Merge branch 'misc' into devel

Conflicts:
	arch/arm/mm/init.c
diff --git a/Documentation/arm/memory.txt b/Documentation/arm/memory.txt
index eb0fae1..771d48d 100644
--- a/Documentation/arm/memory.txt
+++ b/Documentation/arm/memory.txt
@@ -33,7 +33,13 @@
 
 fffe0000	fffeffff	XScale cache flush area.  This is used
 				in proc-xscale.S to flush the whole data
-				cache.  Free for other usage on non-XScale.
+				cache. (XScale does not have TCM.)
+
+fffe8000	fffeffff	DTCM mapping area for platforms with
+				DTCM mounted inside the CPU.
+
+fffe0000	fffe7fff	ITCM mapping area for platforms with
+				ITCM mounted inside the CPU.
 
 fff00000	fffdffff	Fixmap mapping region.  Addresses provided
 				by fix_to_virt() will be located here.
diff --git a/Documentation/arm/tcm.txt b/Documentation/arm/tcm.txt
index 77fd937..7c15871 100644
--- a/Documentation/arm/tcm.txt
+++ b/Documentation/arm/tcm.txt
@@ -19,8 +19,8 @@
 system control coprocessor. Documentation from ARM can be found
 at http://infocenter.arm.com, search for "TCM Status Register"
 to see documents for all CPUs. Reading this register you can
-determine if ITCM (bit 0) and/or DTCM (bit 16) is present in the
-machine.
+determine if ITCM (bits 1-0) and/or DTCM (bit 17-16) is present
+in the machine.
 
 There is further a TCM region register (search for "TCM Region
 Registers" at the ARM site) that can report and modify the location
@@ -35,7 +35,15 @@
 the MMU, but notice that the TCM if often used in situations where
 the MMU is turned off. To avoid confusion the current Linux
 implementation will map the TCM 1 to 1 from physical to virtual
-memory in the location specified by the machine.
+memory in the location specified by the kernel. Currently Linux
+will map ITCM to 0xfffe0000 and on, and DTCM to 0xfffe8000 and
+on, supporting a maximum of 32KiB of ITCM and 32KiB of DTCM.
+
+Newer versions of the region registers also support dividing these
+TCMs in two separate banks, so for example an 8KiB ITCM is divided
+into two 4KiB banks with its own control registers. The idea is to
+be able to lock and hide one of the banks for use by the secure
+world (TrustZone).
 
 TCM is used for a few things:
 
@@ -65,18 +73,18 @@
   memory. Such a heap is great for things like saving
   device state when shutting off device power domains.
 
-A machine that has TCM memory shall select HAVE_TCM in
-arch/arm/Kconfig for itself, and then the
-rest of the functionality will depend on the physical
-location and size of ITCM and DTCM to be defined in
-mach/memory.h for the machine. Code that needs to use
-TCM shall #include <asm/tcm.h> If the TCM is not located
-at the place given in memory.h it will be moved using
-the TCM Region registers.
+A machine that has TCM memory shall select HAVE_TCM from
+arch/arm/Kconfig for itself. Code that needs to use TCM shall
+#include <asm/tcm.h>
 
 Functions to go into itcm can be tagged like this:
 int __tcmfunc foo(int bar);
 
+Since these are marked to become long_calls and you may want
+to have functions called locally inside the TCM without
+wasting space, there is also the __tcmlocalfunc prefix that
+will make the call relative.
+
 Variables to go into dtcm can be tagged like this:
 int __tcmdata foo;
 
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 4047f57..ba1ac08 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -57,7 +57,7 @@
 config GENERIC_CLOCKEVENTS_BROADCAST
 	bool
 	depends on GENERIC_CLOCKEVENTS
-	default y if SMP && !LOCAL_TIMERS
+	default y if SMP
 
 config HAVE_TCM
 	bool
@@ -1263,8 +1263,7 @@
 	  disabled, perf events will use software events only.
 
 config SPARSE_IRQ
-	bool "Support sparse irq numbering"
-	depends on EXPERIMENTAL
+	def_bool n
 	help
 	  This enables support for sparse irqs. This is useful in general
 	  as most CPUs have a fairly sparse array of IRQ vectors, which
@@ -1272,8 +1271,6 @@
 	  number of off-chip IRQs will want to treat this as
 	  experimental until they have been independently verified.
 
-	  If you don't know what to do here, say N.
-
 source "mm/Kconfig"
 
 config FORCE_MAX_ZONEORDER
diff --git a/arch/arm/include/asm/mach/map.h b/arch/arm/include/asm/mach/map.h
index 742c2aaeb..d2fedb5 100644
--- a/arch/arm/include/asm/mach/map.h
+++ b/arch/arm/include/asm/mach/map.h
@@ -27,6 +27,8 @@
 #define MT_MEMORY		9
 #define MT_ROM			10
 #define MT_MEMORY_NONCACHED	11
+#define MT_MEMORY_DTCM		12
+#define MT_MEMORY_ITCM		13
 
 #ifdef CONFIG_MMU
 extern void iotable_init(struct map_desc *, int);
diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index 82df0ae..23c2e8e 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -124,6 +124,15 @@
 #endif /* !CONFIG_MMU */
 
 /*
+ * We fix the TCM memories max 32 KiB ITCM resp DTCM at these
+ * locations
+ */
+#ifdef CONFIG_HAVE_TCM
+#define ITCM_OFFSET	UL(0xfffe0000)
+#define DTCM_OFFSET	UL(0xfffe8000)
+#endif
+
+/*
  * Physical vs virtual RAM address space conversion.  These are
  * private definitions which should NOT be used outside memory.h
  * files.  Use virt_to_phys/phys_to_virt/__pa/__va instead.
diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h
index 5f4f480..8ba1ccf 100644
--- a/arch/arm/include/asm/system.h
+++ b/arch/arm/include/asm/system.h
@@ -83,7 +83,7 @@
 
 void hook_fault_code(int nr, int (*fn)(unsigned long, unsigned int,
 				       struct pt_regs *),
-		     int sig, const char *name);
+		     int sig, int code, const char *name);
 
 #define xchg(ptr,x) \
 	((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr))))
diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c
index 81e9898..1fc74cb 100644
--- a/arch/arm/kernel/machine_kexec.c
+++ b/arch/arm/kernel/machine_kexec.c
@@ -37,10 +37,6 @@
 {
 }
 
-void machine_shutdown(void)
-{
-}
-
 void machine_crash_shutdown(struct pt_regs *regs)
 {
 	local_irq_disable();
@@ -78,7 +74,11 @@
 			   (unsigned long) reboot_code_buffer + KEXEC_CONTROL_PAGE_SIZE);
 	printk(KERN_INFO "Bye!\n");
 
-	cpu_proc_fin();
+	local_irq_disable();
+	local_fiq_disable();
 	setup_mm_for_reboot(0); /* mode is not used, so just pass 0*/
+	flush_cache_all();
+	cpu_proc_fin();
+	flush_cache_all();
 	cpu_reset(reboot_code_buffer_phys);
 }
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index a4a9cc8..2e2ec97 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -29,6 +29,7 @@
 #include <linux/utsname.h>
 #include <linux/uaccess.h>
 
+#include <asm/cacheflush.h>
 #include <asm/leds.h>
 #include <asm/processor.h>
 #include <asm/system.h>
@@ -84,10 +85,9 @@
 
 void arm_machine_restart(char mode, const char *cmd)
 {
-	/*
-	 * Clean and disable cache, and turn off interrupts
-	 */
-	cpu_proc_fin();
+	/* Disable interrupts first */
+	local_irq_disable();
+	local_fiq_disable();
 
 	/*
 	 * Tell the mm system that we are going to reboot -
@@ -96,6 +96,15 @@
 	 */
 	setup_mm_for_reboot(mode);
 
+	/* Clean and invalidate caches */
+	flush_cache_all();
+
+	/* Turn off caching */
+	cpu_proc_fin();
+
+	/* Push out any further dirty data, and ensure cache is empty */
+	flush_cache_all();
+
 	/*
 	 * Now call the architecture specific reboot code.
 	 */
@@ -189,19 +198,29 @@
 
 __setup("reboot=", reboot_setup);
 
-void machine_halt(void)
+void machine_shutdown(void)
 {
+#ifdef CONFIG_SMP
+	smp_send_stop();
+#endif
 }
 
+void machine_halt(void)
+{
+	machine_shutdown();
+	while (1);
+}
 
 void machine_power_off(void)
 {
+	machine_shutdown();
 	if (pm_power_off)
 		pm_power_off();
 }
 
 void machine_restart(char *cmd)
 {
+	machine_shutdown();
 	arm_pm_restart(reboot_mode, cmd);
 }
 
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index b8c3d0f..40dc74f 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -429,7 +429,11 @@
 {
 	send_ipi_message(mask, IPI_TIMER);
 }
+#else
+#define smp_timer_broadcast	NULL
+#endif
 
+#ifndef CONFIG_LOCAL_TIMERS
 static void broadcast_timer_set_mode(enum clock_event_mode mode,
 	struct clock_event_device *evt)
 {
@@ -444,7 +448,6 @@
 	evt->rating	= 400;
 	evt->mult	= 1;
 	evt->set_mode	= broadcast_timer_set_mode;
-	evt->broadcast	= smp_timer_broadcast;
 
 	clockevents_register_device(evt);
 }
@@ -456,6 +459,7 @@
 	struct clock_event_device *evt = &per_cpu(percpu_clockevent, cpu);
 
 	evt->cpumask = cpumask_of(cpu);
+	evt->broadcast = smp_timer_broadcast;
 
 	local_timer_setup(evt);
 }
@@ -467,10 +471,13 @@
  */
 static void ipi_cpu_stop(unsigned int cpu)
 {
-	spin_lock(&stop_lock);
-	printk(KERN_CRIT "CPU%u: stopping\n", cpu);
-	dump_stack();
-	spin_unlock(&stop_lock);
+	if (system_state == SYSTEM_BOOTING ||
+	    system_state == SYSTEM_RUNNING) {
+		spin_lock(&stop_lock);
+		printk(KERN_CRIT "CPU%u: stopping\n", cpu);
+		dump_stack();
+		spin_unlock(&stop_lock);
+	}
 
 	set_cpu_online(cpu, false);
 
diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c
index 7c5f0c0..35882fb 100644
--- a/arch/arm/kernel/smp_twd.c
+++ b/arch/arm/kernel/smp_twd.c
@@ -132,7 +132,8 @@
 	twd_calibrate_rate();
 
 	clk->name = "local_timer";
-	clk->features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT;
+	clk->features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT |
+			CLOCK_EVT_FEAT_C3STOP;
 	clk->rating = 350;
 	clk->set_mode = twd_set_mode;
 	clk->set_next_event = twd_set_next_event;
diff --git a/arch/arm/kernel/tcm.c b/arch/arm/kernel/tcm.c
index e503038..26685c2 100644
--- a/arch/arm/kernel/tcm.c
+++ b/arch/arm/kernel/tcm.c
@@ -13,38 +13,35 @@
 #include <linux/ioport.h>
 #include <linux/genalloc.h>
 #include <linux/string.h> /* memcpy */
-#include <asm/page.h> /* PAGE_SHIFT */
 #include <asm/cputype.h>
 #include <asm/mach/map.h>
 #include <mach/memory.h>
 #include "tcm.h"
 
-/* Scream and warn about misuse */
-#if !defined(ITCM_OFFSET) || !defined(ITCM_END) || \
-    !defined(DTCM_OFFSET) || !defined(DTCM_END)
-#error "TCM support selected but offsets not defined!"
-#endif
-
 static struct gen_pool *tcm_pool;
 
 /* TCM section definitions from the linker */
 extern char __itcm_start, __sitcm_text, __eitcm_text;
 extern char __dtcm_start, __sdtcm_data, __edtcm_data;
 
+/* These will be increased as we run */
+u32 dtcm_end = DTCM_OFFSET;
+u32 itcm_end = ITCM_OFFSET;
+
 /*
  * TCM memory resources
  */
 static struct resource dtcm_res = {
 	.name = "DTCM RAM",
 	.start = DTCM_OFFSET,
-	.end = DTCM_END,
+	.end = DTCM_OFFSET,
 	.flags = IORESOURCE_MEM
 };
 
 static struct resource itcm_res = {
 	.name = "ITCM RAM",
 	.start = ITCM_OFFSET,
-	.end = ITCM_END,
+	.end = ITCM_OFFSET,
 	.flags = IORESOURCE_MEM
 };
 
@@ -52,8 +49,8 @@
 	{
 		.virtual	= DTCM_OFFSET,
 		.pfn		= __phys_to_pfn(DTCM_OFFSET),
-		.length		= (DTCM_END - DTCM_OFFSET + 1),
-		.type		= MT_UNCACHED
+		.length		= 0,
+		.type		= MT_MEMORY_DTCM
 	}
 };
 
@@ -61,8 +58,8 @@
 	{
 		.virtual	= ITCM_OFFSET,
 		.pfn		= __phys_to_pfn(ITCM_OFFSET),
-		.length		= (ITCM_END - ITCM_OFFSET + 1),
-		.type		= MT_UNCACHED
+		.length		= 0,
+		.type		= MT_MEMORY_ITCM
 	}
 };
 
@@ -93,14 +90,24 @@
 }
 EXPORT_SYMBOL(tcm_free);
 
-
-static void __init setup_tcm_bank(u8 type, u32 offset, u32 expected_size)
+static int __init setup_tcm_bank(u8 type, u8 bank, u8 banks,
+				  u32 *offset)
 {
 	const int tcm_sizes[16] = { 0, -1, -1, 4, 8, 16, 32, 64, 128,
 				    256, 512, 1024, -1, -1, -1, -1 };
 	u32 tcm_region;
 	int tcm_size;
 
+	/*
+	 * If there are more than one TCM bank of this type,
+	 * select the TCM bank to operate on in the TCM selection
+	 * register.
+	 */
+	if (banks > 1)
+		asm("mcr	p15, 0, %0, c9, c2, 0"
+		    : /* No output operands */
+		    : "r" (bank));
+
 	/* Read the special TCM region register c9, 0 */
 	if (!type)
 		asm("mrc	p15, 0, %0, c9, c1, 0"
@@ -111,26 +118,24 @@
 
 	tcm_size = tcm_sizes[(tcm_region >> 2) & 0x0f];
 	if (tcm_size < 0) {
-		pr_err("CPU: %sTCM of unknown size!\n",
-			type ? "I" : "D");
+		pr_err("CPU: %sTCM%d of unknown size\n",
+		       type ? "I" : "D", bank);
+		return -EINVAL;
+	} else if (tcm_size > 32) {
+		pr_err("CPU: %sTCM%d larger than 32k found\n",
+		       type ? "I" : "D", bank);
+		return -EINVAL;
 	} else {
-		pr_info("CPU: found %sTCM %dk @ %08x, %senabled\n",
+		pr_info("CPU: found %sTCM%d %dk @ %08x, %senabled\n",
 			type ? "I" : "D",
+			bank,
 			tcm_size,
 			(tcm_region & 0xfffff000U),
 			(tcm_region & 1) ? "" : "not ");
 	}
 
-	if (tcm_size != expected_size) {
-		pr_crit("CPU: %sTCM was detected %dk but expected %dk!\n",
-		       type ? "I" : "D",
-		       tcm_size,
-		       expected_size);
-		/* Adjust to the expected size? what can we do... */
-	}
-
 	/* Force move the TCM bank to where we want it, enable */
-	tcm_region = offset | (tcm_region & 0x00000ffeU) | 1;
+	tcm_region = *offset | (tcm_region & 0x00000ffeU) | 1;
 
 	if (!type)
 		asm("mcr	p15, 0, %0, c9, c1, 0"
@@ -141,10 +146,15 @@
 		    : /* No output operands */
 		    : "r" (tcm_region));
 
-	pr_debug("CPU: moved %sTCM %dk to %08x, enabled\n",
-		 type ? "I" : "D",
-		 tcm_size,
-		 (tcm_region & 0xfffff000U));
+	/* Increase offset */
+	*offset += (tcm_size << 10);
+
+	pr_info("CPU: moved %sTCM%d %dk to %08x, enabled\n",
+		type ? "I" : "D",
+		bank,
+		tcm_size,
+		(tcm_region & 0xfffff000U));
+	return 0;
 }
 
 /*
@@ -153,34 +163,52 @@
 void __init tcm_init(void)
 {
 	u32 tcm_status = read_cpuid_tcmstatus();
+	u8 dtcm_banks = (tcm_status >> 16) & 0x03;
+	u8 itcm_banks = (tcm_status & 0x03);
 	char *start;
 	char *end;
 	char *ram;
+	int ret;
+	int i;
 
 	/* Setup DTCM if present */
-	if (tcm_status & (1 << 16)) {
-		setup_tcm_bank(0, DTCM_OFFSET,
-			       (DTCM_END - DTCM_OFFSET + 1) >> 10);
+	if (dtcm_banks > 0) {
+		for (i = 0; i < dtcm_banks; i++) {
+			ret = setup_tcm_bank(0, i, dtcm_banks, &dtcm_end);
+			if (ret)
+				return;
+		}
+		dtcm_res.end = dtcm_end - 1;
 		request_resource(&iomem_resource, &dtcm_res);
+		dtcm_iomap[0].length = dtcm_end - DTCM_OFFSET;
 		iotable_init(dtcm_iomap, 1);
 		/* Copy data from RAM to DTCM */
 		start = &__sdtcm_data;
 		end   = &__edtcm_data;
 		ram   = &__dtcm_start;
+		/* This means you compiled more code than fits into DTCM */
+		BUG_ON((end - start) > (dtcm_end - DTCM_OFFSET));
 		memcpy(start, ram, (end-start));
 		pr_debug("CPU DTCM: copied data from %p - %p\n", start, end);
 	}
 
 	/* Setup ITCM if present */
-	if (tcm_status & 1) {
-		setup_tcm_bank(1, ITCM_OFFSET,
-			       (ITCM_END - ITCM_OFFSET + 1) >> 10);
+	if (itcm_banks > 0) {
+		for (i = 0; i < itcm_banks; i++) {
+			ret = setup_tcm_bank(1, i, itcm_banks, &itcm_end);
+			if (ret)
+				return;
+		}
+		itcm_res.end = itcm_end - 1;
 		request_resource(&iomem_resource, &itcm_res);
+		itcm_iomap[0].length = itcm_end - ITCM_OFFSET;
 		iotable_init(itcm_iomap, 1);
 		/* Copy code from RAM to ITCM */
 		start = &__sitcm_text;
 		end   = &__eitcm_text;
 		ram   = &__itcm_start;
+		/* This means you compiled more code than fits into ITCM */
+		BUG_ON((end - start) > (itcm_end - ITCM_OFFSET));
 		memcpy(start, ram, (end-start));
 		pr_debug("CPU ITCM: copied code from %p - %p\n", start, end);
 	}
@@ -208,10 +236,10 @@
 	pr_debug("Setting up TCM memory pool\n");
 
 	/* Add the rest of DTCM to the TCM pool */
-	if (tcm_status & (1 << 16)) {
-		if (dtcm_pool_start < DTCM_END) {
+	if (tcm_status & (0x03 << 16)) {
+		if (dtcm_pool_start < dtcm_end) {
 			ret = gen_pool_add(tcm_pool, dtcm_pool_start,
-					   DTCM_END - dtcm_pool_start + 1, -1);
+					   dtcm_end - dtcm_pool_start, -1);
 			if (ret) {
 				pr_err("CPU DTCM: could not add DTCM " \
 				       "remainder to pool!\n");
@@ -219,16 +247,16 @@
 			}
 			pr_debug("CPU DTCM: Added %08x bytes @ %08x to " \
 				 "the TCM memory pool\n",
-				 DTCM_END - dtcm_pool_start + 1,
+				 dtcm_end - dtcm_pool_start,
 				 dtcm_pool_start);
 		}
 	}
 
 	/* Add the rest of ITCM to the TCM pool */
-	if (tcm_status & 1) {
-		if (itcm_pool_start < ITCM_END) {
+	if (tcm_status & 0x03) {
+		if (itcm_pool_start < itcm_end) {
 			ret = gen_pool_add(tcm_pool, itcm_pool_start,
-					   ITCM_END - itcm_pool_start + 1, -1);
+					   itcm_end - itcm_pool_start, -1);
 			if (ret) {
 				pr_err("CPU ITCM: could not add ITCM " \
 				       "remainder to pool!\n");
@@ -236,7 +264,7 @@
 			}
 			pr_debug("CPU ITCM: Added %08x bytes @ %08x to " \
 				 "the TCM memory pool\n",
-				 ITCM_END - itcm_pool_start + 1,
+				 itcm_end - itcm_pool_start,
 				 itcm_pool_start);
 		}
 	}
diff --git a/arch/arm/mach-integrator/pci_v3.c b/arch/arm/mach-integrator/pci_v3.c
index 9cef059..6467d99 100644
--- a/arch/arm/mach-integrator/pci_v3.c
+++ b/arch/arm/mach-integrator/pci_v3.c
@@ -505,10 +505,10 @@
 	/*
 	 * Hook in our fault handler for PCI errors
 	 */
-	hook_fault_code(4, v3_pci_fault, SIGBUS, "external abort on linefetch");
-	hook_fault_code(6, v3_pci_fault, SIGBUS, "external abort on linefetch");
-	hook_fault_code(8, v3_pci_fault, SIGBUS, "external abort on non-linefetch");
-	hook_fault_code(10, v3_pci_fault, SIGBUS, "external abort on non-linefetch");
+	hook_fault_code(4, v3_pci_fault, SIGBUS, 0, "external abort on linefetch");
+	hook_fault_code(6, v3_pci_fault, SIGBUS, 0, "external abort on linefetch");
+	hook_fault_code(8, v3_pci_fault, SIGBUS, 0, "external abort on non-linefetch");
+	hook_fault_code(10, v3_pci_fault, SIGBUS, 0, "external abort on non-linefetch");
 
 	spin_lock_irqsave(&v3_lock, flags);
 
diff --git a/arch/arm/mach-iop13xx/pci.c b/arch/arm/mach-iop13xx/pci.c
index 6d5a908..773ea0c 100644
--- a/arch/arm/mach-iop13xx/pci.c
+++ b/arch/arm/mach-iop13xx/pci.c
@@ -987,7 +987,7 @@
 		iop13xx_atux_setup();
 	}
 
-	hook_fault_code(16+6, iop13xx_pci_abort, SIGBUS,
+	hook_fault_code(16+6, iop13xx_pci_abort, SIGBUS, 0,
 			"imprecise external abort");
 }
 
diff --git a/arch/arm/mach-ixp2000/pci.c b/arch/arm/mach-ixp2000/pci.c
index 90771ca..f797c5f 100644
--- a/arch/arm/mach-ixp2000/pci.c
+++ b/arch/arm/mach-ixp2000/pci.c
@@ -209,7 +209,7 @@
 			"the needed workaround has not been configured in");
 #endif
 
-	hook_fault_code(16+6, ixp2000_pci_abort_handler, SIGBUS,
+	hook_fault_code(16+6, ixp2000_pci_abort_handler, SIGBUS, 0,
 				"PCI config cycle to non-existent device");
 }
 
diff --git a/arch/arm/mach-ixp23xx/pci.c b/arch/arm/mach-ixp23xx/pci.c
index 4b0e598..563819a 100644
--- a/arch/arm/mach-ixp23xx/pci.c
+++ b/arch/arm/mach-ixp23xx/pci.c
@@ -229,7 +229,7 @@
 {
 	ixp23xx_pci_common_init();
 
-	hook_fault_code(16+6, ixp23xx_pci_abort_handler, SIGBUS,
+	hook_fault_code(16+6, ixp23xx_pci_abort_handler, SIGBUS, 0,
 			"PCI config cycle to non-existent device");
 
 	*IXP23XX_PCI_ADDR_EXT = 0x0000e000;
diff --git a/arch/arm/mach-ixp4xx/common-pci.c b/arch/arm/mach-ixp4xx/common-pci.c
index 333d04c..61cd4d6 100644
--- a/arch/arm/mach-ixp4xx/common-pci.c
+++ b/arch/arm/mach-ixp4xx/common-pci.c
@@ -382,7 +382,8 @@
 
 
 	/* hook in our fault handler for PCI errors */
-	hook_fault_code(16+6, abort_handler, SIGBUS, "imprecise external abort");
+	hook_fault_code(16+6, abort_handler, SIGBUS, 0,
+			"imprecise external abort");
 
 	pr_debug("setup PCI-AHB(inbound) and AHB-PCI(outbound) address mappings\n");
 
diff --git a/arch/arm/mach-ks8695/pci.c b/arch/arm/mach-ks8695/pci.c
index 7849966..5fcd082 100644
--- a/arch/arm/mach-ks8695/pci.c
+++ b/arch/arm/mach-ks8695/pci.c
@@ -268,8 +268,8 @@
 	__raw_writel(0, KS8695_PCI_VA + KS8695_PIOBAC);
 
 	/* hook in fault handlers */
-	hook_fault_code(8, ks8695_pci_fault, SIGBUS, "external abort on non-linefetch");
-	hook_fault_code(10, ks8695_pci_fault, SIGBUS, "external abort on non-linefetch");
+	hook_fault_code(8, ks8695_pci_fault, SIGBUS, 0, "external abort on non-linefetch");
+	hook_fault_code(10, ks8695_pci_fault, SIGBUS, 0, "external abort on non-linefetch");
 }
 
 static void ks8695_show_pciregs(void)
diff --git a/arch/arm/mach-u300/include/mach/memory.h b/arch/arm/mach-u300/include/mach/memory.h
index ab000df..bf134bc 100644
--- a/arch/arm/mach-u300/include/mach/memory.h
+++ b/arch/arm/mach-u300/include/mach/memory.h
@@ -35,14 +35,6 @@
 #endif
 
 /*
- * TCM memory whereabouts
- */
-#define ITCM_OFFSET	0xffff2000
-#define ITCM_END	0xffff3fff
-#define DTCM_OFFSET	0xffff4000
-#define DTCM_END	0xffff5fff
-
-/*
  * We enable a real big DMA buffer if need be.
  */
 #define CONSISTENT_DMA_SIZE SZ_4M
diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c
index 6f98c35..d073b64 100644
--- a/arch/arm/mm/alignment.c
+++ b/arch/arm/mm/alignment.c
@@ -924,8 +924,20 @@
 		ai_usermode = UM_FIXUP;
 	}
 
-	hook_fault_code(1, do_alignment, SIGILL, "alignment exception");
-	hook_fault_code(3, do_alignment, SIGILL, "alignment exception");
+	hook_fault_code(1, do_alignment, SIGBUS, BUS_ADRALN,
+			"alignment exception");
+
+	/*
+	 * ARMv6K and ARMv7 use fault status 3 (0b00011) as Access Flag section
+	 * fault, not as alignment error.
+	 *
+	 * TODO: handle ARMv6K properly. Runtime check for 'K' extension is
+	 * needed.
+	 */
+	if (cpu_architecture() <= CPU_ARCH_ARMv6) {
+		hook_fault_code(3, do_alignment, SIGBUS, BUS_ADRALN,
+				"alignment exception");
+	}
 
 	return 0;
 }
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 9e7742f..c704eed 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -183,6 +183,8 @@
 __dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot)
 {
 	struct arm_vmregion *c;
+	size_t align;
+	int bit;
 
 	if (!consistent_pte[0]) {
 		printk(KERN_ERR "%s: not initialised\n", __func__);
@@ -191,9 +193,20 @@
 	}
 
 	/*
+	 * Align the virtual region allocation - maximum alignment is
+	 * a section size, minimum is a page size.  This helps reduce
+	 * fragmentation of the DMA space, and also prevents allocations
+	 * smaller than a section from crossing a section boundary.
+	 */
+	bit = fls(size - 1) + 1;
+	if (bit > SECTION_SHIFT)
+		bit = SECTION_SHIFT;
+	align = 1 << bit;
+
+	/*
 	 * Allocate a virtual address in the consistent mapping region.
 	 */
-	c = arm_vmregion_alloc(&consistent_head, size,
+	c = arm_vmregion_alloc(&consistent_head, align, size,
 			    gfp & ~(__GFP_DMA | __GFP_HIGHMEM));
 	if (c) {
 		pte_t *pte;
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index cbfb2ed..23b0b03 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -413,7 +413,16 @@
 	pmd_k = pmd_offset(pgd_k, addr);
 	pmd   = pmd_offset(pgd, addr);
 
-	if (pmd_none(*pmd_k))
+	/*
+	 * On ARM one Linux PGD entry contains two hardware entries (see page
+	 * tables layout in pgtable.h). We normally guarantee that we always
+	 * fill both L1 entries. But create_mapping() doesn't follow the rule.
+	 * It can create inidividual L1 entries, so here we have to call
+	 * pmd_none() check for the entry really corresponded to address, not
+	 * for the first of pair.
+	 */
+	index = (addr >> SECTION_SHIFT) & 1;
+	if (pmd_none(pmd_k[index]))
 		goto bad_area;
 
 	copy_pmd(pmd, pmd_k);
@@ -463,15 +472,10 @@
 	 * defines these to be "precise" aborts.
 	 */
 	{ do_bad,		SIGSEGV, 0,		"vector exception"		   },
-	{ do_bad,		SIGILL,	 BUS_ADRALN,	"alignment exception"		   },
+	{ do_bad,		SIGBUS,	 BUS_ADRALN,	"alignment exception"		   },
 	{ do_bad,		SIGKILL, 0,		"terminal exception"		   },
-	{ do_bad,		SIGILL,	 BUS_ADRALN,	"alignment exception"		   },
-/* Do we need runtime check ? */
-#if __LINUX_ARM_ARCH__ < 6
+	{ do_bad,		SIGBUS,	 BUS_ADRALN,	"alignment exception"		   },
 	{ do_bad,		SIGBUS,	 0,		"external abort on linefetch"	   },
-#else
-	{ do_translation_fault,	SIGSEGV, SEGV_MAPERR,	"I-cache maintenance fault"	   },
-#endif
 	{ do_translation_fault,	SIGSEGV, SEGV_MAPERR,	"section translation fault"	   },
 	{ do_bad,		SIGBUS,	 0,		"external abort on linefetch"	   },
 	{ do_page_fault,	SIGSEGV, SEGV_MAPERR,	"page translation fault"	   },
@@ -508,13 +512,15 @@
 
 void __init
 hook_fault_code(int nr, int (*fn)(unsigned long, unsigned int, struct pt_regs *),
-		int sig, const char *name)
+		int sig, int code, const char *name)
 {
-	if (nr >= 0 && nr < ARRAY_SIZE(fsr_info)) {
-		fsr_info[nr].fn   = fn;
-		fsr_info[nr].sig  = sig;
-		fsr_info[nr].name = name;
-	}
+	if (nr < 0 || nr >= ARRAY_SIZE(fsr_info))
+		BUG();
+
+	fsr_info[nr].fn   = fn;
+	fsr_info[nr].sig  = sig;
+	fsr_info[nr].code = code;
+	fsr_info[nr].name = name;
 }
 
 /*
@@ -594,3 +600,25 @@
 	arm_notify_die("", regs, &info, ifsr, 0);
 }
 
+static int __init exceptions_init(void)
+{
+	if (cpu_architecture() >= CPU_ARCH_ARMv6) {
+		hook_fault_code(4, do_translation_fault, SIGSEGV, SEGV_MAPERR,
+				"I-cache maintenance fault");
+	}
+
+	if (cpu_architecture() >= CPU_ARCH_ARMv7) {
+		/*
+		 * TODO: Access flag faults introduced in ARMv6K.
+		 * Runtime check for 'K' extension is needed
+		 */
+		hook_fault_code(3, do_bad, SIGSEGV, SEGV_MAPERR,
+				"section access flag fault");
+		hook_fault_code(6, do_bad, SIGSEGV, SEGV_MAPERR,
+				"section access flag fault");
+	}
+
+	return 0;
+}
+
+arch_initcall(exceptions_init);
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 599d121..240b68d 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -432,6 +432,11 @@
 {
 	unsigned long reserved_pages, free_pages;
 	int i;
+#ifdef CONFIG_HAVE_TCM
+	/* These pointers are filled in on TCM detection */
+	extern u32 dtcm_end;
+	extern u32 itcm_end;
+#endif
 
 	max_mapnr   = pfn_to_page(max_pfn + PHYS_PFN_OFFSET) - mem_map;
 
@@ -503,6 +508,10 @@
 
 	printk(KERN_NOTICE "Virtual kernel memory layout:\n"
 			"    vector  : 0x%08lx - 0x%08lx   (%4ld kB)\n"
+#ifdef CONFIG_HAVE_TCM
+			"    DTCM    : 0x%08lx - 0x%08lx   (%4ld kB)\n"
+			"    ITCM    : 0x%08lx - 0x%08lx   (%4ld kB)\n"
+#endif
 			"    fixmap  : 0x%08lx - 0x%08lx   (%4ld kB)\n"
 #ifdef CONFIG_MMU
 			"    DMA     : 0x%08lx - 0x%08lx   (%4ld MB)\n"
@@ -519,6 +528,10 @@
 
 			MLK(UL(CONFIG_VECTORS_BASE), UL(CONFIG_VECTORS_BASE) +
 				(PAGE_SIZE)),
+#ifdef CONFIG_HAVE_TCM
+			MLK(DTCM_OFFSET, (unsigned long) dtcm_end),
+			MLK(ITCM_OFFSET, (unsigned long) itcm_end),
+#endif
 			MLK(FIXADDR_START, FIXADDR_TOP),
 #ifdef CONFIG_MMU
 			MLM(CONSISTENT_BASE, CONSISTENT_END),
diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c
index 03f1193..ab50627 100644
--- a/arch/arm/mm/ioremap.c
+++ b/arch/arm/mm/ioremap.c
@@ -42,78 +42,11 @@
  */
 #define VM_ARM_SECTION_MAPPING	0x80000000
 
-static int remap_area_pte(pmd_t *pmd, unsigned long addr, unsigned long end,
-			  unsigned long phys_addr, const struct mem_type *type)
-{
-	pgprot_t prot = __pgprot(type->prot_pte);
-	pte_t *pte;
-
-	pte = pte_alloc_kernel(pmd, addr);
-	if (!pte)
-		return -ENOMEM;
-
-	do {
-		if (!pte_none(*pte))
-			goto bad;
-
-		set_pte_ext(pte, pfn_pte(phys_addr >> PAGE_SHIFT, prot), 0);
-		phys_addr += PAGE_SIZE;
-	} while (pte++, addr += PAGE_SIZE, addr != end);
-	return 0;
-
- bad:
-	printk(KERN_CRIT "remap_area_pte: page already exists\n");
-	BUG();
-}
-
-static inline int remap_area_pmd(pgd_t *pgd, unsigned long addr,
-				 unsigned long end, unsigned long phys_addr,
-				 const struct mem_type *type)
-{
-	unsigned long next;
-	pmd_t *pmd;
-	int ret = 0;
-
-	pmd = pmd_alloc(&init_mm, pgd, addr);
-	if (!pmd)
-		return -ENOMEM;
-
-	do {
-		next = pmd_addr_end(addr, end);
-		ret = remap_area_pte(pmd, addr, next, phys_addr, type);
-		if (ret)
-			return ret;
-		phys_addr += next - addr;
-	} while (pmd++, addr = next, addr != end);
-	return ret;
-}
-
-static int remap_area_pages(unsigned long start, unsigned long pfn,
-			    size_t size, const struct mem_type *type)
-{
-	unsigned long addr = start;
-	unsigned long next, end = start + size;
-	unsigned long phys_addr = __pfn_to_phys(pfn);
-	pgd_t *pgd;
-	int err = 0;
-
-	BUG_ON(addr >= end);
-	pgd = pgd_offset_k(addr);
-	do {
-		next = pgd_addr_end(addr, end);
-		err = remap_area_pmd(pgd, addr, next, phys_addr, type);
-		if (err)
-			break;
-		phys_addr += next - addr;
-	} while (pgd++, addr = next, addr != end);
-
-	return err;
-}
-
 int ioremap_page(unsigned long virt, unsigned long phys,
 		 const struct mem_type *mtype)
 {
-	return remap_area_pages(virt, __phys_to_pfn(phys), PAGE_SIZE, mtype);
+	return ioremap_page_range(virt, virt + PAGE_SIZE, phys,
+				  __pgprot(mtype->prot_pte));
 }
 EXPORT_SYMBOL(ioremap_page);
 
@@ -300,7 +233,8 @@
 		err = remap_area_sections(addr, pfn, size, type);
 	} else
 #endif
-		err = remap_area_pages(addr, pfn, size, type);
+		err = ioremap_page_range(addr, addr + size, __pfn_to_phys(pfn),
+					 __pgprot(type->prot_pte));
 
 	if (err) {
  		vunmap((void *)addr);
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index d5541ad..6e1c4f6 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -257,6 +257,19 @@
 		.prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,
 		.domain    = DOMAIN_KERNEL,
 	},
+	[MT_MEMORY_DTCM] = {
+		.prot_pte	= L_PTE_PRESENT | L_PTE_YOUNG |
+		                  L_PTE_DIRTY | L_PTE_WRITE,
+		.prot_l1	= PMD_TYPE_TABLE,
+		.prot_sect	= PMD_TYPE_SECT | PMD_SECT_XN,
+		.domain		= DOMAIN_KERNEL,
+	},
+	[MT_MEMORY_ITCM] = {
+		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
+				L_PTE_USER | L_PTE_EXEC,
+		.prot_l1   = PMD_TYPE_TABLE,
+		.domain    = DOMAIN_IO,
+	},
 };
 
 const struct mem_type *get_mem_type(unsigned int type)
diff --git a/arch/arm/mm/proc-arm1020.S b/arch/arm/mm/proc-arm1020.S
index 72507c6..203a4e9 100644
--- a/arch/arm/mm/proc-arm1020.S
+++ b/arch/arm/mm/proc-arm1020.S
@@ -79,15 +79,11 @@
  * cpu_arm1020_proc_fin()
  */
 ENTRY(cpu_arm1020_proc_fin)
-	stmfd	sp!, {lr}
-	mov	ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE
-	msr	cpsr_c, ip
-	bl	arm1020_flush_kern_cache_all
 	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
 	bic	r0, r0, #0x1000 		@ ...i............
 	bic	r0, r0, #0x000e 		@ ............wca.
 	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	ldmfd	sp!, {pc}
+	mov	pc, lr
 
 /*
  * cpu_arm1020_reset(loc)
diff --git a/arch/arm/mm/proc-arm1020e.S b/arch/arm/mm/proc-arm1020e.S
index d278298..1a511e7 100644
--- a/arch/arm/mm/proc-arm1020e.S
+++ b/arch/arm/mm/proc-arm1020e.S
@@ -79,15 +79,11 @@
  * cpu_arm1020e_proc_fin()
  */
 ENTRY(cpu_arm1020e_proc_fin)
-	stmfd	sp!, {lr}
-	mov	ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE
-	msr	cpsr_c, ip
-	bl	arm1020e_flush_kern_cache_all
 	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
 	bic	r0, r0, #0x1000 		@ ...i............
 	bic	r0, r0, #0x000e 		@ ............wca.
 	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	ldmfd	sp!, {pc}
+	mov	pc, lr
 
 /*
  * cpu_arm1020e_reset(loc)
diff --git a/arch/arm/mm/proc-arm1022.S b/arch/arm/mm/proc-arm1022.S
index ce13e4a..1ffa4eb 100644
--- a/arch/arm/mm/proc-arm1022.S
+++ b/arch/arm/mm/proc-arm1022.S
@@ -68,15 +68,11 @@
  * cpu_arm1022_proc_fin()
  */
 ENTRY(cpu_arm1022_proc_fin)
-	stmfd	sp!, {lr}
-	mov	ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE
-	msr	cpsr_c, ip
-	bl	arm1022_flush_kern_cache_all
 	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
 	bic	r0, r0, #0x1000 		@ ...i............
 	bic	r0, r0, #0x000e 		@ ............wca.
 	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	ldmfd	sp!, {pc}
+	mov	pc, lr
 
 /*
  * cpu_arm1022_reset(loc)
diff --git a/arch/arm/mm/proc-arm1026.S b/arch/arm/mm/proc-arm1026.S
index 636672a..5697c34 100644
--- a/arch/arm/mm/proc-arm1026.S
+++ b/arch/arm/mm/proc-arm1026.S
@@ -68,15 +68,11 @@
  * cpu_arm1026_proc_fin()
  */
 ENTRY(cpu_arm1026_proc_fin)
-	stmfd	sp!, {lr}
-	mov	ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE
-	msr	cpsr_c, ip
-	bl	arm1026_flush_kern_cache_all
 	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
 	bic	r0, r0, #0x1000 		@ ...i............
 	bic	r0, r0, #0x000e 		@ ............wca.
 	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	ldmfd	sp!, {pc}
+	mov	pc, lr
 
 /*
  * cpu_arm1026_reset(loc)
diff --git a/arch/arm/mm/proc-arm6_7.S b/arch/arm/mm/proc-arm6_7.S
index 795dc61..64e0b32 100644
--- a/arch/arm/mm/proc-arm6_7.S
+++ b/arch/arm/mm/proc-arm6_7.S
@@ -184,8 +184,6 @@
 
 ENTRY(cpu_arm6_proc_fin)
 ENTRY(cpu_arm7_proc_fin)
-		mov	r0, #PSR_F_BIT | PSR_I_BIT | SVC_MODE
-		msr	cpsr_c, r0
 		mov	r0, #0x31			@ ....S..DP...M
 		mcr	p15, 0, r0, c1, c0, 0		@ disable caches
 		mov	pc, lr
diff --git a/arch/arm/mm/proc-arm720.S b/arch/arm/mm/proc-arm720.S
index 0b62de2..9d96824 100644
--- a/arch/arm/mm/proc-arm720.S
+++ b/arch/arm/mm/proc-arm720.S
@@ -54,15 +54,11 @@
 		mov	pc, lr
 
 ENTRY(cpu_arm720_proc_fin)
-		stmfd	sp!, {lr}
-		mov	ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE
-		msr	cpsr_c, ip
 		mrc	p15, 0, r0, c1, c0, 0
 		bic	r0, r0, #0x1000			@ ...i............
 		bic	r0, r0, #0x000e			@ ............wca.
 		mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-		mcr	p15, 0, r1, c7, c7, 0		@ invalidate cache
-		ldmfd	sp!, {pc}
+		mov	pc, lr
 
 /*
  * Function: arm720_proc_do_idle(void)
diff --git a/arch/arm/mm/proc-arm740.S b/arch/arm/mm/proc-arm740.S
index 01860cd..6c1a9ab 100644
--- a/arch/arm/mm/proc-arm740.S
+++ b/arch/arm/mm/proc-arm740.S
@@ -36,15 +36,11 @@
  * cpu_arm740_proc_fin()
  */
 ENTRY(cpu_arm740_proc_fin)
-	stmfd	sp!, {lr}
-	mov	ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE
-	msr	cpsr_c, ip
 	mrc	p15, 0, r0, c1, c0, 0
 	bic	r0, r0, #0x3f000000		@ bank/f/lock/s
 	bic	r0, r0, #0x0000000c		@ w-buffer/cache
 	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	mcr	p15, 0, r0, c7, c0, 0		@ invalidate cache
-	ldmfd	sp!, {pc}
+	mov	pc, lr
 
 /*
  * cpu_arm740_reset(loc)
diff --git a/arch/arm/mm/proc-arm7tdmi.S b/arch/arm/mm/proc-arm7tdmi.S
index 1201b98..6a850db 100644
--- a/arch/arm/mm/proc-arm7tdmi.S
+++ b/arch/arm/mm/proc-arm7tdmi.S
@@ -36,8 +36,6 @@
  * cpu_arm7tdmi_proc_fin()
  */
 ENTRY(cpu_arm7tdmi_proc_fin)
-		mov	r0, #PSR_F_BIT | PSR_I_BIT | SVC_MODE
-		msr	cpsr_c, r0
 		mov	pc, lr
 
 /*
diff --git a/arch/arm/mm/proc-arm920.S b/arch/arm/mm/proc-arm920.S
index 8be8199..86f80aa 100644
--- a/arch/arm/mm/proc-arm920.S
+++ b/arch/arm/mm/proc-arm920.S
@@ -69,19 +69,11 @@
  * cpu_arm920_proc_fin()
  */
 ENTRY(cpu_arm920_proc_fin)
-	stmfd	sp!, {lr}
-	mov	ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE
-	msr	cpsr_c, ip
-#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
-	bl	arm920_flush_kern_cache_all
-#else
-	bl	v4wt_flush_kern_cache_all
-#endif
 	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
 	bic	r0, r0, #0x1000			@ ...i............
 	bic	r0, r0, #0x000e			@ ............wca.
 	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	ldmfd	sp!, {pc}
+	mov	pc, lr
 
 /*
  * cpu_arm920_reset(loc)
diff --git a/arch/arm/mm/proc-arm922.S b/arch/arm/mm/proc-arm922.S
index c0ff8e4..f76ce9b 100644
--- a/arch/arm/mm/proc-arm922.S
+++ b/arch/arm/mm/proc-arm922.S
@@ -71,19 +71,11 @@
  * cpu_arm922_proc_fin()
  */
 ENTRY(cpu_arm922_proc_fin)
-	stmfd	sp!, {lr}
-	mov	ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE
-	msr	cpsr_c, ip
-#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
-	bl	arm922_flush_kern_cache_all
-#else
-	bl	v4wt_flush_kern_cache_all
-#endif
 	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
 	bic	r0, r0, #0x1000			@ ...i............
 	bic	r0, r0, #0x000e			@ ............wca.
 	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	ldmfd	sp!, {pc}
+	mov	pc, lr
 
 /*
  * cpu_arm922_reset(loc)
diff --git a/arch/arm/mm/proc-arm925.S b/arch/arm/mm/proc-arm925.S
index 3c6cffe..657bd3f 100644
--- a/arch/arm/mm/proc-arm925.S
+++ b/arch/arm/mm/proc-arm925.S
@@ -92,15 +92,11 @@
  * cpu_arm925_proc_fin()
  */
 ENTRY(cpu_arm925_proc_fin)
-	stmfd	sp!, {lr}
-	mov	ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE
-	msr	cpsr_c, ip
-	bl	arm925_flush_kern_cache_all
 	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
 	bic	r0, r0, #0x1000			@ ...i............
 	bic	r0, r0, #0x000e			@ ............wca.
 	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	ldmfd	sp!, {pc}
+	mov	pc, lr
 
 /*
  * cpu_arm925_reset(loc)
diff --git a/arch/arm/mm/proc-arm926.S b/arch/arm/mm/proc-arm926.S
index 75b707c..73f1f3c 100644
--- a/arch/arm/mm/proc-arm926.S
+++ b/arch/arm/mm/proc-arm926.S
@@ -61,15 +61,11 @@
  * cpu_arm926_proc_fin()
  */
 ENTRY(cpu_arm926_proc_fin)
-	stmfd	sp!, {lr}
-	mov	ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE
-	msr	cpsr_c, ip
-	bl	arm926_flush_kern_cache_all
 	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
 	bic	r0, r0, #0x1000			@ ...i............
 	bic	r0, r0, #0x000e			@ ............wca.
 	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	ldmfd	sp!, {pc}
+	mov	pc, lr
 
 /*
  * cpu_arm926_reset(loc)
diff --git a/arch/arm/mm/proc-arm940.S b/arch/arm/mm/proc-arm940.S
index 1af1657..fffb061 100644
--- a/arch/arm/mm/proc-arm940.S
+++ b/arch/arm/mm/proc-arm940.S
@@ -37,15 +37,11 @@
  * cpu_arm940_proc_fin()
  */
 ENTRY(cpu_arm940_proc_fin)
-	stmfd	sp!, {lr}
-	mov	ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE
-	msr	cpsr_c, ip
-	bl	arm940_flush_kern_cache_all
 	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
 	bic	r0, r0, #0x00001000		@ i-cache
 	bic	r0, r0, #0x00000004		@ d-cache
 	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	ldmfd	sp!, {pc}
+	mov	pc, lr
 
 /*
  * cpu_arm940_reset(loc)
diff --git a/arch/arm/mm/proc-arm946.S b/arch/arm/mm/proc-arm946.S
index 1664b6a..249a605 100644
--- a/arch/arm/mm/proc-arm946.S
+++ b/arch/arm/mm/proc-arm946.S
@@ -44,15 +44,11 @@
  * cpu_arm946_proc_fin()
  */
 ENTRY(cpu_arm946_proc_fin)
-	stmfd	sp!, {lr}
-	mov	ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE
-	msr	cpsr_c, ip
-	bl	arm946_flush_kern_cache_all
 	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
 	bic	r0, r0, #0x00001000		@ i-cache
 	bic	r0, r0, #0x00000004		@ d-cache
 	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	ldmfd	sp!, {pc}
+	mov	pc, lr
 
 /*
  * cpu_arm946_reset(loc)
diff --git a/arch/arm/mm/proc-arm9tdmi.S b/arch/arm/mm/proc-arm9tdmi.S
index 28545c2..db47566 100644
--- a/arch/arm/mm/proc-arm9tdmi.S
+++ b/arch/arm/mm/proc-arm9tdmi.S
@@ -36,8 +36,6 @@
  * cpu_arm9tdmi_proc_fin()
  */
 ENTRY(cpu_arm9tdmi_proc_fin)
-		mov	r0, #PSR_F_BIT | PSR_I_BIT | SVC_MODE
-		msr	cpsr_c, r0
 		mov	pc, lr
 
 /*
diff --git a/arch/arm/mm/proc-fa526.S b/arch/arm/mm/proc-fa526.S
index 08f5ac2..7803fdf 100644
--- a/arch/arm/mm/proc-fa526.S
+++ b/arch/arm/mm/proc-fa526.S
@@ -39,17 +39,13 @@
  * cpu_fa526_proc_fin()
  */
 ENTRY(cpu_fa526_proc_fin)
-	stmfd	sp!, {lr}
-	mov	ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE
-	msr	cpsr_c, ip
-	bl	fa_flush_kern_cache_all
 	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
 	bic	r0, r0, #0x1000			@ ...i............
 	bic	r0, r0, #0x000e			@ ............wca.
 	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
 	nop
 	nop
-	ldmfd	sp!, {pc}
+	mov	pc, lr
 
 /*
  * cpu_fa526_reset(loc)
diff --git a/arch/arm/mm/proc-feroceon.S b/arch/arm/mm/proc-feroceon.S
index 53e6323..b304d01 100644
--- a/arch/arm/mm/proc-feroceon.S
+++ b/arch/arm/mm/proc-feroceon.S
@@ -75,11 +75,6 @@
  * cpu_feroceon_proc_fin()
  */
 ENTRY(cpu_feroceon_proc_fin)
-	stmfd	sp!, {lr}
-	mov	ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE
-	msr	cpsr_c, ip
-	bl	feroceon_flush_kern_cache_all
-
 #if defined(CONFIG_CACHE_FEROCEON_L2) && \
 	!defined(CONFIG_CACHE_FEROCEON_L2_WRITETHROUGH)
 	mov	r0, #0
@@ -91,7 +86,7 @@
 	bic	r0, r0, #0x1000			@ ...i............
 	bic	r0, r0, #0x000e			@ ............wca.
 	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	ldmfd	sp!, {pc}
+	mov	pc, lr
 
 /*
  * cpu_feroceon_reset(loc)
diff --git a/arch/arm/mm/proc-mohawk.S b/arch/arm/mm/proc-mohawk.S
index caa3115..5f6892f 100644
--- a/arch/arm/mm/proc-mohawk.S
+++ b/arch/arm/mm/proc-mohawk.S
@@ -51,15 +51,11 @@
  * cpu_mohawk_proc_fin()
  */
 ENTRY(cpu_mohawk_proc_fin)
-	stmfd	sp!, {lr}
-	mov	ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE
-	msr	cpsr_c, ip
-	bl	mohawk_flush_kern_cache_all
 	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
 	bic	r0, r0, #0x1800			@ ...iz...........
 	bic	r0, r0, #0x0006			@ .............ca.
 	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	ldmfd	sp!, {pc}
+	mov	pc, lr
 
 /*
  * cpu_mohawk_reset(loc)
diff --git a/arch/arm/mm/proc-sa110.S b/arch/arm/mm/proc-sa110.S
index 7b706b3..a201eb0 100644
--- a/arch/arm/mm/proc-sa110.S
+++ b/arch/arm/mm/proc-sa110.S
@@ -44,17 +44,13 @@
  * cpu_sa110_proc_fin()
  */
 ENTRY(cpu_sa110_proc_fin)
-	stmfd	sp!, {lr}
-	mov	ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE
-	msr	cpsr_c, ip
-	bl	v4wb_flush_kern_cache_all	@ clean caches
-1:	mov	r0, #0
+	mov	r0, #0
 	mcr	p15, 0, r0, c15, c2, 2		@ Disable clock switching
 	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
 	bic	r0, r0, #0x1000			@ ...i............
 	bic	r0, r0, #0x000e			@ ............wca.
 	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	ldmfd	sp!, {pc}
+	mov	pc, lr
 
 /*
  * cpu_sa110_reset(loc)
diff --git a/arch/arm/mm/proc-sa1100.S b/arch/arm/mm/proc-sa1100.S
index 5c47760..7ddc480 100644
--- a/arch/arm/mm/proc-sa1100.S
+++ b/arch/arm/mm/proc-sa1100.S
@@ -55,16 +55,12 @@
  *  - Clean and turn off caches.
  */
 ENTRY(cpu_sa1100_proc_fin)
-	stmfd	sp!, {lr}
-	mov	ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE
-	msr	cpsr_c, ip
-	bl	v4wb_flush_kern_cache_all
 	mcr	p15, 0, ip, c15, c2, 2		@ Disable clock switching
 	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
 	bic	r0, r0, #0x1000			@ ...i............
 	bic	r0, r0, #0x000e			@ ............wca.
 	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	ldmfd	sp!, {pc}
+	mov	pc, lr
 
 /*
  * cpu_sa1100_reset(loc)
diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S
index 2f5a3c2..22aac85 100644
--- a/arch/arm/mm/proc-v6.S
+++ b/arch/arm/mm/proc-v6.S
@@ -42,14 +42,11 @@
 	mov	pc, lr
 
 ENTRY(cpu_v6_proc_fin)
-	stmfd	sp!, {lr}
-	cpsid	if				@ disable interrupts
-	bl	v6_flush_kern_cache_all
 	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
 	bic	r0, r0, #0x1000			@ ...i............
 	bic	r0, r0, #0x0006			@ .............ca.
 	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	ldmfd	sp!, {pc}
+	mov	pc, lr
 
 /*
  *	cpu_v6_reset(loc)
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 8071bcd..6a8506d 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -45,14 +45,11 @@
 ENDPROC(cpu_v7_proc_init)
 
 ENTRY(cpu_v7_proc_fin)
-	stmfd	sp!, {lr}
-	cpsid	if				@ disable interrupts
-	bl	v7_flush_kern_cache_all
 	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
 	bic	r0, r0, #0x1000			@ ...i............
 	bic	r0, r0, #0x0006			@ .............ca.
 	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	ldmfd	sp!, {pc}
+	mov	pc, lr
 ENDPROC(cpu_v7_proc_fin)
 
 /*
diff --git a/arch/arm/mm/proc-xsc3.S b/arch/arm/mm/proc-xsc3.S
index e5797f1..361a51e 100644
--- a/arch/arm/mm/proc-xsc3.S
+++ b/arch/arm/mm/proc-xsc3.S
@@ -90,15 +90,11 @@
  * cpu_xsc3_proc_fin()
  */
 ENTRY(cpu_xsc3_proc_fin)
-	str	lr, [sp, #-4]!
-	mov	r0, #PSR_F_BIT|PSR_I_BIT|SVC_MODE
-	msr	cpsr_c, r0
-	bl	xsc3_flush_kern_cache_all	@ clean caches
 	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
 	bic	r0, r0, #0x1800			@ ...IZ...........
 	bic	r0, r0, #0x0006			@ .............CA.
 	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	ldr	pc, [sp], #4
+	mov	pc, lr
 
 /*
  * cpu_xsc3_reset(loc)
diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S
index 63037e2..1407597 100644
--- a/arch/arm/mm/proc-xscale.S
+++ b/arch/arm/mm/proc-xscale.S
@@ -124,15 +124,11 @@
  * cpu_xscale_proc_fin()
  */
 ENTRY(cpu_xscale_proc_fin)
-	str	lr, [sp, #-4]!
-	mov	r0, #PSR_F_BIT|PSR_I_BIT|SVC_MODE
-	msr	cpsr_c, r0
-	bl	xscale_flush_kern_cache_all	@ clean caches
 	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
 	bic	r0, r0, #0x1800			@ ...IZ...........
 	bic	r0, r0, #0x0006			@ .............CA.
 	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	ldr	pc, [sp], #4
+	mov	pc, lr
 
 /*
  * cpu_xscale_reset(loc)
diff --git a/arch/arm/mm/vmregion.c b/arch/arm/mm/vmregion.c
index 19e09bdb..935993e 100644
--- a/arch/arm/mm/vmregion.c
+++ b/arch/arm/mm/vmregion.c
@@ -35,7 +35,8 @@
  */
 
 struct arm_vmregion *
-arm_vmregion_alloc(struct arm_vmregion_head *head, size_t size, gfp_t gfp)
+arm_vmregion_alloc(struct arm_vmregion_head *head, size_t align,
+		   size_t size, gfp_t gfp)
 {
 	unsigned long addr = head->vm_start, end = head->vm_end - size;
 	unsigned long flags;
@@ -58,7 +59,7 @@
 			goto nospc;
 		if ((addr + size) <= c->vm_start)
 			goto found;
-		addr = c->vm_end;
+		addr = ALIGN(c->vm_end, align);
 		if (addr > end)
 			goto nospc;
 	}
diff --git a/arch/arm/mm/vmregion.h b/arch/arm/mm/vmregion.h
index 6b2cdbd..15e9f04 100644
--- a/arch/arm/mm/vmregion.h
+++ b/arch/arm/mm/vmregion.h
@@ -21,7 +21,7 @@
 	int			vm_active;
 };
 
-struct arm_vmregion *arm_vmregion_alloc(struct arm_vmregion_head *, size_t, gfp_t);
+struct arm_vmregion *arm_vmregion_alloc(struct arm_vmregion_head *, size_t, size_t, gfp_t);
 struct arm_vmregion *arm_vmregion_find(struct arm_vmregion_head *, unsigned long);
 struct arm_vmregion *arm_vmregion_find_remove(struct arm_vmregion_head *, unsigned long);
 void arm_vmregion_free(struct arm_vmregion_head *, struct arm_vmregion *);
diff --git a/arch/arm/plat-iop/pci.c b/arch/arm/plat-iop/pci.c
index ce31f31..43f2b15 100644
--- a/arch/arm/plat-iop/pci.c
+++ b/arch/arm/plat-iop/pci.c
@@ -359,7 +359,7 @@
 	DBG("ATU: IOP3XX_ATUCMD=0x%04x\n", *IOP3XX_ATUCMD);
 	DBG("ATU: IOP3XX_ATUCR=0x%08x\n", *IOP3XX_ATUCR);
 
-	hook_fault_code(16+6, iop3xx_pci_abort, SIGBUS, "imprecise external abort");
+	hook_fault_code(16+6, iop3xx_pci_abort, SIGBUS, 0, "imprecise external abort");
 }
 
 /* for platforms that might be host-bus-adapters */
diff --git a/drivers/gpio/pl061.c b/drivers/gpio/pl061.c
index ee568c8..5005990 100644
--- a/drivers/gpio/pl061.c
+++ b/drivers/gpio/pl061.c
@@ -232,7 +232,7 @@
 	desc->chip->unmask(irq);
 }
 
-static int __init pl061_probe(struct amba_device *dev, struct amba_id *id)
+static int pl061_probe(struct amba_device *dev, struct amba_id *id)
 {
 	struct pl061_platform_data *pdata;
 	struct pl061_gpio *chip;
@@ -333,7 +333,7 @@
 	return ret;
 }
 
-static struct amba_id pl061_ids[] __initdata = {
+static struct amba_id pl061_ids[] = {
 	{
 		.id	= 0x00041061,
 		.mask	= 0x000fffff,
diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index 4917af9..7edae83 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -26,7 +26,6 @@
 #include <linux/amba/mmci.h>
 #include <linux/regulator/consumer.h>
 
-#include <asm/cacheflush.h>
 #include <asm/div64.h>
 #include <asm/io.h>
 #include <asm/sizes.h>
@@ -37,12 +36,39 @@
 
 static unsigned int fmax = 515633;
 
+/**
+ * struct variant_data - MMCI variant-specific quirks
+ * @clkreg: default value for MCICLOCK register
+ * @clkreg_enable: enable value for MMCICLOCK register
+ * @datalength_bits: number of bits in the MMCIDATALENGTH register
+ */
+struct variant_data {
+	unsigned int		clkreg;
+	unsigned int		clkreg_enable;
+	unsigned int		datalength_bits;
+};
+
+static struct variant_data variant_arm = {
+	.datalength_bits	= 16,
+};
+
+static struct variant_data variant_u300 = {
+	.clkreg_enable		= 1 << 13, /* HWFCEN */
+	.datalength_bits	= 16,
+};
+
+static struct variant_data variant_ux500 = {
+	.clkreg			= MCI_CLK_ENABLE,
+	.clkreg_enable		= 1 << 14, /* HWFCEN */
+	.datalength_bits	= 24,
+};
 /*
  * This must be called with host->lock held
  */
 static void mmci_set_clkreg(struct mmci_host *host, unsigned int desired)
 {
-	u32 clk = 0;
+	struct variant_data *variant = host->variant;
+	u32 clk = variant->clkreg;
 
 	if (desired) {
 		if (desired >= host->mclk) {
@@ -54,8 +80,8 @@
 				clk = 255;
 			host->cclk = host->mclk / (2 * (clk + 1));
 		}
-		if (host->hw_designer == AMBA_VENDOR_ST)
-			clk |= MCI_ST_FCEN; /* Bug fix in ST IP block */
+
+		clk |= variant->clkreg_enable;
 		clk |= MCI_CLK_ENABLE;
 		/* This hasn't proven to be worthwhile */
 		/* clk |= MCI_CLK_PWRSAVE; */
@@ -98,6 +124,18 @@
 	host->data = NULL;
 }
 
+static void mmci_init_sg(struct mmci_host *host, struct mmc_data *data)
+{
+	unsigned int flags = SG_MITER_ATOMIC;
+
+	if (data->flags & MMC_DATA_READ)
+		flags |= SG_MITER_TO_SG;
+	else
+		flags |= SG_MITER_FROM_SG;
+
+	sg_miter_start(&host->sg_miter, data->sg, data->sg_len, flags);
+}
+
 static void mmci_start_data(struct mmci_host *host, struct mmc_data *data)
 {
 	unsigned int datactrl, timeout, irqmask;
@@ -109,7 +147,7 @@
 		data->blksz, data->blocks, data->flags);
 
 	host->data = data;
-	host->size = data->blksz;
+	host->size = data->blksz * data->blocks;
 	host->data_xfered = 0;
 
 	mmci_init_sg(host, data);
@@ -210,8 +248,17 @@
 		 * We hit an error condition.  Ensure that any data
 		 * partially written to a page is properly coherent.
 		 */
-		if (host->sg_len && data->flags & MMC_DATA_READ)
-			flush_dcache_page(sg_page(host->sg_ptr));
+		if (data->flags & MMC_DATA_READ) {
+			struct sg_mapping_iter *sg_miter = &host->sg_miter;
+			unsigned long flags;
+
+			local_irq_save(flags);
+			if (sg_miter_next(sg_miter)) {
+				flush_dcache_page(sg_miter->page);
+				sg_miter_stop(sg_miter);
+			}
+			local_irq_restore(flags);
+		}
 	}
 	if (status & MCI_DATAEND) {
 		mmci_stop_data(host);
@@ -314,15 +361,18 @@
 static irqreturn_t mmci_pio_irq(int irq, void *dev_id)
 {
 	struct mmci_host *host = dev_id;
+	struct sg_mapping_iter *sg_miter = &host->sg_miter;
 	void __iomem *base = host->base;
+	unsigned long flags;
 	u32 status;
 
 	status = readl(base + MMCISTATUS);
 
 	dev_dbg(mmc_dev(host->mmc), "irq1 (pio) %08x\n", status);
 
+	local_irq_save(flags);
+
 	do {
-		unsigned long flags;
 		unsigned int remain, len;
 		char *buffer;
 
@@ -336,11 +386,11 @@
 		if (!(status & (MCI_TXFIFOHALFEMPTY|MCI_RXDATAAVLBL)))
 			break;
 
-		/*
-		 * Map the current scatter buffer.
-		 */
-		buffer = mmci_kmap_atomic(host, &flags) + host->sg_off;
-		remain = host->sg_ptr->length - host->sg_off;
+		if (!sg_miter_next(sg_miter))
+			break;
+
+		buffer = sg_miter->addr;
+		remain = sg_miter->length;
 
 		len = 0;
 		if (status & MCI_RXACTIVE)
@@ -348,31 +398,24 @@
 		if (status & MCI_TXACTIVE)
 			len = mmci_pio_write(host, buffer, remain, status);
 
-		/*
-		 * Unmap the buffer.
-		 */
-		mmci_kunmap_atomic(host, buffer, &flags);
+		sg_miter->consumed = len;
 
-		host->sg_off += len;
 		host->size -= len;
 		remain -= len;
 
 		if (remain)
 			break;
 
-		/*
-		 * If we were reading, and we have completed this
-		 * page, ensure that the data cache is coherent.
-		 */
 		if (status & MCI_RXACTIVE)
-			flush_dcache_page(sg_page(host->sg_ptr));
-
-		if (!mmci_next_sg(host))
-			break;
+			flush_dcache_page(sg_miter->page);
 
 		status = readl(base + MMCISTATUS);
 	} while (1);
 
+	sg_miter_stop(sg_miter);
+
+	local_irq_restore(flags);
+
 	/*
 	 * If we're nearing the end of the read, switch to
 	 * "any data available" mode.
@@ -477,16 +520,9 @@
 			/* This implicitly enables the regulator */
 			mmc_regulator_set_ocr(host->vcc, ios->vdd);
 #endif
-		/*
-		 * The translate_vdd function is not used if you have
-		 * an external regulator, or your design is really weird.
-		 * Using it would mean sending in power control BOTH using
-		 * a regulator AND the 4 MMCIPWR bits. If we don't have
-		 * a regulator, we might have some other platform specific
-		 * power control behind this translate function.
-		 */
-		if (!host->vcc && host->plat->translate_vdd)
-			pwr |= host->plat->translate_vdd(mmc_dev(mmc), ios->vdd);
+		if (host->plat->vdd_handler)
+			pwr |= host->plat->vdd_handler(mmc_dev(mmc), ios->vdd,
+						       ios->power_mode);
 		/* The ST version does not have this, fall through to POWER_ON */
 		if (host->hw_designer != AMBA_VENDOR_ST) {
 			pwr |= MCI_PWR_UP;
@@ -551,21 +587,10 @@
 	.get_cd		= mmci_get_cd,
 };
 
-static void mmci_check_status(unsigned long data)
-{
-	struct mmci_host *host = (struct mmci_host *)data;
-	unsigned int status = mmci_get_cd(host->mmc);
-
-	if (status ^ host->oldstat)
-		mmc_detect_change(host->mmc, 0);
-
-	host->oldstat = status;
-	mod_timer(&host->timer, jiffies + HZ);
-}
-
 static int __devinit mmci_probe(struct amba_device *dev, struct amba_id *id)
 {
 	struct mmci_platform_data *plat = dev->dev.platform_data;
+	struct variant_data *variant = id->data;
 	struct mmci_host *host;
 	struct mmc_host *mmc;
 	int ret;
@@ -609,6 +634,7 @@
 		goto clk_free;
 
 	host->plat = plat;
+	host->variant = variant;
 	host->mclk = clk_get_rate(host->clk);
 	/*
 	 * According to the spec, mclk is max 100 MHz,
@@ -669,6 +695,7 @@
 	if (host->vcc == NULL)
 		mmc->ocr_avail = plat->ocr_mask;
 	mmc->caps = plat->capabilities;
+	mmc->caps |= MMC_CAP_NEEDS_POLL;
 
 	/*
 	 * We can do SGIO
@@ -677,10 +704,11 @@
 	mmc->max_phys_segs = NR_SG;
 
 	/*
-	 * Since we only have a 16-bit data length register, we must
-	 * ensure that we don't exceed 2^16-1 bytes in a single request.
+	 * Since only a certain number of bits are valid in the data length
+	 * register, we must ensure that we don't exceed 2^num-1 bytes in a
+	 * single request.
 	 */
-	mmc->max_req_size = 65535;
+	mmc->max_req_size = (1 << variant->datalength_bits) - 1;
 
 	/*
 	 * Set the maximum segment size.  Since we aren't doing DMA
@@ -734,7 +762,6 @@
 	writel(MCI_IRQENABLE, host->base + MMCIMASK0);
 
 	amba_set_drvdata(dev, mmc);
-	host->oldstat = mmci_get_cd(host->mmc);
 
 	mmc_add_host(mmc);
 
@@ -742,12 +769,6 @@
 		mmc_hostname(mmc), amba_rev(dev), amba_config(dev),
 		(unsigned long long)dev->res.start, dev->irq[0], dev->irq[1]);
 
-	init_timer(&host->timer);
-	host->timer.data = (unsigned long)host;
-	host->timer.function = mmci_check_status;
-	host->timer.expires = jiffies + HZ;
-	add_timer(&host->timer);
-
 	return 0;
 
  irq0_free:
@@ -781,8 +802,6 @@
 	if (mmc) {
 		struct mmci_host *host = mmc_priv(mmc);
 
-		del_timer_sync(&host->timer);
-
 		mmc_remove_host(mmc);
 
 		writel(0, host->base + MMCIMASK0);
@@ -856,19 +875,28 @@
 	{
 		.id	= 0x00041180,
 		.mask	= 0x000fffff,
+		.data	= &variant_arm,
 	},
 	{
 		.id	= 0x00041181,
 		.mask	= 0x000fffff,
+		.data	= &variant_arm,
 	},
 	/* ST Micro variants */
 	{
 		.id     = 0x00180180,
 		.mask   = 0x00ffffff,
+		.data	= &variant_u300,
 	},
 	{
 		.id     = 0x00280180,
 		.mask   = 0x00ffffff,
+		.data	= &variant_u300,
+	},
+	{
+		.id     = 0x00480180,
+		.mask   = 0x00ffffff,
+		.data	= &variant_ux500,
 	},
 	{ 0, 0 },
 };
diff --git a/drivers/mmc/host/mmci.h b/drivers/mmc/host/mmci.h
index d77062e..68970cf 100644
--- a/drivers/mmc/host/mmci.h
+++ b/drivers/mmc/host/mmci.h
@@ -28,8 +28,6 @@
 #define MCI_4BIT_BUS		(1 << 11)
 /* 8bit wide buses supported in ST Micro versions */
 #define MCI_ST_8BIT_BUS		(1 << 12)
-/* HW flow control on the ST Micro version */
-#define MCI_ST_FCEN		(1 << 13)
 
 #define MMCIARGUMENT		0x008
 #define MMCICOMMAND		0x00c
@@ -145,6 +143,7 @@
 #define NR_SG		16
 
 struct clk;
+struct variant_data;
 
 struct mmci_host {
 	void __iomem		*base;
@@ -164,6 +163,7 @@
 	unsigned int		cclk;
 	u32			pwr;
 	struct mmci_platform_data *plat;
+	struct variant_data	*variant;
 
 	u8			hw_designer;
 	u8			hw_revision:4;
@@ -171,42 +171,9 @@
 	struct timer_list	timer;
 	unsigned int		oldstat;
 
-	unsigned int		sg_len;
-
 	/* pio stuff */
-	struct scatterlist	*sg_ptr;
-	unsigned int		sg_off;
+	struct sg_mapping_iter	sg_miter;
 	unsigned int		size;
 	struct regulator	*vcc;
 };
 
-static inline void mmci_init_sg(struct mmci_host *host, struct mmc_data *data)
-{
-	/*
-	 * Ideally, we want the higher levels to pass us a scatter list.
-	 */
-	host->sg_len = data->sg_len;
-	host->sg_ptr = data->sg;
-	host->sg_off = 0;
-}
-
-static inline int mmci_next_sg(struct mmci_host *host)
-{
-	host->sg_ptr++;
-	host->sg_off = 0;
-	return --host->sg_len;
-}
-
-static inline char *mmci_kmap_atomic(struct mmci_host *host, unsigned long *flags)
-{
-	struct scatterlist *sg = host->sg_ptr;
-
-	local_irq_save(*flags);
-	return kmap_atomic(sg_page(sg), KM_BIO_SRC_IRQ) + sg->offset;
-}
-
-static inline void mmci_kunmap_atomic(struct mmci_host *host, void *buffer, unsigned long *flags)
-{
-	kunmap_atomic(buffer, KM_BIO_SRC_IRQ);
-	local_irq_restore(*flags);
-}
diff --git a/drivers/rtc/rtc-pl031.c b/drivers/rtc/rtc-pl031.c
index 3587d99..71bbefc 100644
--- a/drivers/rtc/rtc-pl031.c
+++ b/drivers/rtc/rtc-pl031.c
@@ -456,7 +456,7 @@
 	.irq_set_freq = pl031_irq_set_freq,
 };
 
-static struct amba_id pl031_ids[] __initdata = {
+static struct amba_id pl031_ids[] = {
 	{
 		.id = 0x00041031,
 		.mask = 0x000fffff,
diff --git a/drivers/serial/amba-pl010.c b/drivers/serial/amba-pl010.c
index b09a638..50441ff 100644
--- a/drivers/serial/amba-pl010.c
+++ b/drivers/serial/amba-pl010.c
@@ -782,7 +782,7 @@
 	return 0;
 }
 
-static struct amba_id pl010_ids[] __initdata = {
+static struct amba_id pl010_ids[] = {
 	{
 		.id	= 0x00041010,
 		.mask	= 0x000fffff,
diff --git a/drivers/serial/amba-pl011.c b/drivers/serial/amba-pl011.c
index eb4cb48..6ca7a44 100644
--- a/drivers/serial/amba-pl011.c
+++ b/drivers/serial/amba-pl011.c
@@ -69,9 +69,12 @@
 struct uart_amba_port {
 	struct uart_port	port;
 	struct clk		*clk;
-	unsigned int		im;	/* interrupt mask */
+	unsigned int		im;		/* interrupt mask */
 	unsigned int		old_status;
-	unsigned int		ifls;	/* vendor-specific */
+	unsigned int		ifls;		/* vendor-specific */
+	unsigned int		lcrh_tx;	/* vendor-specific */
+	unsigned int		lcrh_rx;	/* vendor-specific */
+	bool			oversampling;   /* vendor-specific */
 	bool			autorts;
 };
 
@@ -79,16 +82,25 @@
 struct vendor_data {
 	unsigned int		ifls;
 	unsigned int		fifosize;
+	unsigned int		lcrh_tx;
+	unsigned int		lcrh_rx;
+	bool			oversampling;
 };
 
 static struct vendor_data vendor_arm = {
 	.ifls			= UART011_IFLS_RX4_8|UART011_IFLS_TX4_8,
 	.fifosize		= 16,
+	.lcrh_tx		= UART011_LCRH,
+	.lcrh_rx		= UART011_LCRH,
+	.oversampling		= false,
 };
 
 static struct vendor_data vendor_st = {
 	.ifls			= UART011_IFLS_RX_HALF|UART011_IFLS_TX_HALF,
 	.fifosize		= 64,
+	.lcrh_tx		= ST_UART011_LCRH_TX,
+	.lcrh_rx		= ST_UART011_LCRH_RX,
+	.oversampling		= true,
 };
 
 static void pl011_stop_tx(struct uart_port *port)
@@ -327,12 +339,12 @@
 	unsigned int lcr_h;
 
 	spin_lock_irqsave(&uap->port.lock, flags);
-	lcr_h = readw(uap->port.membase + UART011_LCRH);
+	lcr_h = readw(uap->port.membase + uap->lcrh_tx);
 	if (break_state == -1)
 		lcr_h |= UART01x_LCRH_BRK;
 	else
 		lcr_h &= ~UART01x_LCRH_BRK;
-	writew(lcr_h, uap->port.membase + UART011_LCRH);
+	writew(lcr_h, uap->port.membase + uap->lcrh_tx);
 	spin_unlock_irqrestore(&uap->port.lock, flags);
 }
 
@@ -393,7 +405,17 @@
 	writew(cr, uap->port.membase + UART011_CR);
 	writew(0, uap->port.membase + UART011_FBRD);
 	writew(1, uap->port.membase + UART011_IBRD);
-	writew(0, uap->port.membase + UART011_LCRH);
+	writew(0, uap->port.membase + uap->lcrh_rx);
+	if (uap->lcrh_tx != uap->lcrh_rx) {
+		int i;
+		/*
+		 * Wait 10 PCLKs before writing LCRH_TX register,
+		 * to get this delay write read only register 10 times
+		 */
+		for (i = 0; i < 10; ++i)
+			writew(0xff, uap->port.membase + UART011_MIS);
+		writew(0, uap->port.membase + uap->lcrh_tx);
+	}
 	writew(0, uap->port.membase + UART01x_DR);
 	while (readw(uap->port.membase + UART01x_FR) & UART01x_FR_BUSY)
 		barrier();
@@ -422,10 +444,19 @@
 	return retval;
 }
 
+static void pl011_shutdown_channel(struct uart_amba_port *uap,
+					unsigned int lcrh)
+{
+      unsigned long val;
+
+      val = readw(uap->port.membase + lcrh);
+      val &= ~(UART01x_LCRH_BRK | UART01x_LCRH_FEN);
+      writew(val, uap->port.membase + lcrh);
+}
+
 static void pl011_shutdown(struct uart_port *port)
 {
 	struct uart_amba_port *uap = (struct uart_amba_port *)port;
-	unsigned long val;
 
 	/*
 	 * disable all interrupts
@@ -450,9 +481,9 @@
 	/*
 	 * disable break condition and fifos
 	 */
-	val = readw(uap->port.membase + UART011_LCRH);
-	val &= ~(UART01x_LCRH_BRK | UART01x_LCRH_FEN);
-	writew(val, uap->port.membase + UART011_LCRH);
+	pl011_shutdown_channel(uap, uap->lcrh_rx);
+	if (uap->lcrh_rx != uap->lcrh_tx)
+		pl011_shutdown_channel(uap, uap->lcrh_tx);
 
 	/*
 	 * Shut down the clock producer
@@ -472,8 +503,13 @@
 	/*
 	 * Ask the core to calculate the divisor for us.
 	 */
-	baud = uart_get_baud_rate(port, termios, old, 0, port->uartclk/16);
-	quot = port->uartclk * 4 / baud;
+	baud = uart_get_baud_rate(port, termios, old, 0,
+				  port->uartclk/(uap->oversampling ? 8 : 16));
+
+	if (baud > port->uartclk/16)
+		quot = DIV_ROUND_CLOSEST(port->uartclk * 8, baud);
+	else
+		quot = DIV_ROUND_CLOSEST(port->uartclk * 4, baud);
 
 	switch (termios->c_cflag & CSIZE) {
 	case CS5:
@@ -552,6 +588,13 @@
 		uap->autorts = false;
 	}
 
+	if (uap->oversampling) {
+		if (baud > port->uartclk/16)
+			old_cr |= ST_UART011_CR_OVSFACT;
+		else
+			old_cr &= ~ST_UART011_CR_OVSFACT;
+	}
+
 	/* Set baud rate */
 	writew(quot & 0x3f, port->membase + UART011_FBRD);
 	writew(quot >> 6, port->membase + UART011_IBRD);
@@ -561,7 +604,17 @@
 	 * NOTE: MUST BE WRITTEN AFTER UARTLCR_M & UARTLCR_L
 	 * ----------^----------^----------^----------^-----
 	 */
-	writew(lcr_h, port->membase + UART011_LCRH);
+	writew(lcr_h, port->membase + uap->lcrh_rx);
+	if (uap->lcrh_rx != uap->lcrh_tx) {
+		int i;
+		/*
+		 * Wait 10 PCLKs before writing LCRH_TX register,
+		 * to get this delay write read only register 10 times
+		 */
+		for (i = 0; i < 10; ++i)
+			writew(0xff, uap->port.membase + UART011_MIS);
+		writew(lcr_h, port->membase + uap->lcrh_tx);
+	}
 	writew(old_cr, port->membase + UART011_CR);
 
 	spin_unlock_irqrestore(&port->lock, flags);
@@ -688,7 +741,7 @@
 	if (readw(uap->port.membase + UART011_CR) & UART01x_CR_UARTEN) {
 		unsigned int lcr_h, ibrd, fbrd;
 
-		lcr_h = readw(uap->port.membase + UART011_LCRH);
+		lcr_h = readw(uap->port.membase + uap->lcrh_tx);
 
 		*parity = 'n';
 		if (lcr_h & UART01x_LCRH_PEN) {
@@ -707,6 +760,12 @@
 		fbrd = readw(uap->port.membase + UART011_FBRD);
 
 		*baud = uap->port.uartclk * 4 / (64 * ibrd + fbrd);
+
+		if (uap->oversampling) {
+			if (readw(uap->port.membase + UART011_CR)
+				  & ST_UART011_CR_OVSFACT)
+				*baud *= 2;
+		}
 	}
 }
 
@@ -800,6 +859,9 @@
 	}
 
 	uap->ifls = vendor->ifls;
+	uap->lcrh_rx = vendor->lcrh_rx;
+	uap->lcrh_tx = vendor->lcrh_tx;
+	uap->oversampling = vendor->oversampling;
 	uap->port.dev = &dev->dev;
 	uap->port.mapbase = dev->res.start;
 	uap->port.membase = base;
@@ -868,7 +930,7 @@
 }
 #endif
 
-static struct amba_id pl011_ids[] __initdata = {
+static struct amba_id pl011_ids[] = {
 	{
 		.id	= 0x00041011,
 		.mask	= 0x000fffff,
diff --git a/include/linux/amba/mmci.h b/include/linux/amba/mmci.h
index 7e466fe..ca84ce7 100644
--- a/include/linux/amba/mmci.h
+++ b/include/linux/amba/mmci.h
@@ -15,9 +15,10 @@
  * @ocr_mask: available voltages on the 4 pins from the block, this
  * is ignored if a regulator is used, see the MMC_VDD_* masks in
  * mmc/host.h
- * @translate_vdd: a callback function to translate a MMC_VDD_*
- * mask into a value to be binary or:ed and written into the
- * MMCIPWR register of the block
+ * @vdd_handler: a callback function to translate a MMC_VDD_*
+ * mask into a value to be binary (or set some other custom bits
+ * in MMCIPWR) or:ed and written into the MMCIPWR register of the
+ * block.  May also control external power based on the power_mode.
  * @status: if no GPIO read function was given to the block in
  * gpio_wp (below) this function will be called to determine
  * whether a card is present in the MMC slot or not
@@ -29,7 +30,8 @@
 struct mmci_platform_data {
 	unsigned int f_max;
 	unsigned int ocr_mask;
-	u32 (*translate_vdd)(struct device *, unsigned int);
+	u32 (*vdd_handler)(struct device *, unsigned int vdd,
+			   unsigned char power_mode);
 	unsigned int (*status)(struct device *);
 	int	gpio_wp;
 	int	gpio_cd;
diff --git a/include/linux/amba/serial.h b/include/linux/amba/serial.h
index 5a5a7fd..e1b634b 100644
--- a/include/linux/amba/serial.h
+++ b/include/linux/amba/serial.h
@@ -38,10 +38,12 @@
 #define UART01x_FR		0x18	/* Flag register (Read only). */
 #define UART010_IIR		0x1C	/* Interrupt indentification register (Read). */
 #define UART010_ICR		0x1C	/* Interrupt clear register (Write). */
+#define ST_UART011_LCRH_RX	0x1C    /* Rx line control register. */
 #define UART01x_ILPR		0x20	/* IrDA low power counter register. */
 #define UART011_IBRD		0x24	/* Integer baud rate divisor register. */
 #define UART011_FBRD		0x28	/* Fractional baud rate divisor register. */
 #define UART011_LCRH		0x2c	/* Line control register. */
+#define ST_UART011_LCRH_TX	0x2c    /* Tx Line control register. */
 #define UART011_CR		0x30	/* Control register. */
 #define UART011_IFLS		0x34	/* Interrupt fifo level select. */
 #define UART011_IMSC		0x38	/* Interrupt mask. */
@@ -84,6 +86,7 @@
 #define UART010_CR_TIE 		0x0020
 #define UART010_CR_RIE 		0x0010
 #define UART010_CR_MSIE		0x0008
+#define ST_UART011_CR_OVSFACT	0x0008	/* Oversampling factor */
 #define UART01x_CR_IIRLP	0x0004	/* SIR low power mode */
 #define UART01x_CR_SIREN	0x0002	/* SIR enable */
 #define UART01x_CR_UARTEN	0x0001	/* UART enable */
diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c
index 250ed11..44524cc 100644
--- a/lib/atomic64_test.c
+++ b/lib/atomic64_test.c
@@ -114,7 +114,7 @@
 	BUG_ON(v.counter != r);
 
 #if defined(CONFIG_X86) || defined(CONFIG_MIPS) || defined(CONFIG_PPC) || \
-    defined(CONFIG_S390) || defined(_ASM_GENERIC_ATOMIC64_H)
+    defined(CONFIG_S390) || defined(_ASM_GENERIC_ATOMIC64_H) || defined(CONFIG_ARM)
 	INIT(onestwos);
 	BUG_ON(atomic64_dec_if_positive(&v) != (onestwos - 1));
 	r -= one;