Merge branch 'misc' into devel

Conflicts:
	arch/arm/Kconfig
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 38bf684..1fd3f28 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -63,6 +63,10 @@
 	depends on GENERIC_CLOCKEVENTS
 	default y if SMP
 
+config KTIME_SCALAR
+	bool
+	default y
+
 config HAVE_TCM
 	bool
 	select GENERIC_ALLOCATOR
@@ -178,11 +182,6 @@
 config ARCH_MTD_XIP
 	bool
 
-config ARM_L1_CACHE_SHIFT_6
-	bool
-	help
-	  Setting ARM L1 cache line size to 64 Bytes.
-
 config VECTORS_BASE
 	hex
 	default 0xffff0000 if MMU || CPU_HIGH_VECTOR
@@ -1152,7 +1151,7 @@
 
 config PL310_ERRATA_588369
 	bool "Clean & Invalidate maintenance operations do not invalidate clean lines"
-	depends on CACHE_L2X0 && ARCH_OMAP4
+	depends on CACHE_L2X0
 	help
 	   The PL310 L2 cache controller implements three types of Clean &
 	   Invalidate maintenance operations: by Physical Address
@@ -1161,8 +1160,7 @@
 	   clean operation followed immediately by an invalidate operation,
 	   both performing to the same memory location. This functionality
 	   is not correctly implemented in PL310 as clean lines are not
-	   invalidated as a result of these operations. Note that this errata
-	   uses Texas Instrument's secure monitor api.
+	   invalidated as a result of these operations.
 
 config ARM_ERRATA_720789
 	bool "ARM errata: TLBIASIDIS and TLBIMVAIS operations can broadcast a faulty ASID"
@@ -1176,6 +1174,17 @@
 	  tables. The workaround changes the TLB flushing routines to invalidate
 	  entries regardless of the ASID.
 
+config PL310_ERRATA_727915
+	bool "Background Clean & Invalidate by Way operation can cause data corruption"
+	depends on CACHE_L2X0
+	help
+	  PL310 implements the Clean & Invalidate by Way L2 cache maintenance
+	  operation (offset 0x7FC). This operation runs in background so that
+	  PL310 can handle normal accesses while it is in progress. Under very
+	  rare circumstances, due to this erratum, write data can be lost when
+	  PL310 treats a cacheable write transaction during a Clean &
+	  Invalidate by Way operation.
+
 config ARM_ERRATA_743622
 	bool "ARM errata: Faulty hazard checking in the Store Buffer may lead to data corruption"
 	depends on CPU_V7
@@ -1430,6 +1439,37 @@
 
 	  If unsure, say N.
 
+config THUMB2_AVOID_R_ARM_THM_JUMP11
+	bool "Work around buggy Thumb-2 short branch relocations in gas"
+	depends on THUMB2_KERNEL && MODULES
+	default y
+	help
+	  Various binutils versions can resolve Thumb-2 branches to
+	  locally-defined, preemptible global symbols as short-range "b.n"
+	  branch instructions.
+
+	  This is a problem, because there's no guarantee the final
+	  destination of the symbol, or any candidate locations for a
+	  trampoline, are within range of the branch.  For this reason, the
+	  kernel does not support fixing up the R_ARM_THM_JUMP11 (102)
+	  relocation in modules at all, and it makes little sense to add
+	  support.
+
+	  The symptom is that the kernel fails with an "unsupported
+	  relocation" error when loading some modules.
+
+	  Until fixed tools are available, passing
+	  -fno-optimize-sibling-calls to gcc should prevent gcc generating
+	  code which hits this problem, at the cost of a bit of extra runtime
+	  stack usage in some cases.
+
+	  The problem is described in more detail at:
+	      https://bugs.launchpad.net/binutils-linaro/+bug/725126
+
+	  Only Thumb-2 kernels are affected.
+
+	  Unless you are sure your tools don't have this problem, say Y.
+
 config ARM_ASM_UNIFIED
 	bool
 
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index cd56c91..55eca9e 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -105,6 +105,10 @@
 AFLAGS_NOWARN	:=$(call as-option,-Wa$(comma)-mno-warn-deprecated,-Wa$(comma)-W)
 CFLAGS_THUMB2	:=-mthumb $(AFLAGS_AUTOIT) $(AFLAGS_NOWARN)
 AFLAGS_THUMB2	:=$(CFLAGS_THUMB2) -Wa$(comma)-mthumb
+# Work around buggy relocation from gas if requested:
+ifeq ($(CONFIG_THUMB2_AVOID_R_ARM_THM_JUMP11),y)
+CFLAGS_MODULE	+=-fno-optimize-sibling-calls
+endif
 endif
 
 # Need -Uarm for gcc < 3.x
@@ -281,7 +285,7 @@
 zImage Image xipImage bootpImage uImage: vmlinux
 	$(Q)$(MAKE) $(build)=$(boot) MACHINE=$(MACHINE) $(boot)/$@
 
-zinstall install: vmlinux
+zinstall uinstall install: vmlinux
 	$(Q)$(MAKE) $(build)=$(boot) MACHINE=$(MACHINE) $@
 
 # We use MRPROPER_FILES and CLEAN_FILES now
@@ -302,6 +306,7 @@
   echo  '                  (supply initrd image via make variable INITRD=<path>)'
   echo  '  install       - Install uncompressed kernel'
   echo  '  zinstall      - Install compressed kernel'
+  echo  '  uinstall      - Install U-Boot wrapped compressed kernel'
   echo  '                  Install using (your) ~/bin/$(INSTALLKERNEL) or'
   echo  '                  (distribution) /sbin/$(INSTALLKERNEL) or'
   echo  '                  install to $$(INSTALL_PATH) and run lilo'
diff --git a/arch/arm/boot/Makefile b/arch/arm/boot/Makefile
index 4d26f2c..9128fdd 100644
--- a/arch/arm/boot/Makefile
+++ b/arch/arm/boot/Makefile
@@ -99,6 +99,10 @@
 	$(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \
 	$(obj)/zImage System.map "$(INSTALL_PATH)"
 
+uinstall: $(obj)/uImage
+	$(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \
+	$(obj)/uImage System.map "$(INSTALL_PATH)"
+
 zi:
 	$(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \
 	$(obj)/zImage System.map "$(INSTALL_PATH)"
diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile
index 5f3a161..f9f77c6 100644
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile
@@ -98,9 +98,11 @@
 EXTRA_CFLAGS  := -fpic -fno-builtin
 EXTRA_AFLAGS  := -Wa,-march=all
 
+# Provide size of uncompressed kernel to the decompressor via a linker symbol.
+LDFLAGS_vmlinux = --defsym _image_size=$(shell stat -c "%s" $(obj)/../Image)
 # Supply ZRELADDR to the decompressor via a linker symbol.
 ifneq ($(CONFIG_AUTO_ZRELADDR),y)
-LDFLAGS_vmlinux := --defsym zreladdr=$(ZRELADDR)
+LDFLAGS_vmlinux += --defsym zreladdr=$(ZRELADDR)
 endif
 ifeq ($(CONFIG_CPU_ENDIAN_BE8),y)
 LDFLAGS_vmlinux += --be8
diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
index 7193884..3985921 100644
--- a/arch/arm/boot/compressed/head.S
+++ b/arch/arm/boot/compressed/head.S
@@ -128,14 +128,14 @@
 		.arm				@ Always enter in ARM state
 start:
 		.type	start,#function
- THUMB(		adr	r12, BSYM(1f)	)
- THUMB(		bx	r12		)
- THUMB(		.rept	6		)
- ARM(		.rept	8		)
+		.rept	7
 		mov	r0, r0
 		.endr
+   ARM(		mov	r0, r0		)
+   ARM(		b	1f		)
+ THUMB(		adr	r12, BSYM(1f)	)
+ THUMB(		bx	r12		)
 
-		b	1f
 		.word	0x016f2818		@ Magic numbers to help the loader
 		.word	start			@ absolute load/run zImage address
 		.word	_edata			@ zImage end address
@@ -174,9 +174,7 @@
 		 */
 
 		.text
-		adr	r0, LC0
-		ldmia	r0, {r1, r2, r3, r5, r6, r11, ip}
-		ldr	sp, [r0, #28]
+
 #ifdef CONFIG_AUTO_ZRELADDR
 		@ determine final kernel image address
 		mov	r4, pc
@@ -185,35 +183,108 @@
 #else
 		ldr	r4, =zreladdr
 #endif
-		subs	r0, r0, r1		@ calculate the delta offset
 
-						@ if delta is zero, we are
-		beq	not_relocated		@ running at the address we
-						@ were linked at.
+		bl	cache_on
+
+restart:	adr	r0, LC0
+		ldmia	r0, {r1, r2, r3, r5, r6, r9, r11, r12}
+		ldr	sp, [r0, #32]
 
 		/*
-		 * We're running at a different address.  We need to fix
-		 * up various pointers:
-		 *   r5 - zImage base address (_start)
-		 *   r6 - size of decompressed image
-		 *   r11 - GOT start
-		 *   ip - GOT end
+		 * We might be running at a different address.  We need
+		 * to fix up various pointers.
 		 */
-		add	r5, r5, r0
+		sub	r0, r0, r1		@ calculate the delta offset
+		add	r5, r5, r0		@ _start
+		add	r6, r6, r0		@ _edata
+
+#ifndef CONFIG_ZBOOT_ROM
+		/* malloc space is above the relocated stack (64k max) */
+		add	sp, sp, r0
+		add	r10, sp, #0x10000
+#else
+		/*
+		 * With ZBOOT_ROM the bss/stack is non relocatable,
+		 * but someone could still run this code from RAM,
+		 * in which case our reference is _edata.
+		 */
+		mov	r10, r6
+#endif
+
+/*
+ * Check to see if we will overwrite ourselves.
+ *   r4  = final kernel address
+ *   r5  = start of this image
+ *   r9  = size of decompressed image
+ *   r10 = end of this image, including  bss/stack/malloc space if non XIP
+ * We basically want:
+ *   r4 >= r10 -> OK
+ *   r4 + image length <= r5 -> OK
+ */
+		cmp	r4, r10
+		bhs	wont_overwrite
+		add	r10, r4, r9
+		cmp	r10, r5
+		bls	wont_overwrite
+
+/*
+ * Relocate ourselves past the end of the decompressed kernel.
+ *   r5  = start of this image
+ *   r6  = _edata
+ *   r10 = end of the decompressed kernel
+ * Because we always copy ahead, we need to do it from the end and go
+ * backward in case the source and destination overlap.
+ */
+		/* Round up to next 256-byte boundary. */
+		add	r10, r10, #256
+		bic	r10, r10, #255
+
+		sub	r9, r6, r5		@ size to copy
+		add	r9, r9, #31		@ rounded up to a multiple
+		bic	r9, r9, #31		@ ... of 32 bytes
+		add	r6, r9, r5
+		add	r9, r9, r10
+
+1:		ldmdb	r6!, {r0 - r3, r10 - r12, lr}
+		cmp	r6, r5
+		stmdb	r9!, {r0 - r3, r10 - r12, lr}
+		bhi	1b
+
+		/* Preserve offset to relocated code. */
+		sub	r6, r9, r6
+
+		bl	cache_clean_flush
+
+		adr	r0, BSYM(restart)
+		add	r0, r0, r6
+		mov	pc, r0
+
+wont_overwrite:
+/*
+ * If delta is zero, we are running at the address we were linked at.
+ *   r0  = delta
+ *   r2  = BSS start
+ *   r3  = BSS end
+ *   r4  = kernel execution address
+ *   r7  = architecture ID
+ *   r8  = atags pointer
+ *   r11 = GOT start
+ *   r12 = GOT end
+ *   sp  = stack pointer
+ */
+		teq	r0, #0
+		beq	not_relocated
 		add	r11, r11, r0
-		add	ip, ip, r0
+		add	r12, r12, r0
 
 #ifndef CONFIG_ZBOOT_ROM
 		/*
 		 * If we're running fully PIC === CONFIG_ZBOOT_ROM = n,
 		 * we need to fix up pointers into the BSS region.
-		 *   r2 - BSS start
-		 *   r3 - BSS end
-		 *   sp - stack pointer
+		 * Note that the stack pointer has already been fixed up.
 		 */
 		add	r2, r2, r0
 		add	r3, r3, r0
-		add	sp, sp, r0
 
 		/*
 		 * Relocate all entries in the GOT table.
@@ -221,7 +292,7 @@
 1:		ldr	r1, [r11, #0]		@ relocate entries in the GOT
 		add	r1, r1, r0		@ table.  This fixes up the
 		str	r1, [r11], #4		@ C references.
-		cmp	r11, ip
+		cmp	r11, r12
 		blo	1b
 #else
 
@@ -234,7 +305,7 @@
 		cmphs	r3, r1			@ _end < entry
 		addlo	r1, r1, r0		@ table.  This fixes up the
 		str	r1, [r11], #4		@ C references.
-		cmp	r11, ip
+		cmp	r11, r12
 		blo	1b
 #endif
 
@@ -246,76 +317,24 @@
 		cmp	r2, r3
 		blo	1b
 
-		/*
-		 * The C runtime environment should now be setup
-		 * sufficiently.  Turn the cache on, set up some
-		 * pointers, and start decompressing.
-		 */
-		bl	cache_on
-
+/*
+ * The C runtime environment should now be setup sufficiently.
+ * Set up some pointers, and start decompressing.
+ *   r4  = kernel execution address
+ *   r7  = architecture ID
+ *   r8  = atags pointer
+ */
+		mov	r0, r4
 		mov	r1, sp			@ malloc space above stack
 		add	r2, sp, #0x10000	@ 64k max
-
-/*
- * Check to see if we will overwrite ourselves.
- *   r4 = final kernel address
- *   r5 = start of this image
- *   r6 = size of decompressed image
- *   r2 = end of malloc space (and therefore this image)
- * We basically want:
- *   r4 >= r2 -> OK
- *   r4 + image length <= r5 -> OK
- */
-		cmp	r4, r2
-		bhs	wont_overwrite
-		add	r0, r4, r6
-		cmp	r0, r5
-		bls	wont_overwrite
-
-		mov	r5, r2			@ decompress after malloc space
-		mov	r0, r5
 		mov	r3, r7
 		bl	decompress_kernel
-
-		add	r0, r0, #127 + 128	@ alignment + stack
-		bic	r0, r0, #127		@ align the kernel length
-/*
- * r0     = decompressed kernel length
- * r1-r3  = unused
- * r4     = kernel execution address
- * r5     = decompressed kernel start
- * r7     = architecture ID
- * r8     = atags pointer
- * r9-r12,r14 = corrupted
- */
-		add	r1, r5, r0		@ end of decompressed kernel
-		adr	r2, reloc_start
-		ldr	r3, LC1
-		add	r3, r2, r3
-1:		ldmia	r2!, {r9 - r12, r14}	@ copy relocation code
-		stmia	r1!, {r9 - r12, r14}
-		ldmia	r2!, {r9 - r12, r14}
-		stmia	r1!, {r9 - r12, r14}
-		cmp	r2, r3
-		blo	1b
-		mov	sp, r1
-		add	sp, sp, #128		@ relocate the stack
-
 		bl	cache_clean_flush
- ARM(		add	pc, r5, r0		) @ call relocation code
- THUMB(		add	r12, r5, r0		)
- THUMB(		mov	pc, r12			) @ call relocation code
-
-/*
- * We're not in danger of overwriting ourselves.  Do this the simple way.
- *
- * r4     = kernel execution address
- * r7     = architecture ID
- */
-wont_overwrite:	mov	r0, r4
-		mov	r3, r7
-		bl	decompress_kernel
-		b	call_kernel
+		bl	cache_off
+		mov	r0, #0			@ must be zero
+		mov	r1, r7			@ restore architecture number
+		mov	r2, r8			@ restore atags pointer
+		mov	pc, r4			@ call kernel
 
 		.align	2
 		.type	LC0, #object
@@ -323,11 +342,11 @@
 		.word	__bss_start		@ r2
 		.word	_end			@ r3
 		.word	_start			@ r5
-		.word	_image_size		@ r6
+		.word	_edata			@ r6
+		.word	_image_size		@ r9
 		.word	_got_start		@ r11
 		.word	_got_end		@ ip
 		.word	user_stack_end		@ sp
-LC1:		.word	reloc_end - reloc_start
 		.size	LC0, . - LC0
 
 #ifdef CONFIG_ARCH_RPC
@@ -353,7 +372,7 @@
  * On exit,
  *  r0, r1, r2, r3, r9, r10, r12 corrupted
  * This routine must preserve:
- *  r4, r5, r6, r7, r8
+ *  r4, r7, r8
  */
 		.align	5
 cache_on:	mov	r3, #8			@ cache_on function
@@ -551,43 +570,6 @@
 #endif
 
 /*
- * All code following this line is relocatable.  It is relocated by
- * the above code to the end of the decompressed kernel image and
- * executed there.  During this time, we have no stacks.
- *
- * r0     = decompressed kernel length
- * r1-r3  = unused
- * r4     = kernel execution address
- * r5     = decompressed kernel start
- * r7     = architecture ID
- * r8     = atags pointer
- * r9-r12,r14 = corrupted
- */
-		.align	5
-reloc_start:	add	r9, r5, r0
-		sub	r9, r9, #128		@ do not copy the stack
-		debug_reloc_start
-		mov	r1, r4
-1:
-		.rept	4
-		ldmia	r5!, {r0, r2, r3, r10 - r12, r14}	@ relocate kernel
-		stmia	r1!, {r0, r2, r3, r10 - r12, r14}
-		.endr
-
-		cmp	r5, r9
-		blo	1b
-		mov	sp, r1
-		add	sp, sp, #128		@ relocate the stack
-		debug_reloc_end
-
-call_kernel:	bl	cache_clean_flush
-		bl	cache_off
-		mov	r0, #0			@ must be zero
-		mov	r1, r7			@ restore architecture number
-		mov	r2, r8			@ restore atags pointer
-		mov	pc, r4			@ call kernel
-
-/*
  * Here follow the relocatable cache support functions for the
  * various processors.  This is a generic hook for locating an
  * entry and jumping to an instruction at the specified offset
@@ -791,7 +773,7 @@
  * On exit,
  *  r0, r1, r2, r3, r9, r12 corrupted
  * This routine must preserve:
- *  r4, r6, r7
+ *  r4, r7, r8
  */
 		.align	5
 cache_off:	mov	r3, #12			@ cache_off function
@@ -866,7 +848,7 @@
  * On exit,
  *  r1, r2, r3, r9, r10, r11, r12 corrupted
  * This routine must preserve:
- *  r0, r4, r5, r6, r7
+ *  r4, r6, r7, r8
  */
 		.align	5
 cache_clean_flush:
@@ -1088,7 +1070,6 @@
 #endif
 
 		.ltorg
-reloc_end:
 
 		.align
 		.section ".stack", "aw", %nobits
diff --git a/arch/arm/boot/compressed/vmlinux.lds.in b/arch/arm/boot/compressed/vmlinux.lds.in
index 366a924..5309909 100644
--- a/arch/arm/boot/compressed/vmlinux.lds.in
+++ b/arch/arm/boot/compressed/vmlinux.lds.in
@@ -43,9 +43,6 @@
 
   _etext = .;
 
-  /* Assume size of decompressed image is 4x the compressed image */
-  _image_size = (_etext - _text) * 4;
-
   _got_start = .;
   .got			: { *(.got) }
   _got_end = .;
diff --git a/arch/arm/common/gic.c b/arch/arm/common/gic.c
index 2243772..cb6b041 100644
--- a/arch/arm/common/gic.c
+++ b/arch/arm/common/gic.c
@@ -44,6 +44,19 @@
 	void __iomem *cpu_base;
 };
 
+/*
+ * Supported arch specific GIC irq extension.
+ * Default make them NULL.
+ */
+struct irq_chip gic_arch_extn = {
+	.irq_ack	= NULL,
+	.irq_mask	= NULL,
+	.irq_unmask	= NULL,
+	.irq_retrigger	= NULL,
+	.irq_set_type	= NULL,
+	.irq_set_wake	= NULL,
+};
+
 #ifndef MAX_GIC_NR
 #define MAX_GIC_NR	1
 #endif
@@ -74,6 +87,8 @@
 static void gic_ack_irq(struct irq_data *d)
 {
 	spin_lock(&irq_controller_lock);
+	if (gic_arch_extn.irq_ack)
+		gic_arch_extn.irq_ack(d);
 	writel(gic_irq(d), gic_cpu_base(d) + GIC_CPU_EOI);
 	spin_unlock(&irq_controller_lock);
 }
@@ -84,6 +99,8 @@
 
 	spin_lock(&irq_controller_lock);
 	writel(mask, gic_dist_base(d) + GIC_DIST_ENABLE_CLEAR + (gic_irq(d) / 32) * 4);
+	if (gic_arch_extn.irq_mask)
+		gic_arch_extn.irq_mask(d);
 	spin_unlock(&irq_controller_lock);
 }
 
@@ -92,6 +109,8 @@
 	u32 mask = 1 << (d->irq % 32);
 
 	spin_lock(&irq_controller_lock);
+	if (gic_arch_extn.irq_unmask)
+		gic_arch_extn.irq_unmask(d);
 	writel(mask, gic_dist_base(d) + GIC_DIST_ENABLE_SET + (gic_irq(d) / 32) * 4);
 	spin_unlock(&irq_controller_lock);
 }
@@ -116,6 +135,9 @@
 
 	spin_lock(&irq_controller_lock);
 
+	if (gic_arch_extn.irq_set_type)
+		gic_arch_extn.irq_set_type(d, type);
+
 	val = readl(base + GIC_DIST_CONFIG + confoff);
 	if (type == IRQ_TYPE_LEVEL_HIGH)
 		val &= ~confmask;
@@ -141,32 +163,54 @@
 	return 0;
 }
 
+static int gic_retrigger(struct irq_data *d)
+{
+	if (gic_arch_extn.irq_retrigger)
+		return gic_arch_extn.irq_retrigger(d);
+
+	return -ENXIO;
+}
+
 #ifdef CONFIG_SMP
-static int
-gic_set_cpu(struct irq_data *d, const struct cpumask *mask_val, bool force)
+static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val,
+			    bool force)
 {
 	void __iomem *reg = gic_dist_base(d) + GIC_DIST_TARGET + (gic_irq(d) & ~3);
 	unsigned int shift = (d->irq % 4) * 8;
 	unsigned int cpu = cpumask_first(mask_val);
-	u32 val;
-	struct irq_desc *desc;
+	u32 val, mask, bit;
+
+	if (cpu >= 8)
+		return -EINVAL;
+
+	mask = 0xff << shift;
+	bit = 1 << (cpu + shift);
 
 	spin_lock(&irq_controller_lock);
-	desc = irq_to_desc(d->irq);
-	if (desc == NULL) {
-		spin_unlock(&irq_controller_lock);
-		return -EINVAL;
-	}
 	d->node = cpu;
-	val = readl(reg) & ~(0xff << shift);
-	val |= 1 << (cpu + shift);
-	writel(val, reg);
+	val = readl(reg) & ~mask;
+	writel(val | bit, reg);
 	spin_unlock(&irq_controller_lock);
 
 	return 0;
 }
 #endif
 
+#ifdef CONFIG_PM
+static int gic_set_wake(struct irq_data *d, unsigned int on)
+{
+	int ret = -ENXIO;
+
+	if (gic_arch_extn.irq_set_wake)
+		ret = gic_arch_extn.irq_set_wake(d, on);
+
+	return ret;
+}
+
+#else
+#define gic_set_wake	NULL
+#endif
+
 static void gic_handle_cascade_irq(unsigned int irq, struct irq_desc *desc)
 {
 	struct gic_chip_data *chip_data = get_irq_data(irq);
@@ -202,9 +246,11 @@
 	.irq_mask		= gic_mask_irq,
 	.irq_unmask		= gic_unmask_irq,
 	.irq_set_type		= gic_set_type,
+	.irq_retrigger		= gic_retrigger,
 #ifdef CONFIG_SMP
-	.irq_set_affinity	= gic_set_cpu,
+	.irq_set_affinity	= gic_set_affinity,
 #endif
+	.irq_set_wake		= gic_set_wake,
 };
 
 void __init gic_cascade_irq(unsigned int gic_nr, unsigned int irq)
diff --git a/arch/arm/include/asm/a.out-core.h b/arch/arm/include/asm/a.out-core.h
index 93d04ac..92f10cb 100644
--- a/arch/arm/include/asm/a.out-core.h
+++ b/arch/arm/include/asm/a.out-core.h
@@ -32,11 +32,7 @@
 	dump->u_dsize = (tsk->mm->brk - tsk->mm->start_data + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	dump->u_ssize = 0;
 
-	dump->u_debugreg[0] = tsk->thread.debug.bp[0].address;
-	dump->u_debugreg[1] = tsk->thread.debug.bp[1].address;
-	dump->u_debugreg[2] = tsk->thread.debug.bp[0].insn.arm;
-	dump->u_debugreg[3] = tsk->thread.debug.bp[1].insn.arm;
-	dump->u_debugreg[4] = tsk->thread.debug.nsaved;
+	memset(dump->u_debugreg, 0, sizeof(dump->u_debugreg));
 
 	if (dump->start_stack < 0x04000000)
 		dump->u_ssize = (0x04000000 - dump->start_stack) >> PAGE_SHIFT;
diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h
index 20ae96c..ed5bc9e 100644
--- a/arch/arm/include/asm/cputype.h
+++ b/arch/arm/include/asm/cputype.h
@@ -23,6 +23,8 @@
 #define CPUID_EXT_ISAR4	"c2, 4"
 #define CPUID_EXT_ISAR5	"c2, 5"
 
+extern unsigned int processor_id;
+
 #ifdef CONFIG_CPU_CP15
 #define read_cpuid(reg)							\
 	({								\
@@ -43,7 +45,6 @@
 		__val;							\
 	})
 #else
-extern unsigned int processor_id;
 #define read_cpuid(reg) (processor_id)
 #define read_cpuid_ext(reg) 0
 #endif
diff --git a/arch/arm/include/asm/hardware/gic.h b/arch/arm/include/asm/hardware/gic.h
index 84557d3..0691f9d 100644
--- a/arch/arm/include/asm/hardware/gic.h
+++ b/arch/arm/include/asm/hardware/gic.h
@@ -34,6 +34,7 @@
 
 #ifndef __ASSEMBLY__
 extern void __iomem *gic_cpu_base_addr;
+extern struct irq_chip gic_arch_extn;
 
 void gic_init(unsigned int, unsigned int, void __iomem *, void __iomem *);
 void gic_secondary_init(unsigned int);
diff --git a/arch/arm/include/asm/highmem.h b/arch/arm/include/asm/highmem.h
index 7080e2c..a4edd19 100644
--- a/arch/arm/include/asm/highmem.h
+++ b/arch/arm/include/asm/highmem.h
@@ -19,13 +19,38 @@
 
 extern pte_t *pkmap_page_table;
 
-#define ARCH_NEEDS_KMAP_HIGH_GET
-
 extern void *kmap_high(struct page *page);
-extern void *kmap_high_get(struct page *page);
 extern void kunmap_high(struct page *page);
 
 /*
+ * The reason for kmap_high_get() is to ensure that the currently kmap'd
+ * page usage count does not decrease to zero while we're using its
+ * existing virtual mapping in an atomic context.  With a VIVT cache this
+ * is essential to do, but with a VIPT cache this is only an optimization
+ * so not to pay the price of establishing a second mapping if an existing
+ * one can be used.  However, on platforms without hardware TLB maintenance
+ * broadcast, we simply cannot use ARCH_NEEDS_KMAP_HIGH_GET at all since
+ * the locking involved must also disable IRQs which is incompatible with
+ * the IPI mechanism used by global TLB operations.
+ */
+#define ARCH_NEEDS_KMAP_HIGH_GET
+#if defined(CONFIG_SMP) && defined(CONFIG_CPU_TLB_V6)
+#undef ARCH_NEEDS_KMAP_HIGH_GET
+#if defined(CONFIG_HIGHMEM) && defined(CONFIG_CPU_CACHE_VIVT)
+#error "The sum of features in your kernel config cannot be supported together"
+#endif
+#endif
+
+#ifdef ARCH_NEEDS_KMAP_HIGH_GET
+extern void *kmap_high_get(struct page *page);
+#else
+static inline void *kmap_high_get(struct page *page)
+{
+	return NULL;
+}
+#endif
+
+/*
  * The following functions are already defined by <linux/highmem.h>
  * when CONFIG_HIGHMEM is not set.
  */
diff --git a/arch/arm/include/asm/mach/irq.h b/arch/arm/include/asm/mach/irq.h
index 22ac140..febe495 100644
--- a/arch/arm/include/asm/mach/irq.h
+++ b/arch/arm/include/asm/mach/irq.h
@@ -34,4 +34,35 @@
 	raw_spin_unlock(&desc->lock);			\
 } while(0)
 
+#ifndef __ASSEMBLY__
+/*
+ * Entry/exit functions for chained handlers where the primary IRQ chip
+ * may implement either fasteoi or level-trigger flow control.
+ */
+static inline void chained_irq_enter(struct irq_chip *chip,
+				     struct irq_desc *desc)
+{
+	/* FastEOI controllers require no action on entry. */
+	if (chip->irq_eoi)
+		return;
+
+	if (chip->irq_mask_ack) {
+		chip->irq_mask_ack(&desc->irq_data);
+	} else {
+		chip->irq_mask(&desc->irq_data);
+		if (chip->irq_ack)
+			chip->irq_ack(&desc->irq_data);
+	}
+}
+
+static inline void chained_irq_exit(struct irq_chip *chip,
+				    struct irq_desc *desc)
+{
+	if (chip->irq_eoi)
+		chip->irq_eoi(&desc->irq_data);
+	else
+		chip->irq_unmask(&desc->irq_data);
+}
+#endif
+
 #endif
diff --git a/arch/arm/include/asm/outercache.h b/arch/arm/include/asm/outercache.h
index fc19009..348d513 100644
--- a/arch/arm/include/asm/outercache.h
+++ b/arch/arm/include/asm/outercache.h
@@ -31,6 +31,7 @@
 #ifdef CONFIG_OUTER_CACHE_SYNC
 	void (*sync)(void);
 #endif
+	void (*set_debug)(unsigned long);
 };
 
 #ifdef CONFIG_OUTER_CACHE
diff --git a/arch/arm/include/asm/processor.h b/arch/arm/include/asm/processor.h
index 7a1f03c..b2d9df5 100644
--- a/arch/arm/include/asm/processor.h
+++ b/arch/arm/include/asm/processor.h
@@ -29,19 +29,7 @@
 #define STACK_TOP_MAX	TASK_SIZE
 #endif
 
-union debug_insn {
-	u32	arm;
-	u16	thumb;
-};
-
-struct debug_entry {
-	u32			address;
-	union debug_insn	insn;
-};
-
 struct debug_info {
-	int			nsaved;
-	struct debug_entry	bp[2];
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 	struct perf_event	*hbp[ARM_MAX_HBP_SLOTS];
 #endif
diff --git a/arch/arm/include/asm/ptrace.h b/arch/arm/include/asm/ptrace.h
index 783d50f..a8ff22b 100644
--- a/arch/arm/include/asm/ptrace.h
+++ b/arch/arm/include/asm/ptrace.h
@@ -130,8 +130,6 @@
 
 #ifdef __KERNEL__
 
-#define arch_has_single_step()	(1)
-
 #define user_mode(regs)	\
 	(((regs)->ARM_cpsr & 0xf) == 0)
 
diff --git a/arch/arm/include/asm/traps.h b/arch/arm/include/asm/traps.h
index 1b960d5..f90756d 100644
--- a/arch/arm/include/asm/traps.h
+++ b/arch/arm/include/asm/traps.h
@@ -45,6 +45,7 @@
 
 extern void __init early_trap_init(void);
 extern void dump_backtrace_entry(unsigned long where, unsigned long from, unsigned long frame);
+extern void ptrace_break(struct task_struct *tsk, struct pt_regs *regs);
 
 extern void *vectors_page;
 
diff --git a/arch/arm/include/asm/user.h b/arch/arm/include/asm/user.h
index 05ac4b0..35917b3 100644
--- a/arch/arm/include/asm/user.h
+++ b/arch/arm/include/asm/user.h
@@ -71,7 +71,7 @@
 				/* the registers. */
   unsigned long magic;		/* To uniquely identify a core file */
   char u_comm[32];		/* User command that was responsible */
-  int u_debugreg[8];
+  int u_debugreg[8];		/* No longer used */
   struct user_fp u_fp;		/* FP state */
   struct user_fp_struct * u_fp0;/* Used by gdb to help find the values for */
   				/* the FP registers. */
diff --git a/arch/arm/kernel/bios32.c b/arch/arm/kernel/bios32.c
index c6273a3..d86fcd4 100644
--- a/arch/arm/kernel/bios32.c
+++ b/arch/arm/kernel/bios32.c
@@ -583,6 +583,11 @@
 			 * Assign resources.
 			 */
 			pci_bus_assign_resources(bus);
+
+			/*
+			 * Enable bridges
+			 */
+			pci_enable_bridges(bus);
 		}
 
 		/*
diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S
index 8f57515..c84b57d 100644
--- a/arch/arm/kernel/head-common.S
+++ b/arch/arm/kernel/head-common.S
@@ -25,83 +25,6 @@
  * machine ID for example).
  */
 	__HEAD
-__error_a:
-#ifdef CONFIG_DEBUG_LL
-	mov	r4, r1				@ preserve machine ID
-	adr	r0, str_a1
-	bl	printascii
-	mov	r0, r4
-	bl	printhex8
-	adr	r0, str_a2
-	bl	printascii
-	adr	r3, __lookup_machine_type_data
-	ldmia	r3, {r4, r5, r6}		@ get machine desc list
-	sub	r4, r3, r4			@ get offset between virt&phys
-	add	r5, r5, r4			@ convert virt addresses to
-	add	r6, r6, r4			@ physical address space
-1:	ldr	r0, [r5, #MACHINFO_TYPE]	@ get machine type
-	bl	printhex8
-	mov	r0, #'\t'
-	bl	printch
-	ldr     r0, [r5, #MACHINFO_NAME]	@ get machine name
-	add	r0, r0, r4
-	bl	printascii
-	mov	r0, #'\n'
-	bl	printch
-	add	r5, r5, #SIZEOF_MACHINE_DESC	@ next machine_desc
-	cmp	r5, r6
-	blo	1b
-	adr	r0, str_a3
-	bl	printascii
-	b	__error
-ENDPROC(__error_a)
-
-str_a1:	.asciz	"\nError: unrecognized/unsupported machine ID (r1 = 0x"
-str_a2:	.asciz	").\n\nAvailable machine support:\n\nID (hex)\tNAME\n"
-str_a3:	.asciz	"\nPlease check your kernel config and/or bootloader.\n"
-	.align
-#else
-	b	__error
-#endif
-
-/*
- * Lookup machine architecture in the linker-build list of architectures.
- * Note that we can't use the absolute addresses for the __arch_info
- * lists since we aren't running with the MMU on (and therefore, we are
- * not in the correct address space).  We have to calculate the offset.
- *
- *  r1 = machine architecture number
- * Returns:
- *  r3, r4, r6 corrupted
- *  r5 = mach_info pointer in physical address space
- */
-__lookup_machine_type:
-	adr	r3, __lookup_machine_type_data
-	ldmia	r3, {r4, r5, r6}
-	sub	r3, r3, r4			@ get offset between virt&phys
-	add	r5, r5, r3			@ convert virt addresses to
-	add	r6, r6, r3			@ physical address space
-1:	ldr	r3, [r5, #MACHINFO_TYPE]	@ get machine type
-	teq	r3, r1				@ matches loader number?
-	beq	2f				@ found
-	add	r5, r5, #SIZEOF_MACHINE_DESC	@ next machine_desc
-	cmp	r5, r6
-	blo	1b
-	mov	r5, #0				@ unknown machine
-2:	mov	pc, lr
-ENDPROC(__lookup_machine_type)
-
-/*
- * Look in arch/arm/kernel/arch.[ch] for information about the
- * __arch_info structures.
- */
-	.align	2
-	.type	__lookup_machine_type_data, %object
-__lookup_machine_type_data:
-	.long	.
-	.long	__arch_info_begin
-	.long	__arch_info_end
-	.size	__lookup_machine_type_data, . - __lookup_machine_type_data
 
 /* Determine validity of the r2 atags pointer.  The heuristic requires
  * that the pointer be aligned, in the first 16k of physical RAM and
@@ -109,8 +32,6 @@
  * of this function may be more lenient with the physical address and
  * may also be able to move the ATAGS block if necessary.
  *
- * r8  = machinfo
- *
  * Returns:
  *  r2 either valid atags pointer, or zero
  *  r5, r6 corrupted
@@ -185,17 +106,6 @@
 	.size	__mmap_switched_data, . - __mmap_switched_data
 
 /*
- * This provides a C-API version of __lookup_machine_type
- */
-ENTRY(lookup_machine_type)
-	stmfd	sp!, {r4 - r6, lr}
-	mov	r1, r0
-	bl	__lookup_machine_type
-	mov	r0, r5
-	ldmfd	sp!, {r4 - r6, pc}
-ENDPROC(lookup_machine_type)
-
-/*
  * This provides a C-API version of __lookup_processor_type
  */
 ENTRY(lookup_processor_type)
diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S
index 814ce1a..6b1e0ad 100644
--- a/arch/arm/kernel/head-nommu.S
+++ b/arch/arm/kernel/head-nommu.S
@@ -44,9 +44,6 @@
 	bl	__lookup_processor_type		@ r5=procinfo r9=cpuid
 	movs	r10, r5				@ invalid processor (r5=0)?
 	beq	__error_p				@ yes, error 'p'
-	bl	__lookup_machine_type		@ r5=machinfo
-	movs	r8, r5				@ invalid machine (r5=0)?
-	beq	__error_a			@ yes, error 'a'
 
 	adr	lr, BSYM(__after_proc_init)	@ return (PIC) address
  ARM(	add	pc, r10, #PROCINFO_INITFUNC	)
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index f06ff9f..60fe279 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -87,14 +87,10 @@
 	movs	r10, r5				@ invalid processor (r5=0)?
  THUMB( it	eq )		@ force fixup-able long branch encoding
 	beq	__error_p			@ yes, error 'p'
-	bl	__lookup_machine_type		@ r5=machinfo
-	movs	r8, r5				@ invalid machine (r5=0)?
- THUMB( it	eq )		@ force fixup-able long branch encoding
-	beq	__error_a			@ yes, error 'a'
 
 	/*
 	 * r1 = machine no, r2 = atags,
-	 * r8 = machinfo, r9 = cpuid, r10 = procinfo
+	 * r9 = cpuid, r10 = procinfo
 	 */
 	bl	__vet_atags
 #ifdef CONFIG_SMP_ON_UP
@@ -105,7 +101,7 @@
 	/*
 	 * The following calls CPU specific code in a position independent
 	 * manner.  See arch/arm/mm/proc-*.S for details.  r10 = base of
-	 * xxx_proc_info structure selected by __lookup_machine_type
+	 * xxx_proc_info structure selected by __lookup_processor_type
 	 * above.  On return, the CPU will be ready for the MMU to be
 	 * turned on, and r0 will hold the CPU control register value.
 	 */
@@ -124,7 +120,6 @@
  * amount which are required to get the kernel running, which
  * generally means mapping in the kernel code.
  *
- * r8  = machinfo
  * r9  = cpuid
  * r10 = procinfo
  *
diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c
index 28536e3..3535d37 100644
--- a/arch/arm/kernel/irq.c
+++ b/arch/arm/kernel/irq.c
@@ -179,14 +179,21 @@
 
 #ifdef CONFIG_HOTPLUG_CPU
 
-static void route_irq(struct irq_desc *desc, unsigned int irq, unsigned int cpu)
+static bool migrate_one_irq(struct irq_data *d)
 {
-	pr_debug("IRQ%u: moving from cpu%u to cpu%u\n", irq, desc->irq_data.node, cpu);
+	unsigned int cpu = cpumask_any_and(d->affinity, cpu_online_mask);
+	bool ret = false;
 
-	raw_spin_lock_irq(&desc->lock);
-	desc->irq_data.chip->irq_set_affinity(&desc->irq_data,
-					      cpumask_of(cpu), false);
-	raw_spin_unlock_irq(&desc->lock);
+	if (cpu >= nr_cpu_ids) {
+		cpu = cpumask_any(cpu_online_mask);
+		ret = true;
+	}
+
+	pr_debug("IRQ%u: moving from cpu%u to cpu%u\n", d->irq, d->node, cpu);
+
+	d->chip->irq_set_affinity(d, cpumask_of(cpu), true);
+
+	return ret;
 }
 
 /*
@@ -198,25 +205,30 @@
 {
 	unsigned int i, cpu = smp_processor_id();
 	struct irq_desc *desc;
+	unsigned long flags;
+
+	local_irq_save(flags);
 
 	for_each_irq_desc(i, desc) {
 		struct irq_data *d = &desc->irq_data;
+		bool affinity_broken = false;
 
-		if (d->node == cpu) {
-			unsigned int newcpu = cpumask_any_and(d->affinity,
-							      cpu_online_mask);
-			if (newcpu >= nr_cpu_ids) {
-				if (printk_ratelimit())
-					printk(KERN_INFO "IRQ%u no longer affine to CPU%u\n",
-					       i, cpu);
+		raw_spin_lock(&desc->lock);
+		do {
+			if (desc->action == NULL)
+				break;
 
-				cpumask_setall(d->affinity);
-				newcpu = cpumask_any_and(d->affinity,
-							 cpu_online_mask);
-			}
+			if (d->node != cpu)
+				break;
 
-			route_irq(desc, i, newcpu);
-		}
+			affinity_broken = migrate_one_irq(d);
+		} while (0);
+		raw_spin_unlock(&desc->lock);
+
+		if (affinity_broken && printk_ratelimit())
+			pr_warning("IRQ%u no longer affine to CPU%u\n", i, cpu);
 	}
+
+	local_irq_restore(flags);
 }
 #endif /* CONFIG_HOTPLUG_CPU */
diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c
index 6d4105e..6fcf22c 100644
--- a/arch/arm/kernel/module.c
+++ b/arch/arm/kernel/module.c
@@ -76,6 +76,7 @@
 	for (i = 0; i < relsec->sh_size / sizeof(Elf32_Rel); i++, rel++) {
 		unsigned long loc;
 		Elf32_Sym *sym;
+		const char *symname;
 		s32 offset;
 #ifdef CONFIG_THUMB2_KERNEL
 		u32 upper, lower, sign, j1, j2;
@@ -83,18 +84,18 @@
 
 		offset = ELF32_R_SYM(rel->r_info);
 		if (offset < 0 || offset > (symsec->sh_size / sizeof(Elf32_Sym))) {
-			printk(KERN_ERR "%s: bad relocation, section %d reloc %d\n",
+			pr_err("%s: section %u reloc %u: bad relocation sym offset\n",
 				module->name, relindex, i);
 			return -ENOEXEC;
 		}
 
 		sym = ((Elf32_Sym *)symsec->sh_addr) + offset;
+		symname = strtab + sym->st_name;
 
 		if (rel->r_offset < 0 || rel->r_offset > dstsec->sh_size - sizeof(u32)) {
-			printk(KERN_ERR "%s: out of bounds relocation, "
-				"section %d reloc %d offset %d size %d\n",
-				module->name, relindex, i, rel->r_offset,
-				dstsec->sh_size);
+			pr_err("%s: section %u reloc %u sym '%s': out of bounds relocation, offset %d size %u\n",
+			       module->name, relindex, i, symname,
+			       rel->r_offset, dstsec->sh_size);
 			return -ENOEXEC;
 		}
 
@@ -120,10 +121,10 @@
 			if (offset & 3 ||
 			    offset <= (s32)0xfe000000 ||
 			    offset >= (s32)0x02000000) {
-				printk(KERN_ERR
-				       "%s: relocation out of range, section "
-				       "%d reloc %d sym '%s'\n", module->name,
-				       relindex, i, strtab + sym->st_name);
+				pr_err("%s: section %u reloc %u sym '%s': relocation %u out of range (%#lx -> %#x)\n",
+				       module->name, relindex, i, symname,
+				       ELF32_R_TYPE(rel->r_info), loc,
+				       sym->st_value);
 				return -ENOEXEC;
 			}
 
@@ -196,10 +197,10 @@
 			if (!(offset & 1) ||
 			    offset <= (s32)0xff000000 ||
 			    offset >= (s32)0x01000000) {
-				printk(KERN_ERR
-				       "%s: relocation out of range, section "
-				       "%d reloc %d sym '%s'\n", module->name,
-				       relindex, i, strtab + sym->st_name);
+				pr_err("%s: section %u reloc %u sym '%s': relocation %u out of range (%#lx -> %#x)\n",
+				       module->name, relindex, i, symname,
+				       ELF32_R_TYPE(rel->r_info), loc,
+				       sym->st_value);
 				return -ENOEXEC;
 			}
 
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index b13e70f..2bf27f3 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -26,8 +26,6 @@
 #include <asm/system.h>
 #include <asm/traps.h>
 
-#include "ptrace.h"
-
 #define REG_PC	15
 #define REG_PSR	16
 /*
@@ -184,389 +182,12 @@
 	return ret;
 }
 
-static inline int
-read_u32(struct task_struct *task, unsigned long addr, u32 *res)
-{
-	int ret;
-
-	ret = access_process_vm(task, addr, res, sizeof(*res), 0);
-
-	return ret == sizeof(*res) ? 0 : -EIO;
-}
-
-static inline int
-read_instr(struct task_struct *task, unsigned long addr, u32 *res)
-{
-	int ret;
-
-	if (addr & 1) {
-		u16 val;
-		ret = access_process_vm(task, addr & ~1, &val, sizeof(val), 0);
-		ret = ret == sizeof(val) ? 0 : -EIO;
-		*res = val;
-	} else {
-		u32 val;
-		ret = access_process_vm(task, addr & ~3, &val, sizeof(val), 0);
-		ret = ret == sizeof(val) ? 0 : -EIO;
-		*res = val;
-	}
-	return ret;
-}
-
-/*
- * Get value of register `rn' (in the instruction)
- */
-static unsigned long
-ptrace_getrn(struct task_struct *child, unsigned long insn)
-{
-	unsigned int reg = (insn >> 16) & 15;
-	unsigned long val;
-
-	val = get_user_reg(child, reg);
-	if (reg == 15)
-		val += 8;
-
-	return val;
-}
-
-/*
- * Get value of operand 2 (in an ALU instruction)
- */
-static unsigned long
-ptrace_getaluop2(struct task_struct *child, unsigned long insn)
-{
-	unsigned long val;
-	int shift;
-	int type;
-
-	if (insn & 1 << 25) {
-		val = insn & 255;
-		shift = (insn >> 8) & 15;
-		type = 3;
-	} else {
-		val = get_user_reg (child, insn & 15);
-
-		if (insn & (1 << 4))
-			shift = (int)get_user_reg (child, (insn >> 8) & 15);
-		else
-			shift = (insn >> 7) & 31;
-
-		type = (insn >> 5) & 3;
-	}
-
-	switch (type) {
-	case 0:	val <<= shift;	break;
-	case 1:	val >>= shift;	break;
-	case 2:
-		val = (((signed long)val) >> shift);
-		break;
-	case 3:
- 		val = (val >> shift) | (val << (32 - shift));
-		break;
-	}
-	return val;
-}
-
-/*
- * Get value of operand 2 (in a LDR instruction)
- */
-static unsigned long
-ptrace_getldrop2(struct task_struct *child, unsigned long insn)
-{
-	unsigned long val;
-	int shift;
-	int type;
-
-	val = get_user_reg(child, insn & 15);
-	shift = (insn >> 7) & 31;
-	type = (insn >> 5) & 3;
-
-	switch (type) {
-	case 0:	val <<= shift;	break;
-	case 1:	val >>= shift;	break;
-	case 2:
-		val = (((signed long)val) >> shift);
-		break;
-	case 3:
- 		val = (val >> shift) | (val << (32 - shift));
-		break;
-	}
-	return val;
-}
-
-#define OP_MASK	0x01e00000
-#define OP_AND	0x00000000
-#define OP_EOR	0x00200000
-#define OP_SUB	0x00400000
-#define OP_RSB	0x00600000
-#define OP_ADD	0x00800000
-#define OP_ADC	0x00a00000
-#define OP_SBC	0x00c00000
-#define OP_RSC	0x00e00000
-#define OP_ORR	0x01800000
-#define OP_MOV	0x01a00000
-#define OP_BIC	0x01c00000
-#define OP_MVN	0x01e00000
-
-static unsigned long
-get_branch_address(struct task_struct *child, unsigned long pc, unsigned long insn)
-{
-	u32 alt = 0;
-
-	switch (insn & 0x0e000000) {
-	case 0x00000000:
-	case 0x02000000: {
-		/*
-		 * data processing
-		 */
-		long aluop1, aluop2, ccbit;
-
-	        if ((insn & 0x0fffffd0) == 0x012fff10) {
-		        /*
-			 * bx or blx
-			 */
-			alt = get_user_reg(child, insn & 15);
-			break;
-		}
-
-
-		if ((insn & 0xf000) != 0xf000)
-			break;
-
-		aluop1 = ptrace_getrn(child, insn);
-		aluop2 = ptrace_getaluop2(child, insn);
-		ccbit  = get_user_reg(child, REG_PSR) & PSR_C_BIT ? 1 : 0;
-
-		switch (insn & OP_MASK) {
-		case OP_AND: alt = aluop1 & aluop2;		break;
-		case OP_EOR: alt = aluop1 ^ aluop2;		break;
-		case OP_SUB: alt = aluop1 - aluop2;		break;
-		case OP_RSB: alt = aluop2 - aluop1;		break;
-		case OP_ADD: alt = aluop1 + aluop2;		break;
-		case OP_ADC: alt = aluop1 + aluop2 + ccbit;	break;
-		case OP_SBC: alt = aluop1 - aluop2 + ccbit;	break;
-		case OP_RSC: alt = aluop2 - aluop1 + ccbit;	break;
-		case OP_ORR: alt = aluop1 | aluop2;		break;
-		case OP_MOV: alt = aluop2;			break;
-		case OP_BIC: alt = aluop1 & ~aluop2;		break;
-		case OP_MVN: alt = ~aluop2;			break;
-		}
-		break;
-	}
-
-	case 0x04000000:
-	case 0x06000000:
-		/*
-		 * ldr
-		 */
-		if ((insn & 0x0010f000) == 0x0010f000) {
-			unsigned long base;
-
-			base = ptrace_getrn(child, insn);
-			if (insn & 1 << 24) {
-				long aluop2;
-
-				if (insn & 0x02000000)
-					aluop2 = ptrace_getldrop2(child, insn);
-				else
-					aluop2 = insn & 0xfff;
-
-				if (insn & 1 << 23)
-					base += aluop2;
-				else
-					base -= aluop2;
-			}
-			read_u32(child, base, &alt);
-		}
-		break;
-
-	case 0x08000000:
-		/*
-		 * ldm
-		 */
-		if ((insn & 0x00108000) == 0x00108000) {
-			unsigned long base;
-			unsigned int nr_regs;
-
-			if (insn & (1 << 23)) {
-				nr_regs = hweight16(insn & 65535) << 2;
-
-				if (!(insn & (1 << 24)))
-					nr_regs -= 4;
-			} else {
-				if (insn & (1 << 24))
-					nr_regs = -4;
-				else
-					nr_regs = 0;
-			}
-
-			base = ptrace_getrn(child, insn);
-
-			read_u32(child, base + nr_regs, &alt);
-			break;
-		}
-		break;
-
-	case 0x0a000000: {
-		/*
-		 * bl or b
-		 */
-		signed long displ;
-		/* It's a branch/branch link: instead of trying to
-		 * figure out whether the branch will be taken or not,
-		 * we'll put a breakpoint at both locations.  This is
-		 * simpler, more reliable, and probably not a whole lot
-		 * slower than the alternative approach of emulating the
-		 * branch.
-		 */
-		displ = (insn & 0x00ffffff) << 8;
-		displ = (displ >> 6) + 8;
-		if (displ != 0 && displ != 4)
-			alt = pc + displ;
-	    }
-	    break;
-	}
-
-	return alt;
-}
-
-static int
-swap_insn(struct task_struct *task, unsigned long addr,
-	  void *old_insn, void *new_insn, int size)
-{
-	int ret;
-
-	ret = access_process_vm(task, addr, old_insn, size, 0);
-	if (ret == size)
-		ret = access_process_vm(task, addr, new_insn, size, 1);
-	return ret;
-}
-
-static void
-add_breakpoint(struct task_struct *task, struct debug_info *dbg, unsigned long addr)
-{
-	int nr = dbg->nsaved;
-
-	if (nr < 2) {
-		u32 new_insn = BREAKINST_ARM;
-		int res;
-
-		res = swap_insn(task, addr, &dbg->bp[nr].insn, &new_insn, 4);
-
-		if (res == 4) {
-			dbg->bp[nr].address = addr;
-			dbg->nsaved += 1;
-		}
-	} else
-		printk(KERN_ERR "ptrace: too many breakpoints\n");
-}
-
-/*
- * Clear one breakpoint in the user program.  We copy what the hardware
- * does and use bit 0 of the address to indicate whether this is a Thumb
- * breakpoint or an ARM breakpoint.
- */
-static void clear_breakpoint(struct task_struct *task, struct debug_entry *bp)
-{
-	unsigned long addr = bp->address;
-	union debug_insn old_insn;
-	int ret;
-
-	if (addr & 1) {
-		ret = swap_insn(task, addr & ~1, &old_insn.thumb,
-				&bp->insn.thumb, 2);
-
-		if (ret != 2 || old_insn.thumb != BREAKINST_THUMB)
-			printk(KERN_ERR "%s:%d: corrupted Thumb breakpoint at "
-				"0x%08lx (0x%04x)\n", task->comm,
-				task_pid_nr(task), addr, old_insn.thumb);
-	} else {
-		ret = swap_insn(task, addr & ~3, &old_insn.arm,
-				&bp->insn.arm, 4);
-
-		if (ret != 4 || old_insn.arm != BREAKINST_ARM)
-			printk(KERN_ERR "%s:%d: corrupted ARM breakpoint at "
-				"0x%08lx (0x%08x)\n", task->comm,
-				task_pid_nr(task), addr, old_insn.arm);
-	}
-}
-
-void ptrace_set_bpt(struct task_struct *child)
-{
-	struct pt_regs *regs;
-	unsigned long pc;
-	u32 insn;
-	int res;
-
-	regs = task_pt_regs(child);
-	pc = instruction_pointer(regs);
-
-	if (thumb_mode(regs)) {
-		printk(KERN_WARNING "ptrace: can't handle thumb mode\n");
-		return;
-	}
-
-	res = read_instr(child, pc, &insn);
-	if (!res) {
-		struct debug_info *dbg = &child->thread.debug;
-		unsigned long alt;
-
-		dbg->nsaved = 0;
-
-		alt = get_branch_address(child, pc, insn);
-		if (alt)
-			add_breakpoint(child, dbg, alt);
-
-		/*
-		 * Note that we ignore the result of setting the above
-		 * breakpoint since it may fail.  When it does, this is
-		 * not so much an error, but a forewarning that we may
-		 * be receiving a prefetch abort shortly.
-		 *
-		 * If we don't set this breakpoint here, then we can
-		 * lose control of the thread during single stepping.
-		 */
-		if (!alt || predicate(insn) != PREDICATE_ALWAYS)
-			add_breakpoint(child, dbg, pc + 4);
-	}
-}
-
-/*
- * Ensure no single-step breakpoint is pending.  Returns non-zero
- * value if child was being single-stepped.
- */
-void ptrace_cancel_bpt(struct task_struct *child)
-{
-	int i, nsaved = child->thread.debug.nsaved;
-
-	child->thread.debug.nsaved = 0;
-
-	if (nsaved > 2) {
-		printk("ptrace_cancel_bpt: bogus nsaved: %d!\n", nsaved);
-		nsaved = 2;
-	}
-
-	for (i = 0; i < nsaved; i++)
-		clear_breakpoint(child, &child->thread.debug.bp[i]);
-}
-
-void user_disable_single_step(struct task_struct *task)
-{
-	task->ptrace &= ~PT_SINGLESTEP;
-	ptrace_cancel_bpt(task);
-}
-
-void user_enable_single_step(struct task_struct *task)
-{
-	task->ptrace |= PT_SINGLESTEP;
-}
-
 /*
  * Called by kernel/ptrace.c when detaching..
  */
 void ptrace_disable(struct task_struct *child)
 {
-	user_disable_single_step(child);
+	/* Nothing to do. */
 }
 
 /*
@@ -576,8 +197,6 @@
 {
 	siginfo_t info;
 
-	ptrace_cancel_bpt(tsk);
-
 	info.si_signo = SIGTRAP;
 	info.si_errno = 0;
 	info.si_code  = TRAP_BRKPT;
diff --git a/arch/arm/kernel/ptrace.h b/arch/arm/kernel/ptrace.h
deleted file mode 100644
index 3926605..0000000
--- a/arch/arm/kernel/ptrace.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- *  linux/arch/arm/kernel/ptrace.h
- *
- *  Copyright (C) 2000-2003 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <linux/ptrace.h>
-
-extern void ptrace_cancel_bpt(struct task_struct *);
-extern void ptrace_set_bpt(struct task_struct *);
-extern void ptrace_break(struct task_struct *, struct pt_regs *);
-
-/*
- * Send SIGTRAP if we're single-stepping
- */
-static inline void single_step_trap(struct task_struct *task)
-{
-	if (task->ptrace & PT_SINGLESTEP) {
-		ptrace_cancel_bpt(task);
-		send_sig(SIGTRAP, task, 1);
-	}
-}
-
-static inline void single_step_clear(struct task_struct *task)
-{
-	if (task->ptrace & PT_SINGLESTEP)
-		ptrace_cancel_bpt(task);
-}
-
-static inline void single_step_set(struct task_struct *task)
-{
-	if (task->ptrace & PT_SINGLESTEP)
-		ptrace_set_bpt(task);
-}
diff --git a/arch/arm/kernel/return_address.c b/arch/arm/kernel/return_address.c
index df246da..0b13a72 100644
--- a/arch/arm/kernel/return_address.c
+++ b/arch/arm/kernel/return_address.c
@@ -9,6 +9,7 @@
  * the Free Software Foundation.
  */
 #include <linux/module.h>
+#include <linux/ftrace.h>
 
 #if defined(CONFIG_FRAME_POINTER) && !defined(CONFIG_ARM_UNWIND)
 #include <linux/sched.h>
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 5ea4fb71..db23828 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -308,7 +308,44 @@
  * already provide the required functionality.
  */
 extern struct proc_info_list *lookup_processor_type(unsigned int);
-extern struct machine_desc *lookup_machine_type(unsigned int);
+
+static void __init early_print(const char *str, ...)
+{
+	extern void printascii(const char *);
+	char buf[256];
+	va_list ap;
+
+	va_start(ap, str);
+	vsnprintf(buf, sizeof(buf), str, ap);
+	va_end(ap);
+
+#ifdef CONFIG_DEBUG_LL
+	printascii(buf);
+#endif
+	printk("%s", buf);
+}
+
+static struct machine_desc * __init lookup_machine_type(unsigned int type)
+{
+	extern struct machine_desc __arch_info_begin[], __arch_info_end[];
+	struct machine_desc *p;
+
+	for (p = __arch_info_begin; p < __arch_info_end; p++)
+		if (type == p->nr)
+			return p;
+
+	early_print("\n"
+		"Error: unrecognized/unsupported machine ID (r1 = 0x%08x).\n\n"
+		"Available machine support:\n\nID (hex)\tNAME\n", type);
+
+	for (p = __arch_info_begin; p < __arch_info_end; p++)
+		early_print("%08x\t%s\n", p->nr, p->name);
+
+	early_print("\nPlease check your kernel config and/or bootloader.\n");
+
+	while (true)
+		/* can't use cpu_relax() here as it may require MMU setup */;
+}
 
 static void __init feat_v6_fixup(void)
 {
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index abaf844..cb83983 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -20,7 +20,6 @@
 #include <asm/unistd.h>
 #include <asm/vfp.h>
 
-#include "ptrace.h"
 #include "signal.h"
 
 #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
@@ -348,8 +347,6 @@
 	if (restore_sigframe(regs, frame))
 		goto badframe;
 
-	single_step_trap(current);
-
 	return regs->ARM_r0;
 
 badframe:
@@ -383,8 +380,6 @@
 	if (do_sigaltstack(&frame->sig.uc.uc_stack, NULL, regs->ARM_sp) == -EFAULT)
 		goto badframe;
 
-	single_step_trap(current);
-
 	return regs->ARM_r0;
 
 badframe:
@@ -706,8 +701,6 @@
 	if (try_to_freeze())
 		goto no_signal;
 
-	single_step_clear(current);
-
 	signr = get_signal_to_deliver(&info, &ka, regs, NULL);
 	if (signr > 0) {
 		sigset_t *oldset;
@@ -726,7 +719,6 @@
 			if (test_thread_flag(TIF_RESTORE_SIGMASK))
 				clear_thread_flag(TIF_RESTORE_SIGMASK);
 		}
-		single_step_set(current);
 		return;
 	}
 
@@ -772,7 +764,6 @@
 			sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
 		}
 	}
-	single_step_set(current);
 }
 
 asmlinkage void
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index ee57640..21ac43f 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -23,6 +23,7 @@
 #include <linux/kexec.h>
 #include <linux/delay.h>
 #include <linux/init.h>
+#include <linux/sched.h>
 
 #include <asm/atomic.h>
 #include <asm/cacheflush.h>
@@ -32,7 +33,6 @@
 #include <asm/unwind.h>
 #include <asm/tls.h>
 
-#include "ptrace.h"
 #include "signal.h"
 
 static const char *handler[]= { "prefetch abort", "data abort", "address exception", "interrupt" };
@@ -256,7 +256,7 @@
 	return ret;
 }
 
-DEFINE_SPINLOCK(die_lock);
+static DEFINE_SPINLOCK(die_lock);
 
 /*
  * This function is protected against re-entrancy.
diff --git a/arch/arm/mach-footbridge/include/mach/hardware.h b/arch/arm/mach-footbridge/include/mach/hardware.h
index 51dd902..b6fdf23 100644
--- a/arch/arm/mach-footbridge/include/mach/hardware.h
+++ b/arch/arm/mach-footbridge/include/mach/hardware.h
@@ -23,26 +23,33 @@
  * 0xf9000000	0x50000000	1MB	Cache flush
  * 0xf0000000	0x80000000	16MB	ISA memory
  */
+
+#ifdef CONFIG_MMU
+#define MMU_IO(a, b)	(a)
+#else
+#define MMU_IO(a, b)	(b)
+#endif
+
 #define XBUS_SIZE		0x00100000
-#define XBUS_BASE		0xff800000
+#define XBUS_BASE		MMU_IO(0xff800000, 0x40000000)
 
 #define ARMCSR_SIZE		0x00100000
-#define ARMCSR_BASE		0xfe000000
+#define ARMCSR_BASE		MMU_IO(0xfe000000, 0x42000000)
 
 #define WFLUSH_SIZE		0x00100000
-#define WFLUSH_BASE		0xfd000000
+#define WFLUSH_BASE		MMU_IO(0xfd000000, 0x78000000)
 
 #define PCIIACK_SIZE		0x00100000
-#define PCIIACK_BASE		0xfc000000
+#define PCIIACK_BASE		MMU_IO(0xfc000000, 0x79000000)
 
 #define PCICFG1_SIZE		0x01000000
-#define PCICFG1_BASE		0xfb000000
+#define PCICFG1_BASE		MMU_IO(0xfb000000, 0x7a000000)
 
 #define PCICFG0_SIZE		0x01000000
-#define PCICFG0_BASE		0xfa000000
+#define PCICFG0_BASE		MMU_IO(0xfa000000, 0x7b000000)
 
 #define PCIMEM_SIZE		0x01000000
-#define PCIMEM_BASE		0xf0000000
+#define PCIMEM_BASE		MMU_IO(0xf0000000, 0x80000000)
 
 #define XBUS_LEDS		((volatile unsigned char *)(XBUS_BASE + 0x12000))
 #define XBUS_LED_AMBER		(1 << 0)
diff --git a/arch/arm/mach-footbridge/include/mach/io.h b/arch/arm/mach-footbridge/include/mach/io.h
index 101a4fe..32e4cc3 100644
--- a/arch/arm/mach-footbridge/include/mach/io.h
+++ b/arch/arm/mach-footbridge/include/mach/io.h
@@ -14,8 +14,14 @@
 #ifndef __ASM_ARM_ARCH_IO_H
 #define __ASM_ARM_ARCH_IO_H
 
-#define PCIO_SIZE		0x00100000
-#define PCIO_BASE		0xff000000
+#ifdef CONFIG_MMU
+#define MMU_IO(a, b)	(a)
+#else
+#define MMU_IO(a, b)	(b)
+#endif
+
+#define PCIO_SIZE       0x00100000
+#define PCIO_BASE       MMU_IO(0xff000000, 0x7c000000)
 
 #define IO_SPACE_LIMIT 0xffff
 
diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig
index 1a2cf62..b69fa0a 100644
--- a/arch/arm/mach-omap2/Kconfig
+++ b/arch/arm/mach-omap2/Kconfig
@@ -45,6 +45,7 @@
 	select CPU_V7
 	select ARM_GIC
 	select PL310_ERRATA_588369
+	select PL310_ERRATA_727915
 	select ARM_ERRATA_720789
 	select ARCH_HAS_OPP
 	select PM_OPP if PM
diff --git a/arch/arm/mach-omap2/omap4-common.c b/arch/arm/mach-omap2/omap4-common.c
index 1926864..9ef8c29 100644
--- a/arch/arm/mach-omap2/omap4-common.c
+++ b/arch/arm/mach-omap2/omap4-common.c
@@ -52,6 +52,12 @@
 	omap_smc1(0x102, 0x0);
 }
 
+static void omap4_l2x0_set_debug(unsigned long val)
+{
+	/* Program PL310 L2 Cache controller debug register */
+	omap_smc1(0x100, val);
+}
+
 static int __init omap_l2_cache_init(void)
 {
 	u32 aux_ctrl = 0;
@@ -99,6 +105,7 @@
 	 * specific one
 	*/
 	outer_cache.disable = omap4_l2x0_disable;
+	outer_cache.set_debug = omap4_l2x0_set_debug;
 
 	return 0;
 }
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index e4509ba..05b26a0 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -845,6 +845,11 @@
 	help
 	  This option enables the L2 cache on XScale3.
 
+config ARM_L1_CACHE_SHIFT_6
+	bool
+	help
+	  Setting ARM L1 cache line size to 64 Bytes.
+
 config ARM_L1_CACHE_SHIFT
 	int
 	default 6 if ARM_L1_CACHE_SHIFT_6
diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index f2ce38e..ef59099 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -73,18 +73,24 @@
 	writel_relaxed(addr, base + L2X0_INV_LINE_PA);
 }
 
-#ifdef CONFIG_PL310_ERRATA_588369
-static void debug_writel(unsigned long val)
-{
-	extern void omap_smc1(u32 fn, u32 arg);
+#if defined(CONFIG_PL310_ERRATA_588369) || defined(CONFIG_PL310_ERRATA_727915)
 
-	/*
-	 * Texas Instrument secure monitor api to modify the
-	 * PL310 Debug Control Register.
-	 */
-	omap_smc1(0x100, val);
+#define debug_writel(val)	outer_cache.set_debug(val)
+
+static void l2x0_set_debug(unsigned long val)
+{
+	writel_relaxed(val, l2x0_base + L2X0_DEBUG_CTRL);
+}
+#else
+/* Optimised out for non-errata case */
+static inline void debug_writel(unsigned long val)
+{
 }
 
+#define l2x0_set_debug	NULL
+#endif
+
+#ifdef CONFIG_PL310_ERRATA_588369
 static inline void l2x0_flush_line(unsigned long addr)
 {
 	void __iomem *base = l2x0_base;
@@ -97,11 +103,6 @@
 }
 #else
 
-/* Optimised out for non-errata case */
-static inline void debug_writel(unsigned long val)
-{
-}
-
 static inline void l2x0_flush_line(unsigned long addr)
 {
 	void __iomem *base = l2x0_base;
@@ -125,9 +126,11 @@
 
 	/* clean all ways */
 	spin_lock_irqsave(&l2x0_lock, flags);
+	debug_writel(0x03);
 	writel_relaxed(l2x0_way_mask, l2x0_base + L2X0_CLEAN_INV_WAY);
 	cache_wait_way(l2x0_base + L2X0_CLEAN_INV_WAY, l2x0_way_mask);
 	cache_sync();
+	debug_writel(0x00);
 	spin_unlock_irqrestore(&l2x0_lock, flags);
 }
 
@@ -335,6 +338,7 @@
 	outer_cache.flush_all = l2x0_flush_all;
 	outer_cache.inv_all = l2x0_inv_all;
 	outer_cache.disable = l2x0_disable;
+	outer_cache.set_debug = l2x0_set_debug;
 
 	printk(KERN_INFO "%s cache controller enabled\n", type);
 	printk(KERN_INFO "l2x0: %d ways, CACHE_ID 0x%08x, AUX_CTRL 0x%08x, Cache size: %d B\n",
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 3c67e92..ff7b43b 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -827,16 +827,6 @@
 			 * rather difficult.
 			 */
 			reason = "with VIPT aliasing cache";
-		} else if (is_smp() && tlb_ops_need_broadcast()) {
-			/*
-			 * kmap_high needs to occasionally flush TLB entries,
-			 * however, if the TLB entries need to be broadcast
-			 * we may deadlock:
-			 *  kmap_high(irqs off)->flush_all_zero_pkmaps->
-			 *  flush_tlb_kernel_range->smp_call_function_many
-			 *   (must not be called with irqs off)
-			 */
-			reason = "without hardware TLB ops broadcasting";
 		}
 		if (reason) {
 			printk(KERN_CRIT "HIGHMEM is not supported %s, ignoring high memory\n",
diff --git a/arch/arm/mm/vmregion.c b/arch/arm/mm/vmregion.c
index 935993e..036fdbf 100644
--- a/arch/arm/mm/vmregion.c
+++ b/arch/arm/mm/vmregion.c
@@ -38,7 +38,7 @@
 arm_vmregion_alloc(struct arm_vmregion_head *head, size_t align,
 		   size_t size, gfp_t gfp)
 {
-	unsigned long addr = head->vm_start, end = head->vm_end - size;
+	unsigned long start = head->vm_start, addr = head->vm_end;
 	unsigned long flags;
 	struct arm_vmregion *c, *new;
 
@@ -54,21 +54,20 @@
 
 	spin_lock_irqsave(&head->vm_lock, flags);
 
-	list_for_each_entry(c, &head->vm_list, vm_list) {
-		if ((addr + size) < addr)
-			goto nospc;
-		if ((addr + size) <= c->vm_start)
+	addr = rounddown(addr - size, align);
+	list_for_each_entry_reverse(c, &head->vm_list, vm_list) {
+		if (addr >= c->vm_end)
 			goto found;
-		addr = ALIGN(c->vm_end, align);
-		if (addr > end)
+		addr = rounddown(c->vm_start - size, align);
+		if (addr < start)
 			goto nospc;
 	}
 
  found:
 	/*
-	 * Insert this entry _before_ the one we found.
+	 * Insert this entry after the one we found.
 	 */
-	list_add_tail(&new->vm_list, &c->vm_list);
+	list_add(&new->vm_list, &c->vm_list);
 	new->vm_start = addr;
 	new->vm_end = addr + size;
 	new->vm_active = 1;
diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c
index 0797cb5..bbf3da0 100644
--- a/arch/arm/vfp/vfpmodule.c
+++ b/arch/arm/vfp/vfpmodule.c
@@ -153,7 +153,7 @@
  * Raise a SIGFPE for the current process.
  * sicode describes the signal being raised.
  */
-void vfp_raise_sigfpe(unsigned int sicode, struct pt_regs *regs)
+static void vfp_raise_sigfpe(unsigned int sicode, struct pt_regs *regs)
 {
 	siginfo_t info;
 
@@ -489,8 +489,11 @@
 
 /*
  * VFP hardware can lose all context when a CPU goes offline.
- * Safely clear our held state when a CPU has been killed, and
- * re-enable access to VFP when the CPU comes back online.
+ * As we will be running in SMP mode with CPU hotplug, we will save the
+ * hardware state at every thread switch.  We clear our held state when
+ * a CPU has been killed, indicating that the VFP hardware doesn't contain
+ * a threads VFP state.  When a CPU starts up, we re-enable access to the
+ * VFP hardware.
  *
  * Both CPU_DYING and CPU_STARTING are called on the CPU which
  * is being offlined/onlined.