Merge of master.kernel.org:/home/rmk/linux-2.6-serial.git
diff --git a/arch/arm/configs/ixdp2800_defconfig b/arch/arm/configs/ixdp2800_defconfig
index d36f991..7be3521 100644
--- a/arch/arm/configs/ixdp2800_defconfig
+++ b/arch/arm/configs/ixdp2800_defconfig
@@ -133,7 +133,7 @@
 #
 CONFIG_ZBOOT_ROM_TEXT=0x0
 CONFIG_ZBOOT_ROM_BSS=0x0
-CONFIG_CMDLINE="console=ttyS0,9600 root=/dev/nfs ip=bootp mem=64M@0x0 pci=firmware"
+CONFIG_CMDLINE="console=ttyS0,9600 root=/dev/nfs ip=bootp mem=64M@0x0"
 # CONFIG_XIP_KERNEL is not set
 
 #
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 2a5c3fe..080df90 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -269,6 +269,12 @@
 	add	r5, sp, #S_PC
 	ldmia	r7, {r2 - r4}			@ Get USR pc, cpsr
 
+#if __LINUX_ARM_ARCH__ < 6
+	@ make sure our user space atomic helper is aborted
+	cmp	r2, #VIRT_OFFSET
+	bichs	r3, r3, #PSR_Z_BIT
+#endif
+
 	@
 	@ We are now ready to fill in the remaining blanks on the stack:
 	@
@@ -499,8 +505,12 @@
 	mra	r4, r5, acc0
 	stmia   ip, {r4, r5}
 #endif
+#ifdef CONFIG_HAS_TLS_REG
+	mcr	p15, 0, r3, c13, c0, 3		@ set TLS register
+#else
 	mov	r4, #0xffff0fff
-	str	r3, [r4, #-3]			@ Set TLS ptr
+	str	r3, [r4, #-15]			@ TLS val at 0xffff0ff0
+#endif
 	mcr	p15, 0, r6, c3, c0, 0		@ Set domain register
 #ifdef CONFIG_VFP
 	@ Always disable VFP so we can lazily save/restore the old
@@ -519,6 +529,207 @@
 	ldmib	r2, {r4 - sl, fp, sp, pc}	@ Load all regs saved previously
 
 	__INIT
+
+/*
+ * User helpers.
+ *
+ * These are segment of kernel provided user code reachable from user space
+ * at a fixed address in kernel memory.  This is used to provide user space
+ * with some operations which require kernel help because of unimplemented
+ * native feature and/or instructions in many ARM CPUs. The idea is for
+ * this code to be executed directly in user mode for best efficiency but
+ * which is too intimate with the kernel counter part to be left to user
+ * libraries.  In fact this code might even differ from one CPU to another
+ * depending on the available  instruction set and restrictions like on
+ * SMP systems.  In other words, the kernel reserves the right to change
+ * this code as needed without warning. Only the entry points and their
+ * results are guaranteed to be stable.
+ *
+ * Each segment is 32-byte aligned and will be moved to the top of the high
+ * vector page.  New segments (if ever needed) must be added in front of
+ * existing ones.  This mechanism should be used only for things that are
+ * really small and justified, and not be abused freely.
+ *
+ * User space is expected to implement those things inline when optimizing
+ * for a processor that has the necessary native support, but only if such
+ * resulting binaries are already to be incompatible with earlier ARM
+ * processors due to the use of unsupported instructions other than what
+ * is provided here.  In other words don't make binaries unable to run on
+ * earlier processors just for the sake of not using these kernel helpers
+ * if your compiled code is not going to use the new instructions for other
+ * purpose.
+ */
+
+	.align	5
+	.globl	__kuser_helper_start
+__kuser_helper_start:
+
+/*
+ * Reference prototype:
+ *
+ *	int __kernel_cmpxchg(int oldval, int newval, int *ptr)
+ *
+ * Input:
+ *
+ *	r0 = oldval
+ *	r1 = newval
+ *	r2 = ptr
+ *	lr = return address
+ *
+ * Output:
+ *
+ *	r0 = returned value (zero or non-zero)
+ *	C flag = set if r0 == 0, clear if r0 != 0
+ *
+ * Clobbered:
+ *
+ *	r3, ip, flags
+ *
+ * Definition and user space usage example:
+ *
+ *	typedef int (__kernel_cmpxchg_t)(int oldval, int newval, int *ptr);
+ *	#define __kernel_cmpxchg (*(__kernel_cmpxchg_t *)0xffff0fc0)
+ *
+ * Atomically store newval in *ptr if *ptr is equal to oldval for user space.
+ * Return zero if *ptr was changed or non-zero if no exchange happened.
+ * The C flag is also set if *ptr was changed to allow for assembly
+ * optimization in the calling code.
+ *
+ * For example, a user space atomic_add implementation could look like this:
+ *
+ * #define atomic_add(ptr, val) \
+ *	({ register unsigned int *__ptr asm("r2") = (ptr); \
+ *	   register unsigned int __result asm("r1"); \
+ *	   asm volatile ( \
+ *	       "1: @ atomic_add\n\t" \
+ *	       "ldr	r0, [r2]\n\t" \
+ *	       "mov	r3, #0xffff0fff\n\t" \
+ *	       "add	lr, pc, #4\n\t" \
+ *	       "add	r1, r0, %2\n\t" \
+ *	       "add	pc, r3, #(0xffff0fc0 - 0xffff0fff)\n\t" \
+ *	       "bcc	1b" \
+ *	       : "=&r" (__result) \
+ *	       : "r" (__ptr), "rIL" (val) \
+ *	       : "r0","r3","ip","lr","cc","memory" ); \
+ *	   __result; })
+ */
+
+__kuser_cmpxchg:				@ 0xffff0fc0
+
+#if __LINUX_ARM_ARCH__ < 6
+
+#ifdef CONFIG_SMP  /* sanity check */
+#error "CONFIG_SMP on a machine supporting pre-ARMv6 processors?"
+#endif
+
+	/*
+	 * Theory of operation:
+	 *
+	 * We set the Z flag before loading oldval. If ever an exception
+	 * occurs we can not be sure the loaded value will still be the same
+	 * when the exception returns, therefore the user exception handler
+	 * will clear the Z flag whenever the interrupted user code was
+	 * actually from the kernel address space (see the usr_entry macro).
+	 *
+	 * The post-increment on the str is used to prevent a race with an
+	 * exception happening just after the str instruction which would
+	 * clear the Z flag although the exchange was done.
+	 */
+	teq	ip, ip			@ set Z flag
+	ldr	ip, [r2]		@ load current val
+	add	r3, r2, #1		@ prepare store ptr
+	teqeq	ip, r0			@ compare with oldval if still allowed
+	streq	r1, [r3, #-1]!		@ store newval if still allowed
+	subs	r0, r2, r3		@ if r2 == r3 the str occured
+	mov	pc, lr
+
+#else
+
+	ldrex	r3, [r2]
+	subs	r3, r3, r0
+	strexeq	r3, r1, [r2]
+	rsbs	r0, r3, #0
+	mov	pc, lr
+
+#endif
+
+	.align	5
+
+/*
+ * Reference prototype:
+ *
+ *	int __kernel_get_tls(void)
+ *
+ * Input:
+ *
+ *	lr = return address
+ *
+ * Output:
+ *
+ *	r0 = TLS value
+ *
+ * Clobbered:
+ *
+ *	the Z flag might be lost
+ *
+ * Definition and user space usage example:
+ *
+ *	typedef int (__kernel_get_tls_t)(void);
+ *	#define __kernel_get_tls (*(__kernel_get_tls_t *)0xffff0fe0)
+ *
+ * Get the TLS value as previously set via the __ARM_NR_set_tls syscall.
+ *
+ * This could be used as follows:
+ *
+ * #define __kernel_get_tls() \
+ *	({ register unsigned int __val asm("r0"); \
+ *         asm( "mov r0, #0xffff0fff; mov lr, pc; sub pc, r0, #31" \
+ *	        : "=r" (__val) : : "lr","cc" ); \
+ *	   __val; })
+ */
+
+__kuser_get_tls:				@ 0xffff0fe0
+
+#ifndef CONFIG_HAS_TLS_REG
+
+#ifdef CONFIG_SMP  /* sanity check */
+#error "CONFIG_SMP without CONFIG_HAS_TLS_REG is wrong"
+#endif
+
+	ldr	r0, [pc, #(16 - 8)]		@ TLS stored at 0xffff0ff0
+	mov	pc, lr
+
+#else
+
+	mrc	p15, 0, r0, c13, c0, 3		@ read TLS register
+	mov	pc, lr
+
+#endif
+
+	.rep	5
+	.word	0			@ pad up to __kuser_helper_version
+	.endr
+
+/*
+ * Reference declaration:
+ *
+ *	extern unsigned int __kernel_helper_version;
+ *
+ * Definition and user space usage example:
+ *
+ *	#define __kernel_helper_version (*(unsigned int *)0xffff0ffc)
+ *
+ * User space may read this to determine the curent number of helpers
+ * available.
+ */
+
+__kuser_helper_version:				@ 0xffff0ffc
+	.word	((__kuser_helper_end - __kuser_helper_start) >> 5)
+
+	.globl	__kuser_helper_end
+__kuser_helper_end:
+
+
 /*
  * Vector stubs.
  *
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 0078aeb..3a001fe 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -450,13 +450,17 @@
 
 	case NR(set_tls):
 		thread->tp_value = regs->ARM_r0;
+#ifdef CONFIG_HAS_TLS_REG
+		asm ("mcr p15, 0, %0, c13, c0, 3" : : "r" (regs->ARM_r0) );
+#else
 		/*
-		 * Our user accessible TLS ptr is located at 0xffff0ffc.
-		 * On SMP read access to this address must raise a fault
-		 * and be emulated from the data abort handler.
-		 * m
+		 * User space must never try to access this directly.
+		 * Expect your app to break eventually if you do so.
+		 * The user helper at 0xffff0fe0 must be used instead.
+		 * (see entry-armv.S for details)
 		 */
-		*((unsigned long *)0xffff0ffc) = thread->tp_value;
+		*((unsigned int *)0xffff0ff0) = regs->ARM_r0;
+#endif
 		return 0;
 
 	default:
@@ -493,6 +497,41 @@
 	return 0;
 }
 
+#if defined(CONFIG_CPU_32v6) && !defined(CONFIG_HAS_TLS_REG)
+
+/*
+ * We might be running on an ARMv6+ processor which should have the TLS
+ * register, but for some reason we can't use it and have to emulate it.
+ */
+
+static int get_tp_trap(struct pt_regs *regs, unsigned int instr)
+{
+	int reg = (instr >> 12) & 15;
+	if (reg == 15)
+		return 1;
+	regs->uregs[reg] = current_thread_info()->tp_value;
+	regs->ARM_pc += 4;
+	return 0;
+}
+
+static struct undef_hook arm_mrc_hook = {
+	.instr_mask	= 0x0fff0fff,
+	.instr_val	= 0x0e1d0f70,
+	.cpsr_mask	= PSR_T_BIT,
+	.cpsr_val	= 0,
+	.fn		= get_tp_trap,
+};
+
+static int __init arm_mrc_hook_init(void)
+{
+	register_undef_hook(&arm_mrc_hook);
+	return 0;
+}
+
+late_initcall(arm_mrc_hook_init);
+
+#endif
+
 void __bad_xchg(volatile void *ptr, int size)
 {
 	printk("xchg: bad data size: pc 0x%p, ptr 0x%p, size %d\n",
@@ -580,14 +619,17 @@
 {
 	extern char __stubs_start[], __stubs_end[];
 	extern char __vectors_start[], __vectors_end[];
+	extern char __kuser_helper_start[], __kuser_helper_end[];
+	int kuser_sz = __kuser_helper_end - __kuser_helper_start;
 
 	/*
-	 * Copy the vectors and stubs (in entry-armv.S) into the
-	 * vector page, mapped at 0xffff0000, and ensure these are
-	 * visible to the instruction stream.
+	 * Copy the vectors, stubs and kuser helpers (in entry-armv.S)
+	 * into the vector page, mapped at 0xffff0000, and ensure these
+	 * are visible to the instruction stream.
 	 */
 	memcpy((void *)0xffff0000, __vectors_start, __vectors_end - __vectors_start);
 	memcpy((void *)0xffff0200, __stubs_start, __stubs_end - __stubs_start);
+	memcpy((void *)0xffff1000 - kuser_sz, __kuser_helper_start, kuser_sz);
 	flush_icache_range(0xffff0000, 0xffff0000 + PAGE_SIZE);
 	modify_domain(DOMAIN_USER, DOMAIN_CLIENT);
 }
diff --git a/arch/arm/mach-ixp2000/ixdp2800.c b/arch/arm/mach-ixp2000/ixdp2800.c
index c4683aa..aec13c7 100644
--- a/arch/arm/mach-ixp2000/ixdp2800.c
+++ b/arch/arm/mach-ixp2000/ixdp2800.c
@@ -65,19 +65,102 @@
 /*************************************************************************
  * IXDP2800 PCI
  *************************************************************************/
+static void __init ixdp2800_slave_disable_pci_master(void)
+{
+	*IXP2000_PCI_CMDSTAT &= ~(PCI_COMMAND_MASTER | PCI_COMMAND_MEMORY);
+}
+
+static void __init ixdp2800_master_wait_for_slave(void)
+{
+	volatile u32 *addr;
+
+	printk(KERN_INFO "IXDP2800: waiting for slave NPU to configure "
+			 "its BAR sizes\n");
+
+	addr = ixp2000_pci_config_addr(0, IXDP2X00_SLAVE_NPU_DEVFN,
+					PCI_BASE_ADDRESS_1);
+	do {
+		*addr = 0xffffffff;
+		cpu_relax();
+	} while (*addr != 0xfe000008);
+
+	addr = ixp2000_pci_config_addr(0, IXDP2X00_SLAVE_NPU_DEVFN,
+					PCI_BASE_ADDRESS_2);
+	do {
+		*addr = 0xffffffff;
+		cpu_relax();
+	} while (*addr != 0xc0000008);
+
+	/*
+	 * Configure the slave's SDRAM BAR by hand.
+	 */
+	*addr = 0x40000008;
+}
+
+static void __init ixdp2800_slave_wait_for_master_enable(void)
+{
+	printk(KERN_INFO "IXDP2800: waiting for master NPU to enable us\n");
+
+	while ((*IXP2000_PCI_CMDSTAT & PCI_COMMAND_MASTER) == 0)
+		cpu_relax();
+}
+
 void __init ixdp2800_pci_preinit(void)
 {
 	printk("ixdp2x00_pci_preinit called\n");
 
-	*IXP2000_PCI_ADDR_EXT =  0x0000e000;
+	*IXP2000_PCI_ADDR_EXT = 0x0001e000;
 
-	*IXP2000_PCI_DRAM_BASE_ADDR_MASK = (0x40000000 - 1) & ~0xfffff;
+	if (!ixdp2x00_master_npu())
+		ixdp2800_slave_disable_pci_master();
+
 	*IXP2000_PCI_SRAM_BASE_ADDR_MASK = (0x2000000 - 1) & ~0x3ffff;
+	*IXP2000_PCI_DRAM_BASE_ADDR_MASK = (0x40000000 - 1) & ~0xfffff;
 
 	ixp2000_pci_preinit();
+
+	if (ixdp2x00_master_npu()) {
+		/*
+		 * Wait until the slave set its SRAM/SDRAM BAR sizes
+		 * correctly before we proceed to scan and enumerate
+		 * the bus.
+		 */
+		ixdp2800_master_wait_for_slave();
+
+		/*
+		 * We configure the SDRAM BARs by hand because they
+		 * are 1G and fall outside of the regular allocated
+		 * PCI address space.
+		 */
+		*IXP2000_PCI_SDRAM_BAR = 0x00000008;
+	} else {
+		/*
+		 * Wait for the master to complete scanning the bus
+		 * and assigning resources before we proceed to scan
+		 * the bus ourselves.  Set pci=firmware to honor the
+		 * master's resource assignment.
+		 */
+		ixdp2800_slave_wait_for_master_enable();
+		pcibios_setup("firmware");
+	}
 }
 
-int ixdp2800_pci_setup(int nr, struct pci_sys_data *sys)
+/*
+ * We assign the SDRAM BARs for the two IXP2800 CPUs by hand, outside
+ * of the regular PCI window, because there's only 512M of outbound PCI
+ * memory window on each IXP, while we need 1G for each of the BARs.
+ */
+static void __devinit ixp2800_pci_fixup(struct pci_dev *dev)
+{
+	if (machine_is_ixdp2800()) {
+		dev->resource[2].start = 0;
+		dev->resource[2].end   = 0;
+		dev->resource[2].flags = 0;
+	}
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IXP2800, ixp2800_pci_fixup);
+
+static int __init ixdp2800_pci_setup(int nr, struct pci_sys_data *sys)
 {
 	sys->mem_offset = 0x00000000;
 
@@ -129,22 +212,47 @@
 	} else return IRQ_IXP2000_PCIB; /* Slave NIC interrupt */
 }
 
-static void ixdp2800_pci_postinit(void)
+static void __init ixdp2800_master_enable_slave(void)
 {
-	struct pci_dev *dev;
+	volatile u32 *addr;
 
-	if (ixdp2x00_master_npu()) {
-		dev = pci_find_slot(1, IXDP2800_SLAVE_ENET_DEVFN);
-		pci_remove_bus_device(dev);
-	} else {
-		dev = pci_find_slot(1, IXDP2800_MASTER_ENET_DEVFN);
-		pci_remove_bus_device(dev);
+	printk(KERN_INFO "IXDP2800: enabling slave NPU\n");
 
+	addr = (volatile u32 *)ixp2000_pci_config_addr(0,
+					IXDP2X00_SLAVE_NPU_DEVFN,
+					PCI_COMMAND);
+
+	*addr |= PCI_COMMAND_MASTER;
+}
+
+static void __init ixdp2800_master_wait_for_slave_bus_scan(void)
+{
+	volatile u32 *addr;
+
+	printk(KERN_INFO "IXDP2800: waiting for slave to finish bus scan\n");
+
+	addr = (volatile u32 *)ixp2000_pci_config_addr(0,
+					IXDP2X00_SLAVE_NPU_DEVFN,
+					PCI_COMMAND);
+	while ((*addr & PCI_COMMAND_MEMORY) == 0)
+		cpu_relax();
+}
+
+static void __init ixdp2800_slave_signal_bus_scan_completion(void)
+{
+	printk(KERN_INFO "IXDP2800: bus scan done, signaling master\n");
+	*IXP2000_PCI_CMDSTAT |= PCI_COMMAND_MEMORY;
+}
+
+static void __init ixdp2800_pci_postinit(void)
+{
+	if (!ixdp2x00_master_npu()) {
 		ixdp2x00_slave_pci_postinit();
+		ixdp2800_slave_signal_bus_scan_completion();
 	}
 }
 
-struct hw_pci ixdp2800_pci __initdata = {
+struct __initdata hw_pci ixdp2800_pci __initdata = {
 	.nr_controllers	= 1,
 	.setup		= ixdp2800_pci_setup,
 	.preinit	= ixdp2800_pci_preinit,
@@ -155,8 +263,21 @@
 
 int __init ixdp2800_pci_init(void)
 {
-	if (machine_is_ixdp2800())
+	if (machine_is_ixdp2800()) {
+		struct pci_dev *dev;
+
 		pci_common_init(&ixdp2800_pci);
+		if (ixdp2x00_master_npu()) {
+			dev = pci_find_slot(1, IXDP2800_SLAVE_ENET_DEVFN);
+			pci_remove_bus_device(dev);
+
+			ixdp2800_master_enable_slave();
+			ixdp2800_master_wait_for_slave_bus_scan();
+		} else {
+			dev = pci_find_slot(1, IXDP2800_MASTER_ENET_DEVFN);
+			pci_remove_bus_device(dev);
+		}
+	}
 
 	return 0;
 }
diff --git a/arch/arm/mach-ixp2000/pci.c b/arch/arm/mach-ixp2000/pci.c
index 831f8ff..5ff2f27 100644
--- a/arch/arm/mach-ixp2000/pci.c
+++ b/arch/arm/mach-ixp2000/pci.c
@@ -37,7 +37,7 @@
 
 static int clear_master_aborts(void);
 
-static u32 *
+u32 *
 ixp2000_pci_config_addr(unsigned int bus_nr, unsigned int devfn, int where)
 {
 	u32 *paddress;
@@ -208,15 +208,15 @@
  * use our own resource space.
  */
 static struct resource ixp2000_pci_mem_space = {
-	.start	= 0x00000000,
+	.start	= 0xe0000000,
 	.end	= 0xffffffff,
 	.flags	= IORESOURCE_MEM,
 	.name	= "PCI Mem Space"
 };
 
 static struct resource ixp2000_pci_io_space = {
-	.start	= 0x00000000,
-	.end	= 0xffffffff,
+	.start	= 0x00010000,
+	.end	= 0x0001ffff,
 	.flags	= IORESOURCE_IO,
 	.name	= "PCI I/O Space"
 };
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index 5b670c9..007766a 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -409,3 +409,17 @@
 	depends on CPU_ARM1020
 	help
 	  Say Y here to disable branch prediction.  If unsure, say N.
+
+config HAS_TLS_REG
+	bool
+	depends on CPU_32v6 && !CPU_32v5 && !CPU_32v4 && !CPU_32v3
+	help
+	  This selects support for the CP15 thread register.
+	  It is defined to be available on ARMv6 or later.  However
+	  if the kernel is configured to support multiple CPUs including
+	  a pre-ARMv6 processors, or if a given ARMv6 processor doesn't
+	  implement the thread register for some reason, then access to
+	  this register from user space must be trapped and emulated.
+	  If user space is relying on the __kuser_get_tls code then
+	  there should not be any impact.
+
diff --git a/arch/arm/mm/abort-ev6.S b/arch/arm/mm/abort-ev6.S
index 38b2cbb..8f76f3d 100644
--- a/arch/arm/mm/abort-ev6.S
+++ b/arch/arm/mm/abort-ev6.S
@@ -1,5 +1,6 @@
 #include <linux/linkage.h>
 #include <asm/assembler.h>
+#include "abort-macro.S"
 /*
  * Function: v6_early_abort
  *
@@ -13,11 +14,26 @@
  *	   : sp = pointer to registers
  *
  * Purpose : obtain information about current aborted instruction.
+ * Note: we read user space.  This means we might cause a data
+ * abort here if the I-TLB and D-TLB aren't seeing the same
+ * picture.  Unfortunately, this does happen.  We live with it.
  */
 	.align	5
 ENTRY(v6_early_abort)
 	mrc	p15, 0, r1, c5, c0, 0		@ get FSR
 	mrc	p15, 0, r0, c6, c0, 0		@ get FAR
+/*
+ * Faulty SWP instruction on 1136 doesn't set bit 11 in DFSR.
+ * The test below covers all the write situations, including Java bytecodes
+ */
+	bic	r1, r1, #1 << 11 | 1 << 10	@ clear bits 11 and 10 of FSR
+	tst	r3, #PSR_J_BIT			@ Java?
+	movne	pc, lr
+	do_thumb_abort
+	ldreq	r3, [r2]			@ read aborted ARM instruction
+	do_ldrd_abort
+	tst	r3, #1 << 20			@ L = 0 -> write
+	orreq	r1, r1, #1 << 11		@ yes.
 	mov	pc, lr
 
 
diff --git a/arch/arm/mm/mm-armv.c b/arch/arm/mm/mm-armv.c
index f5a87db8..585dfb8 100644
--- a/arch/arm/mm/mm-armv.c
+++ b/arch/arm/mm/mm-armv.c
@@ -411,9 +411,10 @@
 		mem_types[MT_MEMORY].prot_sect &= ~PMD_BIT4;
 		mem_types[MT_ROM].prot_sect &= ~PMD_BIT4;
 		/*
-		 * Mark cache clean areas read only from SVC mode
-		 * and no access from userspace.
+		 * Mark cache clean areas and XIP ROM read only
+		 * from SVC mode and no access from userspace.
 		 */
+		mem_types[MT_ROM].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
 		mem_types[MT_MINICLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
 		mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
 	}
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S
index 3c73dc8..fe1918c 100644
--- a/arch/i386/kernel/entry.S
+++ b/arch/i386/kernel/entry.S
@@ -260,11 +260,9 @@
 .section .fixup,"ax"
 iret_exc:
 	sti
-	movl $__USER_DS, %edx
-	movl %edx, %ds
-	movl %edx, %es
-	movl $11,%eax
-	call do_exit
+	pushl $0			# no error code
+	pushl $do_iret_error
+	jmp error_code
 .previous
 .section __ex_table,"a"
 	.align 4
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c
index 6c0e383..d708194 100644
--- a/arch/i386/kernel/traps.c
+++ b/arch/i386/kernel/traps.c
@@ -451,6 +451,7 @@
 DO_ERROR(11, SIGBUS,  "segment not present", segment_not_present)
 DO_ERROR(12, SIGBUS,  "stack segment", stack_segment)
 DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0)
+DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0)
 
 fastcall void do_general_protection(struct pt_regs * regs, long error_code)
 {
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index b30001f..4fc0cb7 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -223,7 +223,7 @@
 	}
 	if ((val == CPUFREQ_PRECHANGE  && ci->old < ci->new) ||
 	    (val == CPUFREQ_POSTCHANGE && ci->old > ci->new) ||
-	    (val == CPUFREQ_RESUMECHANGE)) {
+	    (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE)) {
 		loops_per_jiffy = cpufreq_scale(l_p_j_ref, l_p_j_ref_freq, ci->new);
 		dprintk("scaling loops_per_jiffy to %lu for frequency %u kHz\n", loops_per_jiffy, ci->new);
 	}
@@ -866,11 +866,90 @@
 
 
 /**
+ *	cpufreq_suspend - let the low level driver prepare for suspend
+ */
+
+static int cpufreq_suspend(struct sys_device * sysdev, u32 state)
+{
+	int cpu = sysdev->id;
+	unsigned int ret = 0;
+	unsigned int cur_freq = 0;
+	struct cpufreq_policy *cpu_policy;
+
+	dprintk("resuming cpu %u\n", cpu);
+
+	if (!cpu_online(cpu))
+		return 0;
+
+	/* we may be lax here as interrupts are off. Nonetheless
+	 * we need to grab the correct cpu policy, as to check
+	 * whether we really run on this CPU.
+	 */
+
+	cpu_policy = cpufreq_cpu_get(cpu);
+	if (!cpu_policy)
+		return -EINVAL;
+
+	/* only handle each CPU group once */
+	if (unlikely(cpu_policy->cpu != cpu)) {
+		cpufreq_cpu_put(cpu_policy);
+		return 0;
+	}
+
+	if (cpufreq_driver->suspend) {
+		ret = cpufreq_driver->suspend(cpu_policy, state);
+		if (ret) {
+			printk(KERN_ERR "cpufreq: suspend failed in ->suspend "
+					"step on CPU %u\n", cpu_policy->cpu);
+			cpufreq_cpu_put(cpu_policy);
+			return ret;
+		}
+	}
+
+
+	if (cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)
+		goto out;
+
+	if (cpufreq_driver->get)
+		cur_freq = cpufreq_driver->get(cpu_policy->cpu);
+
+	if (!cur_freq || !cpu_policy->cur) {
+		printk(KERN_ERR "cpufreq: suspend failed to assert current "
+		       "frequency is what timing core thinks it is.\n");
+		goto out;
+	}
+
+	if (unlikely(cur_freq != cpu_policy->cur)) {
+		struct cpufreq_freqs freqs;
+
+		if (!(cpufreq_driver->flags & CPUFREQ_PM_NO_WARN))
+			printk(KERN_DEBUG "Warning: CPU frequency is %u, "
+			       "cpufreq assumed %u kHz.\n",
+			       cur_freq, cpu_policy->cur);
+
+		freqs.cpu = cpu;
+		freqs.old = cpu_policy->cur;
+		freqs.new = cur_freq;
+
+		notifier_call_chain(&cpufreq_transition_notifier_list,
+				    CPUFREQ_SUSPENDCHANGE, &freqs);
+		adjust_jiffies(CPUFREQ_SUSPENDCHANGE, &freqs);
+
+		cpu_policy->cur = cur_freq;
+	}
+
+ out:
+	cpufreq_cpu_put(cpu_policy);
+	return 0;
+}
+
+/**
  *	cpufreq_resume -  restore proper CPU frequency handling after resume
  *
  *	1.) resume CPUfreq hardware support (cpufreq_driver->resume())
  *	2.) if ->target and !CPUFREQ_CONST_LOOPS: verify we're in sync
- *	3.) schedule call cpufreq_update_policy() ASAP as interrupts are restored.
+ *	3.) schedule call cpufreq_update_policy() ASAP as interrupts are
+ *	    restored.
  */
 static int cpufreq_resume(struct sys_device * sysdev)
 {
@@ -915,7 +994,9 @@
 			cur_freq = cpufreq_driver->get(cpu_policy->cpu);
 
 		if (!cur_freq || !cpu_policy->cur) {
-			printk(KERN_ERR "cpufreq: resume failed to assert current frequency is what timing core thinks it is.\n");
+			printk(KERN_ERR "cpufreq: resume failed to assert "
+					"current frequency is what timing core "
+					"thinks it is.\n");
 			goto out;
 		}
 
@@ -923,13 +1004,15 @@
 			struct cpufreq_freqs freqs;
 
 			printk(KERN_WARNING "Warning: CPU frequency is %u, "
-			       "cpufreq assumed %u kHz.\n", cur_freq, cpu_policy->cur);
+					"cpufreq assumed %u kHz.\n",
+					cur_freq, cpu_policy->cur);
 
 			freqs.cpu = cpu;
 			freqs.old = cpu_policy->cur;
 			freqs.new = cur_freq;
 
-			notifier_call_chain(&cpufreq_transition_notifier_list, CPUFREQ_RESUMECHANGE, &freqs);
+			notifier_call_chain(&cpufreq_transition_notifier_list,
+					CPUFREQ_RESUMECHANGE, &freqs);
 			adjust_jiffies(CPUFREQ_RESUMECHANGE, &freqs);
 
 			cpu_policy->cur = cur_freq;
@@ -945,6 +1028,7 @@
 static struct sysdev_driver cpufreq_sysdev_driver = {
 	.add		= cpufreq_add_dev,
 	.remove		= cpufreq_remove_dev,
+	.suspend	= cpufreq_suspend,
 	.resume		= cpufreq_resume,
 };
 
diff --git a/include/asm-arm/arch-ixp2000/platform.h b/include/asm-arm/arch-ixp2000/platform.h
index 509e44d..901bba6 100644
--- a/include/asm-arm/arch-ixp2000/platform.h
+++ b/include/asm-arm/arch-ixp2000/platform.h
@@ -121,6 +121,7 @@
 
 struct pci_sys_data;
 
+u32 *ixp2000_pci_config_addr(unsigned int bus, unsigned int devfn, int where);
 void ixp2000_pci_preinit(void);
 int ixp2000_pci_setup(int, struct pci_sys_data*);
 struct pci_bus* ixp2000_pci_scan_bus(int, struct pci_sys_data*);
diff --git a/include/asm-arm/io.h b/include/asm-arm/io.h
index 69bc7a3..658ffa3 100644
--- a/include/asm-arm/io.h
+++ b/include/asm-arm/io.h
@@ -99,12 +99,16 @@
  */
 #ifdef __io
 #define outb(v,p)		__raw_writeb(v,__io(p))
-#define outw(v,p)		__raw_writew(cpu_to_le16(v),__io(p))
-#define outl(v,p)		__raw_writel(cpu_to_le32(v),__io(p))
+#define outw(v,p)		__raw_writew((__force __u16) \
+					cpu_to_le16(v),__io(p))
+#define outl(v,p)		__raw_writel((__force __u32) \
+					cpu_to_le32(v),__io(p))
 
-#define inb(p)	({ unsigned int __v = __raw_readb(__io(p)); __v; })
-#define inw(p)	({ unsigned int __v = le16_to_cpu(__raw_readw(__io(p))); __v; })
-#define inl(p)	({ unsigned int __v = le32_to_cpu(__raw_readl(__io(p))); __v; })
+#define inb(p)	({ __u8 __v = __raw_readb(__io(p)); __v; })
+#define inw(p)	({ __u16 __v = le16_to_cpu((__force __le16) \
+			__raw_readw(__io(p))); __v; })
+#define inl(p)	({ __u32 __v = le32_to_cpu((__force __le32) \
+			__raw_readl(__io(p))); __v; })
 
 #define outsb(p,d,l)		__raw_writesb(__io(p),d,l)
 #define outsw(p,d,l)		__raw_writesw(__io(p),d,l)
@@ -149,9 +153,11 @@
  * IO port primitives for more information.
  */
 #ifdef __mem_pci
-#define readb(c) ({ unsigned int __v = __raw_readb(__mem_pci(c)); __v; })
-#define readw(c) ({ unsigned int __v = le16_to_cpu(__raw_readw(__mem_pci(c))); __v; })
-#define readl(c) ({ unsigned int __v = le32_to_cpu(__raw_readl(__mem_pci(c))); __v; })
+#define readb(c) ({ __u8  __v = __raw_readb(__mem_pci(c)); __v; })
+#define readw(c) ({ __u16 __v = le16_to_cpu((__force __le16) \
+					__raw_readw(__mem_pci(c))); __v; })
+#define readl(c) ({ __u32 __v = le32_to_cpu((__force __le32) \
+					__raw_readl(__mem_pci(c))); __v; })
 #define readb_relaxed(addr) readb(addr)
 #define readw_relaxed(addr) readw(addr)
 #define readl_relaxed(addr) readl(addr)
@@ -161,8 +167,10 @@
 #define readsl(p,d,l)		__raw_readsl(__mem_pci(p),d,l)
 
 #define writeb(v,c)		__raw_writeb(v,__mem_pci(c))
-#define writew(v,c)		__raw_writew(cpu_to_le16(v),__mem_pci(c))
-#define writel(v,c)		__raw_writel(cpu_to_le32(v),__mem_pci(c))
+#define writew(v,c)		__raw_writew((__force __u16) \
+					cpu_to_le16(v),__mem_pci(c))
+#define writel(v,c)		__raw_writel((__force __u32) \
+					cpu_to_le32(v),__mem_pci(c))
 
 #define writesb(p,d,l)		__raw_writesb(__mem_pci(p),d,l)
 #define writesw(p,d,l)		__raw_writesw(__mem_pci(p),d,l)
diff --git a/include/asm-arm/unistd.h b/include/asm-arm/unistd.h
index a19ec09..ace2748 100644
--- a/include/asm-arm/unistd.h
+++ b/include/asm-arm/unistd.h
@@ -359,8 +359,7 @@
 #define __ARM_NR_cacheflush		(__ARM_NR_BASE+2)
 #define __ARM_NR_usr26			(__ARM_NR_BASE+3)
 #define __ARM_NR_usr32			(__ARM_NR_BASE+4)
-
-#define __ARM_NR_set_tls		(__ARM_NR_BASE+0x800)
+#define __ARM_NR_set_tls		(__ARM_NR_BASE+5)
 
 #define __sys2(x) #x
 #define __sys1(x) __sys2(x)
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 910eca3..f21af06 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -103,6 +103,7 @@
 #define CPUFREQ_PRECHANGE	(0)
 #define CPUFREQ_POSTCHANGE	(1)
 #define CPUFREQ_RESUMECHANGE	(8)
+#define CPUFREQ_SUSPENDCHANGE	(9)
 
 struct cpufreq_freqs {
 	unsigned int cpu;	/* cpu nr */
@@ -200,6 +201,7 @@
 
 	/* optional */
 	int	(*exit)		(struct cpufreq_policy *policy);
+	int	(*suspend)	(struct cpufreq_policy *policy, u32 state);
 	int	(*resume)	(struct cpufreq_policy *policy);
 	struct freq_attr	**attr;
 };
@@ -211,7 +213,8 @@
 #define CPUFREQ_CONST_LOOPS 	0x02	/* loops_per_jiffy or other kernel
 					 * "constants" aren't affected by
 					 * frequency transitions */
-
+#define CPUFREQ_PM_NO_WARN	0x04	/* don't warn on suspend/resume speed
+					 * mismatches */
 
 int cpufreq_register_driver(struct cpufreq_driver *driver_data);
 int cpufreq_unregister_driver(struct cpufreq_driver *driver_data);
diff --git a/kernel/exit.c b/kernel/exit.c
index 39d35935..93851bc 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -517,8 +517,6 @@
 	 */
 	BUG_ON(p == reaper || reaper->exit_state >= EXIT_ZOMBIE);
 	p->real_parent = reaper;
-	if (p->parent == p->real_parent)
-		BUG();
 }
 
 static inline void reparent_thread(task_t *p, task_t *father, int traced)