Merge branches 'x86/cleanups', 'x86/fpu' and 'x86/urgent' into x86/core
diff --git a/Documentation/ABI/testing/sysfs-class-uwb_rc b/Documentation/ABI/testing/sysfs-class-uwb_rc
index a0d18db..6a5fd07 100644
--- a/Documentation/ABI/testing/sysfs-class-uwb_rc
+++ b/Documentation/ABI/testing/sysfs-class-uwb_rc
@@ -32,14 +32,16 @@
 Description:
                 Write:
 
-                <channel> [<bpst offset>]
+                <channel>
 
-                to start beaconing on a specific channel, or stop
-                beaconing if <channel> is -1.  Valid channels depends
-                on the radio controller's supported band groups.
+                to force a specific channel to be used when beaconing,
+                or, if <channel> is -1, to prohibit beaconing.  If
+                <channel> is 0, then the default channel selection
+                algorithm will be used.  Valid channels depends on the
+                radio controller's supported band groups.
 
-                <bpst offset> may be used to try and join a specific
-                beacon group if more than one was found during a scan.
+                Reading returns the currently active channel, or -1 if
+                the radio controller is not beaconing.
 
 What:           /sys/class/uwb_rc/uwbN/scan
 Date:           July 2008
diff --git a/Documentation/cpu-hotplug.txt b/Documentation/cpu-hotplug.txt
index 94bbc27..9d620c1 100644
--- a/Documentation/cpu-hotplug.txt
+++ b/Documentation/cpu-hotplug.txt
@@ -50,16 +50,17 @@
   			cpu_possible_map = cpu_present_map + additional_cpus
 
 (*) Option valid only for following architectures
-- x86_64, ia64
+- ia64
 
-ia64 and x86_64 use the number of disabled local apics in ACPI tables MADT
-to determine the number of potentially hot-pluggable cpus. The implementation
-should only rely on this to count the # of cpus, but *MUST* not rely on the
-apicid values in those tables for disabled apics. In the event BIOS doesn't
-mark such hot-pluggable cpus as disabled entries, one could use this
-parameter "additional_cpus=x" to represent those cpus in the cpu_possible_map.
+ia64 uses the number of disabled local apics in ACPI tables MADT to
+determine the number of potentially hot-pluggable cpus. The implementation
+should only rely on this to count the # of cpus, but *MUST* not rely
+on the apicid values in those tables for disabled apics. In the event
+BIOS doesn't mark such hot-pluggable cpus as disabled entries, one could
+use this parameter "additional_cpus=x" to represent those cpus in the
+cpu_possible_map.
 
-possible_cpus=n		[s390 only] use this to set hotpluggable cpus.
+possible_cpus=n		[s390,x86_64] use this to set hotpluggable cpus.
 			This option sets possible_cpus bits in
 			cpu_possible_map. Thus keeping the numbers of bits set
 			constant even if the machine gets rebooted.
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index dc7c681..df18d87 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -310,15 +310,6 @@
 
 ---------------------------
 
-What: ide-scsi (BLK_DEV_IDESCSI)
-When: 2.6.29
-Why:  The 2.6 kernel supports direct writing to ide CD drives, which
-      eliminates the need for ide-scsi. The new method is more
-      efficient in every way.
-Who:  FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
-
----------------------------
-
 What:	i2c_attach_client(), i2c_detach_client(), i2c_driver->detach_client()
 When:	2.6.29 (ideally) or 2.6.30 (more likely)
 Why:	Deprecated by the new (standard) device driver binding model. Use
diff --git a/Documentation/filesystems/devpts.txt b/Documentation/filesystems/devpts.txt
new file mode 100644
index 0000000..68dffd8
--- /dev/null
+++ b/Documentation/filesystems/devpts.txt
@@ -0,0 +1,132 @@
+
+To support containers, we now allow multiple instances of devpts filesystem,
+such that indices of ptys allocated in one instance are independent of indices
+allocated in other instances of devpts.
+
+To preserve backward compatibility, this support for multiple instances is
+enabled only if:
+
+	- CONFIG_DEVPTS_MULTIPLE_INSTANCES=y, and
+	- '-o newinstance' mount option is specified while mounting devpts
+
+IOW, devpts now supports both single-instance and multi-instance semantics.
+
+If CONFIG_DEVPTS_MULTIPLE_INSTANCES=n, there is no change in behavior and
+this referred to as the "legacy" mode. In this mode, the new mount options
+(-o newinstance and -o ptmxmode) will be ignored with a 'bogus option' message
+on console.
+
+If CONFIG_DEVPTS_MULTIPLE_INSTANCES=y and devpts is mounted without the
+'newinstance' option (as in current start-up scripts) the new mount binds
+to the initial kernel mount of devpts. This mode is referred to as the
+'single-instance' mode and the current, single-instance semantics are
+preserved, i.e PTYs are common across the system.
+
+The only difference between this single-instance mode and the legacy mode
+is the presence of new, '/dev/pts/ptmx' node with permissions 0000, which
+can safely be ignored.
+
+If CONFIG_DEVPTS_MULTIPLE_INSTANCES=y and 'newinstance' option is specified,
+the mount is considered to be in the multi-instance mode and a new instance
+of the devpts fs is created. Any ptys created in this instance are independent
+of ptys in other instances of devpts. Like in the single-instance mode, the
+/dev/pts/ptmx node is present. To effectively use the multi-instance mode,
+open of /dev/ptmx must be a redirected to '/dev/pts/ptmx' using a symlink or
+bind-mount.
+
+Eg: A container startup script could do the following:
+
+	$ chmod 0666 /dev/pts/ptmx
+	$ rm /dev/ptmx
+	$ ln -s pts/ptmx /dev/ptmx
+	$ ns_exec -cm /bin/bash
+
+	# We are now in new container
+
+	$ umount /dev/pts
+	$ mount -t devpts -o newinstance lxcpts /dev/pts
+	$ sshd -p 1234
+
+where 'ns_exec -cm /bin/bash' calls clone() with CLONE_NEWNS flag and execs
+/bin/bash in the child process.  A pty created by the sshd is not visible in
+the original mount of /dev/pts.
+
+User-space changes
+------------------
+
+In multi-instance mode (i.e '-o newinstance' mount option is specified at least
+once), following user-space issues should be noted.
+
+1. If -o newinstance mount option is never used, /dev/pts/ptmx can be ignored
+   and no change is needed to system-startup scripts.
+
+2. To effectively use multi-instance mode (i.e -o newinstance is specified)
+   administrators or startup scripts should "redirect" open of /dev/ptmx to
+   /dev/pts/ptmx using either a bind mount or symlink.
+
+	$ mount -t devpts -o newinstance devpts /dev/pts
+
+   followed by either
+
+	$ rm /dev/ptmx
+	$ ln -s pts/ptmx /dev/ptmx
+	$ chmod 666 /dev/pts/ptmx
+   or
+	$ mount -o bind /dev/pts/ptmx /dev/ptmx
+
+3. The '/dev/ptmx -> pts/ptmx' symlink is the preferred method since it
+   enables better error-reporting and treats both single-instance and
+   multi-instance mounts similarly.
+
+   But this method requires that system-startup scripts set the mode of
+   /dev/pts/ptmx correctly (default mode is 0000). The scripts can set the
+   mode by, either
+
+   	- adding ptmxmode mount option to devpts entry in /etc/fstab, or
+	- using 'chmod 0666 /dev/pts/ptmx'
+
+4. If multi-instance mode mount is needed for containers, but the system
+   startup scripts have not yet been updated, container-startup scripts
+   should bind mount /dev/ptmx to /dev/pts/ptmx to avoid breaking single-
+   instance mounts.
+
+   Or, in general, container-startup scripts should use:
+
+	mount -t devpts -o newinstance -o ptmxmode=0666 devpts /dev/pts
+	if [ ! -L /dev/ptmx ]; then
+		mount -o bind /dev/pts/ptmx /dev/ptmx
+	fi
+
+   When all devpts mounts are multi-instance, /dev/ptmx can permanently be
+   a symlink to pts/ptmx and the bind mount can be ignored.
+
+5. A multi-instance mount that is not accompanied by the /dev/ptmx to
+   /dev/pts/ptmx redirection would result in an unusable/unreachable pty.
+
+	mount -t devpts -o newinstance lxcpts /dev/pts
+
+   immediately followed by:
+
+	open("/dev/ptmx")
+
+    would create a pty, say /dev/pts/7, in the initial kernel mount.
+    But /dev/pts/7 would be invisible in the new mount.
+
+6. The permissions for /dev/pts/ptmx node should be specified when mounting
+   /dev/pts, using the '-o ptmxmode=%o' mount option (default is 0000).
+
+	mount -t devpts -o newinstance -o ptmxmode=0644 devpts /dev/pts
+
+   The permissions can be later be changed as usual with 'chmod'.
+
+	chmod 666 /dev/pts/ptmx
+
+7. A mount of devpts without the 'newinstance' option results in binding to
+   initial kernel mount.  This behavior while preserving legacy semantics,
+   does not provide strict isolation in a container environment. i.e by
+   mounting devpts without the 'newinstance' option, a container could
+   get visibility into the 'host' or root container's devpts.
+   
+   To workaround this and have strict isolation, all mounts of devpts,
+   including the mount in the root container, should use the newinstance
+   option.
diff --git a/Documentation/usb/wusb-cbaf b/Documentation/usb/wusb-cbaf
index 2e78b70..426ddaa 100644
--- a/Documentation/usb/wusb-cbaf
+++ b/Documentation/usb/wusb-cbaf
@@ -80,12 +80,6 @@
     start)
         for dev in ${2:-$hdevs}
           do
-          uwb_rc=$(readlink -f $dev/uwb_rc)
-          if cat $uwb_rc/beacon | grep -q -- "-1"
-              then
-              echo 13 0 > $uwb_rc/beacon
-              echo I: started beaconing on ch 13 on $(basename $uwb_rc) >&2
-          fi
           echo $host_CHID > $dev/wusb_chid
           echo I: started host $(basename $dev) >&2
         done
@@ -95,9 +89,6 @@
           do
           echo 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 > $dev/wusb_chid
           echo I: stopped host $(basename $dev) >&2
-          uwb_rc=$(readlink -f $dev/uwb_rc)
-          echo -1 | cat > $uwb_rc/beacon
-          echo I: stopped beaconing on $(basename $uwb_rc) >&2
         done
         ;;
     set-chid)
diff --git a/MAINTAINERS b/MAINTAINERS
index ceb32ee..befacf0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2049,6 +2049,12 @@
 W:	http://artax.karlin.mff.cuni.cz/~mikulas/vyplody/hpfs/index-e.cgi
 S:	Maintained
 
+HSO	3G Modem Driver (hso.c)
+P:	Denis Joseph Barrow
+M:	d.barow@option.com
+W:	http://www.pharscape.org
+S:	Maintained
+
 HTCPEN TOUCHSCREEN DRIVER
 P:	Pau Oliva Fora
 M:	pof@eslack.org
@@ -2146,11 +2152,6 @@
 L:	linux-kernel@vger.kernel.org
 S:	Maintained
 
-IDE-SCSI DRIVER
-L:	linux-ide@vger.kernel.org
-L:	linux-scsi@vger.kernel.org
-S:	Orphan
-
 IDLE-I7300
 P:	Andy Henroid
 M:	andrew.d.henroid@intel.com
@@ -2541,8 +2542,6 @@
 S:	Supported
 
 KERNEL VIRTUAL MACHINE For Itanium (KVM/IA64)
-P:	Anthony Xu
-M:	anthony.xu@intel.com
 P:	Xiantao Zhang
 M:	xiantao.zhang@intel.com
 L:	kvm-ia64@vger.kernel.org
@@ -2635,13 +2634,13 @@
 S:	Maintained
 
 LINUX FOR POWERPC (32-BIT AND 64-BIT)
-P:	Paul Mackerras
-M:	paulus@samba.org
 P:	Benjamin Herrenschmidt
 M:	benh@kernel.crashing.org
+P:	Paul Mackerras
+M:	paulus@samba.org
 W:	http://www.penguinppc.org/
 L:	linuxppc-dev@ozlabs.org
-T:	git kernel.org:/pub/scm/linux/kernel/git/paulus/powerpc.git
+T:	git kernel.org:/pub/scm/linux/kernel/git/benh/powerpc.git
 S:	Supported
 
 LINUX FOR POWER MACINTOSH
diff --git a/arch/alpha/include/asm/smp.h b/arch/alpha/include/asm/smp.h
index 544c69a..547e909 100644
--- a/arch/alpha/include/asm/smp.h
+++ b/arch/alpha/include/asm/smp.h
@@ -45,7 +45,6 @@
 #define raw_smp_processor_id()	(current_thread_info()->cpu)
 
 extern int smp_num_cpus;
-#define cpu_possible_map	cpu_present_map
 
 extern void arch_send_call_function_single_ipi(int cpu);
 extern void arch_send_call_function_ipi(cpumask_t mask);
diff --git a/arch/alpha/kernel/irq.c b/arch/alpha/kernel/irq.c
index c626a82..d0f1620 100644
--- a/arch/alpha/kernel/irq.c
+++ b/arch/alpha/kernel/irq.c
@@ -55,7 +55,7 @@
 	last_cpu = cpu;
 
 	irq_desc[irq].affinity = cpumask_of_cpu(cpu);
-	irq_desc[irq].chip->set_affinity(irq, cpumask_of_cpu(cpu));
+	irq_desc[irq].chip->set_affinity(irq, cpumask_of(cpu));
 	return 0;
 }
 #endif /* CONFIG_SMP */
diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c
index 351407e..f238370 100644
--- a/arch/alpha/kernel/process.c
+++ b/arch/alpha/kernel/process.c
@@ -94,6 +94,7 @@
 		flags |= 0x00040000UL; /* "remain halted" */
 		*pflags = flags;
 		cpu_clear(cpuid, cpu_present_map);
+		cpu_clear(cpuid, cpu_possible_map);
 		halt();
 	}
 #endif
@@ -120,6 +121,7 @@
 #ifdef CONFIG_SMP
 	/* Wait for the secondaries to halt. */
 	cpu_clear(boot_cpuid, cpu_present_map);
+	cpu_clear(boot_cpuid, cpu_possible_map);
 	while (cpus_weight(cpu_present_map))
 		barrier();
 #endif
diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c
index cf7da10..d953e51 100644
--- a/arch/alpha/kernel/smp.c
+++ b/arch/alpha/kernel/smp.c
@@ -70,11 +70,6 @@
 /* Set to a secondary's cpuid when it comes online.  */
 static int smp_secondary_alive __devinitdata = 0;
 
-/* Which cpus ids came online.  */
-cpumask_t cpu_online_map;
-
-EXPORT_SYMBOL(cpu_online_map);
-
 int smp_num_probed;		/* Internal processor count */
 int smp_num_cpus = 1;		/* Number that came online.  */
 EXPORT_SYMBOL(smp_num_cpus);
@@ -440,6 +435,7 @@
 				((char *)cpubase + i*hwrpb->processor_size);
 			if ((cpu->flags & 0x1cc) == 0x1cc) {
 				smp_num_probed++;
+				cpu_set(i, cpu_possible_map);
 				cpu_set(i, cpu_present_map);
 				cpu->pal_revision = boot_cpu_palrev;
 			}
@@ -473,6 +469,7 @@
 
 	/* Nothing to do on a UP box, or when told not to.  */
 	if (smp_num_probed == 1 || max_cpus == 0) {
+		cpu_possible_map = cpumask_of_cpu(boot_cpuid);
 		cpu_present_map = cpumask_of_cpu(boot_cpuid);
 		printk(KERN_INFO "SMP mode deactivated.\n");
 		return;
diff --git a/arch/alpha/kernel/sys_dp264.c b/arch/alpha/kernel/sys_dp264.c
index c71b0fd..ab44c16 100644
--- a/arch/alpha/kernel/sys_dp264.c
+++ b/arch/alpha/kernel/sys_dp264.c
@@ -177,19 +177,19 @@
 }
 
 static void
-dp264_set_affinity(unsigned int irq, cpumask_t affinity)
+dp264_set_affinity(unsigned int irq, const struct cpumask *affinity)
 { 
 	spin_lock(&dp264_irq_lock);
-	cpu_set_irq_affinity(irq, affinity);
+	cpu_set_irq_affinity(irq, *affinity);
 	tsunami_update_irq_hw(cached_irq_mask);
 	spin_unlock(&dp264_irq_lock);
 }
 
 static void
-clipper_set_affinity(unsigned int irq, cpumask_t affinity)
+clipper_set_affinity(unsigned int irq, const struct cpumask *affinity)
 { 
 	spin_lock(&dp264_irq_lock);
-	cpu_set_irq_affinity(irq - 16, affinity);
+	cpu_set_irq_affinity(irq - 16, *affinity);
 	tsunami_update_irq_hw(cached_irq_mask);
 	spin_unlock(&dp264_irq_lock);
 }
diff --git a/arch/alpha/kernel/sys_titan.c b/arch/alpha/kernel/sys_titan.c
index 52c91cc..27f840a 100644
--- a/arch/alpha/kernel/sys_titan.c
+++ b/arch/alpha/kernel/sys_titan.c
@@ -158,10 +158,10 @@
 }
 
 static void
-titan_set_irq_affinity(unsigned int irq, cpumask_t affinity)
+titan_set_irq_affinity(unsigned int irq, const struct cpumask *affinity)
 { 
 	spin_lock(&titan_irq_lock);
-	titan_cpu_set_irq_affinity(irq - 16, affinity);
+	titan_cpu_set_irq_affinity(irq - 16, *affinity);
 	titan_update_irq_hw(titan_cached_irq_mask);
 	spin_unlock(&titan_irq_lock);
 }
diff --git a/arch/arm/common/gic.c b/arch/arm/common/gic.c
index 7fc9860..c6884ba 100644
--- a/arch/arm/common/gic.c
+++ b/arch/arm/common/gic.c
@@ -109,11 +109,11 @@
 }
 
 #ifdef CONFIG_SMP
-static void gic_set_cpu(unsigned int irq, cpumask_t mask_val)
+static void gic_set_cpu(unsigned int irq, const struct cpumask *mask_val)
 {
 	void __iomem *reg = gic_dist_base(irq) + GIC_DIST_TARGET + (gic_irq(irq) & ~3);
 	unsigned int shift = (irq % 4) * 8;
-	unsigned int cpu = first_cpu(mask_val);
+	unsigned int cpu = cpumask_first(mask_val);
 	u32 val;
 
 	spin_lock(&irq_controller_lock);
diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c
index 2f3eb79..7141cee 100644
--- a/arch/arm/kernel/irq.c
+++ b/arch/arm/kernel/irq.c
@@ -174,7 +174,7 @@
 	pr_debug("IRQ%u: moving from cpu%u to cpu%u\n", irq, desc->cpu, cpu);
 
 	spin_lock_irq(&desc->lock);
-	desc->chip->set_affinity(irq, cpumask_of_cpu(cpu));
+	desc->chip->set_affinity(irq, cpumask_of(cpu));
 	spin_unlock_irq(&desc->lock);
 }
 
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 019237d..55fa7ff 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -34,16 +34,6 @@
 #include <asm/ptrace.h>
 
 /*
- * bitmask of present and online CPUs.
- * The present bitmask indicates that the CPU is physically present.
- * The online bitmask indicates that the CPU is up and running.
- */
-cpumask_t cpu_possible_map;
-EXPORT_SYMBOL(cpu_possible_map);
-cpumask_t cpu_online_map;
-EXPORT_SYMBOL(cpu_online_map);
-
-/*
  * as from 2.5, kernels no longer have an init_tasks structure
  * so we need some other way of telling a new secondary core
  * where to place its SVC stack
diff --git a/arch/arm/mach-at91/at91rm9200_time.c b/arch/arm/mach-at91/at91rm9200_time.c
index d140eae..1ff1bda 100644
--- a/arch/arm/mach-at91/at91rm9200_time.c
+++ b/arch/arm/mach-at91/at91rm9200_time.c
@@ -178,7 +178,6 @@
 	.features	= CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
 	.shift		= 32,
 	.rating		= 150,
-	.cpumask	= CPU_MASK_CPU0,
 	.set_next_event	= clkevt32k_next_event,
 	.set_mode	= clkevt32k_mode,
 };
@@ -206,7 +205,7 @@
 	clkevt.mult = div_sc(AT91_SLOW_CLOCK, NSEC_PER_SEC, clkevt.shift);
 	clkevt.max_delta_ns = clockevent_delta2ns(AT91_ST_ALMV, &clkevt);
 	clkevt.min_delta_ns = clockevent_delta2ns(2, &clkevt) + 1;
-	clkevt.cpumask = cpumask_of_cpu(0);
+	clkevt.cpumask = cpumask_of(0);
 	clockevents_register_device(&clkevt);
 
 	/* register clocksource */
diff --git a/arch/arm/mach-at91/at91sam926x_time.c b/arch/arm/mach-at91/at91sam926x_time.c
index 122fd77..b63e1d5 100644
--- a/arch/arm/mach-at91/at91sam926x_time.c
+++ b/arch/arm/mach-at91/at91sam926x_time.c
@@ -91,7 +91,6 @@
 	.features	= CLOCK_EVT_FEAT_PERIODIC,
 	.shift		= 32,
 	.rating		= 100,
-	.cpumask	= CPU_MASK_CPU0,
 	.set_mode	= pit_clkevt_mode,
 };
 
@@ -173,6 +172,7 @@
 
 	/* Set up and register clockevents */
 	pit_clkevt.mult = div_sc(pit_rate, NSEC_PER_SEC, pit_clkevt.shift);
+	pit_clkevt.cpumask = cpumask_of(0);
 	clockevents_register_device(&pit_clkevt);
 }
 
diff --git a/arch/arm/mach-davinci/time.c b/arch/arm/mach-davinci/time.c
index 3b9a296..f8bcd29 100644
--- a/arch/arm/mach-davinci/time.c
+++ b/arch/arm/mach-davinci/time.c
@@ -322,7 +322,7 @@
 	clockevent_davinci.min_delta_ns =
 		clockevent_delta2ns(1, &clockevent_davinci);
 
-	clockevent_davinci.cpumask = cpumask_of_cpu(0);
+	clockevent_davinci.cpumask = cpumask_of(0);
 	clockevents_register_device(&clockevent_davinci);
 }
 
diff --git a/arch/arm/mach-imx/time.c b/arch/arm/mach-imx/time.c
index a11765f..aff0ebc 100644
--- a/arch/arm/mach-imx/time.c
+++ b/arch/arm/mach-imx/time.c
@@ -184,7 +184,7 @@
 	clockevent_imx.min_delta_ns =
 		clockevent_delta2ns(0xf, &clockevent_imx);
 
-	clockevent_imx.cpumask = cpumask_of_cpu(0);
+	clockevent_imx.cpumask = cpumask_of(0);
 
 	clockevents_register_device(&clockevent_imx);
 
diff --git a/arch/arm/mach-ixp4xx/common.c b/arch/arm/mach-ixp4xx/common.c
index 7766f46..f4656d2 100644
--- a/arch/arm/mach-ixp4xx/common.c
+++ b/arch/arm/mach-ixp4xx/common.c
@@ -487,7 +487,7 @@
 		clockevent_delta2ns(0xfffffffe, &clockevent_ixp4xx);
 	clockevent_ixp4xx.min_delta_ns =
 		clockevent_delta2ns(0xf, &clockevent_ixp4xx);
-	clockevent_ixp4xx.cpumask = cpumask_of_cpu(0);
+	clockevent_ixp4xx.cpumask = cpumask_of(0);
 
 	clockevents_register_device(&clockevent_ixp4xx);
 	return 0;
diff --git a/arch/arm/mach-msm/timer.c b/arch/arm/mach-msm/timer.c
index 345a14c..444d9c0 100644
--- a/arch/arm/mach-msm/timer.c
+++ b/arch/arm/mach-msm/timer.c
@@ -182,7 +182,7 @@
 			clockevent_delta2ns(0xf0000000 >> clock->shift, ce);
 		/* 4 gets rounded down to 3 */
 		ce->min_delta_ns = clockevent_delta2ns(4, ce);
-		ce->cpumask = cpumask_of_cpu(0);
+		ce->cpumask = cpumask_of(0);
 
 		cs->mult = clocksource_hz2mult(clock->freq, cs->shift);
 		res = clocksource_register(cs);
diff --git a/arch/arm/mach-ns9xxx/time-ns9360.c b/arch/arm/mach-ns9xxx/time-ns9360.c
index a63424d..41df697 100644
--- a/arch/arm/mach-ns9xxx/time-ns9360.c
+++ b/arch/arm/mach-ns9xxx/time-ns9360.c
@@ -173,7 +173,7 @@
 	ns9360_clockevent_device.min_delta_ns =
 		clockevent_delta2ns(1, &ns9360_clockevent_device);
 
-	ns9360_clockevent_device.cpumask = cpumask_of_cpu(0);
+	ns9360_clockevent_device.cpumask = cpumask_of(0);
 	clockevents_register_device(&ns9360_clockevent_device);
 
 	setup_irq(IRQ_NS9360_TIMER0 + TIMER_CLOCKEVENT,
diff --git a/arch/arm/mach-omap1/time.c b/arch/arm/mach-omap1/time.c
index 2cf7e32..495a32c 100644
--- a/arch/arm/mach-omap1/time.c
+++ b/arch/arm/mach-omap1/time.c
@@ -173,7 +173,7 @@
 	clockevent_mpu_timer1.min_delta_ns =
 		clockevent_delta2ns(1, &clockevent_mpu_timer1);
 
-	clockevent_mpu_timer1.cpumask = cpumask_of_cpu(0);
+	clockevent_mpu_timer1.cpumask = cpumask_of(0);
 	clockevents_register_device(&clockevent_mpu_timer1);
 }
 
diff --git a/arch/arm/mach-omap1/timer32k.c b/arch/arm/mach-omap1/timer32k.c
index 705367e..fd3f739 100644
--- a/arch/arm/mach-omap1/timer32k.c
+++ b/arch/arm/mach-omap1/timer32k.c
@@ -187,7 +187,7 @@
 	clockevent_32k_timer.min_delta_ns =
 		clockevent_delta2ns(1, &clockevent_32k_timer);
 
-	clockevent_32k_timer.cpumask = cpumask_of_cpu(0);
+	clockevent_32k_timer.cpumask = cpumask_of(0);
 	clockevents_register_device(&clockevent_32k_timer);
 }
 
diff --git a/arch/arm/mach-omap2/timer-gp.c b/arch/arm/mach-omap2/timer-gp.c
index 589393b..ae60363 100644
--- a/arch/arm/mach-omap2/timer-gp.c
+++ b/arch/arm/mach-omap2/timer-gp.c
@@ -120,7 +120,7 @@
 	clockevent_gpt.min_delta_ns =
 		clockevent_delta2ns(1, &clockevent_gpt);
 
-	clockevent_gpt.cpumask = cpumask_of_cpu(0);
+	clockevent_gpt.cpumask = cpumask_of(0);
 	clockevents_register_device(&clockevent_gpt);
 }
 
diff --git a/arch/arm/mach-pxa/time.c b/arch/arm/mach-pxa/time.c
index 0016241..95656a7 100644
--- a/arch/arm/mach-pxa/time.c
+++ b/arch/arm/mach-pxa/time.c
@@ -122,7 +122,6 @@
 	.features	= CLOCK_EVT_FEAT_ONESHOT,
 	.shift		= 32,
 	.rating		= 200,
-	.cpumask	= CPU_MASK_CPU0,
 	.set_next_event	= pxa_osmr0_set_next_event,
 	.set_mode	= pxa_osmr0_set_mode,
 };
@@ -163,6 +162,7 @@
 		clockevent_delta2ns(0x7fffffff, &ckevt_pxa_osmr0);
 	ckevt_pxa_osmr0.min_delta_ns =
 		clockevent_delta2ns(MIN_OSCR_DELTA * 2, &ckevt_pxa_osmr0) + 1;
+	ckevt_pxa_osmr0.cpumask = cpumask_of(0);
 
 	cksrc_pxa_oscr0.mult =
 		clocksource_hz2mult(clock_tick_rate, cksrc_pxa_oscr0.shift);
diff --git a/arch/arm/mach-realview/core.c b/arch/arm/mach-realview/core.c
index 5f1d559..bd2aa4f 100644
--- a/arch/arm/mach-realview/core.c
+++ b/arch/arm/mach-realview/core.c
@@ -624,7 +624,7 @@
 	.set_mode	= timer_set_mode,
 	.set_next_event	= timer_set_next_event,
 	.rating		= 300,
-	.cpumask	= CPU_MASK_ALL,
+	.cpumask	= cpu_all_mask,
 };
 
 static void __init realview_clockevents_init(unsigned int timer_irq)
diff --git a/arch/arm/mach-realview/localtimer.c b/arch/arm/mach-realview/localtimer.c
index 9019ef2..67d6d9c 100644
--- a/arch/arm/mach-realview/localtimer.c
+++ b/arch/arm/mach-realview/localtimer.c
@@ -154,7 +154,7 @@
 	clk->set_mode		= local_timer_set_mode;
 	clk->set_next_event	= local_timer_set_next_event;
 	clk->irq		= IRQ_LOCALTIMER;
-	clk->cpumask		= cpumask_of_cpu(cpu);
+	clk->cpumask		= cpumask_of(cpu);
 	clk->shift		= 20;
 	clk->mult		= div_sc(mpcore_timer_rate, NSEC_PER_SEC, clk->shift);
 	clk->max_delta_ns	= clockevent_delta2ns(0xffffffff, clk);
@@ -193,7 +193,7 @@
 	clk->rating		= 200;
 	clk->set_mode		= dummy_timer_set_mode;
 	clk->broadcast		= smp_timer_broadcast;
-	clk->cpumask		= cpumask_of_cpu(cpu);
+	clk->cpumask		= cpumask_of(cpu);
 
 	clockevents_register_device(clk);
 }
diff --git a/arch/arm/mach-sa1100/time.c b/arch/arm/mach-sa1100/time.c
index 8c5e727..711c029 100644
--- a/arch/arm/mach-sa1100/time.c
+++ b/arch/arm/mach-sa1100/time.c
@@ -73,7 +73,6 @@
 	.features	= CLOCK_EVT_FEAT_ONESHOT,
 	.shift		= 32,
 	.rating		= 200,
-	.cpumask	= CPU_MASK_CPU0,
 	.set_next_event	= sa1100_osmr0_set_next_event,
 	.set_mode	= sa1100_osmr0_set_mode,
 };
@@ -110,6 +109,7 @@
 		clockevent_delta2ns(0x7fffffff, &ckevt_sa1100_osmr0);
 	ckevt_sa1100_osmr0.min_delta_ns =
 		clockevent_delta2ns(MIN_OSCR_DELTA * 2, &ckevt_sa1100_osmr0) + 1;
+	ckevt_sa1100_osmr0.cpumask = cpumask_of(0);
 
 	cksrc_sa1100_oscr.mult =
 		clocksource_hz2mult(CLOCK_TICK_RATE, cksrc_sa1100_oscr.shift);
diff --git a/arch/arm/mach-versatile/core.c b/arch/arm/mach-versatile/core.c
index df25aa1..1c43494 100644
--- a/arch/arm/mach-versatile/core.c
+++ b/arch/arm/mach-versatile/core.c
@@ -1005,7 +1005,7 @@
 	timer0_clockevent.min_delta_ns =
 		clockevent_delta2ns(0xf, &timer0_clockevent);
 
-	timer0_clockevent.cpumask = cpumask_of_cpu(0);
+	timer0_clockevent.cpumask = cpumask_of(0);
 	clockevents_register_device(&timer0_clockevent);
 }
 
diff --git a/arch/arm/oprofile/op_model_mpcore.c b/arch/arm/oprofile/op_model_mpcore.c
index 4de366e..6d6bd58 100644
--- a/arch/arm/oprofile/op_model_mpcore.c
+++ b/arch/arm/oprofile/op_model_mpcore.c
@@ -260,10 +260,10 @@
 static void em_route_irq(int irq, unsigned int cpu)
 {
 	struct irq_desc *desc = irq_desc + irq;
-	cpumask_t mask = cpumask_of_cpu(cpu);
+	const struct cpumask *mask = cpumask_of(cpu);
 
 	spin_lock_irq(&desc->lock);
-	desc->affinity = mask;
+	desc->affinity = *mask;
 	desc->chip->set_affinity(irq, mask);
 	spin_unlock_irq(&desc->lock);
 }
diff --git a/arch/arm/plat-mxc/time.c b/arch/arm/plat-mxc/time.c
index fd28f51..758a129 100644
--- a/arch/arm/plat-mxc/time.c
+++ b/arch/arm/plat-mxc/time.c
@@ -190,7 +190,7 @@
 	clockevent_mxc.min_delta_ns =
 			clockevent_delta2ns(0xff, &clockevent_mxc);
 
-	clockevent_mxc.cpumask = cpumask_of_cpu(0);
+	clockevent_mxc.cpumask = cpumask_of(0);
 
 	clockevents_register_device(&clockevent_mxc);
 
diff --git a/arch/arm/plat-orion/time.c b/arch/arm/plat-orion/time.c
index 544d6b3..6fa2923 100644
--- a/arch/arm/plat-orion/time.c
+++ b/arch/arm/plat-orion/time.c
@@ -149,7 +149,6 @@
 	.features	= CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_PERIODIC,
 	.shift		= 32,
 	.rating		= 300,
-	.cpumask	= CPU_MASK_CPU0,
 	.set_next_event	= orion_clkevt_next_event,
 	.set_mode	= orion_clkevt_mode,
 };
@@ -199,5 +198,6 @@
 	orion_clkevt.mult = div_sc(tclk, NSEC_PER_SEC, orion_clkevt.shift);
 	orion_clkevt.max_delta_ns = clockevent_delta2ns(0xfffffffe, &orion_clkevt);
 	orion_clkevt.min_delta_ns = clockevent_delta2ns(1, &orion_clkevt);
+	orion_clkevt.cpumask = cpumask_of(0);
 	clockevents_register_device(&orion_clkevt);
 }
diff --git a/arch/avr32/kernel/time.c b/arch/avr32/kernel/time.c
index 283481d..0ff46bf 100644
--- a/arch/avr32/kernel/time.c
+++ b/arch/avr32/kernel/time.c
@@ -106,7 +106,6 @@
 	.features	= CLOCK_EVT_FEAT_ONESHOT,
 	.shift		= 16,
 	.rating		= 50,
-	.cpumask	= CPU_MASK_CPU0,
 	.set_next_event	= comparator_next_event,
 	.set_mode	= comparator_mode,
 };
@@ -134,6 +133,7 @@
 	comparator.mult = div_sc(counter_hz, NSEC_PER_SEC, comparator.shift);
 	comparator.max_delta_ns = clockevent_delta2ns((u32)~0, &comparator);
 	comparator.min_delta_ns = clockevent_delta2ns(50, &comparator) + 1;
+	comparator.cpumask = cpumask_of(0);
 
 	sysreg_write(COMPARE, 0);
 	timer_irqaction.dev_id = &comparator;
diff --git a/arch/blackfin/kernel/time-ts.c b/arch/blackfin/kernel/time-ts.c
index e887efc..0ed2bad 100644
--- a/arch/blackfin/kernel/time-ts.c
+++ b/arch/blackfin/kernel/time-ts.c
@@ -162,7 +162,6 @@
 	.name		= "bfin_core_timer",
 	.features	= CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
 	.shift		= 32,
-	.cpumask	= CPU_MASK_CPU0,
 	.set_next_event = bfin_timer_set_next_event,
 	.set_mode	= bfin_timer_set_mode,
 };
@@ -193,6 +192,7 @@
 	clockevent_bfin.mult = div_sc(timer_clk, NSEC_PER_SEC, clockevent_bfin.shift);
 	clockevent_bfin.max_delta_ns = clockevent_delta2ns(-1, &clockevent_bfin);
 	clockevent_bfin.min_delta_ns = clockevent_delta2ns(100, &clockevent_bfin);
+	clockevent_bfin.cpumask = cpumask_of(0);
 	clockevents_register_device(&clockevent_bfin);
 
 	return 0;
diff --git a/arch/cris/arch-v32/kernel/irq.c b/arch/cris/arch-v32/kernel/irq.c
index 173c141..295131f 100644
--- a/arch/cris/arch-v32/kernel/irq.c
+++ b/arch/cris/arch-v32/kernel/irq.c
@@ -325,11 +325,11 @@
 {
 }
 
-void set_affinity_crisv32_irq(unsigned int irq, cpumask_t dest)
+void set_affinity_crisv32_irq(unsigned int irq, const struct cpumask *dest)
 {
 	unsigned long flags;
 	spin_lock_irqsave(&irq_lock, flags);
-	irq_allocations[irq - FIRST_IRQ].mask = dest;
+	irq_allocations[irq - FIRST_IRQ].mask = *dest;
 	spin_unlock_irqrestore(&irq_lock, flags);
 }
 
diff --git a/arch/cris/arch-v32/kernel/smp.c b/arch/cris/arch-v32/kernel/smp.c
index 52e16c6..9dac173 100644
--- a/arch/cris/arch-v32/kernel/smp.c
+++ b/arch/cris/arch-v32/kernel/smp.c
@@ -29,11 +29,7 @@
 spinlock_t cris_atomic_locks[] = { [0 ... LOCK_COUNT - 1] = SPIN_LOCK_UNLOCKED};
 
 /* CPU masks */
-cpumask_t cpu_online_map = CPU_MASK_NONE;
-EXPORT_SYMBOL(cpu_online_map);
 cpumask_t phys_cpu_present_map = CPU_MASK_NONE;
-cpumask_t cpu_possible_map;
-EXPORT_SYMBOL(cpu_possible_map);
 EXPORT_SYMBOL(phys_cpu_present_map);
 
 /* Variables used during SMP boot */
diff --git a/arch/cris/include/asm/smp.h b/arch/cris/include/asm/smp.h
index dba33ab..c615a06 100644
--- a/arch/cris/include/asm/smp.h
+++ b/arch/cris/include/asm/smp.h
@@ -4,7 +4,6 @@
 #include <linux/cpumask.h>
 
 extern cpumask_t phys_cpu_present_map;
-extern cpumask_t cpu_possible_map;
 
 #define raw_smp_processor_id() (current_thread_info()->cpu)
 
diff --git a/arch/ia64/hp/sim/hpsim_irq.c b/arch/ia64/hp/sim/hpsim_irq.c
index c2f58ff..cc0a318 100644
--- a/arch/ia64/hp/sim/hpsim_irq.c
+++ b/arch/ia64/hp/sim/hpsim_irq.c
@@ -22,7 +22,7 @@
 }
 
 static void
-hpsim_set_affinity_noop (unsigned int a, cpumask_t b)
+hpsim_set_affinity_noop(unsigned int a, const struct cpumask *b)
 {
 }
 
diff --git a/arch/ia64/include/asm/kvm.h b/arch/ia64/include/asm/kvm.h
index f38472a..68aa6da 100644
--- a/arch/ia64/include/asm/kvm.h
+++ b/arch/ia64/include/asm/kvm.h
@@ -166,8 +166,6 @@
 };
 
 struct kvm_regs {
-	char *saved_guest;
-	char *saved_stack;
 	struct saved_vpd vpd;
 	/*Arch-regs*/
 	int mp_state;
@@ -200,6 +198,10 @@
 	unsigned long fp_psr;       /*used for lazy float register */
 	unsigned long saved_gp;
 	/*for phycial  emulation */
+
+	union context saved_guest;
+
+	unsigned long reserved[64];	/* for future use */
 };
 
 struct kvm_sregs {
diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
index c60d324..0560f3f 100644
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -23,17 +23,6 @@
 #ifndef __ASM_KVM_HOST_H
 #define __ASM_KVM_HOST_H
 
-
-#include <linux/types.h>
-#include <linux/mm.h>
-#include <linux/kvm.h>
-#include <linux/kvm_para.h>
-#include <linux/kvm_types.h>
-
-#include <asm/pal.h>
-#include <asm/sal.h>
-
-#define KVM_MAX_VCPUS 4
 #define KVM_MEMORY_SLOTS 32
 /* memory slots that does not exposed to userspace */
 #define KVM_PRIVATE_MEM_SLOTS 4
@@ -50,70 +39,132 @@
 #define EXIT_REASON_EXTERNAL_INTERRUPT	6
 #define EXIT_REASON_IPI			7
 #define EXIT_REASON_PTC_G		8
+#define EXIT_REASON_DEBUG		20
 
 /*Define vmm address space and vm data space.*/
-#define KVM_VMM_SIZE (16UL<<20)
+#define KVM_VMM_SIZE (__IA64_UL_CONST(16)<<20)
 #define KVM_VMM_SHIFT 24
-#define KVM_VMM_BASE 0xD000000000000000UL
-#define VMM_SIZE (8UL<<20)
+#define KVM_VMM_BASE 0xD000000000000000
+#define VMM_SIZE (__IA64_UL_CONST(8)<<20)
 
 /*
  * Define vm_buffer, used by PAL Services, base address.
- * Note: vmbuffer is in the VMM-BLOCK, the size must be < 8M
+ * Note: vm_buffer is in the VMM-BLOCK, the size must be < 8M
  */
 #define KVM_VM_BUFFER_BASE (KVM_VMM_BASE + VMM_SIZE)
-#define KVM_VM_BUFFER_SIZE (8UL<<20)
+#define KVM_VM_BUFFER_SIZE (__IA64_UL_CONST(8)<<20)
 
-/*Define Virtual machine data layout.*/
-#define KVM_VM_DATA_SHIFT  24
-#define KVM_VM_DATA_SIZE (1UL << KVM_VM_DATA_SHIFT)
-#define KVM_VM_DATA_BASE (KVM_VMM_BASE + KVM_VMM_SIZE)
+/*
+ * kvm guest's data area looks as follow:
+ *
+ *            +----------------------+	-------	KVM_VM_DATA_SIZE
+ *	      |	    vcpu[n]'s data   |	 |     ___________________KVM_STK_OFFSET
+ *     	      |			     |	 |    /			  |
+ *     	      |	       ..........    |	 |   /vcpu's struct&stack |
+ *     	      |	       ..........    |	 |  /---------------------|---- 0
+ *	      |	    vcpu[5]'s data   |	 | /	   vpd		  |
+ *	      |	    vcpu[4]'s data   |	 |/-----------------------|
+ *	      |	    vcpu[3]'s data   |	 /	   vtlb		  |
+ *	      |	    vcpu[2]'s data   |	/|------------------------|
+ *	      |	    vcpu[1]'s data   |/  |	   vhpt		  |
+ *	      |	    vcpu[0]'s data   |____________________________|
+ *            +----------------------+	 |
+ *	      |	   memory dirty log  |	 |
+ *            +----------------------+	 |
+ *	      |	   vm's data struct  |	 |
+ *            +----------------------+	 |
+ *	      |			     |	 |
+ *	      |			     |	 |
+ *	      |			     |	 |
+ *	      |			     |	 |
+ *	      |			     |	 |
+ *	      |			     |	 |
+ *	      |			     |	 |
+ *	      |	  vm's p2m table  |	 |
+ *	      |			     |	 |
+ *            |			     |	 |
+ *	      |			     |	 |  |
+ * vm's data->|			     |   |  |
+ *	      +----------------------+ ------- 0
+ * To support large memory, needs to increase the size of p2m.
+ * To support more vcpus, needs to ensure it has enough space to
+ * hold vcpus' data.
+ */
 
+#define KVM_VM_DATA_SHIFT	26
+#define KVM_VM_DATA_SIZE	(__IA64_UL_CONST(1) << KVM_VM_DATA_SHIFT)
+#define KVM_VM_DATA_BASE	(KVM_VMM_BASE + KVM_VM_DATA_SIZE)
 
-#define KVM_P2M_BASE    KVM_VM_DATA_BASE
-#define KVM_P2M_OFS     0
-#define KVM_P2M_SIZE    (8UL << 20)
+#define KVM_P2M_BASE		KVM_VM_DATA_BASE
+#define KVM_P2M_SIZE		(__IA64_UL_CONST(24) << 20)
 
-#define KVM_VHPT_BASE   (KVM_P2M_BASE + KVM_P2M_SIZE)
-#define KVM_VHPT_OFS    KVM_P2M_SIZE
-#define KVM_VHPT_BLOCK_SIZE   (2UL << 20)
-#define VHPT_SHIFT      18
-#define VHPT_SIZE       (1UL << VHPT_SHIFT)
-#define VHPT_NUM_ENTRIES (1<<(VHPT_SHIFT-5))
+#define VHPT_SHIFT		16
+#define VHPT_SIZE		(__IA64_UL_CONST(1) << VHPT_SHIFT)
+#define VHPT_NUM_ENTRIES	(__IA64_UL_CONST(1) << (VHPT_SHIFT-5))
 
-#define KVM_VTLB_BASE   (KVM_VHPT_BASE+KVM_VHPT_BLOCK_SIZE)
-#define KVM_VTLB_OFS    (KVM_VHPT_OFS+KVM_VHPT_BLOCK_SIZE)
-#define KVM_VTLB_BLOCK_SIZE   (1UL<<20)
-#define VTLB_SHIFT      17
-#define VTLB_SIZE       (1UL<<VTLB_SHIFT)
-#define VTLB_NUM_ENTRIES (1<<(VTLB_SHIFT-5))
+#define VTLB_SHIFT		16
+#define VTLB_SIZE		(__IA64_UL_CONST(1) << VTLB_SHIFT)
+#define VTLB_NUM_ENTRIES	(1UL << (VHPT_SHIFT-5))
 
-#define KVM_VPD_BASE   (KVM_VTLB_BASE+KVM_VTLB_BLOCK_SIZE)
-#define KVM_VPD_OFS    (KVM_VTLB_OFS+KVM_VTLB_BLOCK_SIZE)
-#define KVM_VPD_BLOCK_SIZE   (2UL<<20)
-#define VPD_SHIFT       16
-#define VPD_SIZE        (1UL<<VPD_SHIFT)
+#define VPD_SHIFT		16
+#define VPD_SIZE		(__IA64_UL_CONST(1) << VPD_SHIFT)
 
-#define KVM_VCPU_BASE   (KVM_VPD_BASE+KVM_VPD_BLOCK_SIZE)
-#define KVM_VCPU_OFS    (KVM_VPD_OFS+KVM_VPD_BLOCK_SIZE)
-#define KVM_VCPU_BLOCK_SIZE   (2UL<<20)
-#define VCPU_SHIFT 18
-#define VCPU_SIZE (1UL<<VCPU_SHIFT)
-#define MAX_VCPU_NUM KVM_VCPU_BLOCK_SIZE/VCPU_SIZE
+#define VCPU_STRUCT_SHIFT	16
+#define VCPU_STRUCT_SIZE	(__IA64_UL_CONST(1) << VCPU_STRUCT_SHIFT)
 
-#define KVM_VM_BASE     (KVM_VCPU_BASE+KVM_VCPU_BLOCK_SIZE)
-#define KVM_VM_OFS      (KVM_VCPU_OFS+KVM_VCPU_BLOCK_SIZE)
-#define KVM_VM_BLOCK_SIZE     (1UL<<19)
+#define KVM_STK_OFFSET		VCPU_STRUCT_SIZE
 
-#define KVM_MEM_DIRTY_LOG_BASE (KVM_VM_BASE+KVM_VM_BLOCK_SIZE)
-#define KVM_MEM_DIRTY_LOG_OFS  (KVM_VM_OFS+KVM_VM_BLOCK_SIZE)
-#define KVM_MEM_DIRTY_LOG_SIZE (1UL<<19)
+#define KVM_VM_STRUCT_SHIFT	19
+#define KVM_VM_STRUCT_SIZE	(__IA64_UL_CONST(1) << KVM_VM_STRUCT_SHIFT)
 
-/* Get vpd, vhpt, tlb, vcpu, base*/
-#define VPD_ADDR(n) (KVM_VPD_BASE+n*VPD_SIZE)
-#define VHPT_ADDR(n) (KVM_VHPT_BASE+n*VHPT_SIZE)
-#define VTLB_ADDR(n) (KVM_VTLB_BASE+n*VTLB_SIZE)
-#define VCPU_ADDR(n) (KVM_VCPU_BASE+n*VCPU_SIZE)
+#define KVM_MEM_DIRY_LOG_SHIFT	19
+#define KVM_MEM_DIRTY_LOG_SIZE (__IA64_UL_CONST(1) << KVM_MEM_DIRY_LOG_SHIFT)
+
+#ifndef __ASSEMBLY__
+
+/*Define the max vcpus and memory for Guests.*/
+#define KVM_MAX_VCPUS	(KVM_VM_DATA_SIZE - KVM_P2M_SIZE - KVM_VM_STRUCT_SIZE -\
+			KVM_MEM_DIRTY_LOG_SIZE) / sizeof(struct kvm_vcpu_data)
+#define KVM_MAX_MEM_SIZE (KVM_P2M_SIZE >> 3 << PAGE_SHIFT)
+
+#define VMM_LOG_LEN 256
+
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/kvm.h>
+#include <linux/kvm_para.h>
+#include <linux/kvm_types.h>
+
+#include <asm/pal.h>
+#include <asm/sal.h>
+#include <asm/page.h>
+
+struct kvm_vcpu_data {
+	char vcpu_vhpt[VHPT_SIZE];
+	char vcpu_vtlb[VTLB_SIZE];
+	char vcpu_vpd[VPD_SIZE];
+	char vcpu_struct[VCPU_STRUCT_SIZE];
+};
+
+struct kvm_vm_data {
+	char kvm_p2m[KVM_P2M_SIZE];
+	char kvm_vm_struct[KVM_VM_STRUCT_SIZE];
+	char kvm_mem_dirty_log[KVM_MEM_DIRTY_LOG_SIZE];
+	struct kvm_vcpu_data vcpu_data[KVM_MAX_VCPUS];
+};
+
+#define VCPU_BASE(n)	KVM_VM_DATA_BASE + \
+				offsetof(struct kvm_vm_data, vcpu_data[n])
+#define VM_BASE		KVM_VM_DATA_BASE + \
+				offsetof(struct kvm_vm_data, kvm_vm_struct)
+#define KVM_MEM_DIRTY_LOG_BASE	KVM_VM_DATA_BASE + \
+				offsetof(struct kvm_vm_data, kvm_mem_dirty_log)
+
+#define VHPT_BASE(n) (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vhpt))
+#define VTLB_BASE(n) (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vtlb))
+#define VPD_BASE(n)  (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vpd))
+#define VCPU_STRUCT_BASE(n)	(VCPU_BASE(n) + \
+				offsetof(struct kvm_vcpu_data, vcpu_struct))
 
 /*IO section definitions*/
 #define IOREQ_READ      1
@@ -389,6 +440,7 @@
 
 	unsigned long opcode;
 	unsigned long cause;
+	char log_buf[VMM_LOG_LEN];
 	union context host;
 	union context guest;
 };
@@ -403,14 +455,13 @@
 };
 
 struct kvm_arch {
+	spinlock_t dirty_log_lock;
+
 	unsigned long	vm_base;
 	unsigned long	metaphysical_rr0;
 	unsigned long	metaphysical_rr4;
 	unsigned long	vmm_init_rr;
-	unsigned long	vhpt_base;
-	unsigned long	vtlb_base;
-	unsigned long 	vpd_base;
-	spinlock_t dirty_log_lock;
+
 	struct kvm_ioapic *vioapic;
 	struct kvm_vm_stat stat;
 	struct kvm_sal_data rdv_sal_data;
@@ -512,7 +563,7 @@
 
 static inline struct kvm_pt_regs *vcpu_regs(struct kvm_vcpu *v)
 {
-	return (struct kvm_pt_regs *) ((unsigned long) v + IA64_STK_OFFSET) - 1;
+	return (struct kvm_pt_regs *) ((unsigned long) v + KVM_STK_OFFSET) - 1;
 }
 
 typedef int kvm_vmm_entry(void);
@@ -531,5 +582,6 @@
 void kvm_sal_emul(struct kvm_vcpu *vcpu);
 
 static inline void kvm_inject_nmi(struct kvm_vcpu *vcpu) {}
+#endif /* __ASSEMBLY__*/
 
 #endif
diff --git a/arch/ia64/include/asm/smp.h b/arch/ia64/include/asm/smp.h
index 12d96e0..21c4023 100644
--- a/arch/ia64/include/asm/smp.h
+++ b/arch/ia64/include/asm/smp.h
@@ -57,7 +57,6 @@
 
 extern char no_int_routing __devinitdata;
 
-extern cpumask_t cpu_online_map;
 extern cpumask_t cpu_core_map[NR_CPUS];
 DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
 extern int smp_num_siblings;
diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h
index 35bcb64..a3cc9f6 100644
--- a/arch/ia64/include/asm/topology.h
+++ b/arch/ia64/include/asm/topology.h
@@ -55,7 +55,6 @@
 void build_cpu_to_node_map(void);
 
 #define SD_CPU_INIT (struct sched_domain) {		\
-	.span			= CPU_MASK_NONE,	\
 	.parent			= NULL,			\
 	.child			= NULL,			\
 	.groups			= NULL,			\
@@ -80,7 +79,6 @@
 
 /* sched_domains SD_NODE_INIT for IA64 NUMA machines */
 #define SD_NODE_INIT (struct sched_domain) {		\
-	.span			= CPU_MASK_NONE,	\
 	.parent			= NULL,			\
 	.child			= NULL,			\
 	.groups			= NULL,			\
diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c
index 5c4674a..c8adecd 100644
--- a/arch/ia64/kernel/iosapic.c
+++ b/arch/ia64/kernel/iosapic.c
@@ -330,25 +330,25 @@
 
 
 static void
-iosapic_set_affinity (unsigned int irq, cpumask_t mask)
+iosapic_set_affinity(unsigned int irq, const struct cpumask *mask)
 {
 #ifdef CONFIG_SMP
 	u32 high32, low32;
-	int dest, rte_index;
+	int cpu, dest, rte_index;
 	int redir = (irq & IA64_IRQ_REDIRECTED) ? 1 : 0;
 	struct iosapic_rte_info *rte;
 	struct iosapic *iosapic;
 
 	irq &= (~IA64_IRQ_REDIRECTED);
 
-	cpus_and(mask, mask, cpu_online_map);
-	if (cpus_empty(mask))
+	cpu = cpumask_first_and(cpu_online_mask, mask);
+	if (cpu >= nr_cpu_ids)
 		return;
 
-	if (irq_prepare_move(irq, first_cpu(mask)))
+	if (irq_prepare_move(irq, cpu))
 		return;
 
-	dest = cpu_physical_id(first_cpu(mask));
+	dest = cpu_physical_id(cpu);
 
 	if (!iosapic_intr_info[irq].count)
 		return;			/* not an IOSAPIC interrupt */
diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c
index 7fd18f5..0b6db53 100644
--- a/arch/ia64/kernel/irq.c
+++ b/arch/ia64/kernel/irq.c
@@ -133,7 +133,6 @@
  */
 static void migrate_irqs(void)
 {
-	cpumask_t	mask;
 	irq_desc_t *desc;
 	int 		irq, new_cpu;
 
@@ -152,15 +151,14 @@
 		if (desc->status == IRQ_PER_CPU)
 			continue;
 
-		cpus_and(mask, irq_desc[irq].affinity, cpu_online_map);
-		if (any_online_cpu(mask) == NR_CPUS) {
+		if (cpumask_any_and(&irq_desc[irq].affinity, cpu_online_mask)
+		    >= nr_cpu_ids) {
 			/*
 			 * Save it for phase 2 processing
 			 */
 			vectors_in_migration[irq] = irq;
 
 			new_cpu = any_online_cpu(cpu_online_map);
-			mask = cpumask_of_cpu(new_cpu);
 
 			/*
 			 * Al three are essential, currently WARN_ON.. maybe panic?
@@ -168,7 +166,8 @@
 			if (desc->chip && desc->chip->disable &&
 				desc->chip->enable && desc->chip->set_affinity) {
 				desc->chip->disable(irq);
-				desc->chip->set_affinity(irq, mask);
+				desc->chip->set_affinity(irq,
+							 cpumask_of(new_cpu));
 				desc->chip->enable(irq);
 			} else {
 				WARN_ON((!(desc->chip) || !(desc->chip->disable) ||
diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c
index 702a09c..8903393 100644
--- a/arch/ia64/kernel/msi_ia64.c
+++ b/arch/ia64/kernel/msi_ia64.c
@@ -49,11 +49,12 @@
 static struct irq_chip	ia64_msi_chip;
 
 #ifdef CONFIG_SMP
-static void ia64_set_msi_irq_affinity(unsigned int irq, cpumask_t cpu_mask)
+static void ia64_set_msi_irq_affinity(unsigned int irq,
+				      const cpumask_t *cpu_mask)
 {
 	struct msi_msg msg;
 	u32 addr, data;
-	int cpu = first_cpu(cpu_mask);
+	int cpu = first_cpu(*cpu_mask);
 
 	if (!cpu_online(cpu))
 		return;
@@ -166,12 +167,11 @@
 
 #ifdef CONFIG_DMAR
 #ifdef CONFIG_SMP
-static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
+static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	struct irq_cfg *cfg = irq_cfg + irq;
 	struct msi_msg msg;
-	int cpu = first_cpu(mask);
-
+	int cpu = cpumask_first(mask);
 
 	if (!cpu_online(cpu))
 		return;
@@ -187,7 +187,7 @@
 	msg.address_lo |= MSI_ADDR_DESTID_CPU(cpu_physical_id(cpu));
 
 	dmar_msi_write(irq, &msg);
-	irq_desc[irq].affinity = mask;
+	irq_desc[irq].affinity = *mask;
 }
 #endif /* CONFIG_SMP */
 
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index 1dcbb85..1146399 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -131,12 +131,6 @@
  */
 DEFINE_PER_CPU(int, cpu_state);
 
-/* Bitmasks of currently online, and possible CPUs */
-cpumask_t cpu_online_map;
-EXPORT_SYMBOL(cpu_online_map);
-cpumask_t cpu_possible_map = CPU_MASK_NONE;
-EXPORT_SYMBOL(cpu_possible_map);
-
 cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
 EXPORT_SYMBOL(cpu_core_map);
 DEFINE_PER_CPU_SHARED_ALIGNED(cpumask_t, cpu_sibling_map);
@@ -688,7 +682,7 @@
 {
 	int new_cpei_cpu;
 	irq_desc_t *desc = NULL;
-	cpumask_t 	mask;
+	const struct cpumask *mask;
 	int 		retval = 0;
 
 	/*
@@ -701,7 +695,7 @@
 			 * Now re-target the CPEI to a different processor
 			 */
 			new_cpei_cpu = any_online_cpu(cpu_online_map);
-			mask = cpumask_of_cpu(new_cpei_cpu);
+			mask = cpumask_of(new_cpei_cpu);
 			set_cpei_target_cpu(new_cpei_cpu);
 			desc = irq_desc + ia64_cpe_irq;
 			/*
diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c
index c75b914..a8d61a3 100644
--- a/arch/ia64/kernel/topology.c
+++ b/arch/ia64/kernel/topology.c
@@ -219,7 +219,7 @@
 	cpumask_t shared_cpu_map;
 
 	cpus_and(shared_cpu_map, this_leaf->shared_cpu_map, cpu_online_map);
-	len = cpumask_scnprintf(buf, NR_CPUS+1, shared_cpu_map);
+	len = cpumask_scnprintf(buf, NR_CPUS+1, &shared_cpu_map);
 	len += sprintf(buf+len, "\n");
 	return len;
 }
diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile
index 92cef66..76464dc 100644
--- a/arch/ia64/kvm/Makefile
+++ b/arch/ia64/kvm/Makefile
@@ -60,7 +60,7 @@
 
 CFLAGS_vcpu.o += -mfixed-range=f2-f5,f12-f127
 kvm-intel-objs = vmm.o vmm_ivt.o trampoline.o vcpu.o optvfault.o mmio.o \
-	vtlb.o process.o
+	vtlb.o process.o kvm_lib.o
 #Add link memcpy and memset to avoid possible structure assignment error
 kvm-intel-objs += memcpy.o memset.o
 obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/arch/ia64/kvm/asm-offsets.c b/arch/ia64/kvm/asm-offsets.c
index 4e3dc13..0c3564a 100644
--- a/arch/ia64/kvm/asm-offsets.c
+++ b/arch/ia64/kvm/asm-offsets.c
@@ -24,19 +24,10 @@
 
 #include <linux/autoconf.h>
 #include <linux/kvm_host.h>
+#include <linux/kbuild.h>
 
 #include "vcpu.h"
 
-#define task_struct kvm_vcpu
-
-#define DEFINE(sym, val) \
-	asm volatile("\n->" #sym " (%0) " #val : : "i" (val))
-
-#define BLANK() asm volatile("\n->" : :)
-
-#define OFFSET(_sym, _str, _mem) \
-    DEFINE(_sym, offsetof(_str, _mem));
-
 void foo(void)
 {
 	DEFINE(VMM_TASK_SIZE, sizeof(struct kvm_vcpu));
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index af1464f..0f5ebd9 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -180,7 +180,6 @@
 
 	switch (ext) {
 	case KVM_CAP_IRQCHIP:
-	case KVM_CAP_USER_MEMORY:
 	case KVM_CAP_MP_STATE:
 
 		r = 1;
@@ -439,7 +438,6 @@
 		expires = div64_u64(itc_diff, cyc_per_usec);
 		kt = ktime_set(0, 1000 * expires);
 
-		down_read(&vcpu->kvm->slots_lock);
 		vcpu->arch.ht_active = 1;
 		hrtimer_start(p_ht, kt, HRTIMER_MODE_ABS);
 
@@ -452,7 +450,6 @@
 			if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
 				vcpu->arch.mp_state =
 					KVM_MP_STATE_RUNNABLE;
-		up_read(&vcpu->kvm->slots_lock);
 
 		if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE)
 			return -EINTR;
@@ -476,6 +473,13 @@
 	return 1;
 }
 
+static int handle_vcpu_debug(struct kvm_vcpu *vcpu,
+				struct kvm_run *kvm_run)
+{
+	printk("VMM: %s", vcpu->arch.log_buf);
+	return 1;
+}
+
 static int (*kvm_vti_exit_handlers[])(struct kvm_vcpu *vcpu,
 		struct kvm_run *kvm_run) = {
 	[EXIT_REASON_VM_PANIC]              = handle_vm_error,
@@ -487,6 +491,7 @@
 	[EXIT_REASON_EXTERNAL_INTERRUPT]    = handle_external_interrupt,
 	[EXIT_REASON_IPI]		    = handle_ipi,
 	[EXIT_REASON_PTC_G]		    = handle_global_purge,
+	[EXIT_REASON_DEBUG]		    = handle_vcpu_debug,
 
 };
 
@@ -698,27 +703,24 @@
 	return r;
 }
 
-/*
- * Allocate 16M memory for every vm to hold its specific data.
- * Its memory map is defined in kvm_host.h.
- */
 static struct kvm *kvm_alloc_kvm(void)
 {
 
 	struct kvm *kvm;
 	uint64_t  vm_base;
 
+	BUG_ON(sizeof(struct kvm) > KVM_VM_STRUCT_SIZE);
+
 	vm_base = __get_free_pages(GFP_KERNEL, get_order(KVM_VM_DATA_SIZE));
 
 	if (!vm_base)
 		return ERR_PTR(-ENOMEM);
-	printk(KERN_DEBUG"kvm: VM data's base Address:0x%lx\n", vm_base);
 
-	/* Zero all pages before use! */
 	memset((void *)vm_base, 0, KVM_VM_DATA_SIZE);
-
-	kvm = (struct kvm *)(vm_base + KVM_VM_OFS);
+	kvm = (struct kvm *)(vm_base +
+			offsetof(struct kvm_vm_data, kvm_vm_struct));
 	kvm->arch.vm_base = vm_base;
+	printk(KERN_DEBUG"kvm: vm's data area:0x%lx\n", vm_base);
 
 	return kvm;
 }
@@ -760,21 +762,12 @@
 
 static void kvm_init_vm(struct kvm *kvm)
 {
-	long vm_base;
-
 	BUG_ON(!kvm);
 
 	kvm->arch.metaphysical_rr0 = GUEST_PHYSICAL_RR0;
 	kvm->arch.metaphysical_rr4 = GUEST_PHYSICAL_RR4;
 	kvm->arch.vmm_init_rr = VMM_INIT_RR;
 
-	vm_base = kvm->arch.vm_base;
-	if (vm_base) {
-		kvm->arch.vhpt_base = vm_base + KVM_VHPT_OFS;
-		kvm->arch.vtlb_base = vm_base + KVM_VTLB_OFS;
-		kvm->arch.vpd_base  = vm_base + KVM_VPD_OFS;
-	}
-
 	/*
 	 *Fill P2M entries for MMIO/IO ranges
 	 */
@@ -838,9 +831,8 @@
 
 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 {
-	int i;
 	struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
-	int r;
+	int i;
 
 	vcpu_load(vcpu);
 
@@ -857,18 +849,7 @@
 
 	vpd->vpr = regs->vpd.vpr;
 
-	r = -EFAULT;
-	r = copy_from_user(&vcpu->arch.guest, regs->saved_guest,
-						sizeof(union context));
-	if (r)
-		goto out;
-	r = copy_from_user(vcpu + 1, regs->saved_stack +
-			sizeof(struct kvm_vcpu),
-			IA64_STK_OFFSET - sizeof(struct kvm_vcpu));
-	if (r)
-		goto out;
-	vcpu->arch.exit_data =
-		((struct kvm_vcpu *)(regs->saved_stack))->arch.exit_data;
+	memcpy(&vcpu->arch.guest, &regs->saved_guest, sizeof(union context));
 
 	RESTORE_REGS(mp_state);
 	RESTORE_REGS(vmm_rr);
@@ -902,9 +883,8 @@
 	set_bit(KVM_REQ_RESUME, &vcpu->requests);
 
 	vcpu_put(vcpu);
-	r = 0;
-out:
-	return r;
+
+	return 0;
 }
 
 long kvm_arch_vm_ioctl(struct file *filp,
@@ -1166,10 +1146,11 @@
 		/*Set entry address for first run.*/
 		regs->cr_iip = PALE_RESET_ENTRY;
 
-		/*Initilize itc offset for vcpus*/
+		/*Initialize itc offset for vcpus*/
 		itc_offset = 0UL - ia64_getreg(_IA64_REG_AR_ITC);
-		for (i = 0; i < MAX_VCPU_NUM; i++) {
-			v = (struct kvm_vcpu *)((char *)vcpu + VCPU_SIZE * i);
+		for (i = 0; i < KVM_MAX_VCPUS; i++) {
+			v = (struct kvm_vcpu *)((char *)vcpu +
+					sizeof(struct kvm_vcpu_data) * i);
 			v->arch.itc_offset = itc_offset;
 			v->arch.last_itc = 0;
 		}
@@ -1183,7 +1164,7 @@
 	vcpu->arch.apic->vcpu = vcpu;
 
 	p_ctx->gr[1] = 0;
-	p_ctx->gr[12] = (unsigned long)((char *)vmm_vcpu + IA64_STK_OFFSET);
+	p_ctx->gr[12] = (unsigned long)((char *)vmm_vcpu + KVM_STK_OFFSET);
 	p_ctx->gr[13] = (unsigned long)vmm_vcpu;
 	p_ctx->psr = 0x1008522000UL;
 	p_ctx->ar[40] = FPSR_DEFAULT; /*fpsr*/
@@ -1218,12 +1199,12 @@
 	vcpu->arch.hlt_timer.function = hlt_timer_fn;
 
 	vcpu->arch.last_run_cpu = -1;
-	vcpu->arch.vpd = (struct vpd *)VPD_ADDR(vcpu->vcpu_id);
+	vcpu->arch.vpd = (struct vpd *)VPD_BASE(vcpu->vcpu_id);
 	vcpu->arch.vsa_base = kvm_vsa_base;
 	vcpu->arch.__gp = kvm_vmm_gp;
 	vcpu->arch.dirty_log_lock_pa = __pa(&kvm->arch.dirty_log_lock);
-	vcpu->arch.vhpt.hash = (struct thash_data *)VHPT_ADDR(vcpu->vcpu_id);
-	vcpu->arch.vtlb.hash = (struct thash_data *)VTLB_ADDR(vcpu->vcpu_id);
+	vcpu->arch.vhpt.hash = (struct thash_data *)VHPT_BASE(vcpu->vcpu_id);
+	vcpu->arch.vtlb.hash = (struct thash_data *)VTLB_BASE(vcpu->vcpu_id);
 	init_ptce_info(vcpu);
 
 	r = 0;
@@ -1273,12 +1254,22 @@
 	int r;
 	int cpu;
 
+	BUG_ON(sizeof(struct kvm_vcpu) > VCPU_STRUCT_SIZE/2);
+
+	r = -EINVAL;
+	if (id >= KVM_MAX_VCPUS) {
+		printk(KERN_ERR"kvm: Can't configure vcpus > %ld",
+				KVM_MAX_VCPUS);
+		goto fail;
+	}
+
 	r = -ENOMEM;
 	if (!vm_base) {
 		printk(KERN_ERR"kvm: Create vcpu[%d] error!\n", id);
 		goto fail;
 	}
-	vcpu = (struct kvm_vcpu *)(vm_base + KVM_VCPU_OFS + VCPU_SIZE * id);
+	vcpu = (struct kvm_vcpu *)(vm_base + offsetof(struct kvm_vm_data,
+					vcpu_data[id].vcpu_struct));
 	vcpu->kvm = kvm;
 
 	cpu = get_cpu();
@@ -1374,9 +1365,9 @@
 
 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 {
-	int i;
-	int r;
 	struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
+	int i;
+
 	vcpu_load(vcpu);
 
 	for (i = 0; i < 16; i++) {
@@ -1391,14 +1382,8 @@
 	regs->vpd.vpsr = vpd->vpsr;
 	regs->vpd.vpr = vpd->vpr;
 
-	r = -EFAULT;
-	r = copy_to_user(regs->saved_guest, &vcpu->arch.guest,
-					sizeof(union context));
-	if (r)
-		goto out;
-	r = copy_to_user(regs->saved_stack, (void *)vcpu, IA64_STK_OFFSET);
-	if (r)
-		goto out;
+	memcpy(&regs->saved_guest, &vcpu->arch.guest, sizeof(union context));
+
 	SAVE_REGS(mp_state);
 	SAVE_REGS(vmm_rr);
 	memcpy(regs->itrs, vcpu->arch.itrs, sizeof(struct thash_data) * NITRS);
@@ -1426,10 +1411,9 @@
 	SAVE_REGS(metaphysical_saved_rr4);
 	SAVE_REGS(fp_psr);
 	SAVE_REGS(saved_gp);
+
 	vcpu_put(vcpu);
-	r = 0;
-out:
-	return r;
+	return 0;
 }
 
 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
@@ -1457,6 +1441,9 @@
 	struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot];
 	unsigned long base_gfn = memslot->base_gfn;
 
+	if (base_gfn + npages > (KVM_MAX_MEM_SIZE >> PAGE_SHIFT))
+		return -ENOMEM;
+
 	for (i = 0; i < npages; i++) {
 		pfn = gfn_to_pfn(kvm, base_gfn + i);
 		if (!kvm_is_mmio_pfn(pfn)) {
@@ -1631,8 +1618,8 @@
 	struct kvm_memory_slot *memslot;
 	int r, i;
 	long n, base;
-	unsigned long *dirty_bitmap = (unsigned long *)((void *)kvm - KVM_VM_OFS
-					+ KVM_MEM_DIRTY_LOG_OFS);
+	unsigned long *dirty_bitmap = (unsigned long *)(kvm->arch.vm_base +
+			offsetof(struct kvm_vm_data, kvm_mem_dirty_log));
 
 	r = -EINVAL;
 	if (log->slot >= KVM_MEMORY_SLOTS)
diff --git a/arch/ia64/kvm/kvm_lib.c b/arch/ia64/kvm/kvm_lib.c
new file mode 100644
index 0000000..a85cb61
--- /dev/null
+++ b/arch/ia64/kvm/kvm_lib.c
@@ -0,0 +1,15 @@
+/*
+ * kvm_lib.c: Compile some libraries for kvm-intel module.
+ *
+ *	Just include kernel's library, and disable symbols export.
+ * 	Copyright (C) 2008, Intel Corporation.
+ *  	Xiantao Zhang  (xiantao.zhang@intel.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#undef CONFIG_MODULES
+#include "../../../lib/vsprintf.c"
+#include "../../../lib/ctype.c"
diff --git a/arch/ia64/kvm/kvm_minstate.h b/arch/ia64/kvm/kvm_minstate.h
index 2cc41d1..b2bcaa2 100644
--- a/arch/ia64/kvm/kvm_minstate.h
+++ b/arch/ia64/kvm/kvm_minstate.h
@@ -24,6 +24,8 @@
 #include <asm/asmmacro.h>
 #include <asm/types.h>
 #include <asm/kregs.h>
+#include <asm/kvm_host.h>
+
 #include "asm-offsets.h"
 
 #define KVM_MINSTATE_START_SAVE_MIN	     					\
@@ -33,7 +35,7 @@
 	addl r22 = VMM_RBS_OFFSET,r1;            /* compute base of RBS */	\
 	;;									\
 	lfetch.fault.excl.nt1 [r22];						\
-	addl r1 = IA64_STK_OFFSET-VMM_PT_REGS_SIZE,r1;  /* compute base of memory stack */  \
+	addl r1 = KVM_STK_OFFSET-VMM_PT_REGS_SIZE, r1;  \
 	mov r23 = ar.bspstore;			/* save ar.bspstore */          \
 	;;									\
 	mov ar.bspstore = r22;				/* switch to kernel RBS */\
diff --git a/arch/ia64/kvm/misc.h b/arch/ia64/kvm/misc.h
index e585c46..dd979e0 100644
--- a/arch/ia64/kvm/misc.h
+++ b/arch/ia64/kvm/misc.h
@@ -27,7 +27,8 @@
  */
 static inline uint64_t *kvm_host_get_pmt(struct kvm *kvm)
 {
-	return (uint64_t *)(kvm->arch.vm_base + KVM_P2M_OFS);
+	return (uint64_t *)(kvm->arch.vm_base +
+				offsetof(struct kvm_vm_data, kvm_p2m));
 }
 
 static inline void kvm_set_pmt_entry(struct kvm *kvm, gfn_t gfn,
diff --git a/arch/ia64/kvm/mmio.c b/arch/ia64/kvm/mmio.c
index 7f1a858..21f63ff 100644
--- a/arch/ia64/kvm/mmio.c
+++ b/arch/ia64/kvm/mmio.c
@@ -66,31 +66,25 @@
 
 	switch (addr) {
 	case PIB_OFST_INTA:
-		/*panic_domain(NULL, "Undefined write on PIB INTA\n");*/
-		panic_vm(v);
+		panic_vm(v, "Undefined write on PIB INTA\n");
 		break;
 	case PIB_OFST_XTP:
 		if (length == 1) {
 			vlsapic_write_xtp(v, val);
 		} else {
-			/*panic_domain(NULL,
-			"Undefined write on PIB XTP\n");*/
-			panic_vm(v);
+			panic_vm(v, "Undefined write on PIB XTP\n");
 		}
 		break;
 	default:
 		if (PIB_LOW_HALF(addr)) {
-			/*lower half */
+			/*Lower half */
 			if (length != 8)
-				/*panic_domain(NULL,
-				"Can't LHF write with size %ld!\n",
-				length);*/
-				panic_vm(v);
+				panic_vm(v, "Can't LHF write with size %ld!\n",
+						length);
 			else
 				vlsapic_write_ipi(v, addr, val);
-		} else {   /*	upper half
-				printk("IPI-UHF write %lx\n",addr);*/
-			panic_vm(v);
+		} else {   /*Upper half */
+			panic_vm(v, "IPI-UHF write %lx\n", addr);
 		}
 		break;
 	}
@@ -108,22 +102,18 @@
 		if (length == 1) /* 1 byte load */
 			; /* There is no i8259, there is no INTA access*/
 		else
-			/*panic_domain(NULL,"Undefined read on PIB INTA\n"); */
-			panic_vm(v);
+			panic_vm(v, "Undefined read on PIB INTA\n");
 
 		break;
 	case PIB_OFST_XTP:
 		if (length == 1) {
 			result = VLSAPIC_XTP(v);
-			/* printk("read xtp %lx\n", result); */
 		} else {
-			/*panic_domain(NULL,
-			"Undefined read on PIB XTP\n");*/
-			panic_vm(v);
+			panic_vm(v, "Undefined read on PIB XTP\n");
 		}
 		break;
 	default:
-		panic_vm(v);
+		panic_vm(v, "Undefined addr access for lsapic!\n");
 		break;
 	}
 	return result;
@@ -162,7 +152,7 @@
 			/* it's necessary to ensure zero extending */
 			*dest = p->u.ioreq.data & (~0UL >> (64-(s*8)));
 	} else
-		panic_vm(vcpu);
+		panic_vm(vcpu, "Unhandled mmio access returned!\n");
 out:
 	local_irq_restore(psr);
 	return ;
@@ -324,7 +314,9 @@
 		return;
 	} else {
 		inst_type = -1;
-		panic_vm(vcpu);
+		panic_vm(vcpu, "Unsupported MMIO access instruction! \
+				Bunld[0]=0x%lx, Bundle[1]=0x%lx\n",
+				bundle.i64[0], bundle.i64[1]);
 	}
 
 	size = 1 << size;
@@ -335,7 +327,7 @@
 		if (inst_type == SL_INTEGER)
 			vcpu_set_gr(vcpu, inst.M1.r1, data, 0);
 		else
-			panic_vm(vcpu);
+			panic_vm(vcpu, "Unsupported instruction type!\n");
 
 	}
 	vcpu_increment_iip(vcpu);
diff --git a/arch/ia64/kvm/process.c b/arch/ia64/kvm/process.c
index 8008173..552d077 100644
--- a/arch/ia64/kvm/process.c
+++ b/arch/ia64/kvm/process.c
@@ -527,7 +527,8 @@
 	vector = vec2off[vec];
 
 	if (!(vpsr & IA64_PSR_IC) && (vector != IA64_DATA_NESTED_TLB_VECTOR)) {
-		panic_vm(vcpu);
+		panic_vm(vcpu, "Interruption with vector :0x%lx occurs "
+						"with psr.ic = 0\n", vector);
 		return;
 	}
 
@@ -586,7 +587,7 @@
 		vcpu_set_gr(vcpu, 10, p->u.pal_data.ret.v1, 0);
 		vcpu_set_gr(vcpu, 11, p->u.pal_data.ret.v2, 0);
 	} else
-		panic_vm(vcpu);
+		panic_vm(vcpu, "Mis-set for exit reason!\n");
 }
 
 static void set_sal_call_data(struct kvm_vcpu *vcpu)
@@ -614,7 +615,7 @@
 		vcpu_set_gr(vcpu, 10, p->u.sal_data.ret.r10, 0);
 		vcpu_set_gr(vcpu, 11, p->u.sal_data.ret.r11, 0);
 	} else
-		panic_vm(vcpu);
+		panic_vm(vcpu, "Mis-set for exit reason!\n");
 }
 
 void  kvm_ia64_handle_break(unsigned long ifa, struct kvm_pt_regs *regs,
@@ -680,7 +681,7 @@
 	vpsr = VCPU(vcpu, vpsr);
 	isr = vpsr & IA64_PSR_RI;
 	if (!(vpsr & IA64_PSR_IC))
-		panic_vm(vcpu);
+		panic_vm(vcpu, "Trying to inject one IRQ with psr.ic=0\n");
 	reflect_interruption(0, isr, 0, 12, regs); /* EXT IRQ */
 }
 
@@ -941,8 +942,20 @@
 	ia64_set_pta(vcpu->arch.vhpt.pta.val);
 }
 
+static void vmm_sanity_check(struct kvm_vcpu *vcpu)
+{
+	struct exit_ctl_data *p = &vcpu->arch.exit_data;
+
+	if (!vmm_sanity && p->exit_reason != EXIT_REASON_DEBUG) {
+		panic_vm(vcpu, "Failed to do vmm sanity check,"
+			"it maybe caused by crashed vmm!!\n\n");
+	}
+}
+
 static void kvm_do_resume_op(struct kvm_vcpu *vcpu)
 {
+	vmm_sanity_check(vcpu); /*Guarantee vcpu runing on healthy vmm!*/
+
 	if (test_and_clear_bit(KVM_REQ_RESUME, &vcpu->requests)) {
 		vcpu_do_resume(vcpu);
 		return;
@@ -968,3 +981,11 @@
 						1, 0, 0, 0, 0, 0);
 	kvm_do_resume_op(vcpu);
 }
+
+void vmm_panic_handler(u64 vec)
+{
+	struct kvm_vcpu *vcpu = current_vcpu;
+	vmm_sanity = 0;
+	panic_vm(vcpu, "Unexpected interruption occurs in VMM, vector:0x%lx\n",
+			vec2off[vec]);
+}
diff --git a/arch/ia64/kvm/vcpu.c b/arch/ia64/kvm/vcpu.c
index e44027c..ecd526b 100644
--- a/arch/ia64/kvm/vcpu.c
+++ b/arch/ia64/kvm/vcpu.c
@@ -816,8 +816,9 @@
 	unsigned long vitv = VCPU(vcpu, itv);
 
 	if (vcpu->vcpu_id == 0) {
-		for (i = 0; i < MAX_VCPU_NUM; i++) {
-			v = (struct kvm_vcpu *)((char *)vcpu + VCPU_SIZE * i);
+		for (i = 0; i < KVM_MAX_VCPUS; i++) {
+			v = (struct kvm_vcpu *)((char *)vcpu +
+					sizeof(struct kvm_vcpu_data) * i);
 			VMX(v, itc_offset) = itc_offset;
 			VMX(v, last_itc) = 0;
 		}
@@ -1650,7 +1651,8 @@
 	 * Otherwise panic
 	 */
 	if (val & (IA64_PSR_PK | IA64_PSR_IS | IA64_PSR_VM))
-		panic_vm(vcpu);
+		panic_vm(vcpu, "Only support guests with vpsr.pk =0 \
+				& vpsr.is=0\n");
 
 	/*
 	 * For those IA64_PSR bits: id/da/dd/ss/ed/ia
@@ -2103,7 +2105,7 @@
 
 	if (is_physical_mode(vcpu)) {
 		if (vcpu->arch.mode_flags & GUEST_PHY_EMUL)
-			panic_vm(vcpu);
+			panic_vm(vcpu, "Machine Status conflicts!\n");
 
 		ia64_set_rr((VRN0 << VRN_SHIFT), vcpu->arch.metaphysical_rr0);
 		ia64_dv_serialize_data();
@@ -2152,10 +2154,70 @@
 	return 0;
 }
 
-void panic_vm(struct kvm_vcpu *v)
+static void kvm_show_registers(struct kvm_pt_regs *regs)
 {
-	struct exit_ctl_data *p = &v->arch.exit_data;
+	unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri;
 
+	struct kvm_vcpu *vcpu = current_vcpu;
+	if (vcpu != NULL)
+		printk("vcpu 0x%p vcpu %d\n",
+		       vcpu, vcpu->vcpu_id);
+
+	printk("psr : %016lx ifs : %016lx ip  : [<%016lx>]\n",
+	       regs->cr_ipsr, regs->cr_ifs, ip);
+
+	printk("unat: %016lx pfs : %016lx rsc : %016lx\n",
+	       regs->ar_unat, regs->ar_pfs, regs->ar_rsc);
+	printk("rnat: %016lx bspstore: %016lx pr  : %016lx\n",
+	       regs->ar_rnat, regs->ar_bspstore, regs->pr);
+	printk("ldrs: %016lx ccv : %016lx fpsr: %016lx\n",
+	       regs->loadrs, regs->ar_ccv, regs->ar_fpsr);
+	printk("csd : %016lx ssd : %016lx\n", regs->ar_csd, regs->ar_ssd);
+	printk("b0  : %016lx b6  : %016lx b7  : %016lx\n", regs->b0,
+							regs->b6, regs->b7);
+	printk("f6  : %05lx%016lx f7  : %05lx%016lx\n",
+	       regs->f6.u.bits[1], regs->f6.u.bits[0],
+	       regs->f7.u.bits[1], regs->f7.u.bits[0]);
+	printk("f8  : %05lx%016lx f9  : %05lx%016lx\n",
+	       regs->f8.u.bits[1], regs->f8.u.bits[0],
+	       regs->f9.u.bits[1], regs->f9.u.bits[0]);
+	printk("f10 : %05lx%016lx f11 : %05lx%016lx\n",
+	       regs->f10.u.bits[1], regs->f10.u.bits[0],
+	       regs->f11.u.bits[1], regs->f11.u.bits[0]);
+
+	printk("r1  : %016lx r2  : %016lx r3  : %016lx\n", regs->r1,
+							regs->r2, regs->r3);
+	printk("r8  : %016lx r9  : %016lx r10 : %016lx\n", regs->r8,
+							regs->r9, regs->r10);
+	printk("r11 : %016lx r12 : %016lx r13 : %016lx\n", regs->r11,
+							regs->r12, regs->r13);
+	printk("r14 : %016lx r15 : %016lx r16 : %016lx\n", regs->r14,
+							regs->r15, regs->r16);
+	printk("r17 : %016lx r18 : %016lx r19 : %016lx\n", regs->r17,
+							regs->r18, regs->r19);
+	printk("r20 : %016lx r21 : %016lx r22 : %016lx\n", regs->r20,
+							regs->r21, regs->r22);
+	printk("r23 : %016lx r24 : %016lx r25 : %016lx\n", regs->r23,
+							regs->r24, regs->r25);
+	printk("r26 : %016lx r27 : %016lx r28 : %016lx\n", regs->r26,
+							regs->r27, regs->r28);
+	printk("r29 : %016lx r30 : %016lx r31 : %016lx\n", regs->r29,
+							regs->r30, regs->r31);
+
+}
+
+void panic_vm(struct kvm_vcpu *v, const char *fmt, ...)
+{
+	va_list args;
+	char buf[256];
+
+	struct kvm_pt_regs *regs = vcpu_regs(v);
+	struct exit_ctl_data *p = &v->arch.exit_data;
+	va_start(args, fmt);
+	vsnprintf(buf, sizeof(buf), fmt, args);
+	va_end(args);
+	printk(buf);
+	kvm_show_registers(regs);
 	p->exit_reason = EXIT_REASON_VM_PANIC;
 	vmm_transition(v);
 	/*Never to return*/
diff --git a/arch/ia64/kvm/vcpu.h b/arch/ia64/kvm/vcpu.h
index e9b2a4e..b2f12a5 100644
--- a/arch/ia64/kvm/vcpu.h
+++ b/arch/ia64/kvm/vcpu.h
@@ -737,9 +737,12 @@
 void kvm_init_vhpt(struct kvm_vcpu *v);
 void thash_init(struct thash_cb *hcb, u64 sz);
 
-void panic_vm(struct kvm_vcpu *v);
+void panic_vm(struct kvm_vcpu *v, const char *fmt, ...);
 
 extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2, u64 arg3,
 		u64 arg4, u64 arg5, u64 arg6, u64 arg7);
+
+extern long vmm_sanity;
+
 #endif
 #endif	/* __VCPU_H__ */
diff --git a/arch/ia64/kvm/vmm.c b/arch/ia64/kvm/vmm.c
index 2275bf4..9eee5c0 100644
--- a/arch/ia64/kvm/vmm.c
+++ b/arch/ia64/kvm/vmm.c
@@ -20,6 +20,7 @@
  */
 
 
+#include<linux/kernel.h>
 #include<linux/module.h>
 #include<asm/fpswa.h>
 
@@ -31,6 +32,8 @@
 extern char kvm_ia64_ivt;
 extern fpswa_interface_t *vmm_fpswa_interface;
 
+long vmm_sanity = 1;
+
 struct kvm_vmm_info vmm_info = {
 	.module	     = THIS_MODULE,
 	.vmm_entry   = vmm_entry,
@@ -62,5 +65,31 @@
 {
 	_vmm_raw_spin_unlock(lock);
 }
+
+static void vcpu_debug_exit(struct kvm_vcpu *vcpu)
+{
+	struct exit_ctl_data *p = &vcpu->arch.exit_data;
+	long psr;
+
+	local_irq_save(psr);
+	p->exit_reason = EXIT_REASON_DEBUG;
+	vmm_transition(vcpu);
+	local_irq_restore(psr);
+}
+
+asmlinkage int printk(const char *fmt, ...)
+{
+	struct kvm_vcpu *vcpu = current_vcpu;
+	va_list args;
+	int r;
+
+	memset(vcpu->arch.log_buf, 0, VMM_LOG_LEN);
+	va_start(args, fmt);
+	r = vsnprintf(vcpu->arch.log_buf, VMM_LOG_LEN, fmt, args);
+	va_end(args);
+	vcpu_debug_exit(vcpu);
+	return r;
+}
+
 module_init(kvm_vmm_init)
 module_exit(kvm_vmm_exit)
diff --git a/arch/ia64/kvm/vmm_ivt.S b/arch/ia64/kvm/vmm_ivt.S
index c1d7251..3ef1a01 100644
--- a/arch/ia64/kvm/vmm_ivt.S
+++ b/arch/ia64/kvm/vmm_ivt.S
@@ -1,5 +1,5 @@
 /*
- * /ia64/kvm_ivt.S
+ * arch/ia64/kvm/vmm_ivt.S
  *
  * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co
  *      Stephane Eranian <eranian@hpl.hp.com>
@@ -70,32 +70,39 @@
 # define PSR_DEFAULT_BITS   0
 #endif
 
-
 #define KVM_FAULT(n)    \
-    kvm_fault_##n:;          \
-    mov r19=n;;          \
-    br.sptk.many kvm_fault_##n;         \
-    ;;                  \
-
+	kvm_fault_##n:;          \
+	mov r19=n;;          \
+	br.sptk.many kvm_vmm_panic;         \
+	;;                  \
 
 #define KVM_REFLECT(n)    \
-    mov r31=pr;           \
-    mov r19=n;       /* prepare to save predicates */ \
-    mov r29=cr.ipsr;      \
-    ;;      \
-    tbit.z p6,p7=r29,IA64_PSR_VM_BIT;       \
-(p7)br.sptk.many kvm_dispatch_reflection;        \
-    br.sptk.many kvm_panic;      \
+	mov r31=pr;           \
+	mov r19=n;       /* prepare to save predicates */ \
+	mov r29=cr.ipsr;      \
+	;;      \
+	tbit.z p6,p7=r29,IA64_PSR_VM_BIT;       \
+(p7)	br.sptk.many kvm_dispatch_reflection;        \
+	br.sptk.many kvm_vmm_panic;      \
 
-
-GLOBAL_ENTRY(kvm_panic)
-    br.sptk.many kvm_panic
-    ;;
-END(kvm_panic)
-
-
-
-
+GLOBAL_ENTRY(kvm_vmm_panic)
+	KVM_SAVE_MIN_WITH_COVER_R19
+	alloc r14=ar.pfs,0,0,1,0
+	mov out0=r15
+	adds r3=8,r2                // set up second base pointer
+	;;
+	ssm psr.ic
+	;;
+	srlz.i    // guarantee that interruption collection is on
+	;;
+	//(p15) ssm psr.i               // restore psr.i
+	addl r14=@gprel(ia64_leave_hypervisor),gp
+	;;
+	KVM_SAVE_REST
+	mov rp=r14
+	;;
+	br.call.sptk.many b6=vmm_panic_handler;
+END(kvm_vmm_panic)
 
     .section .text.ivt,"ax"
 
@@ -105,308 +112,307 @@
 ///////////////////////////////////////////////////////////////
 // 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47)
 ENTRY(kvm_vhpt_miss)
-    KVM_FAULT(0)
+	KVM_FAULT(0)
 END(kvm_vhpt_miss)
 
-
     .org kvm_ia64_ivt+0x400
 ////////////////////////////////////////////////////////////////
 // 0x0400 Entry 1 (size 64 bundles) ITLB (21)
 ENTRY(kvm_itlb_miss)
-    mov r31 = pr
-    mov r29=cr.ipsr;
-    ;;
-    tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
-    (p6) br.sptk kvm_alt_itlb_miss
-    mov r19 = 1
-    br.sptk kvm_itlb_miss_dispatch
-    KVM_FAULT(1);
+	mov r31 = pr
+	mov r29=cr.ipsr;
+	;;
+	tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
+(p6)	br.sptk kvm_alt_itlb_miss
+	mov r19 = 1
+	br.sptk kvm_itlb_miss_dispatch
+	KVM_FAULT(1);
 END(kvm_itlb_miss)
 
     .org kvm_ia64_ivt+0x0800
 //////////////////////////////////////////////////////////////////
 // 0x0800 Entry 2 (size 64 bundles) DTLB (9,48)
 ENTRY(kvm_dtlb_miss)
-    mov r31 = pr
-    mov r29=cr.ipsr;
-    ;;
-    tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
-(p6)br.sptk kvm_alt_dtlb_miss
-    br.sptk kvm_dtlb_miss_dispatch
+	mov r31 = pr
+	mov r29=cr.ipsr;
+	;;
+	tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
+(p6)	br.sptk kvm_alt_dtlb_miss
+	br.sptk kvm_dtlb_miss_dispatch
 END(kvm_dtlb_miss)
 
      .org kvm_ia64_ivt+0x0c00
 ////////////////////////////////////////////////////////////////////
 // 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19)
 ENTRY(kvm_alt_itlb_miss)
-    mov r16=cr.ifa    // get address that caused the TLB miss
-    ;;
-    movl r17=PAGE_KERNEL
-    mov r24=cr.ipsr
-    movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
-    ;;
-    and r19=r19,r16     // clear ed, reserved bits, and PTE control bits
-    ;;
-    or r19=r17,r19      // insert PTE control bits into r19
-    ;;
-    movl r20=IA64_GRANULE_SHIFT<<2
-    ;;
-    mov cr.itir=r20
-    ;;
-    itc.i r19		// insert the TLB entry
-    mov pr=r31,-1
-    rfi
+	mov r16=cr.ifa    // get address that caused the TLB miss
+	;;
+	movl r17=PAGE_KERNEL
+	mov r24=cr.ipsr
+	movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
+	;;
+	and r19=r19,r16     // clear ed, reserved bits, and PTE control bits
+	;;
+	or r19=r17,r19      // insert PTE control bits into r19
+	;;
+	movl r20=IA64_GRANULE_SHIFT<<2
+	;;
+	mov cr.itir=r20
+	;;
+	itc.i r19		// insert the TLB entry
+	mov pr=r31,-1
+	rfi
 END(kvm_alt_itlb_miss)
 
     .org kvm_ia64_ivt+0x1000
 /////////////////////////////////////////////////////////////////////
 // 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46)
 ENTRY(kvm_alt_dtlb_miss)
-    mov r16=cr.ifa		// get address that caused the TLB miss
-    ;;
-    movl r17=PAGE_KERNEL
-    movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
-    mov r24=cr.ipsr
-    ;;
-    and r19=r19,r16     // clear ed, reserved bits, and PTE control bits
-    ;;
-    or r19=r19,r17	// insert PTE control bits into r19
-    ;;
-    movl r20=IA64_GRANULE_SHIFT<<2
-    ;;
-    mov cr.itir=r20
-    ;;
-    itc.d r19		// insert the TLB entry
-    mov pr=r31,-1
-    rfi
+	mov r16=cr.ifa		// get address that caused the TLB miss
+	;;
+	movl r17=PAGE_KERNEL
+	movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
+	mov r24=cr.ipsr
+	;;
+	and r19=r19,r16     // clear ed, reserved bits, and PTE control bits
+	;;
+	or r19=r19,r17	// insert PTE control bits into r19
+	;;
+	movl r20=IA64_GRANULE_SHIFT<<2
+	;;
+	mov cr.itir=r20
+	;;
+	itc.d r19		// insert the TLB entry
+	mov pr=r31,-1
+	rfi
 END(kvm_alt_dtlb_miss)
 
     .org kvm_ia64_ivt+0x1400
 //////////////////////////////////////////////////////////////////////
 // 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45)
 ENTRY(kvm_nested_dtlb_miss)
-    KVM_FAULT(5)
+	KVM_FAULT(5)
 END(kvm_nested_dtlb_miss)
 
     .org kvm_ia64_ivt+0x1800
 /////////////////////////////////////////////////////////////////////
 // 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24)
 ENTRY(kvm_ikey_miss)
-    KVM_REFLECT(6)
+	KVM_REFLECT(6)
 END(kvm_ikey_miss)
 
     .org kvm_ia64_ivt+0x1c00
 /////////////////////////////////////////////////////////////////////
 // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51)
 ENTRY(kvm_dkey_miss)
-    KVM_REFLECT(7)
+	KVM_REFLECT(7)
 END(kvm_dkey_miss)
 
     .org kvm_ia64_ivt+0x2000
 ////////////////////////////////////////////////////////////////////
 // 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54)
 ENTRY(kvm_dirty_bit)
-    KVM_REFLECT(8)
+	KVM_REFLECT(8)
 END(kvm_dirty_bit)
 
     .org kvm_ia64_ivt+0x2400
 ////////////////////////////////////////////////////////////////////
 // 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27)
 ENTRY(kvm_iaccess_bit)
-    KVM_REFLECT(9)
+	KVM_REFLECT(9)
 END(kvm_iaccess_bit)
 
     .org kvm_ia64_ivt+0x2800
 ///////////////////////////////////////////////////////////////////
 // 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55)
 ENTRY(kvm_daccess_bit)
-    KVM_REFLECT(10)
+	KVM_REFLECT(10)
 END(kvm_daccess_bit)
 
     .org kvm_ia64_ivt+0x2c00
 /////////////////////////////////////////////////////////////////
 // 0x2c00 Entry 11 (size 64 bundles) Break instruction (33)
 ENTRY(kvm_break_fault)
-    mov r31=pr
-    mov r19=11
-    mov r29=cr.ipsr
-    ;;
-    KVM_SAVE_MIN_WITH_COVER_R19
-    ;;
-    alloc r14=ar.pfs,0,0,4,0 // now it's safe (must be first in insn group!)
-    mov out0=cr.ifa
-    mov out2=cr.isr     // FIXME: pity to make this slow access twice
-    mov out3=cr.iim     // FIXME: pity to make this slow access twice
-    adds r3=8,r2                // set up second base pointer
-    ;;
-    ssm psr.ic
-    ;;
-    srlz.i                  // guarantee that interruption collection is on
-    ;;
-    //(p15)ssm psr.i               // restore psr.i
-    addl r14=@gprel(ia64_leave_hypervisor),gp
-    ;;
-    KVM_SAVE_REST
-    mov rp=r14
-    ;;
-    adds out1=16,sp
-    br.call.sptk.many b6=kvm_ia64_handle_break
-    ;;
+	mov r31=pr
+	mov r19=11
+	mov r29=cr.ipsr
+	;;
+	KVM_SAVE_MIN_WITH_COVER_R19
+	;;
+	alloc r14=ar.pfs,0,0,4,0 //(must be first in insn group!)
+	mov out0=cr.ifa
+	mov out2=cr.isr     // FIXME: pity to make this slow access twice
+	mov out3=cr.iim     // FIXME: pity to make this slow access twice
+	adds r3=8,r2                // set up second base pointer
+	;;
+	ssm psr.ic
+	;;
+	srlz.i         // guarantee that interruption collection is on
+	;;
+	//(p15)ssm psr.i               // restore psr.i
+	addl r14=@gprel(ia64_leave_hypervisor),gp
+	;;
+	KVM_SAVE_REST
+	mov rp=r14
+	;;
+	adds out1=16,sp
+	br.call.sptk.many b6=kvm_ia64_handle_break
+	;;
 END(kvm_break_fault)
 
     .org kvm_ia64_ivt+0x3000
 /////////////////////////////////////////////////////////////////
 // 0x3000 Entry 12 (size 64 bundles) External Interrupt (4)
 ENTRY(kvm_interrupt)
-    mov r31=pr		// prepare to save predicates
-    mov r19=12
-    mov r29=cr.ipsr
-    ;;
-    tbit.z p6,p7=r29,IA64_PSR_VM_BIT
-    tbit.z p0,p15=r29,IA64_PSR_I_BIT
-    ;;
-(p7) br.sptk kvm_dispatch_interrupt
-    ;;
-    mov r27=ar.rsc		/* M */
-    mov r20=r1			/* A */
-    mov r25=ar.unat		/* M */
-    mov r26=ar.pfs		/* I */
-    mov r28=cr.iip		/* M */
-    cover			/* B (or nothing) */
-    ;;
-    mov r1=sp
-    ;;
-    invala			/* M */
-    mov r30=cr.ifs
-    ;;
-    addl r1=-VMM_PT_REGS_SIZE,r1
-    ;;
-    adds r17=2*L1_CACHE_BYTES,r1	/* really: biggest cache-line size */
-    adds r16=PT(CR_IPSR),r1
-    ;;
-    lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES
-    st8 [r16]=r29			/* save cr.ipsr */
-    ;;
-    lfetch.fault.excl.nt1 [r17]
-    mov r29=b0
-    ;;
-    adds r16=PT(R8),r1  	/* initialize first base pointer */
-    adds r17=PT(R9),r1  	/* initialize second base pointer */
-    mov r18=r0      		/* make sure r18 isn't NaT */
-    ;;
+	mov r31=pr		// prepare to save predicates
+	mov r19=12
+	mov r29=cr.ipsr
+	;;
+	tbit.z p6,p7=r29,IA64_PSR_VM_BIT
+	tbit.z p0,p15=r29,IA64_PSR_I_BIT
+	;;
+(p7)	br.sptk kvm_dispatch_interrupt
+	;;
+	mov r27=ar.rsc		/* M */
+	mov r20=r1			/* A */
+	mov r25=ar.unat		/* M */
+	mov r26=ar.pfs		/* I */
+	mov r28=cr.iip		/* M */
+	cover			/* B (or nothing) */
+	;;
+	mov r1=sp
+	;;
+	invala			/* M */
+	mov r30=cr.ifs
+	;;
+	addl r1=-VMM_PT_REGS_SIZE,r1
+	;;
+	adds r17=2*L1_CACHE_BYTES,r1	/* really: biggest cache-line size */
+	adds r16=PT(CR_IPSR),r1
+	;;
+	lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES
+	st8 [r16]=r29			/* save cr.ipsr */
+	;;
+	lfetch.fault.excl.nt1 [r17]
+	mov r29=b0
+	;;
+	adds r16=PT(R8),r1  	/* initialize first base pointer */
+	adds r17=PT(R9),r1  	/* initialize second base pointer */
+	mov r18=r0      		/* make sure r18 isn't NaT */
+	;;
 .mem.offset 0,0; st8.spill [r16]=r8,16
 .mem.offset 8,0; st8.spill [r17]=r9,16
         ;;
 .mem.offset 0,0; st8.spill [r16]=r10,24
 .mem.offset 8,0; st8.spill [r17]=r11,24
         ;;
-    st8 [r16]=r28,16		/* save cr.iip */
-    st8 [r17]=r30,16		/* save cr.ifs */
-    mov r8=ar.fpsr		/* M */
-    mov r9=ar.csd
-    mov r10=ar.ssd
-    movl r11=FPSR_DEFAULT	/* L-unit */
-    ;;
-    st8 [r16]=r25,16		/* save ar.unat */
-    st8 [r17]=r26,16		/* save ar.pfs */
-    shl r18=r18,16		/* compute ar.rsc to be used for "loadrs" */
-    ;;
-    st8 [r16]=r27,16		/* save ar.rsc */
-    adds r17=16,r17		/* skip over ar_rnat field */
-    ;;
-    st8 [r17]=r31,16		/* save predicates */
-    adds r16=16,r16		/* skip over ar_bspstore field */
-    ;;
-    st8 [r16]=r29,16		/* save b0 */
-    st8 [r17]=r18,16		/* save ar.rsc value for "loadrs" */
-    ;;
+	st8 [r16]=r28,16		/* save cr.iip */
+	st8 [r17]=r30,16		/* save cr.ifs */
+	mov r8=ar.fpsr		/* M */
+	mov r9=ar.csd
+	mov r10=ar.ssd
+	movl r11=FPSR_DEFAULT	/* L-unit */
+	;;
+	st8 [r16]=r25,16		/* save ar.unat */
+	st8 [r17]=r26,16		/* save ar.pfs */
+	shl r18=r18,16		/* compute ar.rsc to be used for "loadrs" */
+	;;
+	st8 [r16]=r27,16		/* save ar.rsc */
+	adds r17=16,r17		/* skip over ar_rnat field */
+	;;
+	st8 [r17]=r31,16		/* save predicates */
+	adds r16=16,r16		/* skip over ar_bspstore field */
+	;;
+	st8 [r16]=r29,16		/* save b0 */
+	st8 [r17]=r18,16		/* save ar.rsc value for "loadrs" */
+	;;
 .mem.offset 0,0; st8.spill [r16]=r20,16    /* save original r1 */
 .mem.offset 8,0; st8.spill [r17]=r12,16
-    adds r12=-16,r1
-    /* switch to kernel memory stack (with 16 bytes of scratch) */
-    ;;
+	adds r12=-16,r1
+	/* switch to kernel memory stack (with 16 bytes of scratch) */
+	;;
 .mem.offset 0,0; st8.spill [r16]=r13,16
 .mem.offset 8,0; st8.spill [r17]=r8,16 /* save ar.fpsr */
-    ;;
+	;;
 .mem.offset 0,0; st8.spill [r16]=r15,16
 .mem.offset 8,0; st8.spill [r17]=r14,16
-    dep r14=-1,r0,60,4
-    ;;
+	dep r14=-1,r0,60,4
+	;;
 .mem.offset 0,0; st8.spill [r16]=r2,16
 .mem.offset 8,0; st8.spill [r17]=r3,16
-    adds r2=VMM_PT_REGS_R16_OFFSET,r1
-    adds r14 = VMM_VCPU_GP_OFFSET,r13
-    ;;
-    mov r8=ar.ccv
-    ld8 r14 = [r14]
-    ;;
-    mov r1=r14       /* establish kernel global pointer */
-    ;;                                          \
-    bsw.1
-    ;;
-    alloc r14=ar.pfs,0,0,1,0	// must be first in an insn group
-    mov out0=r13
-    ;;
-    ssm psr.ic
-    ;;
-    srlz.i
-    ;;
-    //(p15) ssm psr.i
-    adds r3=8,r2		// set up second base pointer for SAVE_REST
-    srlz.i			// ensure everybody knows psr.ic is back on
-    ;;
+	adds r2=VMM_PT_REGS_R16_OFFSET,r1
+	adds r14 = VMM_VCPU_GP_OFFSET,r13
+	;;
+	mov r8=ar.ccv
+	ld8 r14 = [r14]
+	;;
+	mov r1=r14       /* establish kernel global pointer */
+	;;                                          \
+	bsw.1
+	;;
+	alloc r14=ar.pfs,0,0,1,0	// must be first in an insn group
+	mov out0=r13
+	;;
+	ssm psr.ic
+	;;
+	srlz.i
+	;;
+	//(p15) ssm psr.i
+	adds r3=8,r2		// set up second base pointer for SAVE_REST
+	srlz.i			// ensure everybody knows psr.ic is back on
+	;;
 .mem.offset 0,0; st8.spill [r2]=r16,16
 .mem.offset 8,0; st8.spill [r3]=r17,16
-    ;;
+	;;
 .mem.offset 0,0; st8.spill [r2]=r18,16
 .mem.offset 8,0; st8.spill [r3]=r19,16
-    ;;
+	;;
 .mem.offset 0,0; st8.spill [r2]=r20,16
 .mem.offset 8,0; st8.spill [r3]=r21,16
-    mov r18=b6
-    ;;
+	mov r18=b6
+	;;
 .mem.offset 0,0; st8.spill [r2]=r22,16
 .mem.offset 8,0; st8.spill [r3]=r23,16
-    mov r19=b7
-    ;;
+	mov r19=b7
+	;;
 .mem.offset 0,0; st8.spill [r2]=r24,16
 .mem.offset 8,0; st8.spill [r3]=r25,16
-    ;;
+	;;
 .mem.offset 0,0; st8.spill [r2]=r26,16
 .mem.offset 8,0; st8.spill [r3]=r27,16
-    ;;
+	;;
 .mem.offset 0,0; st8.spill [r2]=r28,16
 .mem.offset 8,0; st8.spill [r3]=r29,16
-    ;;
+	;;
 .mem.offset 0,0; st8.spill [r2]=r30,16
 .mem.offset 8,0; st8.spill [r3]=r31,32
-    ;;
-    mov ar.fpsr=r11       /* M-unit */
-    st8 [r2]=r8,8         /* ar.ccv */
-    adds r24=PT(B6)-PT(F7),r3
-    ;;
-    stf.spill [r2]=f6,32
-    stf.spill [r3]=f7,32
-    ;;
-    stf.spill [r2]=f8,32
-    stf.spill [r3]=f9,32
-    ;;
-    stf.spill [r2]=f10
-    stf.spill [r3]=f11
-    adds r25=PT(B7)-PT(F11),r3
-    ;;
-    st8 [r24]=r18,16       /* b6 */
-    st8 [r25]=r19,16       /* b7 */
-    ;;
-    st8 [r24]=r9           /* ar.csd */
-    st8 [r25]=r10          /* ar.ssd */
-    ;;
-    srlz.d		// make sure we see the effect of cr.ivr
-    addl r14=@gprel(ia64_leave_nested),gp
-    ;;
-    mov rp=r14
-    br.call.sptk.many b6=kvm_ia64_handle_irq
-    ;;
+	;;
+	mov ar.fpsr=r11       /* M-unit */
+	st8 [r2]=r8,8         /* ar.ccv */
+	adds r24=PT(B6)-PT(F7),r3
+	;;
+	stf.spill [r2]=f6,32
+	stf.spill [r3]=f7,32
+	;;
+	stf.spill [r2]=f8,32
+	stf.spill [r3]=f9,32
+	;;
+	stf.spill [r2]=f10
+	stf.spill [r3]=f11
+	adds r25=PT(B7)-PT(F11),r3
+	;;
+	st8 [r24]=r18,16       /* b6 */
+	st8 [r25]=r19,16       /* b7 */
+	;;
+	st8 [r24]=r9           /* ar.csd */
+	st8 [r25]=r10          /* ar.ssd */
+	;;
+	srlz.d		// make sure we see the effect of cr.ivr
+	addl r14=@gprel(ia64_leave_nested),gp
+	;;
+	mov rp=r14
+	br.call.sptk.many b6=kvm_ia64_handle_irq
+	;;
 END(kvm_interrupt)
 
     .global kvm_dispatch_vexirq
@@ -414,387 +420,385 @@
 //////////////////////////////////////////////////////////////////////
 // 0x3400 Entry 13 (size 64 bundles) Reserved
 ENTRY(kvm_virtual_exirq)
-    mov r31=pr
-    mov r19=13
-    mov r30 =r0
-    ;;
+	mov r31=pr
+	mov r19=13
+	mov r30 =r0
+	;;
 kvm_dispatch_vexirq:
-    cmp.eq p6,p0 = 1,r30
-    ;;
-(p6)add r29 = VMM_VCPU_SAVED_GP_OFFSET,r21
-    ;;
-(p6)ld8 r1 = [r29]
-    ;;
-    KVM_SAVE_MIN_WITH_COVER_R19
-    alloc r14=ar.pfs,0,0,1,0
-    mov out0=r13
+	cmp.eq p6,p0 = 1,r30
+	;;
+(p6)	add r29 = VMM_VCPU_SAVED_GP_OFFSET,r21
+	;;
+(p6)	ld8 r1 = [r29]
+	;;
+	KVM_SAVE_MIN_WITH_COVER_R19
+	alloc r14=ar.pfs,0,0,1,0
+	mov out0=r13
 
-    ssm psr.ic
-    ;;
-    srlz.i                  // guarantee that interruption collection is on
-    ;;
-    //(p15) ssm psr.i               // restore psr.i
-    adds r3=8,r2                // set up second base pointer
-    ;;
-    KVM_SAVE_REST
-    addl r14=@gprel(ia64_leave_hypervisor),gp
-    ;;
-    mov rp=r14
-    br.call.sptk.many b6=kvm_vexirq
+	ssm psr.ic
+	;;
+	srlz.i // guarantee that interruption collection is on
+	;;
+	//(p15) ssm psr.i               // restore psr.i
+	adds r3=8,r2                // set up second base pointer
+	;;
+	KVM_SAVE_REST
+	addl r14=@gprel(ia64_leave_hypervisor),gp
+	;;
+	mov rp=r14
+	br.call.sptk.many b6=kvm_vexirq
 END(kvm_virtual_exirq)
 
     .org kvm_ia64_ivt+0x3800
 /////////////////////////////////////////////////////////////////////
 // 0x3800 Entry 14 (size 64 bundles) Reserved
-    KVM_FAULT(14)
-    // this code segment is from 2.6.16.13
-
+	KVM_FAULT(14)
+	// this code segment is from 2.6.16.13
 
     .org kvm_ia64_ivt+0x3c00
 ///////////////////////////////////////////////////////////////////////
 // 0x3c00 Entry 15 (size 64 bundles) Reserved
-    KVM_FAULT(15)
-
+	KVM_FAULT(15)
 
     .org kvm_ia64_ivt+0x4000
 ///////////////////////////////////////////////////////////////////////
 // 0x4000 Entry 16 (size 64 bundles) Reserved
-    KVM_FAULT(16)
+	KVM_FAULT(16)
 
     .org kvm_ia64_ivt+0x4400
 //////////////////////////////////////////////////////////////////////
 // 0x4400 Entry 17 (size 64 bundles) Reserved
-    KVM_FAULT(17)
+	KVM_FAULT(17)
 
     .org kvm_ia64_ivt+0x4800
 //////////////////////////////////////////////////////////////////////
 // 0x4800 Entry 18 (size 64 bundles) Reserved
-    KVM_FAULT(18)
+	KVM_FAULT(18)
 
     .org kvm_ia64_ivt+0x4c00
 //////////////////////////////////////////////////////////////////////
 // 0x4c00 Entry 19 (size 64 bundles) Reserved
-    KVM_FAULT(19)
+	KVM_FAULT(19)
 
     .org kvm_ia64_ivt+0x5000
 //////////////////////////////////////////////////////////////////////
 // 0x5000 Entry 20 (size 16 bundles) Page Not Present
 ENTRY(kvm_page_not_present)
-    KVM_REFLECT(20)
+	KVM_REFLECT(20)
 END(kvm_page_not_present)
 
     .org kvm_ia64_ivt+0x5100
 ///////////////////////////////////////////////////////////////////////
 // 0x5100 Entry 21 (size 16 bundles) Key Permission vector
 ENTRY(kvm_key_permission)
-    KVM_REFLECT(21)
+	KVM_REFLECT(21)
 END(kvm_key_permission)
 
     .org kvm_ia64_ivt+0x5200
 //////////////////////////////////////////////////////////////////////
 // 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26)
 ENTRY(kvm_iaccess_rights)
-    KVM_REFLECT(22)
+	KVM_REFLECT(22)
 END(kvm_iaccess_rights)
 
     .org kvm_ia64_ivt+0x5300
 //////////////////////////////////////////////////////////////////////
 // 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53)
 ENTRY(kvm_daccess_rights)
-    KVM_REFLECT(23)
+	KVM_REFLECT(23)
 END(kvm_daccess_rights)
 
     .org kvm_ia64_ivt+0x5400
 /////////////////////////////////////////////////////////////////////
 // 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39)
 ENTRY(kvm_general_exception)
-   KVM_REFLECT(24)
-   KVM_FAULT(24)
+	KVM_REFLECT(24)
+	KVM_FAULT(24)
 END(kvm_general_exception)
 
     .org kvm_ia64_ivt+0x5500
 //////////////////////////////////////////////////////////////////////
 // 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35)
 ENTRY(kvm_disabled_fp_reg)
-    KVM_REFLECT(25)
+	KVM_REFLECT(25)
 END(kvm_disabled_fp_reg)
 
     .org kvm_ia64_ivt+0x5600
 ////////////////////////////////////////////////////////////////////
 // 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50)
 ENTRY(kvm_nat_consumption)
-    KVM_REFLECT(26)
+	KVM_REFLECT(26)
 END(kvm_nat_consumption)
 
     .org kvm_ia64_ivt+0x5700
 /////////////////////////////////////////////////////////////////////
 // 0x5700 Entry 27 (size 16 bundles) Speculation (40)
 ENTRY(kvm_speculation_vector)
-    KVM_REFLECT(27)
+	KVM_REFLECT(27)
 END(kvm_speculation_vector)
 
     .org kvm_ia64_ivt+0x5800
 /////////////////////////////////////////////////////////////////////
 // 0x5800 Entry 28 (size 16 bundles) Reserved
-    KVM_FAULT(28)
+	KVM_FAULT(28)
 
     .org kvm_ia64_ivt+0x5900
 ///////////////////////////////////////////////////////////////////
 // 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56)
 ENTRY(kvm_debug_vector)
-    KVM_FAULT(29)
+	KVM_FAULT(29)
 END(kvm_debug_vector)
 
     .org kvm_ia64_ivt+0x5a00
 ///////////////////////////////////////////////////////////////
 // 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57)
 ENTRY(kvm_unaligned_access)
-    KVM_REFLECT(30)
+	KVM_REFLECT(30)
 END(kvm_unaligned_access)
 
     .org kvm_ia64_ivt+0x5b00
 //////////////////////////////////////////////////////////////////////
 // 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57)
 ENTRY(kvm_unsupported_data_reference)
-    KVM_REFLECT(31)
+	KVM_REFLECT(31)
 END(kvm_unsupported_data_reference)
 
     .org kvm_ia64_ivt+0x5c00
 ////////////////////////////////////////////////////////////////////
 // 0x5c00 Entry 32 (size 16 bundles) Floating Point FAULT (65)
 ENTRY(kvm_floating_point_fault)
-    KVM_REFLECT(32)
+	KVM_REFLECT(32)
 END(kvm_floating_point_fault)
 
     .org kvm_ia64_ivt+0x5d00
 /////////////////////////////////////////////////////////////////////
 // 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66)
 ENTRY(kvm_floating_point_trap)
-    KVM_REFLECT(33)
+	KVM_REFLECT(33)
 END(kvm_floating_point_trap)
 
     .org kvm_ia64_ivt+0x5e00
 //////////////////////////////////////////////////////////////////////
 // 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66)
 ENTRY(kvm_lower_privilege_trap)
-    KVM_REFLECT(34)
+	KVM_REFLECT(34)
 END(kvm_lower_privilege_trap)
 
     .org kvm_ia64_ivt+0x5f00
 //////////////////////////////////////////////////////////////////////
 // 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68)
 ENTRY(kvm_taken_branch_trap)
-    KVM_REFLECT(35)
+	KVM_REFLECT(35)
 END(kvm_taken_branch_trap)
 
     .org kvm_ia64_ivt+0x6000
 ////////////////////////////////////////////////////////////////////
 // 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69)
 ENTRY(kvm_single_step_trap)
-    KVM_REFLECT(36)
+	KVM_REFLECT(36)
 END(kvm_single_step_trap)
     .global kvm_virtualization_fault_back
     .org kvm_ia64_ivt+0x6100
 /////////////////////////////////////////////////////////////////////
 // 0x6100 Entry 37 (size 16 bundles) Virtualization Fault
 ENTRY(kvm_virtualization_fault)
-    mov r31=pr
-    adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
-    ;;
-    st8 [r16] = r1
-    adds r17 = VMM_VCPU_GP_OFFSET, r21
-    ;;
-    ld8 r1 = [r17]
-    cmp.eq p6,p0=EVENT_MOV_FROM_AR,r24
-    cmp.eq p7,p0=EVENT_MOV_FROM_RR,r24
-    cmp.eq p8,p0=EVENT_MOV_TO_RR,r24
-    cmp.eq p9,p0=EVENT_RSM,r24
-    cmp.eq p10,p0=EVENT_SSM,r24
-    cmp.eq p11,p0=EVENT_MOV_TO_PSR,r24
-    cmp.eq p12,p0=EVENT_THASH,r24
-    (p6) br.dptk.many kvm_asm_mov_from_ar
-    (p7) br.dptk.many kvm_asm_mov_from_rr
-    (p8) br.dptk.many kvm_asm_mov_to_rr
-    (p9) br.dptk.many kvm_asm_rsm
-    (p10) br.dptk.many kvm_asm_ssm
-    (p11) br.dptk.many kvm_asm_mov_to_psr
-    (p12) br.dptk.many kvm_asm_thash
-    ;;
+	mov r31=pr
+	adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
+	;;
+	st8 [r16] = r1
+	adds r17 = VMM_VCPU_GP_OFFSET, r21
+	;;
+	ld8 r1 = [r17]
+	cmp.eq p6,p0=EVENT_MOV_FROM_AR,r24
+	cmp.eq p7,p0=EVENT_MOV_FROM_RR,r24
+	cmp.eq p8,p0=EVENT_MOV_TO_RR,r24
+	cmp.eq p9,p0=EVENT_RSM,r24
+	cmp.eq p10,p0=EVENT_SSM,r24
+	cmp.eq p11,p0=EVENT_MOV_TO_PSR,r24
+	cmp.eq p12,p0=EVENT_THASH,r24
+(p6)	br.dptk.many kvm_asm_mov_from_ar
+(p7)	br.dptk.many kvm_asm_mov_from_rr
+(p8)	br.dptk.many kvm_asm_mov_to_rr
+(p9)	br.dptk.many kvm_asm_rsm
+(p10)	br.dptk.many kvm_asm_ssm
+(p11)	br.dptk.many kvm_asm_mov_to_psr
+(p12)	br.dptk.many kvm_asm_thash
+	;;
 kvm_virtualization_fault_back:
-    adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
-    ;;
-    ld8 r1 = [r16]
-    ;;
-    mov r19=37
-    adds r16 = VMM_VCPU_CAUSE_OFFSET,r21
-    adds r17 = VMM_VCPU_OPCODE_OFFSET,r21
-    ;;
-    st8 [r16] = r24
-    st8 [r17] = r25
-    ;;
-    cmp.ne p6,p0=EVENT_RFI, r24
-    (p6) br.sptk kvm_dispatch_virtualization_fault
-    ;;
-    adds r18=VMM_VPD_BASE_OFFSET,r21
-    ;;
-    ld8 r18=[r18]
-    ;;
-    adds r18=VMM_VPD_VIFS_OFFSET,r18
-    ;;
-    ld8 r18=[r18]
-    ;;
-    tbit.z p6,p0=r18,63
-    (p6) br.sptk kvm_dispatch_virtualization_fault
-    ;;
-    //if vifs.v=1 desert current register frame
-    alloc r18=ar.pfs,0,0,0,0
-    br.sptk kvm_dispatch_virtualization_fault
+	adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
+	;;
+	ld8 r1 = [r16]
+	;;
+	mov r19=37
+	adds r16 = VMM_VCPU_CAUSE_OFFSET,r21
+	adds r17 = VMM_VCPU_OPCODE_OFFSET,r21
+	;;
+	st8 [r16] = r24
+	st8 [r17] = r25
+	;;
+	cmp.ne p6,p0=EVENT_RFI, r24
+(p6)	br.sptk kvm_dispatch_virtualization_fault
+	;;
+	adds r18=VMM_VPD_BASE_OFFSET,r21
+	;;
+	ld8 r18=[r18]
+	;;
+	adds r18=VMM_VPD_VIFS_OFFSET,r18
+	;;
+	ld8 r18=[r18]
+	;;
+	tbit.z p6,p0=r18,63
+(p6)	br.sptk kvm_dispatch_virtualization_fault
+	;;
+//if vifs.v=1 desert current register frame
+	alloc r18=ar.pfs,0,0,0,0
+	br.sptk kvm_dispatch_virtualization_fault
 END(kvm_virtualization_fault)
 
     .org kvm_ia64_ivt+0x6200
 //////////////////////////////////////////////////////////////
 // 0x6200 Entry 38 (size 16 bundles) Reserved
-    KVM_FAULT(38)
+	KVM_FAULT(38)
 
     .org kvm_ia64_ivt+0x6300
 /////////////////////////////////////////////////////////////////
 // 0x6300 Entry 39 (size 16 bundles) Reserved
-    KVM_FAULT(39)
+	KVM_FAULT(39)
 
     .org kvm_ia64_ivt+0x6400
 /////////////////////////////////////////////////////////////////
 // 0x6400 Entry 40 (size 16 bundles) Reserved
-    KVM_FAULT(40)
+	KVM_FAULT(40)
 
     .org kvm_ia64_ivt+0x6500
 //////////////////////////////////////////////////////////////////
 // 0x6500 Entry 41 (size 16 bundles) Reserved
-    KVM_FAULT(41)
+	KVM_FAULT(41)
 
     .org kvm_ia64_ivt+0x6600
 //////////////////////////////////////////////////////////////////
 // 0x6600 Entry 42 (size 16 bundles) Reserved
-    KVM_FAULT(42)
+	KVM_FAULT(42)
 
     .org kvm_ia64_ivt+0x6700
 //////////////////////////////////////////////////////////////////
 // 0x6700 Entry 43 (size 16 bundles) Reserved
-    KVM_FAULT(43)
+	KVM_FAULT(43)
 
     .org kvm_ia64_ivt+0x6800
 //////////////////////////////////////////////////////////////////
 // 0x6800 Entry 44 (size 16 bundles) Reserved
-    KVM_FAULT(44)
+	KVM_FAULT(44)
 
     .org kvm_ia64_ivt+0x6900
 ///////////////////////////////////////////////////////////////////
 // 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception
 //(17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77)
 ENTRY(kvm_ia32_exception)
-    KVM_FAULT(45)
+	KVM_FAULT(45)
 END(kvm_ia32_exception)
 
     .org kvm_ia64_ivt+0x6a00
 ////////////////////////////////////////////////////////////////////
 // 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept  (30,31,59,70,71)
 ENTRY(kvm_ia32_intercept)
-    KVM_FAULT(47)
+	KVM_FAULT(47)
 END(kvm_ia32_intercept)
 
     .org kvm_ia64_ivt+0x6c00
 /////////////////////////////////////////////////////////////////////
 // 0x6c00 Entry 48 (size 16 bundles) Reserved
-    KVM_FAULT(48)
+	KVM_FAULT(48)
 
     .org kvm_ia64_ivt+0x6d00
 //////////////////////////////////////////////////////////////////////
 // 0x6d00 Entry 49 (size 16 bundles) Reserved
-    KVM_FAULT(49)
+	KVM_FAULT(49)
 
     .org kvm_ia64_ivt+0x6e00
 //////////////////////////////////////////////////////////////////////
 // 0x6e00 Entry 50 (size 16 bundles) Reserved
-    KVM_FAULT(50)
+	KVM_FAULT(50)
 
     .org kvm_ia64_ivt+0x6f00
 /////////////////////////////////////////////////////////////////////
 // 0x6f00 Entry 51 (size 16 bundles) Reserved
-    KVM_FAULT(52)
+	KVM_FAULT(52)
 
     .org kvm_ia64_ivt+0x7100
 ////////////////////////////////////////////////////////////////////
 // 0x7100 Entry 53 (size 16 bundles) Reserved
-    KVM_FAULT(53)
+	KVM_FAULT(53)
 
     .org kvm_ia64_ivt+0x7200
 /////////////////////////////////////////////////////////////////////
 // 0x7200 Entry 54 (size 16 bundles) Reserved
-    KVM_FAULT(54)
+	KVM_FAULT(54)
 
     .org kvm_ia64_ivt+0x7300
 ////////////////////////////////////////////////////////////////////
 // 0x7300 Entry 55 (size 16 bundles) Reserved
-    KVM_FAULT(55)
+	KVM_FAULT(55)
 
     .org kvm_ia64_ivt+0x7400
 ////////////////////////////////////////////////////////////////////
 // 0x7400 Entry 56 (size 16 bundles) Reserved
-    KVM_FAULT(56)
+	KVM_FAULT(56)
 
     .org kvm_ia64_ivt+0x7500
 /////////////////////////////////////////////////////////////////////
 // 0x7500 Entry 57 (size 16 bundles) Reserved
-    KVM_FAULT(57)
+	KVM_FAULT(57)
 
     .org kvm_ia64_ivt+0x7600
 /////////////////////////////////////////////////////////////////////
 // 0x7600 Entry 58 (size 16 bundles) Reserved
-    KVM_FAULT(58)
+	KVM_FAULT(58)
 
     .org kvm_ia64_ivt+0x7700
 ////////////////////////////////////////////////////////////////////
 // 0x7700 Entry 59 (size 16 bundles) Reserved
-    KVM_FAULT(59)
+	KVM_FAULT(59)
 
     .org kvm_ia64_ivt+0x7800
 ////////////////////////////////////////////////////////////////////
 // 0x7800 Entry 60 (size 16 bundles) Reserved
-    KVM_FAULT(60)
+	KVM_FAULT(60)
 
     .org kvm_ia64_ivt+0x7900
 /////////////////////////////////////////////////////////////////////
 // 0x7900 Entry 61 (size 16 bundles) Reserved
-    KVM_FAULT(61)
+	KVM_FAULT(61)
 
     .org kvm_ia64_ivt+0x7a00
 /////////////////////////////////////////////////////////////////////
 // 0x7a00 Entry 62 (size 16 bundles) Reserved
-    KVM_FAULT(62)
+	KVM_FAULT(62)
 
     .org kvm_ia64_ivt+0x7b00
 /////////////////////////////////////////////////////////////////////
 // 0x7b00 Entry 63 (size 16 bundles) Reserved
-    KVM_FAULT(63)
+	KVM_FAULT(63)
 
     .org kvm_ia64_ivt+0x7c00
 ////////////////////////////////////////////////////////////////////
 // 0x7c00 Entry 64 (size 16 bundles) Reserved
-    KVM_FAULT(64)
+	KVM_FAULT(64)
 
     .org kvm_ia64_ivt+0x7d00
 /////////////////////////////////////////////////////////////////////
 // 0x7d00 Entry 65 (size 16 bundles) Reserved
-    KVM_FAULT(65)
+	KVM_FAULT(65)
 
     .org kvm_ia64_ivt+0x7e00
 /////////////////////////////////////////////////////////////////////
 // 0x7e00 Entry 66 (size 16 bundles) Reserved
-    KVM_FAULT(66)
+	KVM_FAULT(66)
 
     .org kvm_ia64_ivt+0x7f00
 ////////////////////////////////////////////////////////////////////
 // 0x7f00 Entry 67 (size 16 bundles) Reserved
-    KVM_FAULT(67)
+	KVM_FAULT(67)
 
     .org kvm_ia64_ivt+0x8000
 // There is no particular reason for this code to be here, other than that
@@ -804,132 +808,128 @@
 
 
 ENTRY(kvm_dtlb_miss_dispatch)
-    mov r19 = 2
-    KVM_SAVE_MIN_WITH_COVER_R19
-    alloc r14=ar.pfs,0,0,3,0
-    mov out0=cr.ifa
-    mov out1=r15
-    adds r3=8,r2                // set up second base pointer
-    ;;
-    ssm psr.ic
-    ;;
-    srlz.i                  // guarantee that interruption collection is on
-    ;;
-    //(p15) ssm psr.i               // restore psr.i
-    addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
-    ;;
-    KVM_SAVE_REST
-    KVM_SAVE_EXTRA
-    mov rp=r14
-    ;;
-    adds out2=16,r12
-    br.call.sptk.many b6=kvm_page_fault
+	mov r19 = 2
+	KVM_SAVE_MIN_WITH_COVER_R19
+	alloc r14=ar.pfs,0,0,3,0
+	mov out0=cr.ifa
+	mov out1=r15
+	adds r3=8,r2                // set up second base pointer
+	;;
+	ssm psr.ic
+	;;
+	srlz.i     // guarantee that interruption collection is on
+	;;
+	//(p15) ssm psr.i               // restore psr.i
+	addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
+	;;
+	KVM_SAVE_REST
+	KVM_SAVE_EXTRA
+	mov rp=r14
+	;;
+	adds out2=16,r12
+	br.call.sptk.many b6=kvm_page_fault
 END(kvm_dtlb_miss_dispatch)
 
 ENTRY(kvm_itlb_miss_dispatch)
 
-    KVM_SAVE_MIN_WITH_COVER_R19
-    alloc r14=ar.pfs,0,0,3,0
-    mov out0=cr.ifa
-    mov out1=r15
-    adds r3=8,r2                // set up second base pointer
-    ;;
-    ssm psr.ic
-    ;;
-    srlz.i                  // guarantee that interruption collection is on
-    ;;
-    //(p15) ssm psr.i               // restore psr.i
-    addl r14=@gprel(ia64_leave_hypervisor),gp
-    ;;
-    KVM_SAVE_REST
-    mov rp=r14
-    ;;
-    adds out2=16,r12
-    br.call.sptk.many b6=kvm_page_fault
+	KVM_SAVE_MIN_WITH_COVER_R19
+	alloc r14=ar.pfs,0,0,3,0
+	mov out0=cr.ifa
+	mov out1=r15
+	adds r3=8,r2                // set up second base pointer
+	;;
+	ssm psr.ic
+	;;
+	srlz.i   // guarantee that interruption collection is on
+	;;
+	//(p15) ssm psr.i               // restore psr.i
+	addl r14=@gprel(ia64_leave_hypervisor),gp
+	;;
+	KVM_SAVE_REST
+	mov rp=r14
+	;;
+	adds out2=16,r12
+	br.call.sptk.many b6=kvm_page_fault
 END(kvm_itlb_miss_dispatch)
 
 ENTRY(kvm_dispatch_reflection)
-    /*
-     * Input:
-     *  psr.ic: off
-     *  r19:    intr type (offset into ivt, see ia64_int.h)
-     *  r31:    contains saved predicates (pr)
-     */
-    KVM_SAVE_MIN_WITH_COVER_R19
-    alloc r14=ar.pfs,0,0,5,0
-    mov out0=cr.ifa
-    mov out1=cr.isr
-    mov out2=cr.iim
-    mov out3=r15
-    adds r3=8,r2                // set up second base pointer
-    ;;
-    ssm psr.ic
-    ;;
-    srlz.i                  // guarantee that interruption collection is on
-    ;;
-    //(p15) ssm psr.i               // restore psr.i
-    addl r14=@gprel(ia64_leave_hypervisor),gp
-    ;;
-    KVM_SAVE_REST
-    mov rp=r14
-    ;;
-    adds out4=16,r12
-    br.call.sptk.many b6=reflect_interruption
+/*
+ * Input:
+ *  psr.ic: off
+ *  r19:    intr type (offset into ivt, see ia64_int.h)
+ *  r31:    contains saved predicates (pr)
+ */
+	KVM_SAVE_MIN_WITH_COVER_R19
+	alloc r14=ar.pfs,0,0,5,0
+	mov out0=cr.ifa
+	mov out1=cr.isr
+	mov out2=cr.iim
+	mov out3=r15
+	adds r3=8,r2                // set up second base pointer
+	;;
+	ssm psr.ic
+	;;
+	srlz.i   // guarantee that interruption collection is on
+	;;
+	//(p15) ssm psr.i               // restore psr.i
+	addl r14=@gprel(ia64_leave_hypervisor),gp
+	;;
+	KVM_SAVE_REST
+	mov rp=r14
+	;;
+	adds out4=16,r12
+	br.call.sptk.many b6=reflect_interruption
 END(kvm_dispatch_reflection)
 
 ENTRY(kvm_dispatch_virtualization_fault)
-    adds r16 = VMM_VCPU_CAUSE_OFFSET,r21
-    adds r17 = VMM_VCPU_OPCODE_OFFSET,r21
-    ;;
-    st8 [r16] = r24
-    st8 [r17] = r25
-    ;;
-    KVM_SAVE_MIN_WITH_COVER_R19
-    ;;
-    alloc r14=ar.pfs,0,0,2,0 // now it's safe (must be first in insn group!)
-    mov out0=r13        //vcpu
-    adds r3=8,r2                // set up second base pointer
-    ;;
-    ssm psr.ic
-    ;;
-    srlz.i                  // guarantee that interruption collection is on
-    ;;
-    //(p15) ssm psr.i               // restore psr.i
-    addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
-    ;;
-    KVM_SAVE_REST
-    KVM_SAVE_EXTRA
-    mov rp=r14
-    ;;
-    adds out1=16,sp         //regs
-    br.call.sptk.many b6=kvm_emulate
+	adds r16 = VMM_VCPU_CAUSE_OFFSET,r21
+	adds r17 = VMM_VCPU_OPCODE_OFFSET,r21
+	;;
+	st8 [r16] = r24
+	st8 [r17] = r25
+	;;
+	KVM_SAVE_MIN_WITH_COVER_R19
+	;;
+	alloc r14=ar.pfs,0,0,2,0 // (must be first in insn group!)
+	mov out0=r13        //vcpu
+	adds r3=8,r2                // set up second base pointer
+	;;
+	ssm psr.ic
+	;;
+	srlz.i    // guarantee that interruption collection is on
+	;;
+	//(p15) ssm psr.i               // restore psr.i
+	addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
+	;;
+	KVM_SAVE_REST
+	KVM_SAVE_EXTRA
+	mov rp=r14
+	;;
+	adds out1=16,sp         //regs
+	br.call.sptk.many b6=kvm_emulate
 END(kvm_dispatch_virtualization_fault)
 
 
 ENTRY(kvm_dispatch_interrupt)
-    KVM_SAVE_MIN_WITH_COVER_R19	// uses r31; defines r2 and r3
-    ;;
-    alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group
-    //mov out0=cr.ivr		// pass cr.ivr as first arg
-    adds r3=8,r2		// set up second base pointer for SAVE_REST
-    ;;
-    ssm psr.ic
-    ;;
-    srlz.i
-    ;;
-    //(p15) ssm psr.i
-    addl r14=@gprel(ia64_leave_hypervisor),gp
-    ;;
-    KVM_SAVE_REST
-    mov rp=r14
-    ;;
-    mov out0=r13		// pass pointer to pt_regs as second arg
-    br.call.sptk.many b6=kvm_ia64_handle_irq
+	KVM_SAVE_MIN_WITH_COVER_R19	// uses r31; defines r2 and r3
+	;;
+	alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group
+	adds r3=8,r2		// set up second base pointer for SAVE_REST
+	;;
+	ssm psr.ic
+	;;
+	srlz.i
+	;;
+	//(p15) ssm psr.i
+	addl r14=@gprel(ia64_leave_hypervisor),gp
+	;;
+	KVM_SAVE_REST
+	mov rp=r14
+	;;
+	mov out0=r13		// pass pointer to pt_regs as second arg
+	br.call.sptk.many b6=kvm_ia64_handle_irq
 END(kvm_dispatch_interrupt)
 
-
-
-
 GLOBAL_ENTRY(ia64_leave_nested)
 	rsm psr.i
 	;;
@@ -1008,7 +1008,7 @@
 	;;
 	ldf.fill f11=[r2]
 //	mov r18=r13
-//    mov r21=r13
+//	mov r21=r13
 	adds r16=PT(CR_IPSR)+16,r12
 	adds r17=PT(CR_IIP)+16,r12
 	;;
@@ -1058,138 +1058,135 @@
 	rfi
 END(ia64_leave_nested)
 
-
-
 GLOBAL_ENTRY(ia64_leave_hypervisor_prepare)
-    /*
-     * work.need_resched etc. mustn't get changed
-     *by this CPU before it returns to
-    ;;
-     * user- or fsys-mode, hence we disable interrupts early on:
-     */
-    adds r2 = PT(R4)+16,r12
-    adds r3 = PT(R5)+16,r12
-    adds r8 = PT(EML_UNAT)+16,r12
-    ;;
-    ld8 r8 = [r8]
-    ;;
-    mov ar.unat=r8
-    ;;
-    ld8.fill r4=[r2],16    //load r4
-    ld8.fill r5=[r3],16    //load r5
-    ;;
-    ld8.fill r6=[r2]    //load r6
-    ld8.fill r7=[r3]    //load r7
-    ;;
+/*
+ * work.need_resched etc. mustn't get changed
+ *by this CPU before it returns to
+ * user- or fsys-mode, hence we disable interrupts early on:
+ */
+	adds r2 = PT(R4)+16,r12
+	adds r3 = PT(R5)+16,r12
+	adds r8 = PT(EML_UNAT)+16,r12
+	;;
+	ld8 r8 = [r8]
+	;;
+	mov ar.unat=r8
+	;;
+	ld8.fill r4=[r2],16    //load r4
+	ld8.fill r5=[r3],16    //load r5
+	;;
+	ld8.fill r6=[r2]    //load r6
+	ld8.fill r7=[r3]    //load r7
+	;;
 END(ia64_leave_hypervisor_prepare)
 //fall through
 GLOBAL_ENTRY(ia64_leave_hypervisor)
-    rsm psr.i
-    ;;
-    br.call.sptk.many b0=leave_hypervisor_tail
-    ;;
-    adds r20=PT(PR)+16,r12
-    adds r8=PT(EML_UNAT)+16,r12
-    ;;
-    ld8 r8=[r8]
-    ;;
-    mov ar.unat=r8
-    ;;
-    lfetch [r20],PT(CR_IPSR)-PT(PR)
-    adds r2 = PT(B6)+16,r12
-    adds r3 = PT(B7)+16,r12
-    ;;
-    lfetch [r20]
-    ;;
-    ld8 r24=[r2],16        /* B6 */
-    ld8 r25=[r3],16        /* B7 */
-    ;;
-    ld8 r26=[r2],16        /* ar_csd */
-    ld8 r27=[r3],16        /* ar_ssd */
-    mov b6 = r24
-    ;;
-    ld8.fill r8=[r2],16
-    ld8.fill r9=[r3],16
-    mov b7 = r25
-    ;;
-    mov ar.csd = r26
-    mov ar.ssd = r27
-    ;;
-    ld8.fill r10=[r2],PT(R15)-PT(R10)
-    ld8.fill r11=[r3],PT(R14)-PT(R11)
-    ;;
-    ld8.fill r15=[r2],PT(R16)-PT(R15)
-    ld8.fill r14=[r3],PT(R17)-PT(R14)
-    ;;
-    ld8.fill r16=[r2],16
-    ld8.fill r17=[r3],16
-    ;;
-    ld8.fill r18=[r2],16
-    ld8.fill r19=[r3],16
-    ;;
-    ld8.fill r20=[r2],16
-    ld8.fill r21=[r3],16
-    ;;
-    ld8.fill r22=[r2],16
-    ld8.fill r23=[r3],16
-    ;;
-    ld8.fill r24=[r2],16
-    ld8.fill r25=[r3],16
-    ;;
-    ld8.fill r26=[r2],16
-    ld8.fill r27=[r3],16
-    ;;
-    ld8.fill r28=[r2],16
-    ld8.fill r29=[r3],16
-    ;;
-    ld8.fill r30=[r2],PT(F6)-PT(R30)
-    ld8.fill r31=[r3],PT(F7)-PT(R31)
-    ;;
-    rsm psr.i | psr.ic
-    // initiate turning off of interrupt and interruption collection
-    invala          // invalidate ALAT
-    ;;
-    srlz.i          // ensure interruption collection is off
-    ;;
-    bsw.0
-    ;;
-    adds r16 = PT(CR_IPSR)+16,r12
-    adds r17 = PT(CR_IIP)+16,r12
-    mov r21=r13		// get current
-    ;;
-    ld8 r31=[r16],16    // load cr.ipsr
-    ld8 r30=[r17],16    // load cr.iip
-    ;;
-    ld8 r29=[r16],16    // load cr.ifs
-    ld8 r28=[r17],16    // load ar.unat
-    ;;
-    ld8 r27=[r16],16    // load ar.pfs
-    ld8 r26=[r17],16    // load ar.rsc
-    ;;
-    ld8 r25=[r16],16    // load ar.rnat
-    ld8 r24=[r17],16    // load ar.bspstore
-    ;;
-    ld8 r23=[r16],16    // load predicates
-    ld8 r22=[r17],16    // load b0
-    ;;
-    ld8 r20=[r16],16    // load ar.rsc value for "loadrs"
-    ld8.fill r1=[r17],16    //load r1
-    ;;
-    ld8.fill r12=[r16],16    //load r12
-    ld8.fill r13=[r17],PT(R2)-PT(R13)    //load r13
-    ;;
-    ld8 r19=[r16],PT(R3)-PT(AR_FPSR)    //load ar_fpsr
-    ld8.fill r2=[r17],PT(AR_CCV)-PT(R2)    //load r2
-    ;;
-    ld8.fill r3=[r16]	//load r3
-    ld8 r18=[r17]	//load ar_ccv
-    ;;
-    mov ar.fpsr=r19
-    mov ar.ccv=r18
-    shr.u r18=r20,16
-    ;;
+	rsm psr.i
+	;;
+	br.call.sptk.many b0=leave_hypervisor_tail
+	;;
+	adds r20=PT(PR)+16,r12
+	adds r8=PT(EML_UNAT)+16,r12
+	;;
+	ld8 r8=[r8]
+	;;
+	mov ar.unat=r8
+	;;
+	lfetch [r20],PT(CR_IPSR)-PT(PR)
+	adds r2 = PT(B6)+16,r12
+	adds r3 = PT(B7)+16,r12
+	;;
+	lfetch [r20]
+	;;
+	ld8 r24=[r2],16        /* B6 */
+	ld8 r25=[r3],16        /* B7 */
+	;;
+	ld8 r26=[r2],16        /* ar_csd */
+	ld8 r27=[r3],16        /* ar_ssd */
+	mov b6 = r24
+	;;
+	ld8.fill r8=[r2],16
+	ld8.fill r9=[r3],16
+	mov b7 = r25
+	;;
+	mov ar.csd = r26
+	mov ar.ssd = r27
+	;;
+	ld8.fill r10=[r2],PT(R15)-PT(R10)
+	ld8.fill r11=[r3],PT(R14)-PT(R11)
+	;;
+	ld8.fill r15=[r2],PT(R16)-PT(R15)
+	ld8.fill r14=[r3],PT(R17)-PT(R14)
+	;;
+	ld8.fill r16=[r2],16
+	ld8.fill r17=[r3],16
+	;;
+	ld8.fill r18=[r2],16
+	ld8.fill r19=[r3],16
+	;;
+	ld8.fill r20=[r2],16
+	ld8.fill r21=[r3],16
+	;;
+	ld8.fill r22=[r2],16
+	ld8.fill r23=[r3],16
+	;;
+	ld8.fill r24=[r2],16
+	ld8.fill r25=[r3],16
+	;;
+	ld8.fill r26=[r2],16
+	ld8.fill r27=[r3],16
+	;;
+	ld8.fill r28=[r2],16
+	ld8.fill r29=[r3],16
+	;;
+	ld8.fill r30=[r2],PT(F6)-PT(R30)
+	ld8.fill r31=[r3],PT(F7)-PT(R31)
+	;;
+	rsm psr.i | psr.ic
+	// initiate turning off of interrupt and interruption collection
+	invala          // invalidate ALAT
+	;;
+	srlz.i          // ensure interruption collection is off
+	;;
+	bsw.0
+	;;
+	adds r16 = PT(CR_IPSR)+16,r12
+	adds r17 = PT(CR_IIP)+16,r12
+	mov r21=r13		// get current
+	;;
+	ld8 r31=[r16],16    // load cr.ipsr
+	ld8 r30=[r17],16    // load cr.iip
+	;;
+	ld8 r29=[r16],16    // load cr.ifs
+	ld8 r28=[r17],16    // load ar.unat
+	;;
+	ld8 r27=[r16],16    // load ar.pfs
+	ld8 r26=[r17],16    // load ar.rsc
+	;;
+	ld8 r25=[r16],16    // load ar.rnat
+	ld8 r24=[r17],16    // load ar.bspstore
+	;;
+	ld8 r23=[r16],16    // load predicates
+	ld8 r22=[r17],16    // load b0
+	;;
+	ld8 r20=[r16],16    // load ar.rsc value for "loadrs"
+	ld8.fill r1=[r17],16    //load r1
+	;;
+	ld8.fill r12=[r16],16    //load r12
+	ld8.fill r13=[r17],PT(R2)-PT(R13)    //load r13
+	;;
+	ld8 r19=[r16],PT(R3)-PT(AR_FPSR)    //load ar_fpsr
+	ld8.fill r2=[r17],PT(AR_CCV)-PT(R2)    //load r2
+	;;
+	ld8.fill r3=[r16]	//load r3
+	ld8 r18=[r17]	//load ar_ccv
+	;;
+	mov ar.fpsr=r19
+	mov ar.ccv=r18
+	shr.u r18=r20,16
+	;;
 kvm_rbs_switch:
-    mov r19=96
+	mov r19=96
 
 kvm_dont_preserve_current_frame:
 /*
@@ -1201,76 +1198,76 @@
 #   define pReturn	p7
 #   define Nregs	14
 
-    alloc loc0=ar.pfs,2,Nregs-2,2,0
-    shr.u loc1=r18,9		// RNaTslots <= floor(dirtySize / (64*8))
-    sub r19=r19,r18		// r19 = (physStackedSize + 8) - dirtySize
-    ;;
-    mov ar.rsc=r20		// load ar.rsc to be used for "loadrs"
-    shladd in0=loc1,3,r19
-    mov in1=0
-    ;;
-    TEXT_ALIGN(32)
+	alloc loc0=ar.pfs,2,Nregs-2,2,0
+	shr.u loc1=r18,9	// RNaTslots <= floor(dirtySize / (64*8))
+	sub r19=r19,r18		// r19 = (physStackedSize + 8) - dirtySize
+	;;
+	mov ar.rsc=r20		// load ar.rsc to be used for "loadrs"
+	shladd in0=loc1,3,r19
+	mov in1=0
+	;;
+	TEXT_ALIGN(32)
 kvm_rse_clear_invalid:
-    alloc loc0=ar.pfs,2,Nregs-2,2,0
-    cmp.lt pRecurse,p0=Nregs*8,in0
-    // if more than Nregs regs left to clear, (re)curse
-    add out0=-Nregs*8,in0
-    add out1=1,in1		// increment recursion count
-    mov loc1=0
-    mov loc2=0
-    ;;
-    mov loc3=0
-    mov loc4=0
-    mov loc5=0
-    mov loc6=0
-    mov loc7=0
+	alloc loc0=ar.pfs,2,Nregs-2,2,0
+	cmp.lt pRecurse,p0=Nregs*8,in0
+	// if more than Nregs regs left to clear, (re)curse
+	add out0=-Nregs*8,in0
+	add out1=1,in1		// increment recursion count
+	mov loc1=0
+	mov loc2=0
+	;;
+	mov loc3=0
+	mov loc4=0
+	mov loc5=0
+	mov loc6=0
+	mov loc7=0
 (pRecurse) br.call.dptk.few b0=kvm_rse_clear_invalid
-    ;;
-    mov loc8=0
-    mov loc9=0
-    cmp.ne pReturn,p0=r0,in1
-    // if recursion count != 0, we need to do a br.ret
-    mov loc10=0
-    mov loc11=0
+	;;
+	mov loc8=0
+	mov loc9=0
+	cmp.ne pReturn,p0=r0,in1
+	// if recursion count != 0, we need to do a br.ret
+	mov loc10=0
+	mov loc11=0
 (pReturn) br.ret.dptk.many b0
 
 #	undef pRecurse
 #	undef pReturn
 
 // loadrs has already been shifted
-    alloc r16=ar.pfs,0,0,0,0    // drop current register frame
-    ;;
-    loadrs
-    ;;
-    mov ar.bspstore=r24
-    ;;
-    mov ar.unat=r28
-    mov ar.rnat=r25
-    mov ar.rsc=r26
-    ;;
-    mov cr.ipsr=r31
-    mov cr.iip=r30
-    mov cr.ifs=r29
-    mov ar.pfs=r27
-    adds r18=VMM_VPD_BASE_OFFSET,r21
-    ;;
-    ld8 r18=[r18]   //vpd
-    adds r17=VMM_VCPU_ISR_OFFSET,r21
-    ;;
-    ld8 r17=[r17]
-    adds r19=VMM_VPD_VPSR_OFFSET,r18
-    ;;
-    ld8 r19=[r19]        //vpsr
-    mov r25=r18
-    adds r16= VMM_VCPU_GP_OFFSET,r21
-    ;;
-    ld8 r16= [r16] // Put gp in r24
-    movl r24=@gprel(ia64_vmm_entry)  // calculate return address
-    ;;
-    add  r24=r24,r16
-    ;;
-    br.sptk.many  kvm_vps_sync_write       // call the service
-    ;;
+	alloc r16=ar.pfs,0,0,0,0    // drop current register frame
+	;;
+	loadrs
+	;;
+	mov ar.bspstore=r24
+	;;
+	mov ar.unat=r28
+	mov ar.rnat=r25
+	mov ar.rsc=r26
+	;;
+	mov cr.ipsr=r31
+	mov cr.iip=r30
+	mov cr.ifs=r29
+	mov ar.pfs=r27
+	adds r18=VMM_VPD_BASE_OFFSET,r21
+	;;
+	ld8 r18=[r18]   //vpd
+	adds r17=VMM_VCPU_ISR_OFFSET,r21
+	;;
+	ld8 r17=[r17]
+	adds r19=VMM_VPD_VPSR_OFFSET,r18
+	;;
+	ld8 r19=[r19]        //vpsr
+	mov r25=r18
+	adds r16= VMM_VCPU_GP_OFFSET,r21
+	;;
+	ld8 r16= [r16] // Put gp in r24
+	movl r24=@gprel(ia64_vmm_entry)  // calculate return address
+	;;
+	add  r24=r24,r16
+	;;
+	br.sptk.many  kvm_vps_sync_write       // call the service
+	;;
 END(ia64_leave_hypervisor)
 // fall through
 GLOBAL_ENTRY(ia64_vmm_entry)
@@ -1283,16 +1280,14 @@
  *  r22:b0
  *  r23:predicate
  */
-    mov r24=r22
-    mov r25=r18
-    tbit.nz p1,p2 = r19,IA64_PSR_IC_BIT        // p1=vpsr.ic
-    (p1) br.cond.sptk.few kvm_vps_resume_normal
-    (p2) br.cond.sptk.many kvm_vps_resume_handler
-    ;;
+	mov r24=r22
+	mov r25=r18
+	tbit.nz p1,p2 = r19,IA64_PSR_IC_BIT        // p1=vpsr.ic
+(p1) 	br.cond.sptk.few kvm_vps_resume_normal
+(p2)	br.cond.sptk.many kvm_vps_resume_handler
+	;;
 END(ia64_vmm_entry)
 
-
-
 /*
  * extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2,
  *                  u64 arg3, u64 arg4, u64 arg5,
@@ -1310,88 +1305,88 @@
 entry   =   loc3
 hostret =   r24
 
-    alloc   pfssave=ar.pfs,4,4,0,0
-    mov rpsave=rp
-    adds entry=VMM_VCPU_VSA_BASE_OFFSET, r13
-    ;;
-    ld8 entry=[entry]
-1:  mov hostret=ip
-    mov r25=in1         // copy arguments
-    mov r26=in2
-    mov r27=in3
-    mov psrsave=psr
-    ;;
-    tbit.nz p6,p0=psrsave,14    // IA64_PSR_I
-    tbit.nz p7,p0=psrsave,13    // IA64_PSR_IC
-    ;;
-    add hostret=2f-1b,hostret   // calculate return address
-    add entry=entry,in0
-    ;;
-    rsm psr.i | psr.ic
-    ;;
-    srlz.i
-    mov b6=entry
-    br.cond.sptk b6         // call the service
+	alloc   pfssave=ar.pfs,4,4,0,0
+	mov rpsave=rp
+	adds entry=VMM_VCPU_VSA_BASE_OFFSET, r13
+	;;
+	ld8 entry=[entry]
+1:	mov hostret=ip
+	mov r25=in1         // copy arguments
+	mov r26=in2
+	mov r27=in3
+	mov psrsave=psr
+	;;
+	tbit.nz p6,p0=psrsave,14    // IA64_PSR_I
+	tbit.nz p7,p0=psrsave,13    // IA64_PSR_IC
+	;;
+	add hostret=2f-1b,hostret   // calculate return address
+	add entry=entry,in0
+	;;
+	rsm psr.i | psr.ic
+	;;
+	srlz.i
+	mov b6=entry
+	br.cond.sptk b6         // call the service
 2:
-    // Architectural sequence for enabling interrupts if necessary
+// Architectural sequence for enabling interrupts if necessary
 (p7)    ssm psr.ic
-    ;;
+	;;
 (p7)    srlz.i
-    ;;
+	;;
 //(p6)    ssm psr.i
-    ;;
-    mov rp=rpsave
-    mov ar.pfs=pfssave
-    mov r8=r31
-    ;;
-    srlz.d
-    br.ret.sptk rp
+	;;
+	mov rp=rpsave
+	mov ar.pfs=pfssave
+	mov r8=r31
+	;;
+	srlz.d
+	br.ret.sptk rp
 
 END(ia64_call_vsa)
 
 #define  INIT_BSPSTORE  ((4<<30)-(12<<20)-0x100)
 
 GLOBAL_ENTRY(vmm_reset_entry)
-    //set up ipsr, iip, vpd.vpsr, dcr
-    // For IPSR: it/dt/rt=1, i/ic=1, si=1, vm/bn=1
-    // For DCR: all bits 0
-    bsw.0
-    ;;
-    mov r21 =r13
-    adds r14=-VMM_PT_REGS_SIZE, r12
-    ;;
-    movl r6=0x501008826000      // IPSR dt/rt/it:1;i/ic:1, si:1, vm/bn:1
-    movl r10=0x8000000000000000
-    adds r16=PT(CR_IIP), r14
-    adds r20=PT(R1), r14
-    ;;
-    rsm psr.ic | psr.i
-    ;;
-    srlz.i
-    ;;
-    mov ar.rsc = 0
-    ;;
-    flushrs
-    ;;
-    mov ar.bspstore = 0
-    // clear BSPSTORE
-    ;;
-    mov cr.ipsr=r6
-    mov cr.ifs=r10
-    ld8 r4 = [r16] // Set init iip for first run.
-    ld8 r1 = [r20]
-    ;;
-    mov cr.iip=r4
-    adds r16=VMM_VPD_BASE_OFFSET,r13
-    ;;
-    ld8 r18=[r16]
-    ;;
-    adds r19=VMM_VPD_VPSR_OFFSET,r18
-    ;;
-    ld8 r19=[r19]
-    mov r17=r0
-    mov r22=r0
-    mov r23=r0
-    br.cond.sptk ia64_vmm_entry
-    br.ret.sptk  b0
+	//set up ipsr, iip, vpd.vpsr, dcr
+	// For IPSR: it/dt/rt=1, i/ic=1, si=1, vm/bn=1
+	// For DCR: all bits 0
+	bsw.0
+	;;
+	mov r21 =r13
+	adds r14=-VMM_PT_REGS_SIZE, r12
+	;;
+	movl r6=0x501008826000      // IPSR dt/rt/it:1;i/ic:1, si:1, vm/bn:1
+	movl r10=0x8000000000000000
+	adds r16=PT(CR_IIP), r14
+	adds r20=PT(R1), r14
+	;;
+	rsm psr.ic | psr.i
+	;;
+	srlz.i
+	;;
+	mov ar.rsc = 0
+	;;
+	flushrs
+	;;
+	mov ar.bspstore = 0
+	// clear BSPSTORE
+	;;
+	mov cr.ipsr=r6
+	mov cr.ifs=r10
+	ld8 r4 = [r16] // Set init iip for first run.
+	ld8 r1 = [r20]
+	;;
+	mov cr.iip=r4
+	adds r16=VMM_VPD_BASE_OFFSET,r13
+	;;
+	ld8 r18=[r16]
+	;;
+	adds r19=VMM_VPD_VPSR_OFFSET,r18
+	;;
+	ld8 r19=[r19]
+	mov r17=r0
+	mov r22=r0
+	mov r23=r0
+	br.cond.sptk ia64_vmm_entry
+	br.ret.sptk  b0
 END(vmm_reset_entry)
diff --git a/arch/ia64/kvm/vtlb.c b/arch/ia64/kvm/vtlb.c
index e22b933..6b6307a 100644
--- a/arch/ia64/kvm/vtlb.c
+++ b/arch/ia64/kvm/vtlb.c
@@ -183,8 +183,8 @@
 	u64 i, dirty_pages = 1;
 	u64 base_gfn = (pte&_PAGE_PPN_MASK) >> PAGE_SHIFT;
 	spinlock_t *lock = __kvm_va(v->arch.dirty_log_lock_pa);
-	void *dirty_bitmap = (void *)v - (KVM_VCPU_OFS + v->vcpu_id * VCPU_SIZE)
-						+ KVM_MEM_DIRTY_LOG_OFS;
+	void *dirty_bitmap = (void *)KVM_MEM_DIRTY_LOG_BASE;
+
 	dirty_pages <<= ps <= PAGE_SHIFT ? 0 : ps - PAGE_SHIFT;
 
 	vmm_spin_lock(lock);
diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c
index 0c66dbd..66fd705 100644
--- a/arch/ia64/sn/kernel/irq.c
+++ b/arch/ia64/sn/kernel/irq.c
@@ -227,14 +227,14 @@
 	return new_irq_info;
 }
 
-static void sn_set_affinity_irq(unsigned int irq, cpumask_t mask)
+static void sn_set_affinity_irq(unsigned int irq, const struct cpumask *mask)
 {
 	struct sn_irq_info *sn_irq_info, *sn_irq_info_safe;
 	nasid_t nasid;
 	int slice;
 
-	nasid = cpuid_to_nasid(first_cpu(mask));
-	slice = cpuid_to_slice(first_cpu(mask));
+	nasid = cpuid_to_nasid(cpumask_first(mask));
+	slice = cpuid_to_slice(cpumask_first(mask));
 
 	list_for_each_entry_safe(sn_irq_info, sn_irq_info_safe,
 				 sn_irq_lh[irq], list)
diff --git a/arch/ia64/sn/kernel/msi_sn.c b/arch/ia64/sn/kernel/msi_sn.c
index 83f190f..ca553b0 100644
--- a/arch/ia64/sn/kernel/msi_sn.c
+++ b/arch/ia64/sn/kernel/msi_sn.c
@@ -151,7 +151,8 @@
 }
 
 #ifdef CONFIG_SMP
-static void sn_set_msi_irq_affinity(unsigned int irq, cpumask_t cpu_mask)
+static void sn_set_msi_irq_affinity(unsigned int irq,
+				    const struct cpumask *cpu_mask)
 {
 	struct msi_msg msg;
 	int slice;
@@ -164,7 +165,7 @@
 	struct sn_pcibus_provider *provider;
 	unsigned int cpu;
 
-	cpu = first_cpu(cpu_mask);
+	cpu = cpumask_first(cpu_mask);
 	sn_irq_info = sn_msi_info[irq].sn_irq_info;
 	if (sn_irq_info == NULL || sn_irq_info->irq_int_bit >= 0)
 		return;
@@ -204,7 +205,7 @@
 	msg.address_lo = (u32)(bus_addr & 0x00000000ffffffff);
 
 	write_msi_msg(irq, &msg);
-	irq_desc[irq].affinity = cpu_mask;
+	irq_desc[irq].affinity = *cpu_mask;
 }
 #endif /* CONFIG_SMP */
 
diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig
index 29047d5..cabba33 100644
--- a/arch/m32r/Kconfig
+++ b/arch/m32r/Kconfig
@@ -10,6 +10,7 @@
 	default y
 	select HAVE_IDE
 	select HAVE_OPROFILE
+	select INIT_ALL_POSSIBLE
 
 config SBUS
 	bool
diff --git a/arch/m32r/kernel/smpboot.c b/arch/m32r/kernel/smpboot.c
index 39cb6da..0f06b37 100644
--- a/arch/m32r/kernel/smpboot.c
+++ b/arch/m32r/kernel/smpboot.c
@@ -73,17 +73,11 @@
 /* Bitmask of physically existing CPUs */
 physid_mask_t phys_cpu_present_map;
 
-/* Bitmask of currently online CPUs */
-cpumask_t cpu_online_map;
-EXPORT_SYMBOL(cpu_online_map);
-
 cpumask_t cpu_bootout_map;
 cpumask_t cpu_bootin_map;
 static cpumask_t cpu_callin_map;
 cpumask_t cpu_callout_map;
 EXPORT_SYMBOL(cpu_callout_map);
-cpumask_t cpu_possible_map = CPU_MASK_ALL;
-EXPORT_SYMBOL(cpu_possible_map);
 
 /* Per CPU bogomips and other parameters */
 struct cpuinfo_m32r cpu_data[NR_CPUS] __cacheline_aligned;
diff --git a/arch/m68knommu/platform/coldfire/pit.c b/arch/m68knommu/platform/coldfire/pit.c
index c5b9167..2a12e7f 100644
--- a/arch/m68knommu/platform/coldfire/pit.c
+++ b/arch/m68knommu/platform/coldfire/pit.c
@@ -156,7 +156,7 @@
 {
 	u32 imr;
 
-	cf_pit_clockevent.cpumask = cpumask_of_cpu(smp_processor_id());
+	cf_pit_clockevent.cpumask = cpumask_of(smp_processor_id());
 	cf_pit_clockevent.mult = div_sc(FREQ, NSEC_PER_SEC, 32);
 	cf_pit_clockevent.max_delta_ns =
 		clockevent_delta2ns(0xFFFF, &cf_pit_clockevent);
diff --git a/arch/mips/include/asm/irq.h b/arch/mips/include/asm/irq.h
index a58f0ee..abc62aa 100644
--- a/arch/mips/include/asm/irq.h
+++ b/arch/mips/include/asm/irq.h
@@ -49,7 +49,8 @@
 #ifdef CONFIG_MIPS_MT_SMTC_IRQAFF
 #include <linux/cpumask.h>
 
-extern void plat_set_irq_affinity(unsigned int irq, cpumask_t affinity);
+extern void plat_set_irq_affinity(unsigned int irq,
+				  const struct cpumask *affinity);
 extern void smtc_forward_irq(unsigned int irq);
 
 /*
diff --git a/arch/mips/include/asm/mach-ip27/topology.h b/arch/mips/include/asm/mach-ip27/topology.h
index 7785bec..1fb959f 100644
--- a/arch/mips/include/asm/mach-ip27/topology.h
+++ b/arch/mips/include/asm/mach-ip27/topology.h
@@ -37,7 +37,6 @@
 
 /* sched_domains SD_NODE_INIT for SGI IP27 machines */
 #define SD_NODE_INIT (struct sched_domain) {		\
-	.span			= CPU_MASK_NONE,	\
 	.parent			= NULL,			\
 	.child			= NULL,			\
 	.groups			= NULL,			\
diff --git a/arch/mips/include/asm/smp.h b/arch/mips/include/asm/smp.h
index 0ff5b52..86557b5 100644
--- a/arch/mips/include/asm/smp.h
+++ b/arch/mips/include/asm/smp.h
@@ -38,9 +38,6 @@
 #define SMP_RESCHEDULE_YOURSELF	0x1	/* XXX braindead */
 #define SMP_CALL_FUNCTION	0x2
 
-extern cpumask_t phys_cpu_present_map;
-#define cpu_possible_map	phys_cpu_present_map
-
 extern void asmlinkage smp_bootstrap(void);
 
 /*
diff --git a/arch/mips/jazz/irq.c b/arch/mips/jazz/irq.c
index d7f8a78..03965cb 100644
--- a/arch/mips/jazz/irq.c
+++ b/arch/mips/jazz/irq.c
@@ -146,7 +146,7 @@
 
 	BUG_ON(HZ != 100);
 
-	cd->cpumask             = cpumask_of_cpu(cpu);
+	cd->cpumask             = cpumask_of(cpu);
 	clockevents_register_device(cd);
 	action->dev_id = cd;
 	setup_irq(JAZZ_TIMER_IRQ, action);
diff --git a/arch/mips/kernel/cevt-bcm1480.c b/arch/mips/kernel/cevt-bcm1480.c
index 0a57f86..b820661 100644
--- a/arch/mips/kernel/cevt-bcm1480.c
+++ b/arch/mips/kernel/cevt-bcm1480.c
@@ -126,7 +126,7 @@
 	cd->min_delta_ns	= clockevent_delta2ns(2, cd);
 	cd->rating		= 200;
 	cd->irq			= irq;
-	cd->cpumask		= cpumask_of_cpu(cpu);
+	cd->cpumask		= cpumask_of(cpu);
 	cd->set_next_event	= sibyte_next_event;
 	cd->set_mode		= sibyte_set_mode;
 	clockevents_register_device(cd);
@@ -148,6 +148,6 @@
 	action->name	= name;
 	action->dev_id	= cd;
 
-	irq_set_affinity(irq, cpumask_of_cpu(cpu));
+	irq_set_affinity(irq, cpumask_of(cpu));
 	setup_irq(irq, action);
 }
diff --git a/arch/mips/kernel/cevt-ds1287.c b/arch/mips/kernel/cevt-ds1287.c
index df4acb6..1ada45e 100644
--- a/arch/mips/kernel/cevt-ds1287.c
+++ b/arch/mips/kernel/cevt-ds1287.c
@@ -88,7 +88,6 @@
 static struct clock_event_device ds1287_clockevent = {
 	.name		= "ds1287",
 	.features	= CLOCK_EVT_FEAT_PERIODIC,
-	.cpumask	= CPU_MASK_CPU0,
 	.set_next_event	= ds1287_set_next_event,
 	.set_mode	= ds1287_set_mode,
 	.event_handler	= ds1287_event_handler,
@@ -122,6 +121,7 @@
 	clockevent_set_clock(cd, 32768);
 	cd->max_delta_ns = clockevent_delta2ns(0x7fffffff, cd);
 	cd->min_delta_ns = clockevent_delta2ns(0x300, cd);
+	cd->cpumask = cpumask_of(0);
 
 	clockevents_register_device(&ds1287_clockevent);
 
diff --git a/arch/mips/kernel/cevt-gt641xx.c b/arch/mips/kernel/cevt-gt641xx.c
index 6e2f585..e9b787f 100644
--- a/arch/mips/kernel/cevt-gt641xx.c
+++ b/arch/mips/kernel/cevt-gt641xx.c
@@ -96,7 +96,6 @@
 static struct clock_event_device gt641xx_timer0_clockevent = {
 	.name		= "gt641xx-timer0",
 	.features	= CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
-	.cpumask	= CPU_MASK_CPU0,
 	.irq		= GT641XX_TIMER0_IRQ,
 	.set_next_event	= gt641xx_timer0_set_next_event,
 	.set_mode	= gt641xx_timer0_set_mode,
@@ -132,6 +131,7 @@
 	clockevent_set_clock(cd, gt641xx_base_clock);
 	cd->max_delta_ns = clockevent_delta2ns(0x7fffffff, cd);
 	cd->min_delta_ns = clockevent_delta2ns(0x300, cd);
+	cd->cpumask = cpumask_of(0);
 
 	clockevents_register_device(&gt641xx_timer0_clockevent);
 
diff --git a/arch/mips/kernel/cevt-r4k.c b/arch/mips/kernel/cevt-r4k.c
index 4a4c59f..e1ec83b 100644
--- a/arch/mips/kernel/cevt-r4k.c
+++ b/arch/mips/kernel/cevt-r4k.c
@@ -195,7 +195,7 @@
 
 	cd->rating		= 300;
 	cd->irq			= irq;
-	cd->cpumask		= cpumask_of_cpu(cpu);
+	cd->cpumask		= cpumask_of(cpu);
 	cd->set_next_event	= mips_next_event;
 	cd->set_mode		= mips_set_clock_mode;
 	cd->event_handler	= mips_event_handler;
diff --git a/arch/mips/kernel/cevt-sb1250.c b/arch/mips/kernel/cevt-sb1250.c
index 63ac3ad..a2eebaa 100644
--- a/arch/mips/kernel/cevt-sb1250.c
+++ b/arch/mips/kernel/cevt-sb1250.c
@@ -125,7 +125,7 @@
 	cd->min_delta_ns	= clockevent_delta2ns(2, cd);
 	cd->rating		= 200;
 	cd->irq			= irq;
-	cd->cpumask		= cpumask_of_cpu(cpu);
+	cd->cpumask		= cpumask_of(cpu);
 	cd->set_next_event	= sibyte_next_event;
 	cd->set_mode		= sibyte_set_mode;
 	clockevents_register_device(cd);
@@ -147,6 +147,6 @@
 	action->name	= name;
 	action->dev_id	= cd;
 
-	irq_set_affinity(irq, cpumask_of_cpu(cpu));
+	irq_set_affinity(irq, cpumask_of(cpu));
 	setup_irq(irq, action);
 }
diff --git a/arch/mips/kernel/cevt-smtc.c b/arch/mips/kernel/cevt-smtc.c
index 5162fe4..6d45e24 100644
--- a/arch/mips/kernel/cevt-smtc.c
+++ b/arch/mips/kernel/cevt-smtc.c
@@ -292,7 +292,7 @@
 
 	cd->rating		= 300;
 	cd->irq			= irq;
-	cd->cpumask		= cpumask_of_cpu(cpu);
+	cd->cpumask		= cpumask_of(cpu);
 	cd->set_next_event	= mips_next_event;
 	cd->set_mode		= mips_set_clock_mode;
 	cd->event_handler	= mips_event_handler;
diff --git a/arch/mips/kernel/cevt-txx9.c b/arch/mips/kernel/cevt-txx9.c
index b5fc4eb..eccf7d6 100644
--- a/arch/mips/kernel/cevt-txx9.c
+++ b/arch/mips/kernel/cevt-txx9.c
@@ -112,7 +112,6 @@
 	.name		= "TXx9",
 	.features	= CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
 	.rating		= 200,
-	.cpumask	= CPU_MASK_CPU0,
 	.set_mode	= txx9tmr_set_mode,
 	.set_next_event	= txx9tmr_set_next_event,
 };
@@ -150,6 +149,7 @@
 		clockevent_delta2ns(0xffffffff >> (32 - TXX9_TIMER_BITS), cd);
 	cd->min_delta_ns = clockevent_delta2ns(0xf, cd);
 	cd->irq = irq;
+	cd->cpumask = cpumask_of(0),
 	clockevents_register_device(cd);
 	setup_irq(irq, &txx9tmr_irq);
 	printk(KERN_INFO "TXx9: clockevent device at 0x%lx, irq %d\n",
diff --git a/arch/mips/kernel/i8253.c b/arch/mips/kernel/i8253.c
index b6ac551..f4d1878 100644
--- a/arch/mips/kernel/i8253.c
+++ b/arch/mips/kernel/i8253.c
@@ -115,7 +115,7 @@
 	 * Start pit with the boot cpu mask and make it global after the
 	 * IO_APIC has been initialized.
 	 */
-	cd->cpumask = cpumask_of_cpu(cpu);
+	cd->cpumask = cpumask_of(cpu);
 	clockevent_set_clock(cd, CLOCK_TICK_RATE);
 	cd->max_delta_ns = clockevent_delta2ns(0x7FFF, cd);
 	cd->min_delta_ns = clockevent_delta2ns(0xF, cd);
diff --git a/arch/mips/kernel/irq-gic.c b/arch/mips/kernel/irq-gic.c
index f0a4bb1..494a49a 100644
--- a/arch/mips/kernel/irq-gic.c
+++ b/arch/mips/kernel/irq-gic.c
@@ -155,7 +155,7 @@
 
 static DEFINE_SPINLOCK(gic_lock);
 
-static void gic_set_affinity(unsigned int irq, cpumask_t cpumask)
+static void gic_set_affinity(unsigned int irq, const struct cpumask *cpumask)
 {
 	cpumask_t	tmp = CPU_MASK_NONE;
 	unsigned long	flags;
@@ -164,7 +164,7 @@
 	pr_debug(KERN_DEBUG "%s called\n", __func__);
 	irq -= _irqbase;
 
-	cpus_and(tmp, cpumask, cpu_online_map);
+	cpumask_and(&tmp, cpumask, cpu_online_mask);
 	if (cpus_empty(tmp))
 		return;
 
@@ -187,7 +187,7 @@
 		set_bit(irq, pcpu_masks[first_cpu(tmp)].pcpu_mask);
 
 	}
-	irq_desc[irq].affinity = cpumask;
+	irq_desc[irq].affinity = *cpumask;
 	spin_unlock_irqrestore(&gic_lock, flags);
 
 }
diff --git a/arch/mips/kernel/smp-cmp.c b/arch/mips/kernel/smp-cmp.c
index ca476c4..f27beca 100644
--- a/arch/mips/kernel/smp-cmp.c
+++ b/arch/mips/kernel/smp-cmp.c
@@ -51,10 +51,10 @@
 	int len;
 
 	cpus_clear(cpu_allow_map);
-	if (cpulist_parse(str, cpu_allow_map) == 0) {
+	if (cpulist_parse(str, &cpu_allow_map) == 0) {
 		cpu_set(0, cpu_allow_map);
 		cpus_and(cpu_possible_map, cpu_possible_map, cpu_allow_map);
-		len = cpulist_scnprintf(buf, sizeof(buf)-1, cpu_possible_map);
+		len = cpulist_scnprintf(buf, sizeof(buf)-1, &cpu_possible_map);
 		buf[len] = '\0';
 		pr_debug("Allowable CPUs: %s\n", buf);
 		return 1;
@@ -226,7 +226,7 @@
 
 	for (i = 1; i < NR_CPUS; i++) {
 		if (amon_cpu_avail(i)) {
-			cpu_set(i, phys_cpu_present_map);
+			cpu_set(i, cpu_possible_map);
 			__cpu_number_map[i]	= ++ncpu;
 			__cpu_logical_map[ncpu]	= i;
 		}
diff --git a/arch/mips/kernel/smp-mt.c b/arch/mips/kernel/smp-mt.c
index 87a1816..6f7ee5a 100644
--- a/arch/mips/kernel/smp-mt.c
+++ b/arch/mips/kernel/smp-mt.c
@@ -70,7 +70,7 @@
 		write_vpe_c0_vpeconf0(tmp);
 
 		/* Record this as available CPU */
-		cpu_set(tc, phys_cpu_present_map);
+		cpu_set(tc, cpu_possible_map);
 		__cpu_number_map[tc]	= ++ncpu;
 		__cpu_logical_map[ncpu]	= tc;
 	}
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index 8bf88faf..3da9470 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -44,15 +44,10 @@
 #include <asm/mipsmtregs.h>
 #endif /* CONFIG_MIPS_MT_SMTC */
 
-cpumask_t phys_cpu_present_map;		/* Bitmask of available CPUs */
 volatile cpumask_t cpu_callin_map;	/* Bitmask of started secondaries */
-cpumask_t cpu_online_map;		/* Bitmask of currently online CPUs */
 int __cpu_number_map[NR_CPUS];		/* Map physical to logical */
 int __cpu_logical_map[NR_CPUS];		/* Map logical to physical */
 
-EXPORT_SYMBOL(phys_cpu_present_map);
-EXPORT_SYMBOL(cpu_online_map);
-
 extern void cpu_idle(void);
 
 /* Number of TCs (or siblings in Intel speak) per CPU core */
@@ -195,7 +190,7 @@
 /* preload SMP state for boot cpu */
 void __devinit smp_prepare_boot_cpu(void)
 {
-	cpu_set(0, phys_cpu_present_map);
+	cpu_set(0, cpu_possible_map);
 	cpu_set(0, cpu_online_map);
 	cpu_set(0, cpu_callin_map);
 }
diff --git a/arch/mips/kernel/smtc.c b/arch/mips/kernel/smtc.c
index 897fb2b..b6cca01 100644
--- a/arch/mips/kernel/smtc.c
+++ b/arch/mips/kernel/smtc.c
@@ -290,7 +290,7 @@
  * possibly leave some TCs/VPEs as "slave" processors.
  *
  * Use c0_MVPConf0 to find out how many TCs are available, setting up
- * phys_cpu_present_map and the logical/physical mappings.
+ * cpu_possible_map and the logical/physical mappings.
  */
 
 int __init smtc_build_cpu_map(int start_cpu_slot)
@@ -304,7 +304,7 @@
 	 */
 	ntcs = ((read_c0_mvpconf0() & MVPCONF0_PTC) >> MVPCONF0_PTC_SHIFT) + 1;
 	for (i=start_cpu_slot; i<NR_CPUS && i<ntcs; i++) {
-		cpu_set(i, phys_cpu_present_map);
+		cpu_set(i, cpu_possible_map);
 		__cpu_number_map[i] = i;
 		__cpu_logical_map[i] = i;
 	}
@@ -521,7 +521,7 @@
 	 * Pull any physically present but unused TCs out of circulation.
 	 */
 	while (tc < (((val & MVPCONF0_PTC) >> MVPCONF0_PTC_SHIFT) + 1)) {
-		cpu_clear(tc, phys_cpu_present_map);
+		cpu_clear(tc, cpu_possible_map);
 		cpu_clear(tc, cpu_present_map);
 		tc++;
 	}
diff --git a/arch/mips/mti-malta/malta-smtc.c b/arch/mips/mti-malta/malta-smtc.c
index f84a46a..aabd727 100644
--- a/arch/mips/mti-malta/malta-smtc.c
+++ b/arch/mips/mti-malta/malta-smtc.c
@@ -114,9 +114,9 @@
  */
 
 
-void plat_set_irq_affinity(unsigned int irq, cpumask_t affinity)
+void plat_set_irq_affinity(unsigned int irq, const struct cpumask *affinity)
 {
-	cpumask_t tmask = affinity;
+	cpumask_t tmask = *affinity;
 	int cpu = 0;
 	void smtc_set_irq_affinity(unsigned int irq, cpumask_t aff);
 
@@ -139,7 +139,7 @@
 	 * be made to forward to an offline "CPU".
 	 */
 
-	for_each_cpu_mask(cpu, affinity) {
+	for_each_cpu(cpu, affinity) {
 		if ((cpu_data[cpu].vpe_id != 0) || !cpu_online(cpu))
 			cpu_clear(cpu, tmask);
 	}
diff --git a/arch/mips/nxp/pnx8550/common/time.c b/arch/mips/nxp/pnx8550/common/time.c
index 62f495b..cf293b2 100644
--- a/arch/mips/nxp/pnx8550/common/time.c
+++ b/arch/mips/nxp/pnx8550/common/time.c
@@ -102,6 +102,7 @@
 	unsigned int p;
 	unsigned int pow2p;
 
+	pnx8xxx_clockevent.cpumask = cpu_none_mask;
 	clockevents_register_device(&pnx8xxx_clockevent);
 	clocksource_register(&pnx_clocksource);
 
diff --git a/arch/mips/pmc-sierra/yosemite/smp.c b/arch/mips/pmc-sierra/yosemite/smp.c
index 3a7df64..f78c29b 100644
--- a/arch/mips/pmc-sierra/yosemite/smp.c
+++ b/arch/mips/pmc-sierra/yosemite/smp.c
@@ -141,7 +141,7 @@
 }
 
 /*
- * Detect available CPUs, populate phys_cpu_present_map before smp_init
+ * Detect available CPUs, populate cpu_possible_map before smp_init
  *
  * We don't want to start the secondary CPU yet nor do we have a nice probing
  * feature in PMON so we just assume presence of the secondary core.
@@ -150,10 +150,10 @@
 {
 	int i;
 
-	cpus_clear(phys_cpu_present_map);
+	cpus_clear(cpu_possible_map);
 
 	for (i = 0; i < 2; i++) {
-		cpu_set(i, phys_cpu_present_map);
+		cpu_set(i, cpu_possible_map);
 		__cpu_number_map[i]	= i;
 		__cpu_logical_map[i]	= i;
 	}
diff --git a/arch/mips/sgi-ip27/ip27-smp.c b/arch/mips/sgi-ip27/ip27-smp.c
index ba5cdeb..5b47d6b 100644
--- a/arch/mips/sgi-ip27/ip27-smp.c
+++ b/arch/mips/sgi-ip27/ip27-smp.c
@@ -76,7 +76,7 @@
 			/* Only let it join in if it's marked enabled */
 			if ((acpu->cpu_info.flags & KLINFO_ENABLE) &&
 			    (tot_cpus_found != NR_CPUS)) {
-				cpu_set(cpuid, phys_cpu_present_map);
+				cpu_set(cpuid, cpu_possible_map);
 				alloc_cpupda(cpuid, tot_cpus_found);
 				cpus_found++;
 				tot_cpus_found++;
diff --git a/arch/mips/sgi-ip27/ip27-timer.c b/arch/mips/sgi-ip27/ip27-timer.c
index 1327c27..f024057 100644
--- a/arch/mips/sgi-ip27/ip27-timer.c
+++ b/arch/mips/sgi-ip27/ip27-timer.c
@@ -134,7 +134,7 @@
 	cd->min_delta_ns        = clockevent_delta2ns(0x300, cd);
 	cd->rating		= 200;
 	cd->irq			= irq;
-	cd->cpumask		= cpumask_of_cpu(cpu);
+	cd->cpumask		= cpumask_of(cpu);
 	cd->set_next_event	= rt_next_event;
 	cd->set_mode		= rt_set_mode;
 	clockevents_register_device(cd);
diff --git a/arch/mips/sibyte/bcm1480/irq.c b/arch/mips/sibyte/bcm1480/irq.c
index a35818e..12b465d 100644
--- a/arch/mips/sibyte/bcm1480/irq.c
+++ b/arch/mips/sibyte/bcm1480/irq.c
@@ -50,7 +50,7 @@
 static void disable_bcm1480_irq(unsigned int irq);
 static void ack_bcm1480_irq(unsigned int irq);
 #ifdef CONFIG_SMP
-static void bcm1480_set_affinity(unsigned int irq, cpumask_t mask);
+static void bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask);
 #endif
 
 #ifdef CONFIG_PCI
@@ -109,7 +109,7 @@
 }
 
 #ifdef CONFIG_SMP
-static void bcm1480_set_affinity(unsigned int irq, cpumask_t mask)
+static void bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	int i = 0, old_cpu, cpu, int_on, k;
 	u64 cur_ints;
@@ -117,11 +117,11 @@
 	unsigned long flags;
 	unsigned int irq_dirty;
 
-	if (cpus_weight(mask) != 1) {
+	if (cpumask_weight(mask) != 1) {
 		printk("attempted to set irq affinity for irq %d to multiple CPUs\n", irq);
 		return;
 	}
-	i = first_cpu(mask);
+	i = cpumask_first(mask);
 
 	/* Convert logical CPU to physical CPU */
 	cpu = cpu_logical_map(i);
diff --git a/arch/mips/sibyte/bcm1480/smp.c b/arch/mips/sibyte/bcm1480/smp.c
index bd9eeb4..dddfda8 100644
--- a/arch/mips/sibyte/bcm1480/smp.c
+++ b/arch/mips/sibyte/bcm1480/smp.c
@@ -136,7 +136,7 @@
 
 /*
  * Use CFE to find out how many CPUs are available, setting up
- * phys_cpu_present_map and the logical/physical mappings.
+ * cpu_possible_map and the logical/physical mappings.
  * XXXKW will the boot CPU ever not be physical 0?
  *
  * Common setup before any secondaries are started
@@ -145,14 +145,14 @@
 {
 	int i, num;
 
-	cpus_clear(phys_cpu_present_map);
-	cpu_set(0, phys_cpu_present_map);
+	cpus_clear(cpu_possible_map);
+	cpu_set(0, cpu_possible_map);
 	__cpu_number_map[0] = 0;
 	__cpu_logical_map[0] = 0;
 
 	for (i = 1, num = 0; i < NR_CPUS; i++) {
 		if (cfe_cpu_stop(i) == 0) {
-			cpu_set(i, phys_cpu_present_map);
+			cpu_set(i, cpu_possible_map);
 			__cpu_number_map[i] = ++num;
 			__cpu_logical_map[num] = i;
 		}
diff --git a/arch/mips/sibyte/sb1250/irq.c b/arch/mips/sibyte/sb1250/irq.c
index a515848..808ac29 100644
--- a/arch/mips/sibyte/sb1250/irq.c
+++ b/arch/mips/sibyte/sb1250/irq.c
@@ -50,7 +50,7 @@
 static void disable_sb1250_irq(unsigned int irq);
 static void ack_sb1250_irq(unsigned int irq);
 #ifdef CONFIG_SMP
-static void sb1250_set_affinity(unsigned int irq, cpumask_t mask);
+static void sb1250_set_affinity(unsigned int irq, const struct cpumask *mask);
 #endif
 
 #ifdef CONFIG_SIBYTE_HAS_LDT
@@ -103,16 +103,16 @@
 }
 
 #ifdef CONFIG_SMP
-static void sb1250_set_affinity(unsigned int irq, cpumask_t mask)
+static void sb1250_set_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	int i = 0, old_cpu, cpu, int_on;
 	u64 cur_ints;
 	struct irq_desc *desc = irq_desc + irq;
 	unsigned long flags;
 
-	i = first_cpu(mask);
+	i = cpumask_first(mask);
 
-	if (cpus_weight(mask) > 1) {
+	if (cpumask_weight(mask) > 1) {
 		printk("attempted to set irq affinity for irq %d to multiple CPUs\n", irq);
 		return;
 	}
diff --git a/arch/mips/sibyte/sb1250/smp.c b/arch/mips/sibyte/sb1250/smp.c
index 0734b93..5950a288 100644
--- a/arch/mips/sibyte/sb1250/smp.c
+++ b/arch/mips/sibyte/sb1250/smp.c
@@ -124,7 +124,7 @@
 
 /*
  * Use CFE to find out how many CPUs are available, setting up
- * phys_cpu_present_map and the logical/physical mappings.
+ * cpu_possible_map and the logical/physical mappings.
  * XXXKW will the boot CPU ever not be physical 0?
  *
  * Common setup before any secondaries are started
@@ -133,14 +133,14 @@
 {
 	int i, num;
 
-	cpus_clear(phys_cpu_present_map);
-	cpu_set(0, phys_cpu_present_map);
+	cpus_clear(cpu_possible_map);
+	cpu_set(0, cpu_possible_map);
 	__cpu_number_map[0] = 0;
 	__cpu_logical_map[0] = 0;
 
 	for (i = 1, num = 0; i < NR_CPUS; i++) {
 		if (cfe_cpu_stop(i) == 0) {
-			cpu_set(i, phys_cpu_present_map);
+			cpu_set(i, cpu_possible_map);
 			__cpu_number_map[i] = ++num;
 			__cpu_logical_map[num] = i;
 		}
diff --git a/arch/mips/sni/time.c b/arch/mips/sni/time.c
index 796e3ce..69f5f88 100644
--- a/arch/mips/sni/time.c
+++ b/arch/mips/sni/time.c
@@ -80,7 +80,7 @@
 	struct irqaction *action = &a20r_irqaction;
 	unsigned int cpu = smp_processor_id();
 
-	cd->cpumask             = cpumask_of_cpu(cpu);
+	cd->cpumask             = cpumask_of(cpu);
 	clockevents_register_device(cd);
 	action->dev_id = cd;
 	setup_irq(SNI_A20R_IRQ_TIMER, &a20r_irqaction);
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 644a70b..aacf11d 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -11,6 +11,7 @@
 	select HAVE_OPROFILE
 	select RTC_CLASS
 	select RTC_DRV_PARISC
+	select INIT_ALL_POSSIBLE
 	help
 	  The PA-RISC microprocessor is designed by Hewlett-Packard and used
 	  in many of their workstations & servers (HP9000 700 and 800 series,
diff --git a/arch/parisc/kernel/irq.c b/arch/parisc/kernel/irq.c
index 23ef950..4cea935 100644
--- a/arch/parisc/kernel/irq.c
+++ b/arch/parisc/kernel/irq.c
@@ -131,12 +131,12 @@
 	return 0;
 }
 
-static void cpu_set_affinity_irq(unsigned int irq, cpumask_t dest)
+static void cpu_set_affinity_irq(unsigned int irq, const struct cpumask *dest)
 {
-	if (cpu_check_affinity(irq, &dest))
+	if (cpu_check_affinity(irq, dest))
 		return;
 
-	irq_desc[irq].affinity = dest;
+	irq_desc[irq].affinity = *dest;
 }
 #endif
 
diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c
index d47f397..80bc000 100644
--- a/arch/parisc/kernel/smp.c
+++ b/arch/parisc/kernel/smp.c
@@ -67,21 +67,6 @@
 
 static int parisc_max_cpus __read_mostly = 1;
 
-/* online cpus are ones that we've managed to bring up completely
- * possible cpus are all valid cpu 
- * present cpus are all detected cpu
- *
- * On startup we bring up the "possible" cpus. Since we discover
- * CPUs later, we add them as hotplug, so the possible cpu mask is
- * empty in the beginning.
- */
-
-cpumask_t cpu_online_map   __read_mostly = CPU_MASK_NONE;	/* Bitmap of online CPUs */
-cpumask_t cpu_possible_map __read_mostly = CPU_MASK_ALL;	/* Bitmap of Present CPUs */
-
-EXPORT_SYMBOL(cpu_online_map);
-EXPORT_SYMBOL(cpu_possible_map);
-
 DEFINE_PER_CPU(spinlock_t, ipi_lock) = SPIN_LOCK_UNLOCKED;
 
 enum ipi_message_type {
diff --git a/arch/powerpc/include/asm/disassemble.h b/arch/powerpc/include/asm/disassemble.h
new file mode 100644
index 0000000..9b198d1
--- /dev/null
+++ b/arch/powerpc/include/asm/disassemble.h
@@ -0,0 +1,80 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#ifndef __ASM_PPC_DISASSEMBLE_H__
+#define __ASM_PPC_DISASSEMBLE_H__
+
+#include <linux/types.h>
+
+static inline unsigned int get_op(u32 inst)
+{
+	return inst >> 26;
+}
+
+static inline unsigned int get_xop(u32 inst)
+{
+	return (inst >> 1) & 0x3ff;
+}
+
+static inline unsigned int get_sprn(u32 inst)
+{
+	return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
+}
+
+static inline unsigned int get_dcrn(u32 inst)
+{
+	return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
+}
+
+static inline unsigned int get_rt(u32 inst)
+{
+	return (inst >> 21) & 0x1f;
+}
+
+static inline unsigned int get_rs(u32 inst)
+{
+	return (inst >> 21) & 0x1f;
+}
+
+static inline unsigned int get_ra(u32 inst)
+{
+	return (inst >> 16) & 0x1f;
+}
+
+static inline unsigned int get_rb(u32 inst)
+{
+	return (inst >> 11) & 0x1f;
+}
+
+static inline unsigned int get_rc(u32 inst)
+{
+	return inst & 0x1;
+}
+
+static inline unsigned int get_ws(u32 inst)
+{
+	return (inst >> 11) & 0x1f;
+}
+
+static inline unsigned int get_d(u32 inst)
+{
+	return inst & 0xffff;
+}
+
+#endif /* __ASM_PPC_DISASSEMBLE_H__ */
diff --git a/arch/powerpc/include/asm/kvm_44x.h b/arch/powerpc/include/asm/kvm_44x.h
new file mode 100644
index 0000000..f49031b
--- /dev/null
+++ b/arch/powerpc/include/asm/kvm_44x.h
@@ -0,0 +1,61 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#ifndef __ASM_44X_H__
+#define __ASM_44X_H__
+
+#include <linux/kvm_host.h>
+
+#define PPC44x_TLB_SIZE 64
+
+/* If the guest is expecting it, this can be as large as we like; we'd just
+ * need to find some way of advertising it. */
+#define KVM44x_GUEST_TLB_SIZE 64
+
+struct kvmppc_44x_shadow_ref {
+	struct page *page;
+	u16 gtlb_index;
+	u8 writeable;
+	u8 tid;
+};
+
+struct kvmppc_vcpu_44x {
+	/* Unmodified copy of the guest's TLB. */
+	struct kvmppc_44x_tlbe guest_tlb[KVM44x_GUEST_TLB_SIZE];
+
+	/* References to guest pages in the hardware TLB. */
+	struct kvmppc_44x_shadow_ref shadow_refs[PPC44x_TLB_SIZE];
+
+	/* State of the shadow TLB at guest context switch time. */
+	struct kvmppc_44x_tlbe shadow_tlb[PPC44x_TLB_SIZE];
+	u8 shadow_tlb_mod[PPC44x_TLB_SIZE];
+
+	struct kvm_vcpu vcpu;
+};
+
+static inline struct kvmppc_vcpu_44x *to_44x(struct kvm_vcpu *vcpu)
+{
+	return container_of(vcpu, struct kvmppc_vcpu_44x, vcpu);
+}
+
+void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid);
+void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu);
+void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu);
+
+#endif /* __ASM_44X_H__ */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 34b52b7..c1e436f 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -64,27 +64,58 @@
 	u32 halt_wakeup;
 };
 
-struct tlbe {
+struct kvmppc_44x_tlbe {
 	u32 tid; /* Only the low 8 bits are used. */
 	u32 word0;
 	u32 word1;
 	u32 word2;
 };
 
+enum kvm_exit_types {
+	MMIO_EXITS,
+	DCR_EXITS,
+	SIGNAL_EXITS,
+	ITLB_REAL_MISS_EXITS,
+	ITLB_VIRT_MISS_EXITS,
+	DTLB_REAL_MISS_EXITS,
+	DTLB_VIRT_MISS_EXITS,
+	SYSCALL_EXITS,
+	ISI_EXITS,
+	DSI_EXITS,
+	EMULATED_INST_EXITS,
+	EMULATED_MTMSRWE_EXITS,
+	EMULATED_WRTEE_EXITS,
+	EMULATED_MTSPR_EXITS,
+	EMULATED_MFSPR_EXITS,
+	EMULATED_MTMSR_EXITS,
+	EMULATED_MFMSR_EXITS,
+	EMULATED_TLBSX_EXITS,
+	EMULATED_TLBWE_EXITS,
+	EMULATED_RFI_EXITS,
+	DEC_EXITS,
+	EXT_INTR_EXITS,
+	HALT_WAKEUP,
+	USR_PR_INST,
+	FP_UNAVAIL,
+	DEBUG_EXITS,
+	TIMEINGUEST,
+	__NUMBER_OF_KVM_EXIT_TYPES
+};
+
+/* allow access to big endian 32bit upper/lower parts and 64bit var */
+struct kvmppc_exit_timing {
+	union {
+		u64 tv64;
+		struct {
+			u32 tbu, tbl;
+		} tv32;
+	};
+};
+
 struct kvm_arch {
 };
 
 struct kvm_vcpu_arch {
-	/* Unmodified copy of the guest's TLB. */
-	struct tlbe guest_tlb[PPC44x_TLB_SIZE];
-	/* TLB that's actually used when the guest is running. */
-	struct tlbe shadow_tlb[PPC44x_TLB_SIZE];
-	/* Pages which are referenced in the shadow TLB. */
-	struct page *shadow_pages[PPC44x_TLB_SIZE];
-
-	/* Track which TLB entries we've modified in the current exit. */
-	u8 shadow_tlb_mod[PPC44x_TLB_SIZE];
-
 	u32 host_stack;
 	u32 host_pid;
 	u32 host_dbcr0;
@@ -94,32 +125,32 @@
 	u32 host_msr;
 
 	u64 fpr[32];
-	u32 gpr[32];
+	ulong gpr[32];
 
-	u32 pc;
+	ulong pc;
 	u32 cr;
-	u32 ctr;
-	u32 lr;
-	u32 xer;
+	ulong ctr;
+	ulong lr;
+	ulong xer;
 
-	u32 msr;
+	ulong msr;
 	u32 mmucr;
-	u32 sprg0;
-	u32 sprg1;
-	u32 sprg2;
-	u32 sprg3;
-	u32 sprg4;
-	u32 sprg5;
-	u32 sprg6;
-	u32 sprg7;
-	u32 srr0;
-	u32 srr1;
-	u32 csrr0;
-	u32 csrr1;
-	u32 dsrr0;
-	u32 dsrr1;
-	u32 dear;
-	u32 esr;
+	ulong sprg0;
+	ulong sprg1;
+	ulong sprg2;
+	ulong sprg3;
+	ulong sprg4;
+	ulong sprg5;
+	ulong sprg6;
+	ulong sprg7;
+	ulong srr0;
+	ulong srr1;
+	ulong csrr0;
+	ulong csrr1;
+	ulong dsrr0;
+	ulong dsrr1;
+	ulong dear;
+	ulong esr;
 	u32 dec;
 	u32 decar;
 	u32 tbl;
@@ -127,7 +158,7 @@
 	u32 tcr;
 	u32 tsr;
 	u32 ivor[16];
-	u32 ivpr;
+	ulong ivpr;
 	u32 pir;
 
 	u32 shadow_pid;
@@ -140,9 +171,22 @@
 	u32 dbcr0;
 	u32 dbcr1;
 
+#ifdef CONFIG_KVM_EXIT_TIMING
+	struct kvmppc_exit_timing timing_exit;
+	struct kvmppc_exit_timing timing_last_enter;
+	u32 last_exit_type;
+	u32 timing_count_type[__NUMBER_OF_KVM_EXIT_TYPES];
+	u64 timing_sum_duration[__NUMBER_OF_KVM_EXIT_TYPES];
+	u64 timing_sum_quad_duration[__NUMBER_OF_KVM_EXIT_TYPES];
+	u64 timing_min_duration[__NUMBER_OF_KVM_EXIT_TYPES];
+	u64 timing_max_duration[__NUMBER_OF_KVM_EXIT_TYPES];
+	u64 timing_last_exit;
+	struct dentry *debugfs_exit_timing;
+#endif
+
 	u32 last_inst;
-	u32 fault_dear;
-	u32 fault_esr;
+	ulong fault_dear;
+	ulong fault_esr;
 	gpa_t paddr_accessed;
 
 	u8 io_gpr; /* GPR used as IO source/target */
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index bb62ad8..36d2a50 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -29,11 +29,6 @@
 #include <linux/kvm_types.h>
 #include <linux/kvm_host.h>
 
-struct kvm_tlb {
-	struct tlbe guest_tlb[PPC44x_TLB_SIZE];
-	struct tlbe shadow_tlb[PPC44x_TLB_SIZE];
-};
-
 enum emulation_result {
 	EMULATE_DONE,         /* no further processing */
 	EMULATE_DO_MMIO,      /* kvm_run filled with MMIO request */
@@ -41,9 +36,6 @@
 	EMULATE_FAIL,         /* can't emulate this instruction */
 };
 
-extern const unsigned char exception_priority[];
-extern const unsigned char priority_exception[];
-
 extern int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
 extern char kvmppc_handlers_start[];
 extern unsigned long kvmppc_handler_len;
@@ -58,51 +50,44 @@
 extern int kvmppc_emulate_instruction(struct kvm_run *run,
                                       struct kvm_vcpu *vcpu);
 extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu);
+extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
 
-extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn,
-                           u64 asid, u32 flags);
-extern void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, gva_t eaddr,
-                                  gva_t eend, u32 asid);
+extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr,
+                           u64 asid, u32 flags, u32 max_bytes,
+                           unsigned int gtlb_idx);
 extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode);
 extern void kvmppc_mmu_switch_pid(struct kvm_vcpu *vcpu, u32 pid);
 
-/* XXX Book E specific */
-extern void kvmppc_tlbe_set_modified(struct kvm_vcpu *vcpu, unsigned int i);
+/* Core-specific hooks */
 
-extern void kvmppc_check_and_deliver_interrupts(struct kvm_vcpu *vcpu);
+extern struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm,
+                                                unsigned int id);
+extern void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu);
+extern int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu);
+extern int kvmppc_core_check_processor_compat(void);
+extern int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
+                                      struct kvm_translation *tr);
 
-static inline void kvmppc_queue_exception(struct kvm_vcpu *vcpu, int exception)
-{
-	unsigned int priority = exception_priority[exception];
-	set_bit(priority, &vcpu->arch.pending_exceptions);
-}
+extern void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
+extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu);
 
-static inline void kvmppc_clear_exception(struct kvm_vcpu *vcpu, int exception)
-{
-	unsigned int priority = exception_priority[exception];
-	clear_bit(priority, &vcpu->arch.pending_exceptions);
-}
+extern void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu);
+extern void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu);
 
-/* Helper function for "full" MSR writes. No need to call this if only EE is
- * changing. */
-static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
-{
-	if ((new_msr & MSR_PR) != (vcpu->arch.msr & MSR_PR))
-		kvmppc_mmu_priv_switch(vcpu, new_msr & MSR_PR);
+extern void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu);
+extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu);
+extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu);
+extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu);
+extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
+                                       struct kvm_interrupt *irq);
 
-	vcpu->arch.msr = new_msr;
+extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
+                                  unsigned int op, int *advance);
+extern int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs);
+extern int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt);
 
-	if (vcpu->arch.msr & MSR_WE)
-		kvm_vcpu_block(vcpu);
-}
-
-static inline void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid)
-{
-	if (vcpu->arch.pid != new_pid) {
-		vcpu->arch.pid = new_pid;
-		vcpu->arch.swap_pid = 1;
-	}
-}
+extern int kvmppc_booke_init(void);
+extern void kvmppc_booke_exit(void);
 
 extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu);
 
diff --git a/arch/powerpc/include/asm/mmu-44x.h b/arch/powerpc/include/asm/mmu-44x.h
index 8a97cfb..27cc6fd 100644
--- a/arch/powerpc/include/asm/mmu-44x.h
+++ b/arch/powerpc/include/asm/mmu-44x.h
@@ -56,6 +56,7 @@
 #ifndef __ASSEMBLY__
 
 extern unsigned int tlb_44x_hwater;
+extern unsigned int tlb_44x_index;
 
 typedef struct {
 	unsigned int	id;
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index c32da6f..373fca3 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -48,7 +48,6 @@
 
 /* sched_domains SD_NODE_INIT for PPC64 machines */
 #define SD_NODE_INIT (struct sched_domain) {		\
-	.span			= CPU_MASK_NONE,	\
 	.parent			= NULL,			\
 	.child			= NULL,			\
 	.groups			= NULL,			\
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 661d07d..9937fe4 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -23,9 +23,6 @@
 #include <linux/mm.h>
 #include <linux/suspend.h>
 #include <linux/hrtimer.h>
-#ifdef CONFIG_KVM
-#include <linux/kvm_host.h>
-#endif
 #ifdef CONFIG_PPC64
 #include <linux/time.h>
 #include <linux/hardirq.h>
@@ -51,6 +48,9 @@
 #ifdef CONFIG_PPC_ISERIES
 #include <asm/iseries/alpaca.h>
 #endif
+#ifdef CONFIG_KVM
+#include <asm/kvm_44x.h>
+#endif
 
 #if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
 #include "head_booke.h"
@@ -357,12 +357,10 @@
 	DEFINE(PTE_SIZE, sizeof(pte_t));
 
 #ifdef CONFIG_KVM
-	DEFINE(TLBE_BYTES, sizeof(struct tlbe));
+	DEFINE(TLBE_BYTES, sizeof(struct kvmppc_44x_tlbe));
 
 	DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack));
 	DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid));
-	DEFINE(VCPU_SHADOW_TLB, offsetof(struct kvm_vcpu, arch.shadow_tlb));
-	DEFINE(VCPU_SHADOW_MOD, offsetof(struct kvm_vcpu, arch.shadow_tlb_mod));
 	DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr));
 	DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr));
 	DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr));
@@ -385,5 +383,16 @@
 	DEFINE(PTE_T_LOG2, PTE_T_LOG2);
 #endif
 
+#ifdef CONFIG_KVM_EXIT_TIMING
+	DEFINE(VCPU_TIMING_EXIT_TBU, offsetof(struct kvm_vcpu,
+						arch.timing_exit.tv32.tbu));
+	DEFINE(VCPU_TIMING_EXIT_TBL, offsetof(struct kvm_vcpu,
+						arch.timing_exit.tv32.tbl));
+	DEFINE(VCPU_TIMING_LAST_ENTER_TBU, offsetof(struct kvm_vcpu,
+					arch.timing_last_enter.tv32.tbu));
+	DEFINE(VCPU_TIMING_LAST_ENTER_TBL, offsetof(struct kvm_vcpu,
+					arch.timing_last_enter.tv32.tbl));
+#endif
+
 	return 0;
 }
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index ac222d0..23b8b5e 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -237,7 +237,7 @@
 			mask = map;
 		}
 		if (irq_desc[irq].chip->set_affinity)
-			irq_desc[irq].chip->set_affinity(irq, mask);
+			irq_desc[irq].chip->set_affinity(irq, &mask);
 		else if (irq_desc[irq].action && !(warned++))
 			printk("Cannot set affinity for irq %i\n", irq);
 	}
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 8ac3f72..65484b2 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -59,13 +59,9 @@
 
 struct thread_info *secondary_ti;
 
-cpumask_t cpu_possible_map = CPU_MASK_NONE;
-cpumask_t cpu_online_map = CPU_MASK_NONE;
 DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE;
 DEFINE_PER_CPU(cpumask_t, cpu_core_map) = CPU_MASK_NONE;
 
-EXPORT_SYMBOL(cpu_online_map);
-EXPORT_SYMBOL(cpu_possible_map);
 EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
 EXPORT_PER_CPU_SYMBOL(cpu_core_map);
 
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index e1f3a51..99f1ddd 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -844,7 +844,7 @@
 	struct clock_event_device *dec = &per_cpu(decrementers, cpu).event;
 
 	*dec = decrementer_clockevent;
-	dec->cpumask = cpumask_of_cpu(cpu);
+	dec->cpumask = cpumask_of(cpu);
 
 	printk(KERN_DEBUG "clockevent: %s mult[%lx] shift[%d] cpu[%d]\n",
 	       dec->name, dec->mult, dec->shift, cpu);
diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c
new file mode 100644
index 0000000..a66bec5
--- /dev/null
+++ b/arch/powerpc/kvm/44x.c
@@ -0,0 +1,228 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/err.h>
+
+#include <asm/reg.h>
+#include <asm/cputable.h>
+#include <asm/tlbflush.h>
+#include <asm/kvm_44x.h>
+#include <asm/kvm_ppc.h>
+
+#include "44x_tlb.h"
+
+/* Note: clearing MSR[DE] just means that the debug interrupt will not be
+ * delivered *immediately*. Instead, it simply sets the appropriate DBSR bits.
+ * If those DBSR bits are still set when MSR[DE] is re-enabled, the interrupt
+ * will be delivered as an "imprecise debug event" (which is indicated by
+ * DBSR[IDE].
+ */
+static void kvm44x_disable_debug_interrupts(void)
+{
+	mtmsr(mfmsr() & ~MSR_DE);
+}
+
+void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu)
+{
+	kvm44x_disable_debug_interrupts();
+
+	mtspr(SPRN_IAC1, vcpu->arch.host_iac[0]);
+	mtspr(SPRN_IAC2, vcpu->arch.host_iac[1]);
+	mtspr(SPRN_IAC3, vcpu->arch.host_iac[2]);
+	mtspr(SPRN_IAC4, vcpu->arch.host_iac[3]);
+	mtspr(SPRN_DBCR1, vcpu->arch.host_dbcr1);
+	mtspr(SPRN_DBCR2, vcpu->arch.host_dbcr2);
+	mtspr(SPRN_DBCR0, vcpu->arch.host_dbcr0);
+	mtmsr(vcpu->arch.host_msr);
+}
+
+void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
+{
+	struct kvm_guest_debug *dbg = &vcpu->guest_debug;
+	u32 dbcr0 = 0;
+
+	vcpu->arch.host_msr = mfmsr();
+	kvm44x_disable_debug_interrupts();
+
+	/* Save host debug register state. */
+	vcpu->arch.host_iac[0] = mfspr(SPRN_IAC1);
+	vcpu->arch.host_iac[1] = mfspr(SPRN_IAC2);
+	vcpu->arch.host_iac[2] = mfspr(SPRN_IAC3);
+	vcpu->arch.host_iac[3] = mfspr(SPRN_IAC4);
+	vcpu->arch.host_dbcr0 = mfspr(SPRN_DBCR0);
+	vcpu->arch.host_dbcr1 = mfspr(SPRN_DBCR1);
+	vcpu->arch.host_dbcr2 = mfspr(SPRN_DBCR2);
+
+	/* set registers up for guest */
+
+	if (dbg->bp[0]) {
+		mtspr(SPRN_IAC1, dbg->bp[0]);
+		dbcr0 |= DBCR0_IAC1 | DBCR0_IDM;
+	}
+	if (dbg->bp[1]) {
+		mtspr(SPRN_IAC2, dbg->bp[1]);
+		dbcr0 |= DBCR0_IAC2 | DBCR0_IDM;
+	}
+	if (dbg->bp[2]) {
+		mtspr(SPRN_IAC3, dbg->bp[2]);
+		dbcr0 |= DBCR0_IAC3 | DBCR0_IDM;
+	}
+	if (dbg->bp[3]) {
+		mtspr(SPRN_IAC4, dbg->bp[3]);
+		dbcr0 |= DBCR0_IAC4 | DBCR0_IDM;
+	}
+
+	mtspr(SPRN_DBCR0, dbcr0);
+	mtspr(SPRN_DBCR1, 0);
+	mtspr(SPRN_DBCR2, 0);
+}
+
+void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+	kvmppc_44x_tlb_load(vcpu);
+}
+
+void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
+{
+	kvmppc_44x_tlb_put(vcpu);
+}
+
+int kvmppc_core_check_processor_compat(void)
+{
+	int r;
+
+	if (strcmp(cur_cpu_spec->platform, "ppc440") == 0)
+		r = 0;
+	else
+		r = -ENOTSUPP;
+
+	return r;
+}
+
+int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+	struct kvmppc_44x_tlbe *tlbe = &vcpu_44x->guest_tlb[0];
+	int i;
+
+	tlbe->tid = 0;
+	tlbe->word0 = PPC44x_TLB_16M | PPC44x_TLB_VALID;
+	tlbe->word1 = 0;
+	tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR;
+
+	tlbe++;
+	tlbe->tid = 0;
+	tlbe->word0 = 0xef600000 | PPC44x_TLB_4K | PPC44x_TLB_VALID;
+	tlbe->word1 = 0xef600000;
+	tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR
+	              | PPC44x_TLB_I | PPC44x_TLB_G;
+
+	/* Since the guest can directly access the timebase, it must know the
+	 * real timebase frequency. Accordingly, it must see the state of
+	 * CCR1[TCS]. */
+	vcpu->arch.ccr1 = mfspr(SPRN_CCR1);
+
+	for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++)
+		vcpu_44x->shadow_refs[i].gtlb_index = -1;
+
+	return 0;
+}
+
+/* 'linear_address' is actually an encoding of AS|PID|EADDR . */
+int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
+                               struct kvm_translation *tr)
+{
+	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+	struct kvmppc_44x_tlbe *gtlbe;
+	int index;
+	gva_t eaddr;
+	u8 pid;
+	u8 as;
+
+	eaddr = tr->linear_address;
+	pid = (tr->linear_address >> 32) & 0xff;
+	as = (tr->linear_address >> 40) & 0x1;
+
+	index = kvmppc_44x_tlb_index(vcpu, eaddr, pid, as);
+	if (index == -1) {
+		tr->valid = 0;
+		return 0;
+	}
+
+	gtlbe = &vcpu_44x->guest_tlb[index];
+
+	tr->physical_address = tlb_xlate(gtlbe, eaddr);
+	/* XXX what does "writeable" and "usermode" even mean? */
+	tr->valid = 1;
+
+	return 0;
+}
+
+struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
+{
+	struct kvmppc_vcpu_44x *vcpu_44x;
+	struct kvm_vcpu *vcpu;
+	int err;
+
+	vcpu_44x = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
+	if (!vcpu_44x) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	vcpu = &vcpu_44x->vcpu;
+	err = kvm_vcpu_init(vcpu, kvm, id);
+	if (err)
+		goto free_vcpu;
+
+	return vcpu;
+
+free_vcpu:
+	kmem_cache_free(kvm_vcpu_cache, vcpu_44x);
+out:
+	return ERR_PTR(err);
+}
+
+void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+
+	kvm_vcpu_uninit(vcpu);
+	kmem_cache_free(kvm_vcpu_cache, vcpu_44x);
+}
+
+static int kvmppc_44x_init(void)
+{
+	int r;
+
+	r = kvmppc_booke_init();
+	if (r)
+		return r;
+
+	return kvm_init(NULL, sizeof(struct kvmppc_vcpu_44x), THIS_MODULE);
+}
+
+static void kvmppc_44x_exit(void)
+{
+	kvmppc_booke_exit();
+}
+
+module_init(kvmppc_44x_init);
+module_exit(kvmppc_44x_exit);
diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c
new file mode 100644
index 0000000..82489a7
--- /dev/null
+++ b/arch/powerpc/kvm/44x_emulate.c
@@ -0,0 +1,371 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#include <asm/kvm_ppc.h>
+#include <asm/dcr.h>
+#include <asm/dcr-regs.h>
+#include <asm/disassemble.h>
+#include <asm/kvm_44x.h>
+#include "timing.h"
+
+#include "booke.h"
+#include "44x_tlb.h"
+
+#define OP_RFI      19
+
+#define XOP_RFI     50
+#define XOP_MFMSR   83
+#define XOP_WRTEE   131
+#define XOP_MTMSR   146
+#define XOP_WRTEEI  163
+#define XOP_MFDCR   323
+#define XOP_MTDCR   451
+#define XOP_TLBSX   914
+#define XOP_ICCCI   966
+#define XOP_TLBWE   978
+
+static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.pc = vcpu->arch.srr0;
+	kvmppc_set_msr(vcpu, vcpu->arch.srr1);
+}
+
+int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
+                           unsigned int inst, int *advance)
+{
+	int emulated = EMULATE_DONE;
+	int dcrn;
+	int ra;
+	int rb;
+	int rc;
+	int rs;
+	int rt;
+	int ws;
+
+	switch (get_op(inst)) {
+	case OP_RFI:
+		switch (get_xop(inst)) {
+		case XOP_RFI:
+			kvmppc_emul_rfi(vcpu);
+			kvmppc_set_exit_type(vcpu, EMULATED_RFI_EXITS);
+			*advance = 0;
+			break;
+
+		default:
+			emulated = EMULATE_FAIL;
+			break;
+		}
+		break;
+
+	case 31:
+		switch (get_xop(inst)) {
+
+		case XOP_MFMSR:
+			rt = get_rt(inst);
+			vcpu->arch.gpr[rt] = vcpu->arch.msr;
+			kvmppc_set_exit_type(vcpu, EMULATED_MFMSR_EXITS);
+			break;
+
+		case XOP_MTMSR:
+			rs = get_rs(inst);
+			kvmppc_set_exit_type(vcpu, EMULATED_MTMSR_EXITS);
+			kvmppc_set_msr(vcpu, vcpu->arch.gpr[rs]);
+			break;
+
+		case XOP_WRTEE:
+			rs = get_rs(inst);
+			vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
+							 | (vcpu->arch.gpr[rs] & MSR_EE);
+			kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS);
+			break;
+
+		case XOP_WRTEEI:
+			vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
+							 | (inst & MSR_EE);
+			kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS);
+			break;
+
+		case XOP_MFDCR:
+			dcrn = get_dcrn(inst);
+			rt = get_rt(inst);
+
+			/* The guest may access CPR0 registers to determine the timebase
+			 * frequency, and it must know the real host frequency because it
+			 * can directly access the timebase registers.
+			 *
+			 * It would be possible to emulate those accesses in userspace,
+			 * but userspace can really only figure out the end frequency.
+			 * We could decompose that into the factors that compute it, but
+			 * that's tricky math, and it's easier to just report the real
+			 * CPR0 values.
+			 */
+			switch (dcrn) {
+			case DCRN_CPR0_CONFIG_ADDR:
+				vcpu->arch.gpr[rt] = vcpu->arch.cpr0_cfgaddr;
+				break;
+			case DCRN_CPR0_CONFIG_DATA:
+				local_irq_disable();
+				mtdcr(DCRN_CPR0_CONFIG_ADDR,
+					  vcpu->arch.cpr0_cfgaddr);
+				vcpu->arch.gpr[rt] = mfdcr(DCRN_CPR0_CONFIG_DATA);
+				local_irq_enable();
+				break;
+			default:
+				run->dcr.dcrn = dcrn;
+				run->dcr.data =  0;
+				run->dcr.is_write = 0;
+				vcpu->arch.io_gpr = rt;
+				vcpu->arch.dcr_needed = 1;
+				kvmppc_account_exit(vcpu, DCR_EXITS);
+				emulated = EMULATE_DO_DCR;
+			}
+
+			break;
+
+		case XOP_MTDCR:
+			dcrn = get_dcrn(inst);
+			rs = get_rs(inst);
+
+			/* emulate some access in kernel */
+			switch (dcrn) {
+			case DCRN_CPR0_CONFIG_ADDR:
+				vcpu->arch.cpr0_cfgaddr = vcpu->arch.gpr[rs];
+				break;
+			default:
+				run->dcr.dcrn = dcrn;
+				run->dcr.data = vcpu->arch.gpr[rs];
+				run->dcr.is_write = 1;
+				vcpu->arch.dcr_needed = 1;
+				kvmppc_account_exit(vcpu, DCR_EXITS);
+				emulated = EMULATE_DO_DCR;
+			}
+
+			break;
+
+		case XOP_TLBWE:
+			ra = get_ra(inst);
+			rs = get_rs(inst);
+			ws = get_ws(inst);
+			emulated = kvmppc_44x_emul_tlbwe(vcpu, ra, rs, ws);
+			break;
+
+		case XOP_TLBSX:
+			rt = get_rt(inst);
+			ra = get_ra(inst);
+			rb = get_rb(inst);
+			rc = get_rc(inst);
+			emulated = kvmppc_44x_emul_tlbsx(vcpu, rt, ra, rb, rc);
+			break;
+
+		case XOP_ICCCI:
+			break;
+
+		default:
+			emulated = EMULATE_FAIL;
+		}
+
+		break;
+
+	default:
+		emulated = EMULATE_FAIL;
+	}
+
+	return emulated;
+}
+
+int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
+{
+	switch (sprn) {
+	case SPRN_MMUCR:
+		vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break;
+	case SPRN_PID:
+		kvmppc_set_pid(vcpu, vcpu->arch.gpr[rs]); break;
+	case SPRN_CCR0:
+		vcpu->arch.ccr0 = vcpu->arch.gpr[rs]; break;
+	case SPRN_CCR1:
+		vcpu->arch.ccr1 = vcpu->arch.gpr[rs]; break;
+	case SPRN_DEAR:
+		vcpu->arch.dear = vcpu->arch.gpr[rs]; break;
+	case SPRN_ESR:
+		vcpu->arch.esr = vcpu->arch.gpr[rs]; break;
+	case SPRN_DBCR0:
+		vcpu->arch.dbcr0 = vcpu->arch.gpr[rs]; break;
+	case SPRN_DBCR1:
+		vcpu->arch.dbcr1 = vcpu->arch.gpr[rs]; break;
+	case SPRN_TSR:
+		vcpu->arch.tsr &= ~vcpu->arch.gpr[rs]; break;
+	case SPRN_TCR:
+		vcpu->arch.tcr = vcpu->arch.gpr[rs];
+		kvmppc_emulate_dec(vcpu);
+		break;
+
+	/* Note: SPRG4-7 are user-readable. These values are
+	 * loaded into the real SPRGs when resuming the
+	 * guest. */
+	case SPRN_SPRG4:
+		vcpu->arch.sprg4 = vcpu->arch.gpr[rs]; break;
+	case SPRN_SPRG5:
+		vcpu->arch.sprg5 = vcpu->arch.gpr[rs]; break;
+	case SPRN_SPRG6:
+		vcpu->arch.sprg6 = vcpu->arch.gpr[rs]; break;
+	case SPRN_SPRG7:
+		vcpu->arch.sprg7 = vcpu->arch.gpr[rs]; break;
+
+	case SPRN_IVPR:
+		vcpu->arch.ivpr = vcpu->arch.gpr[rs];
+		break;
+	case SPRN_IVOR0:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = vcpu->arch.gpr[rs];
+		break;
+	case SPRN_IVOR1:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = vcpu->arch.gpr[rs];
+		break;
+	case SPRN_IVOR2:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = vcpu->arch.gpr[rs];
+		break;
+	case SPRN_IVOR3:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = vcpu->arch.gpr[rs];
+		break;
+	case SPRN_IVOR4:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = vcpu->arch.gpr[rs];
+		break;
+	case SPRN_IVOR5:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = vcpu->arch.gpr[rs];
+		break;
+	case SPRN_IVOR6:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = vcpu->arch.gpr[rs];
+		break;
+	case SPRN_IVOR7:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = vcpu->arch.gpr[rs];
+		break;
+	case SPRN_IVOR8:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = vcpu->arch.gpr[rs];
+		break;
+	case SPRN_IVOR9:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = vcpu->arch.gpr[rs];
+		break;
+	case SPRN_IVOR10:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = vcpu->arch.gpr[rs];
+		break;
+	case SPRN_IVOR11:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = vcpu->arch.gpr[rs];
+		break;
+	case SPRN_IVOR12:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = vcpu->arch.gpr[rs];
+		break;
+	case SPRN_IVOR13:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = vcpu->arch.gpr[rs];
+		break;
+	case SPRN_IVOR14:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = vcpu->arch.gpr[rs];
+		break;
+	case SPRN_IVOR15:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = vcpu->arch.gpr[rs];
+		break;
+
+	default:
+		return EMULATE_FAIL;
+	}
+
+	kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS);
+	return EMULATE_DONE;
+}
+
+int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
+{
+	switch (sprn) {
+	/* 440 */
+	case SPRN_MMUCR:
+		vcpu->arch.gpr[rt] = vcpu->arch.mmucr; break;
+	case SPRN_CCR0:
+		vcpu->arch.gpr[rt] = vcpu->arch.ccr0; break;
+	case SPRN_CCR1:
+		vcpu->arch.gpr[rt] = vcpu->arch.ccr1; break;
+
+	/* Book E */
+	case SPRN_PID:
+		vcpu->arch.gpr[rt] = vcpu->arch.pid; break;
+	case SPRN_IVPR:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivpr; break;
+	case SPRN_DEAR:
+		vcpu->arch.gpr[rt] = vcpu->arch.dear; break;
+	case SPRN_ESR:
+		vcpu->arch.gpr[rt] = vcpu->arch.esr; break;
+	case SPRN_DBCR0:
+		vcpu->arch.gpr[rt] = vcpu->arch.dbcr0; break;
+	case SPRN_DBCR1:
+		vcpu->arch.gpr[rt] = vcpu->arch.dbcr1; break;
+
+	case SPRN_IVOR0:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL];
+		break;
+	case SPRN_IVOR1:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK];
+		break;
+	case SPRN_IVOR2:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE];
+		break;
+	case SPRN_IVOR3:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE];
+		break;
+	case SPRN_IVOR4:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL];
+		break;
+	case SPRN_IVOR5:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT];
+		break;
+	case SPRN_IVOR6:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM];
+		break;
+	case SPRN_IVOR7:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL];
+		break;
+	case SPRN_IVOR8:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL];
+		break;
+	case SPRN_IVOR9:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL];
+		break;
+	case SPRN_IVOR10:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER];
+		break;
+	case SPRN_IVOR11:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FIT];
+		break;
+	case SPRN_IVOR12:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG];
+		break;
+	case SPRN_IVOR13:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS];
+		break;
+	case SPRN_IVOR14:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS];
+		break;
+	case SPRN_IVOR15:
+		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG];
+		break;
+
+	default:
+		return EMULATE_FAIL;
+	}
+
+	kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS);
+	return EMULATE_DONE;
+}
+
diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c
index ad72c6f..9a34b8e 100644
--- a/arch/powerpc/kvm/44x_tlb.c
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -22,20 +22,103 @@
 #include <linux/kvm.h>
 #include <linux/kvm_host.h>
 #include <linux/highmem.h>
+
+#include <asm/tlbflush.h>
 #include <asm/mmu-44x.h>
 #include <asm/kvm_ppc.h>
+#include <asm/kvm_44x.h>
+#include "timing.h"
 
 #include "44x_tlb.h"
 
+#ifndef PPC44x_TLBE_SIZE
+#define PPC44x_TLBE_SIZE	PPC44x_TLB_4K
+#endif
+
+#define PAGE_SIZE_4K (1<<12)
+#define PAGE_MASK_4K (~(PAGE_SIZE_4K - 1))
+
+#define PPC44x_TLB_UATTR_MASK \
+	(PPC44x_TLB_U0|PPC44x_TLB_U1|PPC44x_TLB_U2|PPC44x_TLB_U3)
 #define PPC44x_TLB_USER_PERM_MASK (PPC44x_TLB_UX|PPC44x_TLB_UR|PPC44x_TLB_UW)
 #define PPC44x_TLB_SUPER_PERM_MASK (PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW)
 
-static unsigned int kvmppc_tlb_44x_pos;
+#ifdef DEBUG
+void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_44x_tlbe *tlbe;
+	int i;
+
+	printk("vcpu %d TLB dump:\n", vcpu->vcpu_id);
+	printk("| %2s | %3s | %8s | %8s | %8s |\n",
+			"nr", "tid", "word0", "word1", "word2");
+
+	for (i = 0; i < ARRAY_SIZE(vcpu_44x->guest_tlb); i++) {
+		tlbe = &vcpu_44x->guest_tlb[i];
+		if (tlbe->word0 & PPC44x_TLB_VALID)
+			printk(" G%2d |  %02X | %08X | %08X | %08X |\n",
+			       i, tlbe->tid, tlbe->word0, tlbe->word1,
+			       tlbe->word2);
+	}
+}
+#endif
+
+static inline void kvmppc_44x_tlbie(unsigned int index)
+{
+	/* 0 <= index < 64, so the V bit is clear and we can use the index as
+	 * word0. */
+	asm volatile(
+		"tlbwe %[index], %[index], 0\n"
+	:
+	: [index] "r"(index)
+	);
+}
+
+static inline void kvmppc_44x_tlbre(unsigned int index,
+                                    struct kvmppc_44x_tlbe *tlbe)
+{
+	asm volatile(
+		"tlbre %[word0], %[index], 0\n"
+		"mfspr %[tid], %[sprn_mmucr]\n"
+		"andi. %[tid], %[tid], 0xff\n"
+		"tlbre %[word1], %[index], 1\n"
+		"tlbre %[word2], %[index], 2\n"
+		: [word0] "=r"(tlbe->word0),
+		  [word1] "=r"(tlbe->word1),
+		  [word2] "=r"(tlbe->word2),
+		  [tid]   "=r"(tlbe->tid)
+		: [index] "r"(index),
+		  [sprn_mmucr] "i"(SPRN_MMUCR)
+		: "cc"
+	);
+}
+
+static inline void kvmppc_44x_tlbwe(unsigned int index,
+                                    struct kvmppc_44x_tlbe *stlbe)
+{
+	unsigned long tmp;
+
+	asm volatile(
+		"mfspr %[tmp], %[sprn_mmucr]\n"
+		"rlwimi %[tmp], %[tid], 0, 0xff\n"
+		"mtspr %[sprn_mmucr], %[tmp]\n"
+		"tlbwe %[word0], %[index], 0\n"
+		"tlbwe %[word1], %[index], 1\n"
+		"tlbwe %[word2], %[index], 2\n"
+		: [tmp]   "=&r"(tmp)
+		: [word0] "r"(stlbe->word0),
+		  [word1] "r"(stlbe->word1),
+		  [word2] "r"(stlbe->word2),
+		  [tid]   "r"(stlbe->tid),
+		  [index] "r"(index),
+		  [sprn_mmucr] "i"(SPRN_MMUCR)
+	);
+}
 
 static u32 kvmppc_44x_tlb_shadow_attrib(u32 attrib, int usermode)
 {
-	/* Mask off reserved bits. */
-	attrib &= PPC44x_TLB_PERM_MASK|PPC44x_TLB_ATTR_MASK;
+	/* We only care about the guest's permission and user bits. */
+	attrib &= PPC44x_TLB_PERM_MASK|PPC44x_TLB_UATTR_MASK;
 
 	if (!usermode) {
 		/* Guest is in supervisor mode, so we need to translate guest
@@ -47,18 +130,60 @@
 	/* Make sure host can always access this memory. */
 	attrib |= PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW;
 
+	/* WIMGE = 0b00100 */
+	attrib |= PPC44x_TLB_M;
+
 	return attrib;
 }
 
+/* Load shadow TLB back into hardware. */
+void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+	int i;
+
+	for (i = 0; i <= tlb_44x_hwater; i++) {
+		struct kvmppc_44x_tlbe *stlbe = &vcpu_44x->shadow_tlb[i];
+
+		if (get_tlb_v(stlbe) && get_tlb_ts(stlbe))
+			kvmppc_44x_tlbwe(i, stlbe);
+	}
+}
+
+static void kvmppc_44x_tlbe_set_modified(struct kvmppc_vcpu_44x *vcpu_44x,
+                                         unsigned int i)
+{
+	vcpu_44x->shadow_tlb_mod[i] = 1;
+}
+
+/* Save hardware TLB to the vcpu, and invalidate all guest mappings. */
+void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+	int i;
+
+	for (i = 0; i <= tlb_44x_hwater; i++) {
+		struct kvmppc_44x_tlbe *stlbe = &vcpu_44x->shadow_tlb[i];
+
+		if (vcpu_44x->shadow_tlb_mod[i])
+			kvmppc_44x_tlbre(i, stlbe);
+
+		if (get_tlb_v(stlbe) && get_tlb_ts(stlbe))
+			kvmppc_44x_tlbie(i);
+	}
+}
+
+
 /* Search the guest TLB for a matching entry. */
 int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid,
                          unsigned int as)
 {
+	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
 	int i;
 
 	/* XXX Replace loop with fancy data structures. */
-	for (i = 0; i < PPC44x_TLB_SIZE; i++) {
-		struct tlbe *tlbe = &vcpu->arch.guest_tlb[i];
+	for (i = 0; i < ARRAY_SIZE(vcpu_44x->guest_tlb); i++) {
+		struct kvmppc_44x_tlbe *tlbe = &vcpu_44x->guest_tlb[i];
 		unsigned int tid;
 
 		if (eaddr < get_tlb_eaddr(tlbe))
@@ -83,78 +208,89 @@
 	return -1;
 }
 
-struct tlbe *kvmppc_44x_itlb_search(struct kvm_vcpu *vcpu, gva_t eaddr)
+int kvmppc_44x_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
 {
 	unsigned int as = !!(vcpu->arch.msr & MSR_IS);
-	unsigned int index;
 
-	index = kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
-	if (index == -1)
-		return NULL;
-	return &vcpu->arch.guest_tlb[index];
+	return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
 }
 
-struct tlbe *kvmppc_44x_dtlb_search(struct kvm_vcpu *vcpu, gva_t eaddr)
+int kvmppc_44x_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
 {
 	unsigned int as = !!(vcpu->arch.msr & MSR_DS);
-	unsigned int index;
 
-	index = kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
-	if (index == -1)
-		return NULL;
-	return &vcpu->arch.guest_tlb[index];
+	return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
 }
 
-static int kvmppc_44x_tlbe_is_writable(struct tlbe *tlbe)
+static void kvmppc_44x_shadow_release(struct kvmppc_vcpu_44x *vcpu_44x,
+                                      unsigned int stlb_index)
 {
-	return tlbe->word2 & (PPC44x_TLB_SW|PPC44x_TLB_UW);
-}
+	struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[stlb_index];
 
-static void kvmppc_44x_shadow_release(struct kvm_vcpu *vcpu,
-                                      unsigned int index)
-{
-	struct tlbe *stlbe = &vcpu->arch.shadow_tlb[index];
-	struct page *page = vcpu->arch.shadow_pages[index];
+	if (!ref->page)
+		return;
 
-	if (get_tlb_v(stlbe)) {
-		if (kvmppc_44x_tlbe_is_writable(stlbe))
-			kvm_release_page_dirty(page);
-		else
-			kvm_release_page_clean(page);
-	}
+	/* Discard from the TLB. */
+	/* Note: we could actually invalidate a host mapping, if the host overwrote
+	 * this TLB entry since we inserted a guest mapping. */
+	kvmppc_44x_tlbie(stlb_index);
+
+	/* Now release the page. */
+	if (ref->writeable)
+		kvm_release_page_dirty(ref->page);
+	else
+		kvm_release_page_clean(ref->page);
+
+	ref->page = NULL;
+
+	/* XXX set tlb_44x_index to stlb_index? */
+
+	KVMTRACE_1D(STLB_INVAL, &vcpu_44x->vcpu, stlb_index, handler);
 }
 
 void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu)
 {
+	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
 	int i;
 
 	for (i = 0; i <= tlb_44x_hwater; i++)
-		kvmppc_44x_shadow_release(vcpu, i);
+		kvmppc_44x_shadow_release(vcpu_44x, i);
 }
 
-void kvmppc_tlbe_set_modified(struct kvm_vcpu *vcpu, unsigned int i)
+/**
+ * kvmppc_mmu_map -- create a host mapping for guest memory
+ *
+ * If the guest wanted a larger page than the host supports, only the first
+ * host page is mapped here and the rest are demand faulted.
+ *
+ * If the guest wanted a smaller page than the host page size, we map only the
+ * guest-size page (i.e. not a full host page mapping).
+ *
+ * Caller must ensure that the specified guest TLB entry is safe to insert into
+ * the shadow TLB.
+ */
+void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, u64 asid,
+                    u32 flags, u32 max_bytes, unsigned int gtlb_index)
 {
-    vcpu->arch.shadow_tlb_mod[i] = 1;
-}
-
-/* Caller must ensure that the specified guest TLB entry is safe to insert into
- * the shadow TLB. */
-void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid,
-                    u32 flags)
-{
+	struct kvmppc_44x_tlbe stlbe;
+	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+	struct kvmppc_44x_shadow_ref *ref;
 	struct page *new_page;
-	struct tlbe *stlbe;
 	hpa_t hpaddr;
+	gfn_t gfn;
 	unsigned int victim;
 
-	/* Future optimization: don't overwrite the TLB entry containing the
-	 * current PC (or stack?). */
-	victim = kvmppc_tlb_44x_pos++;
-	if (kvmppc_tlb_44x_pos > tlb_44x_hwater)
-		kvmppc_tlb_44x_pos = 0;
-	stlbe = &vcpu->arch.shadow_tlb[victim];
+	/* Select TLB entry to clobber. Indirectly guard against races with the TLB
+	 * miss handler by disabling interrupts. */
+	local_irq_disable();
+	victim = ++tlb_44x_index;
+	if (victim > tlb_44x_hwater)
+		victim = 0;
+	tlb_44x_index = victim;
+	local_irq_enable();
 
 	/* Get reference to new page. */
+	gfn = gpaddr >> PAGE_SHIFT;
 	new_page = gfn_to_page(vcpu->kvm, gfn);
 	if (is_error_page(new_page)) {
 		printk(KERN_ERR "Couldn't get guest page for gfn %lx!\n", gfn);
@@ -163,10 +299,8 @@
 	}
 	hpaddr = page_to_phys(new_page);
 
-	/* Drop reference to old page. */
-	kvmppc_44x_shadow_release(vcpu, victim);
-
-	vcpu->arch.shadow_pages[victim] = new_page;
+	/* Invalidate any previous shadow mappings. */
+	kvmppc_44x_shadow_release(vcpu_44x, victim);
 
 	/* XXX Make sure (va, size) doesn't overlap any other
 	 * entries. 440x6 user manual says the result would be
@@ -174,78 +308,193 @@
 
 	/* XXX what about AS? */
 
-	stlbe->tid = !(asid & 0xff);
-
 	/* Force TS=1 for all guest mappings. */
-	/* For now we hardcode 4KB mappings, but it will be important to
-	 * use host large pages in the future. */
-	stlbe->word0 = (gvaddr & PAGE_MASK) | PPC44x_TLB_VALID | PPC44x_TLB_TS
-	               | PPC44x_TLB_4K;
-	stlbe->word1 = (hpaddr & 0xfffffc00) | ((hpaddr >> 32) & 0xf);
-	stlbe->word2 = kvmppc_44x_tlb_shadow_attrib(flags,
-	                                            vcpu->arch.msr & MSR_PR);
-	kvmppc_tlbe_set_modified(vcpu, victim);
+	stlbe.word0 = PPC44x_TLB_VALID | PPC44x_TLB_TS;
 
-	KVMTRACE_5D(STLB_WRITE, vcpu, victim,
-			stlbe->tid, stlbe->word0, stlbe->word1, stlbe->word2,
-			handler);
+	if (max_bytes >= PAGE_SIZE) {
+		/* Guest mapping is larger than or equal to host page size. We can use
+		 * a "native" host mapping. */
+		stlbe.word0 |= (gvaddr & PAGE_MASK) | PPC44x_TLBE_SIZE;
+	} else {
+		/* Guest mapping is smaller than host page size. We must restrict the
+		 * size of the mapping to be at most the smaller of the two, but for
+		 * simplicity we fall back to a 4K mapping (this is probably what the
+		 * guest is using anyways). */
+		stlbe.word0 |= (gvaddr & PAGE_MASK_4K) | PPC44x_TLB_4K;
+
+		/* 'hpaddr' is a host page, which is larger than the mapping we're
+		 * inserting here. To compensate, we must add the in-page offset to the
+		 * sub-page. */
+		hpaddr |= gpaddr & (PAGE_MASK ^ PAGE_MASK_4K);
+	}
+
+	stlbe.word1 = (hpaddr & 0xfffffc00) | ((hpaddr >> 32) & 0xf);
+	stlbe.word2 = kvmppc_44x_tlb_shadow_attrib(flags,
+	                                            vcpu->arch.msr & MSR_PR);
+	stlbe.tid = !(asid & 0xff);
+
+	/* Keep track of the reference so we can properly release it later. */
+	ref = &vcpu_44x->shadow_refs[victim];
+	ref->page = new_page;
+	ref->gtlb_index = gtlb_index;
+	ref->writeable = !!(stlbe.word2 & PPC44x_TLB_UW);
+	ref->tid = stlbe.tid;
+
+	/* Insert shadow mapping into hardware TLB. */
+	kvmppc_44x_tlbe_set_modified(vcpu_44x, victim);
+	kvmppc_44x_tlbwe(victim, &stlbe);
+	KVMTRACE_5D(STLB_WRITE, vcpu, victim, stlbe.tid, stlbe.word0, stlbe.word1,
+	            stlbe.word2, handler);
 }
 
-void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, gva_t eaddr,
-                           gva_t eend, u32 asid)
+/* For a particular guest TLB entry, invalidate the corresponding host TLB
+ * mappings and release the host pages. */
+static void kvmppc_44x_invalidate(struct kvm_vcpu *vcpu,
+                                  unsigned int gtlb_index)
 {
-	unsigned int pid = !(asid & 0xff);
+	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
 	int i;
 
-	/* XXX Replace loop with fancy data structures. */
-	for (i = 0; i <= tlb_44x_hwater; i++) {
-		struct tlbe *stlbe = &vcpu->arch.shadow_tlb[i];
-		unsigned int tid;
-
-		if (!get_tlb_v(stlbe))
-			continue;
-
-		if (eend < get_tlb_eaddr(stlbe))
-			continue;
-
-		if (eaddr > get_tlb_end(stlbe))
-			continue;
-
-		tid = get_tlb_tid(stlbe);
-		if (tid && (tid != pid))
-			continue;
-
-		kvmppc_44x_shadow_release(vcpu, i);
-		stlbe->word0 = 0;
-		kvmppc_tlbe_set_modified(vcpu, i);
-		KVMTRACE_5D(STLB_INVAL, vcpu, i,
-				stlbe->tid, stlbe->word0, stlbe->word1,
-				stlbe->word2, handler);
+	for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) {
+		struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[i];
+		if (ref->gtlb_index == gtlb_index)
+			kvmppc_44x_shadow_release(vcpu_44x, i);
 	}
 }
 
-/* Invalidate all mappings on the privilege switch after PID has been changed.
- * The guest always runs with PID=1, so we must clear the entire TLB when
- * switching address spaces. */
 void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode)
 {
+	vcpu->arch.shadow_pid = !usermode;
+}
+
+void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid)
+{
+	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
 	int i;
 
-	if (vcpu->arch.swap_pid) {
-		/* XXX Replace loop with fancy data structures. */
-		for (i = 0; i <= tlb_44x_hwater; i++) {
-			struct tlbe *stlbe = &vcpu->arch.shadow_tlb[i];
+	if (unlikely(vcpu->arch.pid == new_pid))
+		return;
 
-			/* Future optimization: clear only userspace mappings. */
-			kvmppc_44x_shadow_release(vcpu, i);
-			stlbe->word0 = 0;
-			kvmppc_tlbe_set_modified(vcpu, i);
-			KVMTRACE_5D(STLB_INVAL, vcpu, i,
-			            stlbe->tid, stlbe->word0, stlbe->word1,
-			            stlbe->word2, handler);
-		}
-		vcpu->arch.swap_pid = 0;
+	vcpu->arch.pid = new_pid;
+
+	/* Guest userspace runs with TID=0 mappings and PID=0, to make sure it
+	 * can't access guest kernel mappings (TID=1). When we switch to a new
+	 * guest PID, which will also use host PID=0, we must discard the old guest
+	 * userspace mappings. */
+	for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) {
+		struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[i];
+
+		if (ref->tid == 0)
+			kvmppc_44x_shadow_release(vcpu_44x, i);
+	}
+}
+
+static int tlbe_is_host_safe(const struct kvm_vcpu *vcpu,
+                             const struct kvmppc_44x_tlbe *tlbe)
+{
+	gpa_t gpa;
+
+	if (!get_tlb_v(tlbe))
+		return 0;
+
+	/* Does it match current guest AS? */
+	/* XXX what about IS != DS? */
+	if (get_tlb_ts(tlbe) != !!(vcpu->arch.msr & MSR_IS))
+		return 0;
+
+	gpa = get_tlb_raddr(tlbe);
+	if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT))
+		/* Mapping is not for RAM. */
+		return 0;
+
+	return 1;
+}
+
+int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws)
+{
+	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+	struct kvmppc_44x_tlbe *tlbe;
+	unsigned int gtlb_index;
+
+	gtlb_index = vcpu->arch.gpr[ra];
+	if (gtlb_index > KVM44x_GUEST_TLB_SIZE) {
+		printk("%s: index %d\n", __func__, gtlb_index);
+		kvmppc_dump_vcpu(vcpu);
+		return EMULATE_FAIL;
 	}
 
-	vcpu->arch.shadow_pid = !usermode;
+	tlbe = &vcpu_44x->guest_tlb[gtlb_index];
+
+	/* Invalidate shadow mappings for the about-to-be-clobbered TLB entry. */
+	if (tlbe->word0 & PPC44x_TLB_VALID)
+		kvmppc_44x_invalidate(vcpu, gtlb_index);
+
+	switch (ws) {
+	case PPC44x_TLB_PAGEID:
+		tlbe->tid = get_mmucr_stid(vcpu);
+		tlbe->word0 = vcpu->arch.gpr[rs];
+		break;
+
+	case PPC44x_TLB_XLAT:
+		tlbe->word1 = vcpu->arch.gpr[rs];
+		break;
+
+	case PPC44x_TLB_ATTRIB:
+		tlbe->word2 = vcpu->arch.gpr[rs];
+		break;
+
+	default:
+		return EMULATE_FAIL;
+	}
+
+	if (tlbe_is_host_safe(vcpu, tlbe)) {
+		u64 asid;
+		gva_t eaddr;
+		gpa_t gpaddr;
+		u32 flags;
+		u32 bytes;
+
+		eaddr = get_tlb_eaddr(tlbe);
+		gpaddr = get_tlb_raddr(tlbe);
+
+		/* Use the advertised page size to mask effective and real addrs. */
+		bytes = get_tlb_bytes(tlbe);
+		eaddr &= ~(bytes - 1);
+		gpaddr &= ~(bytes - 1);
+
+		asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid;
+		flags = tlbe->word2 & 0xffff;
+
+		kvmppc_mmu_map(vcpu, eaddr, gpaddr, asid, flags, bytes, gtlb_index);
+	}
+
+	KVMTRACE_5D(GTLB_WRITE, vcpu, gtlb_index, tlbe->tid, tlbe->word0,
+	            tlbe->word1, tlbe->word2, handler);
+
+	kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS);
+	return EMULATE_DONE;
+}
+
+int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb, u8 rc)
+{
+	u32 ea;
+	int gtlb_index;
+	unsigned int as = get_mmucr_sts(vcpu);
+	unsigned int pid = get_mmucr_stid(vcpu);
+
+	ea = vcpu->arch.gpr[rb];
+	if (ra)
+		ea += vcpu->arch.gpr[ra];
+
+	gtlb_index = kvmppc_44x_tlb_index(vcpu, ea, pid, as);
+	if (rc) {
+		if (gtlb_index < 0)
+			vcpu->arch.cr &= ~0x20000000;
+		else
+			vcpu->arch.cr |= 0x20000000;
+	}
+	vcpu->arch.gpr[rt] = gtlb_index;
+
+	kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS);
+	return EMULATE_DONE;
 }
diff --git a/arch/powerpc/kvm/44x_tlb.h b/arch/powerpc/kvm/44x_tlb.h
index 2ccd46b..772191f 100644
--- a/arch/powerpc/kvm/44x_tlb.h
+++ b/arch/powerpc/kvm/44x_tlb.h
@@ -25,48 +25,52 @@
 
 extern int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr,
                                 unsigned int pid, unsigned int as);
-extern struct tlbe *kvmppc_44x_dtlb_search(struct kvm_vcpu *vcpu, gva_t eaddr);
-extern struct tlbe *kvmppc_44x_itlb_search(struct kvm_vcpu *vcpu, gva_t eaddr);
+extern int kvmppc_44x_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr);
+extern int kvmppc_44x_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr);
+
+extern int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb,
+                                 u8 rc);
+extern int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws);
 
 /* TLB helper functions */
-static inline unsigned int get_tlb_size(const struct tlbe *tlbe)
+static inline unsigned int get_tlb_size(const struct kvmppc_44x_tlbe *tlbe)
 {
 	return (tlbe->word0 >> 4) & 0xf;
 }
 
-static inline gva_t get_tlb_eaddr(const struct tlbe *tlbe)
+static inline gva_t get_tlb_eaddr(const struct kvmppc_44x_tlbe *tlbe)
 {
 	return tlbe->word0 & 0xfffffc00;
 }
 
-static inline gva_t get_tlb_bytes(const struct tlbe *tlbe)
+static inline gva_t get_tlb_bytes(const struct kvmppc_44x_tlbe *tlbe)
 {
 	unsigned int pgsize = get_tlb_size(tlbe);
 	return 1 << 10 << (pgsize << 1);
 }
 
-static inline gva_t get_tlb_end(const struct tlbe *tlbe)
+static inline gva_t get_tlb_end(const struct kvmppc_44x_tlbe *tlbe)
 {
 	return get_tlb_eaddr(tlbe) + get_tlb_bytes(tlbe) - 1;
 }
 
-static inline u64 get_tlb_raddr(const struct tlbe *tlbe)
+static inline u64 get_tlb_raddr(const struct kvmppc_44x_tlbe *tlbe)
 {
 	u64 word1 = tlbe->word1;
 	return ((word1 & 0xf) << 32) | (word1 & 0xfffffc00);
 }
 
-static inline unsigned int get_tlb_tid(const struct tlbe *tlbe)
+static inline unsigned int get_tlb_tid(const struct kvmppc_44x_tlbe *tlbe)
 {
 	return tlbe->tid & 0xff;
 }
 
-static inline unsigned int get_tlb_ts(const struct tlbe *tlbe)
+static inline unsigned int get_tlb_ts(const struct kvmppc_44x_tlbe *tlbe)
 {
 	return (tlbe->word0 >> 8) & 0x1;
 }
 
-static inline unsigned int get_tlb_v(const struct tlbe *tlbe)
+static inline unsigned int get_tlb_v(const struct kvmppc_44x_tlbe *tlbe)
 {
 	return (tlbe->word0 >> 9) & 0x1;
 }
@@ -81,7 +85,7 @@
 	return (vcpu->arch.mmucr >> 16) & 0x1;
 }
 
-static inline gpa_t tlb_xlate(struct tlbe *tlbe, gva_t eaddr)
+static inline gpa_t tlb_xlate(struct kvmppc_44x_tlbe *tlbe, gva_t eaddr)
 {
 	unsigned int pgmask = get_tlb_bytes(tlbe) - 1;
 
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 53aaa66..6dbdc48 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -15,27 +15,33 @@
 if VIRTUALIZATION
 
 config KVM
-	bool "Kernel-based Virtual Machine (KVM) support"
-	depends on 44x && EXPERIMENTAL
+	bool
 	select PREEMPT_NOTIFIERS
 	select ANON_INODES
-	# We can only run on Book E hosts so far
-	select KVM_BOOKE_HOST
+
+config KVM_440
+	bool "KVM support for PowerPC 440 processors"
+	depends on EXPERIMENTAL && 44x
+	select KVM
 	---help---
-	  Support hosting virtualized guest machines. You will also
-	  need to select one or more of the processor modules below.
+	  Support running unmodified 440 guest kernels in virtual machines on
+	  440 host processors.
 
 	  This module provides access to the hardware capabilities through
 	  a character device node named /dev/kvm.
 
 	  If unsure, say N.
 
-config KVM_BOOKE_HOST
-	bool "KVM host support for Book E PowerPC processors"
-	depends on KVM && 44x
+config KVM_EXIT_TIMING
+	bool "Detailed exit timing"
+	depends on KVM
 	---help---
-	  Provides host support for KVM on Book E PowerPC processors. Currently
-	  this works on 440 processors only.
+	  Calculate elapsed time for every exit/enter cycle. A per-vcpu
+	  report is available in debugfs kvm/vm#_vcpu#_timing.
+	  The overhead is relatively small, however it is not recommended for
+	  production environments.
+
+	  If unsure, say N.
 
 config KVM_TRACE
 	bool "KVM trace support"
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 2a5d439..df7ba59 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -8,10 +8,16 @@
 
 common-objs-$(CONFIG_KVM_TRACE)  += $(addprefix ../../../virt/kvm/, kvm_trace.o)
 
-kvm-objs := $(common-objs-y) powerpc.o emulate.o booke_guest.o
+kvm-objs := $(common-objs-y) powerpc.o emulate.o
+obj-$(CONFIG_KVM_EXIT_TIMING) += timing.o
 obj-$(CONFIG_KVM) += kvm.o
 
 AFLAGS_booke_interrupts.o := -I$(obj)
 
-kvm-booke-host-objs := booke_host.o booke_interrupts.o 44x_tlb.o
-obj-$(CONFIG_KVM_BOOKE_HOST) += kvm-booke-host.o
+kvm-440-objs := \
+	booke.o \
+	booke_interrupts.o \
+	44x.o \
+	44x_tlb.o \
+	44x_emulate.o
+obj-$(CONFIG_KVM_440) += kvm-440.o
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
new file mode 100644
index 0000000..35485dd
--- /dev/null
+++ b/arch/powerpc/kvm/booke.c
@@ -0,0 +1,565 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2007
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ *          Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/kvm_host.h>
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/fs.h>
+
+#include <asm/cputable.h>
+#include <asm/uaccess.h>
+#include <asm/kvm_ppc.h>
+#include "timing.h"
+#include <asm/cacheflush.h>
+#include <asm/kvm_44x.h>
+
+#include "booke.h"
+#include "44x_tlb.h"
+
+unsigned long kvmppc_booke_handlers;
+
+#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
+#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
+
+struct kvm_stats_debugfs_item debugfs_entries[] = {
+	{ "mmio",       VCPU_STAT(mmio_exits) },
+	{ "dcr",        VCPU_STAT(dcr_exits) },
+	{ "sig",        VCPU_STAT(signal_exits) },
+	{ "itlb_r",     VCPU_STAT(itlb_real_miss_exits) },
+	{ "itlb_v",     VCPU_STAT(itlb_virt_miss_exits) },
+	{ "dtlb_r",     VCPU_STAT(dtlb_real_miss_exits) },
+	{ "dtlb_v",     VCPU_STAT(dtlb_virt_miss_exits) },
+	{ "sysc",       VCPU_STAT(syscall_exits) },
+	{ "isi",        VCPU_STAT(isi_exits) },
+	{ "dsi",        VCPU_STAT(dsi_exits) },
+	{ "inst_emu",   VCPU_STAT(emulated_inst_exits) },
+	{ "dec",        VCPU_STAT(dec_exits) },
+	{ "ext_intr",   VCPU_STAT(ext_intr_exits) },
+	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
+	{ NULL }
+};
+
+/* TODO: use vcpu_printf() */
+void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu)
+{
+	int i;
+
+	printk("pc:   %08lx msr:  %08lx\n", vcpu->arch.pc, vcpu->arch.msr);
+	printk("lr:   %08lx ctr:  %08lx\n", vcpu->arch.lr, vcpu->arch.ctr);
+	printk("srr0: %08lx srr1: %08lx\n", vcpu->arch.srr0, vcpu->arch.srr1);
+
+	printk("exceptions: %08lx\n", vcpu->arch.pending_exceptions);
+
+	for (i = 0; i < 32; i += 4) {
+		printk("gpr%02d: %08lx %08lx %08lx %08lx\n", i,
+		       vcpu->arch.gpr[i],
+		       vcpu->arch.gpr[i+1],
+		       vcpu->arch.gpr[i+2],
+		       vcpu->arch.gpr[i+3]);
+	}
+}
+
+static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu,
+                                       unsigned int priority)
+{
+	set_bit(priority, &vcpu->arch.pending_exceptions);
+}
+
+void kvmppc_core_queue_program(struct kvm_vcpu *vcpu)
+{
+	kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM);
+}
+
+void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu)
+{
+	kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DECREMENTER);
+}
+
+int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu)
+{
+	return test_bit(BOOKE_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions);
+}
+
+void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
+                                struct kvm_interrupt *irq)
+{
+	kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_EXTERNAL);
+}
+
+/* Deliver the interrupt of the corresponding priority, if possible. */
+static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
+                                        unsigned int priority)
+{
+	int allowed = 0;
+	ulong msr_mask;
+
+	switch (priority) {
+	case BOOKE_IRQPRIO_PROGRAM:
+	case BOOKE_IRQPRIO_DTLB_MISS:
+	case BOOKE_IRQPRIO_ITLB_MISS:
+	case BOOKE_IRQPRIO_SYSCALL:
+	case BOOKE_IRQPRIO_DATA_STORAGE:
+	case BOOKE_IRQPRIO_INST_STORAGE:
+	case BOOKE_IRQPRIO_FP_UNAVAIL:
+	case BOOKE_IRQPRIO_AP_UNAVAIL:
+	case BOOKE_IRQPRIO_ALIGNMENT:
+		allowed = 1;
+		msr_mask = MSR_CE|MSR_ME|MSR_DE;
+		break;
+	case BOOKE_IRQPRIO_CRITICAL:
+	case BOOKE_IRQPRIO_WATCHDOG:
+		allowed = vcpu->arch.msr & MSR_CE;
+		msr_mask = MSR_ME;
+		break;
+	case BOOKE_IRQPRIO_MACHINE_CHECK:
+		allowed = vcpu->arch.msr & MSR_ME;
+		msr_mask = 0;
+		break;
+	case BOOKE_IRQPRIO_EXTERNAL:
+	case BOOKE_IRQPRIO_DECREMENTER:
+	case BOOKE_IRQPRIO_FIT:
+		allowed = vcpu->arch.msr & MSR_EE;
+		msr_mask = MSR_CE|MSR_ME|MSR_DE;
+		break;
+	case BOOKE_IRQPRIO_DEBUG:
+		allowed = vcpu->arch.msr & MSR_DE;
+		msr_mask = MSR_ME;
+		break;
+	}
+
+	if (allowed) {
+		vcpu->arch.srr0 = vcpu->arch.pc;
+		vcpu->arch.srr1 = vcpu->arch.msr;
+		vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority];
+		kvmppc_set_msr(vcpu, vcpu->arch.msr & msr_mask);
+
+		clear_bit(priority, &vcpu->arch.pending_exceptions);
+	}
+
+	return allowed;
+}
+
+/* Check pending exceptions and deliver one, if possible. */
+void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
+{
+	unsigned long *pending = &vcpu->arch.pending_exceptions;
+	unsigned int priority;
+
+	priority = __ffs(*pending);
+	while (priority <= BOOKE_MAX_INTERRUPT) {
+		if (kvmppc_booke_irqprio_deliver(vcpu, priority))
+			break;
+
+		priority = find_next_bit(pending,
+		                         BITS_PER_BYTE * sizeof(*pending),
+		                         priority + 1);
+	}
+}
+
+/**
+ * kvmppc_handle_exit
+ *
+ * Return value is in the form (errcode<<2 | RESUME_FLAG_HOST | RESUME_FLAG_NV)
+ */
+int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
+                       unsigned int exit_nr)
+{
+	enum emulation_result er;
+	int r = RESUME_HOST;
+
+	/* update before a new last_exit_type is rewritten */
+	kvmppc_update_timing_stats(vcpu);
+
+	local_irq_enable();
+
+	run->exit_reason = KVM_EXIT_UNKNOWN;
+	run->ready_for_interrupt_injection = 1;
+
+	switch (exit_nr) {
+	case BOOKE_INTERRUPT_MACHINE_CHECK:
+		printk("MACHINE CHECK: %lx\n", mfspr(SPRN_MCSR));
+		kvmppc_dump_vcpu(vcpu);
+		r = RESUME_HOST;
+		break;
+
+	case BOOKE_INTERRUPT_EXTERNAL:
+		kvmppc_account_exit(vcpu, EXT_INTR_EXITS);
+		if (need_resched())
+			cond_resched();
+		r = RESUME_GUEST;
+		break;
+
+	case BOOKE_INTERRUPT_DECREMENTER:
+		/* Since we switched IVPR back to the host's value, the host
+		 * handled this interrupt the moment we enabled interrupts.
+		 * Now we just offer it a chance to reschedule the guest. */
+		kvmppc_account_exit(vcpu, DEC_EXITS);
+		if (need_resched())
+			cond_resched();
+		r = RESUME_GUEST;
+		break;
+
+	case BOOKE_INTERRUPT_PROGRAM:
+		if (vcpu->arch.msr & MSR_PR) {
+			/* Program traps generated by user-level software must be handled
+			 * by the guest kernel. */
+			vcpu->arch.esr = vcpu->arch.fault_esr;
+			kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM);
+			r = RESUME_GUEST;
+			kvmppc_account_exit(vcpu, USR_PR_INST);
+			break;
+		}
+
+		er = kvmppc_emulate_instruction(run, vcpu);
+		switch (er) {
+		case EMULATE_DONE:
+			/* don't overwrite subtypes, just account kvm_stats */
+			kvmppc_account_exit_stat(vcpu, EMULATED_INST_EXITS);
+			/* Future optimization: only reload non-volatiles if
+			 * they were actually modified by emulation. */
+			r = RESUME_GUEST_NV;
+			break;
+		case EMULATE_DO_DCR:
+			run->exit_reason = KVM_EXIT_DCR;
+			r = RESUME_HOST;
+			break;
+		case EMULATE_FAIL:
+			/* XXX Deliver Program interrupt to guest. */
+			printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
+			       __func__, vcpu->arch.pc, vcpu->arch.last_inst);
+			/* For debugging, encode the failing instruction and
+			 * report it to userspace. */
+			run->hw.hardware_exit_reason = ~0ULL << 32;
+			run->hw.hardware_exit_reason |= vcpu->arch.last_inst;
+			r = RESUME_HOST;
+			break;
+		default:
+			BUG();
+		}
+		break;
+
+	case BOOKE_INTERRUPT_FP_UNAVAIL:
+		kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_FP_UNAVAIL);
+		kvmppc_account_exit(vcpu, FP_UNAVAIL);
+		r = RESUME_GUEST;
+		break;
+
+	case BOOKE_INTERRUPT_DATA_STORAGE:
+		vcpu->arch.dear = vcpu->arch.fault_dear;
+		vcpu->arch.esr = vcpu->arch.fault_esr;
+		kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DATA_STORAGE);
+		kvmppc_account_exit(vcpu, DSI_EXITS);
+		r = RESUME_GUEST;
+		break;
+
+	case BOOKE_INTERRUPT_INST_STORAGE:
+		vcpu->arch.esr = vcpu->arch.fault_esr;
+		kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_INST_STORAGE);
+		kvmppc_account_exit(vcpu, ISI_EXITS);
+		r = RESUME_GUEST;
+		break;
+
+	case BOOKE_INTERRUPT_SYSCALL:
+		kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SYSCALL);
+		kvmppc_account_exit(vcpu, SYSCALL_EXITS);
+		r = RESUME_GUEST;
+		break;
+
+	/* XXX move to a 440-specific file. */
+	case BOOKE_INTERRUPT_DTLB_MISS: {
+		struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+		struct kvmppc_44x_tlbe *gtlbe;
+		unsigned long eaddr = vcpu->arch.fault_dear;
+		int gtlb_index;
+		gfn_t gfn;
+
+		/* Check the guest TLB. */
+		gtlb_index = kvmppc_44x_dtlb_index(vcpu, eaddr);
+		if (gtlb_index < 0) {
+			/* The guest didn't have a mapping for it. */
+			kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DTLB_MISS);
+			vcpu->arch.dear = vcpu->arch.fault_dear;
+			vcpu->arch.esr = vcpu->arch.fault_esr;
+			kvmppc_account_exit(vcpu, DTLB_REAL_MISS_EXITS);
+			r = RESUME_GUEST;
+			break;
+		}
+
+		gtlbe = &vcpu_44x->guest_tlb[gtlb_index];
+		vcpu->arch.paddr_accessed = tlb_xlate(gtlbe, eaddr);
+		gfn = vcpu->arch.paddr_accessed >> PAGE_SHIFT;
+
+		if (kvm_is_visible_gfn(vcpu->kvm, gfn)) {
+			/* The guest TLB had a mapping, but the shadow TLB
+			 * didn't, and it is RAM. This could be because:
+			 * a) the entry is mapping the host kernel, or
+			 * b) the guest used a large mapping which we're faking
+			 * Either way, we need to satisfy the fault without
+			 * invoking the guest. */
+			kvmppc_mmu_map(vcpu, eaddr, vcpu->arch.paddr_accessed, gtlbe->tid,
+			               gtlbe->word2, get_tlb_bytes(gtlbe), gtlb_index);
+			kvmppc_account_exit(vcpu, DTLB_VIRT_MISS_EXITS);
+			r = RESUME_GUEST;
+		} else {
+			/* Guest has mapped and accessed a page which is not
+			 * actually RAM. */
+			r = kvmppc_emulate_mmio(run, vcpu);
+			kvmppc_account_exit(vcpu, MMIO_EXITS);
+		}
+
+		break;
+	}
+
+	/* XXX move to a 440-specific file. */
+	case BOOKE_INTERRUPT_ITLB_MISS: {
+		struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+		struct kvmppc_44x_tlbe *gtlbe;
+		unsigned long eaddr = vcpu->arch.pc;
+		gpa_t gpaddr;
+		gfn_t gfn;
+		int gtlb_index;
+
+		r = RESUME_GUEST;
+
+		/* Check the guest TLB. */
+		gtlb_index = kvmppc_44x_itlb_index(vcpu, eaddr);
+		if (gtlb_index < 0) {
+			/* The guest didn't have a mapping for it. */
+			kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ITLB_MISS);
+			kvmppc_account_exit(vcpu, ITLB_REAL_MISS_EXITS);
+			break;
+		}
+
+		kvmppc_account_exit(vcpu, ITLB_VIRT_MISS_EXITS);
+
+		gtlbe = &vcpu_44x->guest_tlb[gtlb_index];
+		gpaddr = tlb_xlate(gtlbe, eaddr);
+		gfn = gpaddr >> PAGE_SHIFT;
+
+		if (kvm_is_visible_gfn(vcpu->kvm, gfn)) {
+			/* The guest TLB had a mapping, but the shadow TLB
+			 * didn't. This could be because:
+			 * a) the entry is mapping the host kernel, or
+			 * b) the guest used a large mapping which we're faking
+			 * Either way, we need to satisfy the fault without
+			 * invoking the guest. */
+			kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlbe->tid,
+			               gtlbe->word2, get_tlb_bytes(gtlbe), gtlb_index);
+		} else {
+			/* Guest mapped and leaped at non-RAM! */
+			kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_MACHINE_CHECK);
+		}
+
+		break;
+	}
+
+	case BOOKE_INTERRUPT_DEBUG: {
+		u32 dbsr;
+
+		vcpu->arch.pc = mfspr(SPRN_CSRR0);
+
+		/* clear IAC events in DBSR register */
+		dbsr = mfspr(SPRN_DBSR);
+		dbsr &= DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4;
+		mtspr(SPRN_DBSR, dbsr);
+
+		run->exit_reason = KVM_EXIT_DEBUG;
+		kvmppc_account_exit(vcpu, DEBUG_EXITS);
+		r = RESUME_HOST;
+		break;
+	}
+
+	default:
+		printk(KERN_EMERG "exit_nr %d\n", exit_nr);
+		BUG();
+	}
+
+	local_irq_disable();
+
+	kvmppc_core_deliver_interrupts(vcpu);
+
+	if (!(r & RESUME_HOST)) {
+		/* To avoid clobbering exit_reason, only check for signals if
+		 * we aren't already exiting to userspace for some other
+		 * reason. */
+		if (signal_pending(current)) {
+			run->exit_reason = KVM_EXIT_INTR;
+			r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV);
+			kvmppc_account_exit(vcpu, SIGNAL_EXITS);
+		}
+	}
+
+	return r;
+}
+
+/* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */
+int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.pc = 0;
+	vcpu->arch.msr = 0;
+	vcpu->arch.gpr[1] = (16<<20) - 8; /* -8 for the callee-save LR slot */
+
+	vcpu->arch.shadow_pid = 1;
+
+	/* Eye-catching number so we know if the guest takes an interrupt
+	 * before it's programmed its own IVPR. */
+	vcpu->arch.ivpr = 0x55550000;
+
+	kvmppc_init_timing_stats(vcpu);
+
+	return kvmppc_core_vcpu_setup(vcpu);
+}
+
+int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+	int i;
+
+	regs->pc = vcpu->arch.pc;
+	regs->cr = vcpu->arch.cr;
+	regs->ctr = vcpu->arch.ctr;
+	regs->lr = vcpu->arch.lr;
+	regs->xer = vcpu->arch.xer;
+	regs->msr = vcpu->arch.msr;
+	regs->srr0 = vcpu->arch.srr0;
+	regs->srr1 = vcpu->arch.srr1;
+	regs->pid = vcpu->arch.pid;
+	regs->sprg0 = vcpu->arch.sprg0;
+	regs->sprg1 = vcpu->arch.sprg1;
+	regs->sprg2 = vcpu->arch.sprg2;
+	regs->sprg3 = vcpu->arch.sprg3;
+	regs->sprg5 = vcpu->arch.sprg4;
+	regs->sprg6 = vcpu->arch.sprg5;
+	regs->sprg7 = vcpu->arch.sprg6;
+
+	for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
+		regs->gpr[i] = vcpu->arch.gpr[i];
+
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+	int i;
+
+	vcpu->arch.pc = regs->pc;
+	vcpu->arch.cr = regs->cr;
+	vcpu->arch.ctr = regs->ctr;
+	vcpu->arch.lr = regs->lr;
+	vcpu->arch.xer = regs->xer;
+	kvmppc_set_msr(vcpu, regs->msr);
+	vcpu->arch.srr0 = regs->srr0;
+	vcpu->arch.srr1 = regs->srr1;
+	vcpu->arch.sprg0 = regs->sprg0;
+	vcpu->arch.sprg1 = regs->sprg1;
+	vcpu->arch.sprg2 = regs->sprg2;
+	vcpu->arch.sprg3 = regs->sprg3;
+	vcpu->arch.sprg5 = regs->sprg4;
+	vcpu->arch.sprg6 = regs->sprg5;
+	vcpu->arch.sprg7 = regs->sprg6;
+
+	for (i = 0; i < ARRAY_SIZE(vcpu->arch.gpr); i++)
+		vcpu->arch.gpr[i] = regs->gpr[i];
+
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
+                                  struct kvm_sregs *sregs)
+{
+	return -ENOTSUPP;
+}
+
+int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
+                                  struct kvm_sregs *sregs)
+{
+	return -ENOTSUPP;
+}
+
+int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+	return -ENOTSUPP;
+}
+
+int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+	return -ENOTSUPP;
+}
+
+int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
+                                  struct kvm_translation *tr)
+{
+	return kvmppc_core_vcpu_translate(vcpu, tr);
+}
+
+int kvmppc_booke_init(void)
+{
+	unsigned long ivor[16];
+	unsigned long max_ivor = 0;
+	int i;
+
+	/* We install our own exception handlers by hijacking IVPR. IVPR must
+	 * be 16-bit aligned, so we need a 64KB allocation. */
+	kvmppc_booke_handlers = __get_free_pages(GFP_KERNEL | __GFP_ZERO,
+	                                         VCPU_SIZE_ORDER);
+	if (!kvmppc_booke_handlers)
+		return -ENOMEM;
+
+	/* XXX make sure our handlers are smaller than Linux's */
+
+	/* Copy our interrupt handlers to match host IVORs. That way we don't
+	 * have to swap the IVORs on every guest/host transition. */
+	ivor[0] = mfspr(SPRN_IVOR0);
+	ivor[1] = mfspr(SPRN_IVOR1);
+	ivor[2] = mfspr(SPRN_IVOR2);
+	ivor[3] = mfspr(SPRN_IVOR3);
+	ivor[4] = mfspr(SPRN_IVOR4);
+	ivor[5] = mfspr(SPRN_IVOR5);
+	ivor[6] = mfspr(SPRN_IVOR6);
+	ivor[7] = mfspr(SPRN_IVOR7);
+	ivor[8] = mfspr(SPRN_IVOR8);
+	ivor[9] = mfspr(SPRN_IVOR9);
+	ivor[10] = mfspr(SPRN_IVOR10);
+	ivor[11] = mfspr(SPRN_IVOR11);
+	ivor[12] = mfspr(SPRN_IVOR12);
+	ivor[13] = mfspr(SPRN_IVOR13);
+	ivor[14] = mfspr(SPRN_IVOR14);
+	ivor[15] = mfspr(SPRN_IVOR15);
+
+	for (i = 0; i < 16; i++) {
+		if (ivor[i] > max_ivor)
+			max_ivor = ivor[i];
+
+		memcpy((void *)kvmppc_booke_handlers + ivor[i],
+		       kvmppc_handlers_start + i * kvmppc_handler_len,
+		       kvmppc_handler_len);
+	}
+	flush_icache_range(kvmppc_booke_handlers,
+	                   kvmppc_booke_handlers + max_ivor + kvmppc_handler_len);
+
+	return 0;
+}
+
+void __exit kvmppc_booke_exit(void)
+{
+	free_pages(kvmppc_booke_handlers, VCPU_SIZE_ORDER);
+	kvm_exit();
+}
diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h
new file mode 100644
index 0000000..cf7c94c
--- /dev/null
+++ b/arch/powerpc/kvm/booke.h
@@ -0,0 +1,60 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#ifndef __KVM_BOOKE_H__
+#define __KVM_BOOKE_H__
+
+#include <linux/types.h>
+#include <linux/kvm_host.h>
+#include "timing.h"
+
+/* interrupt priortity ordering */
+#define BOOKE_IRQPRIO_DATA_STORAGE 0
+#define BOOKE_IRQPRIO_INST_STORAGE 1
+#define BOOKE_IRQPRIO_ALIGNMENT 2
+#define BOOKE_IRQPRIO_PROGRAM 3
+#define BOOKE_IRQPRIO_FP_UNAVAIL 4
+#define BOOKE_IRQPRIO_SYSCALL 5
+#define BOOKE_IRQPRIO_AP_UNAVAIL 6
+#define BOOKE_IRQPRIO_DTLB_MISS 7
+#define BOOKE_IRQPRIO_ITLB_MISS 8
+#define BOOKE_IRQPRIO_MACHINE_CHECK 9
+#define BOOKE_IRQPRIO_DEBUG 10
+#define BOOKE_IRQPRIO_CRITICAL 11
+#define BOOKE_IRQPRIO_WATCHDOG 12
+#define BOOKE_IRQPRIO_EXTERNAL 13
+#define BOOKE_IRQPRIO_FIT 14
+#define BOOKE_IRQPRIO_DECREMENTER 15
+
+/* Helper function for "full" MSR writes. No need to call this if only EE is
+ * changing. */
+static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
+{
+	if ((new_msr & MSR_PR) != (vcpu->arch.msr & MSR_PR))
+		kvmppc_mmu_priv_switch(vcpu, new_msr & MSR_PR);
+
+	vcpu->arch.msr = new_msr;
+
+	if (vcpu->arch.msr & MSR_WE) {
+		kvm_vcpu_block(vcpu);
+		kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS);
+	};
+}
+
+#endif /* __KVM_BOOKE_H__ */
diff --git a/arch/powerpc/kvm/booke_guest.c b/arch/powerpc/kvm/booke_guest.c
deleted file mode 100644
index 7b2591e..0000000
--- a/arch/powerpc/kvm/booke_guest.c
+++ /dev/null
@@ -1,605 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- *
- * Copyright IBM Corp. 2007
- *
- * Authors: Hollis Blanchard <hollisb@us.ibm.com>
- *          Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
- */
-
-#include <linux/errno.h>
-#include <linux/err.h>
-#include <linux/kvm_host.h>
-#include <linux/module.h>
-#include <linux/vmalloc.h>
-#include <linux/fs.h>
-#include <asm/cputable.h>
-#include <asm/uaccess.h>
-#include <asm/kvm_ppc.h>
-
-#include "44x_tlb.h"
-
-#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
-#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
-
-struct kvm_stats_debugfs_item debugfs_entries[] = {
-	{ "exits",      VCPU_STAT(sum_exits) },
-	{ "mmio",       VCPU_STAT(mmio_exits) },
-	{ "dcr",        VCPU_STAT(dcr_exits) },
-	{ "sig",        VCPU_STAT(signal_exits) },
-	{ "light",      VCPU_STAT(light_exits) },
-	{ "itlb_r",     VCPU_STAT(itlb_real_miss_exits) },
-	{ "itlb_v",     VCPU_STAT(itlb_virt_miss_exits) },
-	{ "dtlb_r",     VCPU_STAT(dtlb_real_miss_exits) },
-	{ "dtlb_v",     VCPU_STAT(dtlb_virt_miss_exits) },
-	{ "sysc",       VCPU_STAT(syscall_exits) },
-	{ "isi",        VCPU_STAT(isi_exits) },
-	{ "dsi",        VCPU_STAT(dsi_exits) },
-	{ "inst_emu",   VCPU_STAT(emulated_inst_exits) },
-	{ "dec",        VCPU_STAT(dec_exits) },
-	{ "ext_intr",   VCPU_STAT(ext_intr_exits) },
-	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
-	{ NULL }
-};
-
-static const u32 interrupt_msr_mask[16] = {
-	[BOOKE_INTERRUPT_CRITICAL]      = MSR_ME,
-	[BOOKE_INTERRUPT_MACHINE_CHECK] = 0,
-	[BOOKE_INTERRUPT_DATA_STORAGE]  = MSR_CE|MSR_ME|MSR_DE,
-	[BOOKE_INTERRUPT_INST_STORAGE]  = MSR_CE|MSR_ME|MSR_DE,
-	[BOOKE_INTERRUPT_EXTERNAL]      = MSR_CE|MSR_ME|MSR_DE,
-	[BOOKE_INTERRUPT_ALIGNMENT]     = MSR_CE|MSR_ME|MSR_DE,
-	[BOOKE_INTERRUPT_PROGRAM]       = MSR_CE|MSR_ME|MSR_DE,
-	[BOOKE_INTERRUPT_FP_UNAVAIL]    = MSR_CE|MSR_ME|MSR_DE,
-	[BOOKE_INTERRUPT_SYSCALL]       = MSR_CE|MSR_ME|MSR_DE,
-	[BOOKE_INTERRUPT_AP_UNAVAIL]    = MSR_CE|MSR_ME|MSR_DE,
-	[BOOKE_INTERRUPT_DECREMENTER]   = MSR_CE|MSR_ME|MSR_DE,
-	[BOOKE_INTERRUPT_FIT]           = MSR_CE|MSR_ME|MSR_DE,
-	[BOOKE_INTERRUPT_WATCHDOG]      = MSR_ME,
-	[BOOKE_INTERRUPT_DTLB_MISS]     = MSR_CE|MSR_ME|MSR_DE,
-	[BOOKE_INTERRUPT_ITLB_MISS]     = MSR_CE|MSR_ME|MSR_DE,
-	[BOOKE_INTERRUPT_DEBUG]         = MSR_ME,
-};
-
-const unsigned char exception_priority[] = {
-	[BOOKE_INTERRUPT_DATA_STORAGE] = 0,
-	[BOOKE_INTERRUPT_INST_STORAGE] = 1,
-	[BOOKE_INTERRUPT_ALIGNMENT] = 2,
-	[BOOKE_INTERRUPT_PROGRAM] = 3,
-	[BOOKE_INTERRUPT_FP_UNAVAIL] = 4,
-	[BOOKE_INTERRUPT_SYSCALL] = 5,
-	[BOOKE_INTERRUPT_AP_UNAVAIL] = 6,
-	[BOOKE_INTERRUPT_DTLB_MISS] = 7,
-	[BOOKE_INTERRUPT_ITLB_MISS] = 8,
-	[BOOKE_INTERRUPT_MACHINE_CHECK] = 9,
-	[BOOKE_INTERRUPT_DEBUG] = 10,
-	[BOOKE_INTERRUPT_CRITICAL] = 11,
-	[BOOKE_INTERRUPT_WATCHDOG] = 12,
-	[BOOKE_INTERRUPT_EXTERNAL] = 13,
-	[BOOKE_INTERRUPT_FIT] = 14,
-	[BOOKE_INTERRUPT_DECREMENTER] = 15,
-};
-
-const unsigned char priority_exception[] = {
-	BOOKE_INTERRUPT_DATA_STORAGE,
-	BOOKE_INTERRUPT_INST_STORAGE,
-	BOOKE_INTERRUPT_ALIGNMENT,
-	BOOKE_INTERRUPT_PROGRAM,
-	BOOKE_INTERRUPT_FP_UNAVAIL,
-	BOOKE_INTERRUPT_SYSCALL,
-	BOOKE_INTERRUPT_AP_UNAVAIL,
-	BOOKE_INTERRUPT_DTLB_MISS,
-	BOOKE_INTERRUPT_ITLB_MISS,
-	BOOKE_INTERRUPT_MACHINE_CHECK,
-	BOOKE_INTERRUPT_DEBUG,
-	BOOKE_INTERRUPT_CRITICAL,
-	BOOKE_INTERRUPT_WATCHDOG,
-	BOOKE_INTERRUPT_EXTERNAL,
-	BOOKE_INTERRUPT_FIT,
-	BOOKE_INTERRUPT_DECREMENTER,
-};
-
-
-void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
-{
-	struct tlbe *tlbe;
-	int i;
-
-	printk("vcpu %d TLB dump:\n", vcpu->vcpu_id);
-	printk("| %2s | %3s | %8s | %8s | %8s |\n",
-			"nr", "tid", "word0", "word1", "word2");
-
-	for (i = 0; i < PPC44x_TLB_SIZE; i++) {
-		tlbe = &vcpu->arch.guest_tlb[i];
-		if (tlbe->word0 & PPC44x_TLB_VALID)
-			printk(" G%2d |  %02X | %08X | %08X | %08X |\n",
-			       i, tlbe->tid, tlbe->word0, tlbe->word1,
-			       tlbe->word2);
-	}
-
-	for (i = 0; i < PPC44x_TLB_SIZE; i++) {
-		tlbe = &vcpu->arch.shadow_tlb[i];
-		if (tlbe->word0 & PPC44x_TLB_VALID)
-			printk(" S%2d | %02X | %08X | %08X | %08X |\n",
-			       i, tlbe->tid, tlbe->word0, tlbe->word1,
-			       tlbe->word2);
-	}
-}
-
-/* TODO: use vcpu_printf() */
-void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu)
-{
-	int i;
-
-	printk("pc:   %08x msr:  %08x\n", vcpu->arch.pc, vcpu->arch.msr);
-	printk("lr:   %08x ctr:  %08x\n", vcpu->arch.lr, vcpu->arch.ctr);
-	printk("srr0: %08x srr1: %08x\n", vcpu->arch.srr0, vcpu->arch.srr1);
-
-	printk("exceptions: %08lx\n", vcpu->arch.pending_exceptions);
-
-	for (i = 0; i < 32; i += 4) {
-		printk("gpr%02d: %08x %08x %08x %08x\n", i,
-		       vcpu->arch.gpr[i],
-		       vcpu->arch.gpr[i+1],
-		       vcpu->arch.gpr[i+2],
-		       vcpu->arch.gpr[i+3]);
-	}
-}
-
-/* Check if we are ready to deliver the interrupt */
-static int kvmppc_can_deliver_interrupt(struct kvm_vcpu *vcpu, int interrupt)
-{
-	int r;
-
-	switch (interrupt) {
-	case BOOKE_INTERRUPT_CRITICAL:
-		r = vcpu->arch.msr & MSR_CE;
-		break;
-	case BOOKE_INTERRUPT_MACHINE_CHECK:
-		r = vcpu->arch.msr & MSR_ME;
-		break;
-	case BOOKE_INTERRUPT_EXTERNAL:
-		r = vcpu->arch.msr & MSR_EE;
-		break;
-	case BOOKE_INTERRUPT_DECREMENTER:
-		r = vcpu->arch.msr & MSR_EE;
-		break;
-	case BOOKE_INTERRUPT_FIT:
-		r = vcpu->arch.msr & MSR_EE;
-		break;
-	case BOOKE_INTERRUPT_WATCHDOG:
-		r = vcpu->arch.msr & MSR_CE;
-		break;
-	case BOOKE_INTERRUPT_DEBUG:
-		r = vcpu->arch.msr & MSR_DE;
-		break;
-	default:
-		r = 1;
-	}
-
-	return r;
-}
-
-static void kvmppc_deliver_interrupt(struct kvm_vcpu *vcpu, int interrupt)
-{
-	switch (interrupt) {
-	case BOOKE_INTERRUPT_DECREMENTER:
-		vcpu->arch.tsr |= TSR_DIS;
-		break;
-	}
-
-	vcpu->arch.srr0 = vcpu->arch.pc;
-	vcpu->arch.srr1 = vcpu->arch.msr;
-	vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[interrupt];
-	kvmppc_set_msr(vcpu, vcpu->arch.msr & interrupt_msr_mask[interrupt]);
-}
-
-/* Check pending exceptions and deliver one, if possible. */
-void kvmppc_check_and_deliver_interrupts(struct kvm_vcpu *vcpu)
-{
-	unsigned long *pending = &vcpu->arch.pending_exceptions;
-	unsigned int exception;
-	unsigned int priority;
-
-	priority = find_first_bit(pending, BITS_PER_BYTE * sizeof(*pending));
-	while (priority <= BOOKE_MAX_INTERRUPT) {
-		exception = priority_exception[priority];
-		if (kvmppc_can_deliver_interrupt(vcpu, exception)) {
-			kvmppc_clear_exception(vcpu, exception);
-			kvmppc_deliver_interrupt(vcpu, exception);
-			break;
-		}
-
-		priority = find_next_bit(pending,
-		                         BITS_PER_BYTE * sizeof(*pending),
-		                         priority + 1);
-	}
-}
-
-/**
- * kvmppc_handle_exit
- *
- * Return value is in the form (errcode<<2 | RESUME_FLAG_HOST | RESUME_FLAG_NV)
- */
-int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
-                       unsigned int exit_nr)
-{
-	enum emulation_result er;
-	int r = RESUME_HOST;
-
-	local_irq_enable();
-
-	run->exit_reason = KVM_EXIT_UNKNOWN;
-	run->ready_for_interrupt_injection = 1;
-
-	switch (exit_nr) {
-	case BOOKE_INTERRUPT_MACHINE_CHECK:
-		printk("MACHINE CHECK: %lx\n", mfspr(SPRN_MCSR));
-		kvmppc_dump_vcpu(vcpu);
-		r = RESUME_HOST;
-		break;
-
-	case BOOKE_INTERRUPT_EXTERNAL:
-	case BOOKE_INTERRUPT_DECREMENTER:
-		/* Since we switched IVPR back to the host's value, the host
-		 * handled this interrupt the moment we enabled interrupts.
-		 * Now we just offer it a chance to reschedule the guest. */
-
-		/* XXX At this point the TLB still holds our shadow TLB, so if
-		 * we do reschedule the host will fault over it. Perhaps we
-		 * should politely restore the host's entries to minimize
-		 * misses before ceding control. */
-		if (need_resched())
-			cond_resched();
-		if (exit_nr == BOOKE_INTERRUPT_DECREMENTER)
-			vcpu->stat.dec_exits++;
-		else
-			vcpu->stat.ext_intr_exits++;
-		r = RESUME_GUEST;
-		break;
-
-	case BOOKE_INTERRUPT_PROGRAM:
-		if (vcpu->arch.msr & MSR_PR) {
-			/* Program traps generated by user-level software must be handled
-			 * by the guest kernel. */
-			vcpu->arch.esr = vcpu->arch.fault_esr;
-			kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_PROGRAM);
-			r = RESUME_GUEST;
-			break;
-		}
-
-		er = kvmppc_emulate_instruction(run, vcpu);
-		switch (er) {
-		case EMULATE_DONE:
-			/* Future optimization: only reload non-volatiles if
-			 * they were actually modified by emulation. */
-			vcpu->stat.emulated_inst_exits++;
-			r = RESUME_GUEST_NV;
-			break;
-		case EMULATE_DO_DCR:
-			run->exit_reason = KVM_EXIT_DCR;
-			r = RESUME_HOST;
-			break;
-		case EMULATE_FAIL:
-			/* XXX Deliver Program interrupt to guest. */
-			printk(KERN_CRIT "%s: emulation at %x failed (%08x)\n",
-			       __func__, vcpu->arch.pc, vcpu->arch.last_inst);
-			/* For debugging, encode the failing instruction and
-			 * report it to userspace. */
-			run->hw.hardware_exit_reason = ~0ULL << 32;
-			run->hw.hardware_exit_reason |= vcpu->arch.last_inst;
-			r = RESUME_HOST;
-			break;
-		default:
-			BUG();
-		}
-		break;
-
-	case BOOKE_INTERRUPT_FP_UNAVAIL:
-		kvmppc_queue_exception(vcpu, exit_nr);
-		r = RESUME_GUEST;
-		break;
-
-	case BOOKE_INTERRUPT_DATA_STORAGE:
-		vcpu->arch.dear = vcpu->arch.fault_dear;
-		vcpu->arch.esr = vcpu->arch.fault_esr;
-		kvmppc_queue_exception(vcpu, exit_nr);
-		vcpu->stat.dsi_exits++;
-		r = RESUME_GUEST;
-		break;
-
-	case BOOKE_INTERRUPT_INST_STORAGE:
-		vcpu->arch.esr = vcpu->arch.fault_esr;
-		kvmppc_queue_exception(vcpu, exit_nr);
-		vcpu->stat.isi_exits++;
-		r = RESUME_GUEST;
-		break;
-
-	case BOOKE_INTERRUPT_SYSCALL:
-		kvmppc_queue_exception(vcpu, exit_nr);
-		vcpu->stat.syscall_exits++;
-		r = RESUME_GUEST;
-		break;
-
-	case BOOKE_INTERRUPT_DTLB_MISS: {
-		struct tlbe *gtlbe;
-		unsigned long eaddr = vcpu->arch.fault_dear;
-		gfn_t gfn;
-
-		/* Check the guest TLB. */
-		gtlbe = kvmppc_44x_dtlb_search(vcpu, eaddr);
-		if (!gtlbe) {
-			/* The guest didn't have a mapping for it. */
-			kvmppc_queue_exception(vcpu, exit_nr);
-			vcpu->arch.dear = vcpu->arch.fault_dear;
-			vcpu->arch.esr = vcpu->arch.fault_esr;
-			vcpu->stat.dtlb_real_miss_exits++;
-			r = RESUME_GUEST;
-			break;
-		}
-
-		vcpu->arch.paddr_accessed = tlb_xlate(gtlbe, eaddr);
-		gfn = vcpu->arch.paddr_accessed >> PAGE_SHIFT;
-
-		if (kvm_is_visible_gfn(vcpu->kvm, gfn)) {
-			/* The guest TLB had a mapping, but the shadow TLB
-			 * didn't, and it is RAM. This could be because:
-			 * a) the entry is mapping the host kernel, or
-			 * b) the guest used a large mapping which we're faking
-			 * Either way, we need to satisfy the fault without
-			 * invoking the guest. */
-			kvmppc_mmu_map(vcpu, eaddr, gfn, gtlbe->tid,
-			               gtlbe->word2);
-			vcpu->stat.dtlb_virt_miss_exits++;
-			r = RESUME_GUEST;
-		} else {
-			/* Guest has mapped and accessed a page which is not
-			 * actually RAM. */
-			r = kvmppc_emulate_mmio(run, vcpu);
-		}
-
-		break;
-	}
-
-	case BOOKE_INTERRUPT_ITLB_MISS: {
-		struct tlbe *gtlbe;
-		unsigned long eaddr = vcpu->arch.pc;
-		gfn_t gfn;
-
-		r = RESUME_GUEST;
-
-		/* Check the guest TLB. */
-		gtlbe = kvmppc_44x_itlb_search(vcpu, eaddr);
-		if (!gtlbe) {
-			/* The guest didn't have a mapping for it. */
-			kvmppc_queue_exception(vcpu, exit_nr);
-			vcpu->stat.itlb_real_miss_exits++;
-			break;
-		}
-
-		vcpu->stat.itlb_virt_miss_exits++;
-
-		gfn = tlb_xlate(gtlbe, eaddr) >> PAGE_SHIFT;
-
-		if (kvm_is_visible_gfn(vcpu->kvm, gfn)) {
-			/* The guest TLB had a mapping, but the shadow TLB
-			 * didn't. This could be because:
-			 * a) the entry is mapping the host kernel, or
-			 * b) the guest used a large mapping which we're faking
-			 * Either way, we need to satisfy the fault without
-			 * invoking the guest. */
-			kvmppc_mmu_map(vcpu, eaddr, gfn, gtlbe->tid,
-			               gtlbe->word2);
-		} else {
-			/* Guest mapped and leaped at non-RAM! */
-			kvmppc_queue_exception(vcpu,
-			                       BOOKE_INTERRUPT_MACHINE_CHECK);
-		}
-
-		break;
-	}
-
-	case BOOKE_INTERRUPT_DEBUG: {
-		u32 dbsr;
-
-		vcpu->arch.pc = mfspr(SPRN_CSRR0);
-
-		/* clear IAC events in DBSR register */
-		dbsr = mfspr(SPRN_DBSR);
-		dbsr &= DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4;
-		mtspr(SPRN_DBSR, dbsr);
-
-		run->exit_reason = KVM_EXIT_DEBUG;
-		r = RESUME_HOST;
-		break;
-	}
-
-	default:
-		printk(KERN_EMERG "exit_nr %d\n", exit_nr);
-		BUG();
-	}
-
-	local_irq_disable();
-
-	kvmppc_check_and_deliver_interrupts(vcpu);
-
-	/* Do some exit accounting. */
-	vcpu->stat.sum_exits++;
-	if (!(r & RESUME_HOST)) {
-		/* To avoid clobbering exit_reason, only check for signals if
-		 * we aren't already exiting to userspace for some other
-		 * reason. */
-		if (signal_pending(current)) {
-			run->exit_reason = KVM_EXIT_INTR;
-			r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV);
-
-			vcpu->stat.signal_exits++;
-		} else {
-			vcpu->stat.light_exits++;
-		}
-	} else {
-		switch (run->exit_reason) {
-		case KVM_EXIT_MMIO:
-			vcpu->stat.mmio_exits++;
-			break;
-		case KVM_EXIT_DCR:
-			vcpu->stat.dcr_exits++;
-			break;
-		case KVM_EXIT_INTR:
-			vcpu->stat.signal_exits++;
-			break;
-		}
-	}
-
-	return r;
-}
-
-/* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */
-int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
-{
-	struct tlbe *tlbe = &vcpu->arch.guest_tlb[0];
-
-	tlbe->tid = 0;
-	tlbe->word0 = PPC44x_TLB_16M | PPC44x_TLB_VALID;
-	tlbe->word1 = 0;
-	tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR;
-
-	tlbe++;
-	tlbe->tid = 0;
-	tlbe->word0 = 0xef600000 | PPC44x_TLB_4K | PPC44x_TLB_VALID;
-	tlbe->word1 = 0xef600000;
-	tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR
-	              | PPC44x_TLB_I | PPC44x_TLB_G;
-
-	vcpu->arch.pc = 0;
-	vcpu->arch.msr = 0;
-	vcpu->arch.gpr[1] = (16<<20) - 8; /* -8 for the callee-save LR slot */
-
-	vcpu->arch.shadow_pid = 1;
-
-	/* Eye-catching number so we know if the guest takes an interrupt
-	 * before it's programmed its own IVPR. */
-	vcpu->arch.ivpr = 0x55550000;
-
-	/* Since the guest can directly access the timebase, it must know the
-	 * real timebase frequency. Accordingly, it must see the state of
-	 * CCR1[TCS]. */
-	vcpu->arch.ccr1 = mfspr(SPRN_CCR1);
-
-	return 0;
-}
-
-int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
-{
-	int i;
-
-	regs->pc = vcpu->arch.pc;
-	regs->cr = vcpu->arch.cr;
-	regs->ctr = vcpu->arch.ctr;
-	regs->lr = vcpu->arch.lr;
-	regs->xer = vcpu->arch.xer;
-	regs->msr = vcpu->arch.msr;
-	regs->srr0 = vcpu->arch.srr0;
-	regs->srr1 = vcpu->arch.srr1;
-	regs->pid = vcpu->arch.pid;
-	regs->sprg0 = vcpu->arch.sprg0;
-	regs->sprg1 = vcpu->arch.sprg1;
-	regs->sprg2 = vcpu->arch.sprg2;
-	regs->sprg3 = vcpu->arch.sprg3;
-	regs->sprg5 = vcpu->arch.sprg4;
-	regs->sprg6 = vcpu->arch.sprg5;
-	regs->sprg7 = vcpu->arch.sprg6;
-
-	for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
-		regs->gpr[i] = vcpu->arch.gpr[i];
-
-	return 0;
-}
-
-int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
-{
-	int i;
-
-	vcpu->arch.pc = regs->pc;
-	vcpu->arch.cr = regs->cr;
-	vcpu->arch.ctr = regs->ctr;
-	vcpu->arch.lr = regs->lr;
-	vcpu->arch.xer = regs->xer;
-	vcpu->arch.msr = regs->msr;
-	vcpu->arch.srr0 = regs->srr0;
-	vcpu->arch.srr1 = regs->srr1;
-	vcpu->arch.sprg0 = regs->sprg0;
-	vcpu->arch.sprg1 = regs->sprg1;
-	vcpu->arch.sprg2 = regs->sprg2;
-	vcpu->arch.sprg3 = regs->sprg3;
-	vcpu->arch.sprg5 = regs->sprg4;
-	vcpu->arch.sprg6 = regs->sprg5;
-	vcpu->arch.sprg7 = regs->sprg6;
-
-	for (i = 0; i < ARRAY_SIZE(vcpu->arch.gpr); i++)
-		vcpu->arch.gpr[i] = regs->gpr[i];
-
-	return 0;
-}
-
-int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
-                                  struct kvm_sregs *sregs)
-{
-	return -ENOTSUPP;
-}
-
-int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
-                                  struct kvm_sregs *sregs)
-{
-	return -ENOTSUPP;
-}
-
-int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
-{
-	return -ENOTSUPP;
-}
-
-int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
-{
-	return -ENOTSUPP;
-}
-
-/* 'linear_address' is actually an encoding of AS|PID|EADDR . */
-int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
-                                  struct kvm_translation *tr)
-{
-	struct tlbe *gtlbe;
-	int index;
-	gva_t eaddr;
-	u8 pid;
-	u8 as;
-
-	eaddr = tr->linear_address;
-	pid = (tr->linear_address >> 32) & 0xff;
-	as = (tr->linear_address >> 40) & 0x1;
-
-	index = kvmppc_44x_tlb_index(vcpu, eaddr, pid, as);
-	if (index == -1) {
-		tr->valid = 0;
-		return 0;
-	}
-
-	gtlbe = &vcpu->arch.guest_tlb[index];
-
-	tr->physical_address = tlb_xlate(gtlbe, eaddr);
-	/* XXX what does "writeable" and "usermode" even mean? */
-	tr->valid = 1;
-
-	return 0;
-}
diff --git a/arch/powerpc/kvm/booke_host.c b/arch/powerpc/kvm/booke_host.c
deleted file mode 100644
index b480341..0000000
--- a/arch/powerpc/kvm/booke_host.c
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- *
- * Copyright IBM Corp. 2008
- *
- * Authors: Hollis Blanchard <hollisb@us.ibm.com>
- */
-
-#include <linux/errno.h>
-#include <linux/kvm_host.h>
-#include <linux/module.h>
-#include <asm/cacheflush.h>
-#include <asm/kvm_ppc.h>
-
-unsigned long kvmppc_booke_handlers;
-
-static int kvmppc_booke_init(void)
-{
-	unsigned long ivor[16];
-	unsigned long max_ivor = 0;
-	int i;
-
-	/* We install our own exception handlers by hijacking IVPR. IVPR must
-	 * be 16-bit aligned, so we need a 64KB allocation. */
-	kvmppc_booke_handlers = __get_free_pages(GFP_KERNEL | __GFP_ZERO,
-	                                         VCPU_SIZE_ORDER);
-	if (!kvmppc_booke_handlers)
-		return -ENOMEM;
-
-	/* XXX make sure our handlers are smaller than Linux's */
-
-	/* Copy our interrupt handlers to match host IVORs. That way we don't
-	 * have to swap the IVORs on every guest/host transition. */
-	ivor[0] = mfspr(SPRN_IVOR0);
-	ivor[1] = mfspr(SPRN_IVOR1);
-	ivor[2] = mfspr(SPRN_IVOR2);
-	ivor[3] = mfspr(SPRN_IVOR3);
-	ivor[4] = mfspr(SPRN_IVOR4);
-	ivor[5] = mfspr(SPRN_IVOR5);
-	ivor[6] = mfspr(SPRN_IVOR6);
-	ivor[7] = mfspr(SPRN_IVOR7);
-	ivor[8] = mfspr(SPRN_IVOR8);
-	ivor[9] = mfspr(SPRN_IVOR9);
-	ivor[10] = mfspr(SPRN_IVOR10);
-	ivor[11] = mfspr(SPRN_IVOR11);
-	ivor[12] = mfspr(SPRN_IVOR12);
-	ivor[13] = mfspr(SPRN_IVOR13);
-	ivor[14] = mfspr(SPRN_IVOR14);
-	ivor[15] = mfspr(SPRN_IVOR15);
-
-	for (i = 0; i < 16; i++) {
-		if (ivor[i] > max_ivor)
-			max_ivor = ivor[i];
-
-		memcpy((void *)kvmppc_booke_handlers + ivor[i],
-		       kvmppc_handlers_start + i * kvmppc_handler_len,
-		       kvmppc_handler_len);
-	}
-	flush_icache_range(kvmppc_booke_handlers,
-	                   kvmppc_booke_handlers + max_ivor + kvmppc_handler_len);
-
-	return kvm_init(NULL, sizeof(struct kvm_vcpu), THIS_MODULE);
-}
-
-static void __exit kvmppc_booke_exit(void)
-{
-	free_pages(kvmppc_booke_handlers, VCPU_SIZE_ORDER);
-	kvm_exit();
-}
-
-module_init(kvmppc_booke_init)
-module_exit(kvmppc_booke_exit)
diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S
index 95e165b..084ebcd 100644
--- a/arch/powerpc/kvm/booke_interrupts.S
+++ b/arch/powerpc/kvm/booke_interrupts.S
@@ -107,6 +107,18 @@
 	li	r6, 1
 	slw	r6, r6, r5
 
+#ifdef CONFIG_KVM_EXIT_TIMING
+	/* save exit time */
+1:
+	mfspr	r7, SPRN_TBRU
+	mfspr	r8, SPRN_TBRL
+	mfspr	r9, SPRN_TBRU
+	cmpw	r9, r7
+	bne	1b
+	stw	r8, VCPU_TIMING_EXIT_TBL(r4)
+	stw	r9, VCPU_TIMING_EXIT_TBU(r4)
+#endif
+
 	/* Save the faulting instruction and all GPRs for emulation. */
 	andi.	r7, r6, NEED_INST_MASK
 	beq	..skip_inst_copy
@@ -335,54 +347,6 @@
 	lwz	r3, VCPU_SHADOW_PID(r4)
 	mtspr	SPRN_PID, r3
 
-	/* Prevent all asynchronous TLB updates. */
-	mfmsr	r5
-	lis	r6, (MSR_EE|MSR_CE|MSR_ME|MSR_DE)@h
-	ori	r6, r6, (MSR_EE|MSR_CE|MSR_ME|MSR_DE)@l
-	andc	r6, r5, r6
-	mtmsr	r6
-
-	/* Load the guest mappings, leaving the host's "pinned" kernel mappings
-	 * in place. */
-	mfspr	r10, SPRN_MMUCR			/* Save host MMUCR. */
-	li	r5, PPC44x_TLB_SIZE
-	lis	r5, tlb_44x_hwater@ha
-	lwz	r5, tlb_44x_hwater@l(r5)
-	mtctr	r5
-	addi	r9, r4, VCPU_SHADOW_TLB
-	addi	r5, r4, VCPU_SHADOW_MOD
-	li	r3, 0
-1:
-	lbzx	r7, r3, r5
-	cmpwi	r7, 0
-	beq	3f
-
-	/* Load guest entry. */
-	mulli	r11, r3, TLBE_BYTES
-	add	r11, r11, r9
-	lwz	r7, 0(r11)
-	mtspr	SPRN_MMUCR, r7
-	lwz	r7, 4(r11)
-	tlbwe	r7, r3, PPC44x_TLB_PAGEID
-	lwz	r7, 8(r11)
-	tlbwe	r7, r3, PPC44x_TLB_XLAT
-	lwz	r7, 12(r11)
-	tlbwe	r7, r3, PPC44x_TLB_ATTRIB
-3:
-	addi	r3, r3, 1                       /* Increment index. */
-	bdnz	1b
-
-	mtspr	SPRN_MMUCR, r10			/* Restore host MMUCR. */
-
-	/* Clear bitmap of modified TLB entries */
-	li	r5, PPC44x_TLB_SIZE>>2
-	mtctr	r5
-	addi	r5, r4, VCPU_SHADOW_MOD - 4
-	li	r6, 0
-1:
-	stwu	r6, 4(r5)
-	bdnz	1b
-
 	iccci	0, 0 /* XXX hack */
 
 	/* Load some guest volatiles. */
@@ -423,6 +387,18 @@
 	lwz	r3, VCPU_SPRG7(r4)
 	mtspr	SPRN_SPRG7, r3
 
+#ifdef CONFIG_KVM_EXIT_TIMING
+	/* save enter time */
+1:
+	mfspr	r6, SPRN_TBRU
+	mfspr	r7, SPRN_TBRL
+	mfspr	r8, SPRN_TBRU
+	cmpw	r8, r6
+	bne	1b
+	stw	r7, VCPU_TIMING_LAST_ENTER_TBL(r4)
+	stw	r8, VCPU_TIMING_LAST_ENTER_TBU(r4)
+#endif
+
 	/* Finish loading guest volatiles and jump to guest. */
 	lwz	r3, VCPU_CTR(r4)
 	mtctr	r3
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index 0fce4fb..d1d38da 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -23,161 +23,14 @@
 #include <linux/string.h>
 #include <linux/kvm_host.h>
 
-#include <asm/dcr.h>
-#include <asm/dcr-regs.h>
+#include <asm/reg.h>
 #include <asm/time.h>
 #include <asm/byteorder.h>
 #include <asm/kvm_ppc.h>
+#include <asm/disassemble.h>
+#include "timing.h"
 
-#include "44x_tlb.h"
-
-/* Instruction decoding */
-static inline unsigned int get_op(u32 inst)
-{
-	return inst >> 26;
-}
-
-static inline unsigned int get_xop(u32 inst)
-{
-	return (inst >> 1) & 0x3ff;
-}
-
-static inline unsigned int get_sprn(u32 inst)
-{
-	return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
-}
-
-static inline unsigned int get_dcrn(u32 inst)
-{
-	return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
-}
-
-static inline unsigned int get_rt(u32 inst)
-{
-	return (inst >> 21) & 0x1f;
-}
-
-static inline unsigned int get_rs(u32 inst)
-{
-	return (inst >> 21) & 0x1f;
-}
-
-static inline unsigned int get_ra(u32 inst)
-{
-	return (inst >> 16) & 0x1f;
-}
-
-static inline unsigned int get_rb(u32 inst)
-{
-	return (inst >> 11) & 0x1f;
-}
-
-static inline unsigned int get_rc(u32 inst)
-{
-	return inst & 0x1;
-}
-
-static inline unsigned int get_ws(u32 inst)
-{
-	return (inst >> 11) & 0x1f;
-}
-
-static inline unsigned int get_d(u32 inst)
-{
-	return inst & 0xffff;
-}
-
-static int tlbe_is_host_safe(const struct kvm_vcpu *vcpu,
-                             const struct tlbe *tlbe)
-{
-	gpa_t gpa;
-
-	if (!get_tlb_v(tlbe))
-		return 0;
-
-	/* Does it match current guest AS? */
-	/* XXX what about IS != DS? */
-	if (get_tlb_ts(tlbe) != !!(vcpu->arch.msr & MSR_IS))
-		return 0;
-
-	gpa = get_tlb_raddr(tlbe);
-	if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT))
-		/* Mapping is not for RAM. */
-		return 0;
-
-	return 1;
-}
-
-static int kvmppc_emul_tlbwe(struct kvm_vcpu *vcpu, u32 inst)
-{
-	u64 eaddr;
-	u64 raddr;
-	u64 asid;
-	u32 flags;
-	struct tlbe *tlbe;
-	unsigned int ra;
-	unsigned int rs;
-	unsigned int ws;
-	unsigned int index;
-
-	ra = get_ra(inst);
-	rs = get_rs(inst);
-	ws = get_ws(inst);
-
-	index = vcpu->arch.gpr[ra];
-	if (index > PPC44x_TLB_SIZE) {
-		printk("%s: index %d\n", __func__, index);
-		kvmppc_dump_vcpu(vcpu);
-		return EMULATE_FAIL;
-	}
-
-	tlbe = &vcpu->arch.guest_tlb[index];
-
-	/* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */
-	if (tlbe->word0 & PPC44x_TLB_VALID) {
-		eaddr = get_tlb_eaddr(tlbe);
-		asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid;
-		kvmppc_mmu_invalidate(vcpu, eaddr, get_tlb_end(tlbe), asid);
-	}
-
-	switch (ws) {
-	case PPC44x_TLB_PAGEID:
-		tlbe->tid = vcpu->arch.mmucr & 0xff;
-		tlbe->word0 = vcpu->arch.gpr[rs];
-		break;
-
-	case PPC44x_TLB_XLAT:
-		tlbe->word1 = vcpu->arch.gpr[rs];
-		break;
-
-	case PPC44x_TLB_ATTRIB:
-		tlbe->word2 = vcpu->arch.gpr[rs];
-		break;
-
-	default:
-		return EMULATE_FAIL;
-	}
-
-	if (tlbe_is_host_safe(vcpu, tlbe)) {
-		eaddr = get_tlb_eaddr(tlbe);
-		raddr = get_tlb_raddr(tlbe);
-		asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid;
-		flags = tlbe->word2 & 0xffff;
-
-		/* Create a 4KB mapping on the host. If the guest wanted a
-		 * large page, only the first 4KB is mapped here and the rest
-		 * are mapped on the fly. */
-		kvmppc_mmu_map(vcpu, eaddr, raddr >> PAGE_SHIFT, asid, flags);
-	}
-
-	KVMTRACE_5D(GTLB_WRITE, vcpu, index,
-			tlbe->tid, tlbe->word0, tlbe->word1, tlbe->word2,
-			handler);
-
-	return EMULATE_DONE;
-}
-
-static void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
+void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
 {
 	if (vcpu->arch.tcr & TCR_DIE) {
 		/* The decrementer ticks at the same rate as the timebase, so
@@ -193,12 +46,6 @@
 	}
 }
 
-static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu)
-{
-	vcpu->arch.pc = vcpu->arch.srr0;
-	kvmppc_set_msr(vcpu, vcpu->arch.srr1);
-}
-
 /* XXX to do:
  * lhax
  * lhaux
@@ -213,40 +60,30 @@
  *
  * XXX is_bigendian should depend on MMU mapping or MSR[LE]
  */
+/* XXX Should probably auto-generate instruction decoding for a particular core
+ * from opcode tables in the future. */
 int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 {
 	u32 inst = vcpu->arch.last_inst;
 	u32 ea;
 	int ra;
 	int rb;
-	int rc;
 	int rs;
 	int rt;
 	int sprn;
-	int dcrn;
 	enum emulation_result emulated = EMULATE_DONE;
 	int advance = 1;
 
+	/* this default type might be overwritten by subcategories */
+	kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS);
+
 	switch (get_op(inst)) {
-	case 3:                                                 /* trap */
-		printk("trap!\n");
-		kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_PROGRAM);
+	case 3:                                             /* trap */
+		vcpu->arch.esr |= ESR_PTR;
+		kvmppc_core_queue_program(vcpu);
 		advance = 0;
 		break;
 
-	case 19:
-		switch (get_xop(inst)) {
-		case 50:                                        /* rfi */
-			kvmppc_emul_rfi(vcpu);
-			advance = 0;
-			break;
-
-		default:
-			emulated = EMULATE_FAIL;
-			break;
-		}
-		break;
-
 	case 31:
 		switch (get_xop(inst)) {
 
@@ -255,27 +92,11 @@
 			emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
 			break;
 
-		case 83:                                        /* mfmsr */
-			rt = get_rt(inst);
-			vcpu->arch.gpr[rt] = vcpu->arch.msr;
-			break;
-
 		case 87:                                        /* lbzx */
 			rt = get_rt(inst);
 			emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
 			break;
 
-		case 131:                                       /* wrtee */
-			rs = get_rs(inst);
-			vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
-			                 | (vcpu->arch.gpr[rs] & MSR_EE);
-			break;
-
-		case 146:                                       /* mtmsr */
-			rs = get_rs(inst);
-			kvmppc_set_msr(vcpu, vcpu->arch.gpr[rs]);
-			break;
-
 		case 151:                                       /* stwx */
 			rs = get_rs(inst);
 			emulated = kvmppc_handle_store(run, vcpu,
@@ -283,11 +104,6 @@
 			                               4, 1);
 			break;
 
-		case 163:                                       /* wrteei */
-			vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
-			                 | (inst & MSR_EE);
-			break;
-
 		case 215:                                       /* stbx */
 			rs = get_rs(inst);
 			emulated = kvmppc_handle_store(run, vcpu,
@@ -328,42 +144,6 @@
 			vcpu->arch.gpr[ra] = ea;
 			break;
 
-		case 323:                                       /* mfdcr */
-			dcrn = get_dcrn(inst);
-			rt = get_rt(inst);
-
-			/* The guest may access CPR0 registers to determine the timebase
-			 * frequency, and it must know the real host frequency because it
-			 * can directly access the timebase registers.
-			 *
-			 * It would be possible to emulate those accesses in userspace,
-			 * but userspace can really only figure out the end frequency.
-			 * We could decompose that into the factors that compute it, but
-			 * that's tricky math, and it's easier to just report the real
-			 * CPR0 values.
-			 */
-			switch (dcrn) {
-			case DCRN_CPR0_CONFIG_ADDR:
-				vcpu->arch.gpr[rt] = vcpu->arch.cpr0_cfgaddr;
-				break;
-			case DCRN_CPR0_CONFIG_DATA:
-				local_irq_disable();
-				mtdcr(DCRN_CPR0_CONFIG_ADDR,
-				      vcpu->arch.cpr0_cfgaddr);
-				vcpu->arch.gpr[rt] = mfdcr(DCRN_CPR0_CONFIG_DATA);
-				local_irq_enable();
-				break;
-			default:
-				run->dcr.dcrn = dcrn;
-				run->dcr.data =  0;
-				run->dcr.is_write = 0;
-				vcpu->arch.io_gpr = rt;
-				vcpu->arch.dcr_needed = 1;
-				emulated = EMULATE_DO_DCR;
-			}
-
-			break;
-
 		case 339:                                       /* mfspr */
 			sprn = get_sprn(inst);
 			rt = get_rt(inst);
@@ -373,26 +153,8 @@
 				vcpu->arch.gpr[rt] = vcpu->arch.srr0; break;
 			case SPRN_SRR1:
 				vcpu->arch.gpr[rt] = vcpu->arch.srr1; break;
-			case SPRN_MMUCR:
-				vcpu->arch.gpr[rt] = vcpu->arch.mmucr; break;
-			case SPRN_PID:
-				vcpu->arch.gpr[rt] = vcpu->arch.pid; break;
-			case SPRN_IVPR:
-				vcpu->arch.gpr[rt] = vcpu->arch.ivpr; break;
-			case SPRN_CCR0:
-				vcpu->arch.gpr[rt] = vcpu->arch.ccr0; break;
-			case SPRN_CCR1:
-				vcpu->arch.gpr[rt] = vcpu->arch.ccr1; break;
 			case SPRN_PVR:
 				vcpu->arch.gpr[rt] = vcpu->arch.pvr; break;
-			case SPRN_DEAR:
-				vcpu->arch.gpr[rt] = vcpu->arch.dear; break;
-			case SPRN_ESR:
-				vcpu->arch.gpr[rt] = vcpu->arch.esr; break;
-			case SPRN_DBCR0:
-				vcpu->arch.gpr[rt] = vcpu->arch.dbcr0; break;
-			case SPRN_DBCR1:
-				vcpu->arch.gpr[rt] = vcpu->arch.dbcr1; break;
 
 			/* Note: mftb and TBRL/TBWL are user-accessible, so
 			 * the guest can always access the real TB anyways.
@@ -413,42 +175,12 @@
 			/* Note: SPRG4-7 are user-readable, so we don't get
 			 * a trap. */
 
-			case SPRN_IVOR0:
-				vcpu->arch.gpr[rt] = vcpu->arch.ivor[0]; break;
-			case SPRN_IVOR1:
-				vcpu->arch.gpr[rt] = vcpu->arch.ivor[1]; break;
-			case SPRN_IVOR2:
-				vcpu->arch.gpr[rt] = vcpu->arch.ivor[2]; break;
-			case SPRN_IVOR3:
-				vcpu->arch.gpr[rt] = vcpu->arch.ivor[3]; break;
-			case SPRN_IVOR4:
-				vcpu->arch.gpr[rt] = vcpu->arch.ivor[4]; break;
-			case SPRN_IVOR5:
-				vcpu->arch.gpr[rt] = vcpu->arch.ivor[5]; break;
-			case SPRN_IVOR6:
-				vcpu->arch.gpr[rt] = vcpu->arch.ivor[6]; break;
-			case SPRN_IVOR7:
-				vcpu->arch.gpr[rt] = vcpu->arch.ivor[7]; break;
-			case SPRN_IVOR8:
-				vcpu->arch.gpr[rt] = vcpu->arch.ivor[8]; break;
-			case SPRN_IVOR9:
-				vcpu->arch.gpr[rt] = vcpu->arch.ivor[9]; break;
-			case SPRN_IVOR10:
-				vcpu->arch.gpr[rt] = vcpu->arch.ivor[10]; break;
-			case SPRN_IVOR11:
-				vcpu->arch.gpr[rt] = vcpu->arch.ivor[11]; break;
-			case SPRN_IVOR12:
-				vcpu->arch.gpr[rt] = vcpu->arch.ivor[12]; break;
-			case SPRN_IVOR13:
-				vcpu->arch.gpr[rt] = vcpu->arch.ivor[13]; break;
-			case SPRN_IVOR14:
-				vcpu->arch.gpr[rt] = vcpu->arch.ivor[14]; break;
-			case SPRN_IVOR15:
-				vcpu->arch.gpr[rt] = vcpu->arch.ivor[15]; break;
-
 			default:
-				printk("mfspr: unknown spr %x\n", sprn);
-				vcpu->arch.gpr[rt] = 0;
+				emulated = kvmppc_core_emulate_mfspr(vcpu, sprn, rt);
+				if (emulated == EMULATE_FAIL) {
+					printk("mfspr: unknown spr %x\n", sprn);
+					vcpu->arch.gpr[rt] = 0;
+				}
 				break;
 			}
 			break;
@@ -478,25 +210,6 @@
 			vcpu->arch.gpr[ra] = ea;
 			break;
 
-		case 451:                                       /* mtdcr */
-			dcrn = get_dcrn(inst);
-			rs = get_rs(inst);
-
-			/* emulate some access in kernel */
-			switch (dcrn) {
-			case DCRN_CPR0_CONFIG_ADDR:
-				vcpu->arch.cpr0_cfgaddr = vcpu->arch.gpr[rs];
-				break;
-			default:
-				run->dcr.dcrn = dcrn;
-				run->dcr.data = vcpu->arch.gpr[rs];
-				run->dcr.is_write = 1;
-				vcpu->arch.dcr_needed = 1;
-				emulated = EMULATE_DO_DCR;
-			}
-
-			break;
-
 		case 467:                                       /* mtspr */
 			sprn = get_sprn(inst);
 			rs = get_rs(inst);
@@ -505,22 +218,6 @@
 				vcpu->arch.srr0 = vcpu->arch.gpr[rs]; break;
 			case SPRN_SRR1:
 				vcpu->arch.srr1 = vcpu->arch.gpr[rs]; break;
-			case SPRN_MMUCR:
-				vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break;
-			case SPRN_PID:
-				kvmppc_set_pid(vcpu, vcpu->arch.gpr[rs]); break;
-			case SPRN_CCR0:
-				vcpu->arch.ccr0 = vcpu->arch.gpr[rs]; break;
-			case SPRN_CCR1:
-				vcpu->arch.ccr1 = vcpu->arch.gpr[rs]; break;
-			case SPRN_DEAR:
-				vcpu->arch.dear = vcpu->arch.gpr[rs]; break;
-			case SPRN_ESR:
-				vcpu->arch.esr = vcpu->arch.gpr[rs]; break;
-			case SPRN_DBCR0:
-				vcpu->arch.dbcr0 = vcpu->arch.gpr[rs]; break;
-			case SPRN_DBCR1:
-				vcpu->arch.dbcr1 = vcpu->arch.gpr[rs]; break;
 
 			/* XXX We need to context-switch the timebase for
 			 * watchdog and FIT. */
@@ -532,14 +229,6 @@
 				kvmppc_emulate_dec(vcpu);
 				break;
 
-			case SPRN_TSR:
-				vcpu->arch.tsr &= ~vcpu->arch.gpr[rs]; break;
-
-			case SPRN_TCR:
-				vcpu->arch.tcr = vcpu->arch.gpr[rs];
-				kvmppc_emulate_dec(vcpu);
-				break;
-
 			case SPRN_SPRG0:
 				vcpu->arch.sprg0 = vcpu->arch.gpr[rs]; break;
 			case SPRN_SPRG1:
@@ -549,56 +238,10 @@
 			case SPRN_SPRG3:
 				vcpu->arch.sprg3 = vcpu->arch.gpr[rs]; break;
 
-			/* Note: SPRG4-7 are user-readable. These values are
-			 * loaded into the real SPRGs when resuming the
-			 * guest. */
-			case SPRN_SPRG4:
-				vcpu->arch.sprg4 = vcpu->arch.gpr[rs]; break;
-			case SPRN_SPRG5:
-				vcpu->arch.sprg5 = vcpu->arch.gpr[rs]; break;
-			case SPRN_SPRG6:
-				vcpu->arch.sprg6 = vcpu->arch.gpr[rs]; break;
-			case SPRN_SPRG7:
-				vcpu->arch.sprg7 = vcpu->arch.gpr[rs]; break;
-
-			case SPRN_IVPR:
-				vcpu->arch.ivpr = vcpu->arch.gpr[rs]; break;
-			case SPRN_IVOR0:
-				vcpu->arch.ivor[0] = vcpu->arch.gpr[rs]; break;
-			case SPRN_IVOR1:
-				vcpu->arch.ivor[1] = vcpu->arch.gpr[rs]; break;
-			case SPRN_IVOR2:
-				vcpu->arch.ivor[2] = vcpu->arch.gpr[rs]; break;
-			case SPRN_IVOR3:
-				vcpu->arch.ivor[3] = vcpu->arch.gpr[rs]; break;
-			case SPRN_IVOR4:
-				vcpu->arch.ivor[4] = vcpu->arch.gpr[rs]; break;
-			case SPRN_IVOR5:
-				vcpu->arch.ivor[5] = vcpu->arch.gpr[rs]; break;
-			case SPRN_IVOR6:
-				vcpu->arch.ivor[6] = vcpu->arch.gpr[rs]; break;
-			case SPRN_IVOR7:
-				vcpu->arch.ivor[7] = vcpu->arch.gpr[rs]; break;
-			case SPRN_IVOR8:
-				vcpu->arch.ivor[8] = vcpu->arch.gpr[rs]; break;
-			case SPRN_IVOR9:
-				vcpu->arch.ivor[9] = vcpu->arch.gpr[rs]; break;
-			case SPRN_IVOR10:
-				vcpu->arch.ivor[10] = vcpu->arch.gpr[rs]; break;
-			case SPRN_IVOR11:
-				vcpu->arch.ivor[11] = vcpu->arch.gpr[rs]; break;
-			case SPRN_IVOR12:
-				vcpu->arch.ivor[12] = vcpu->arch.gpr[rs]; break;
-			case SPRN_IVOR13:
-				vcpu->arch.ivor[13] = vcpu->arch.gpr[rs]; break;
-			case SPRN_IVOR14:
-				vcpu->arch.ivor[14] = vcpu->arch.gpr[rs]; break;
-			case SPRN_IVOR15:
-				vcpu->arch.ivor[15] = vcpu->arch.gpr[rs]; break;
-
 			default:
-				printk("mtspr: unknown spr %x\n", sprn);
-				emulated = EMULATE_FAIL;
+				emulated = kvmppc_core_emulate_mtspr(vcpu, sprn, rs);
+				if (emulated == EMULATE_FAIL)
+					printk("mtspr: unknown spr %x\n", sprn);
 				break;
 			}
 			break;
@@ -629,36 +272,6 @@
 			                               4, 0);
 			break;
 
-		case 978:                                       /* tlbwe */
-			emulated = kvmppc_emul_tlbwe(vcpu, inst);
-			break;
-
-		case 914:       {                               /* tlbsx */
-			int index;
-			unsigned int as = get_mmucr_sts(vcpu);
-			unsigned int pid = get_mmucr_stid(vcpu);
-
-			rt = get_rt(inst);
-			ra = get_ra(inst);
-			rb = get_rb(inst);
-			rc = get_rc(inst);
-
-			ea = vcpu->arch.gpr[rb];
-			if (ra)
-				ea += vcpu->arch.gpr[ra];
-
-			index = kvmppc_44x_tlb_index(vcpu, ea, pid, as);
-			if (rc) {
-				if (index < 0)
-					vcpu->arch.cr &= ~0x20000000;
-				else
-					vcpu->arch.cr |= 0x20000000;
-			}
-			vcpu->arch.gpr[rt] = index;
-
-			}
-			break;
-
 		case 790:                                       /* lhbrx */
 			rt = get_rt(inst);
 			emulated = kvmppc_handle_load(run, vcpu, rt, 2, 0);
@@ -674,14 +287,9 @@
 			                               2, 0);
 			break;
 
-		case 966:                                       /* iccci */
-			break;
-
 		default:
-			printk("unknown: op %d xop %d\n", get_op(inst),
-				get_xop(inst));
+			/* Attempt core-specific emulation below. */
 			emulated = EMULATE_FAIL;
-			break;
 		}
 		break;
 
@@ -764,12 +372,19 @@
 		break;
 
 	default:
-		printk("unknown op %d\n", get_op(inst));
 		emulated = EMULATE_FAIL;
-		break;
 	}
 
-	KVMTRACE_3D(PPC_INSTR, vcpu, inst, vcpu->arch.pc, emulated, entryexit);
+	if (emulated == EMULATE_FAIL) {
+		emulated = kvmppc_core_emulate_op(run, vcpu, inst, &advance);
+		if (emulated == EMULATE_FAIL) {
+			advance = 0;
+			printk(KERN_ERR "Couldn't emulate instruction 0x%08x "
+			       "(op %d xop %d)\n", inst, get_op(inst), get_xop(inst));
+		}
+	}
+
+	KVMTRACE_3D(PPC_INSTR, vcpu, inst, (int)vcpu->arch.pc, emulated, entryexit);
 
 	if (advance)
 		vcpu->arch.pc += 4; /* Advance past emulated instruction. */
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 8bef0ef..2822c8c 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -28,9 +28,9 @@
 #include <asm/uaccess.h>
 #include <asm/kvm_ppc.h>
 #include <asm/tlbflush.h>
+#include "timing.h"
 #include "../mm/mmu_decl.h"
 
-
 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
 {
 	return gfn;
@@ -99,14 +99,7 @@
 
 void kvm_arch_check_processor_compat(void *rtn)
 {
-	int r;
-
-	if (strcmp(cur_cpu_spec->platform, "ppc440") == 0)
-		r = 0;
-	else
-		r = -ENOTSUPP;
-
-	*(int *)rtn = r;
+	*(int *)rtn = kvmppc_core_check_processor_compat();
 }
 
 struct kvm *kvm_arch_create_vm(void)
@@ -144,9 +137,6 @@
 	int r;
 
 	switch (ext) {
-	case KVM_CAP_USER_MEMORY:
-		r = 1;
-		break;
 	case KVM_CAP_COALESCED_MMIO:
 		r = KVM_COALESCED_MMIO_PAGE_OFFSET;
 		break;
@@ -179,30 +169,15 @@
 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
 {
 	struct kvm_vcpu *vcpu;
-	int err;
-
-	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
-	if (!vcpu) {
-		err = -ENOMEM;
-		goto out;
-	}
-
-	err = kvm_vcpu_init(vcpu, kvm, id);
-	if (err)
-		goto free_vcpu;
-
+	vcpu = kvmppc_core_vcpu_create(kvm, id);
+	kvmppc_create_vcpu_debugfs(vcpu, id);
 	return vcpu;
-
-free_vcpu:
-	kmem_cache_free(kvm_vcpu_cache, vcpu);
-out:
-	return ERR_PTR(err);
 }
 
 void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
 {
-	kvm_vcpu_uninit(vcpu);
-	kmem_cache_free(kvm_vcpu_cache, vcpu);
+	kvmppc_remove_vcpu_debugfs(vcpu);
+	kvmppc_core_vcpu_free(vcpu);
 }
 
 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
@@ -212,16 +187,14 @@
 
 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 {
-	unsigned int priority = exception_priority[BOOKE_INTERRUPT_DECREMENTER];
-
-	return test_bit(priority, &vcpu->arch.pending_exceptions);
+	return kvmppc_core_pending_dec(vcpu);
 }
 
 static void kvmppc_decrementer_func(unsigned long data)
 {
 	struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
 
-	kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_DECREMENTER);
+	kvmppc_core_queue_dec(vcpu);
 
 	if (waitqueue_active(&vcpu->wq)) {
 		wake_up_interruptible(&vcpu->wq);
@@ -242,96 +215,25 @@
 	kvmppc_core_destroy_mmu(vcpu);
 }
 
-/* Note: clearing MSR[DE] just means that the debug interrupt will not be
- * delivered *immediately*. Instead, it simply sets the appropriate DBSR bits.
- * If those DBSR bits are still set when MSR[DE] is re-enabled, the interrupt
- * will be delivered as an "imprecise debug event" (which is indicated by
- * DBSR[IDE].
- */
-static void kvmppc_disable_debug_interrupts(void)
-{
-	mtmsr(mfmsr() & ~MSR_DE);
-}
-
-static void kvmppc_restore_host_debug_state(struct kvm_vcpu *vcpu)
-{
-	kvmppc_disable_debug_interrupts();
-
-	mtspr(SPRN_IAC1, vcpu->arch.host_iac[0]);
-	mtspr(SPRN_IAC2, vcpu->arch.host_iac[1]);
-	mtspr(SPRN_IAC3, vcpu->arch.host_iac[2]);
-	mtspr(SPRN_IAC4, vcpu->arch.host_iac[3]);
-	mtspr(SPRN_DBCR1, vcpu->arch.host_dbcr1);
-	mtspr(SPRN_DBCR2, vcpu->arch.host_dbcr2);
-	mtspr(SPRN_DBCR0, vcpu->arch.host_dbcr0);
-	mtmsr(vcpu->arch.host_msr);
-}
-
-static void kvmppc_load_guest_debug_registers(struct kvm_vcpu *vcpu)
-{
-	struct kvm_guest_debug *dbg = &vcpu->guest_debug;
-	u32 dbcr0 = 0;
-
-	vcpu->arch.host_msr = mfmsr();
-	kvmppc_disable_debug_interrupts();
-
-	/* Save host debug register state. */
-	vcpu->arch.host_iac[0] = mfspr(SPRN_IAC1);
-	vcpu->arch.host_iac[1] = mfspr(SPRN_IAC2);
-	vcpu->arch.host_iac[2] = mfspr(SPRN_IAC3);
-	vcpu->arch.host_iac[3] = mfspr(SPRN_IAC4);
-	vcpu->arch.host_dbcr0 = mfspr(SPRN_DBCR0);
-	vcpu->arch.host_dbcr1 = mfspr(SPRN_DBCR1);
-	vcpu->arch.host_dbcr2 = mfspr(SPRN_DBCR2);
-
-	/* set registers up for guest */
-
-	if (dbg->bp[0]) {
-		mtspr(SPRN_IAC1, dbg->bp[0]);
-		dbcr0 |= DBCR0_IAC1 | DBCR0_IDM;
-	}
-	if (dbg->bp[1]) {
-		mtspr(SPRN_IAC2, dbg->bp[1]);
-		dbcr0 |= DBCR0_IAC2 | DBCR0_IDM;
-	}
-	if (dbg->bp[2]) {
-		mtspr(SPRN_IAC3, dbg->bp[2]);
-		dbcr0 |= DBCR0_IAC3 | DBCR0_IDM;
-	}
-	if (dbg->bp[3]) {
-		mtspr(SPRN_IAC4, dbg->bp[3]);
-		dbcr0 |= DBCR0_IAC4 | DBCR0_IDM;
-	}
-
-	mtspr(SPRN_DBCR0, dbcr0);
-	mtspr(SPRN_DBCR1, 0);
-	mtspr(SPRN_DBCR2, 0);
-}
-
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
-	int i;
-
 	if (vcpu->guest_debug.enabled)
-		kvmppc_load_guest_debug_registers(vcpu);
+		kvmppc_core_load_guest_debugstate(vcpu);
 
-	/* Mark every guest entry in the shadow TLB entry modified, so that they
-	 * will all be reloaded on the next vcpu run (instead of being
-	 * demand-faulted). */
-	for (i = 0; i <= tlb_44x_hwater; i++)
-		kvmppc_tlbe_set_modified(vcpu, i);
+	kvmppc_core_vcpu_load(vcpu, cpu);
 }
 
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 {
 	if (vcpu->guest_debug.enabled)
-		kvmppc_restore_host_debug_state(vcpu);
+		kvmppc_core_load_host_debugstate(vcpu);
 
 	/* Don't leave guest TLB entries resident when being de-scheduled. */
 	/* XXX It would be nice to differentiate between heavyweight exit and
 	 * sched_out here, since we could avoid the TLB flush for heavyweight
 	 * exits. */
 	_tlbil_all();
+	kvmppc_core_vcpu_put(vcpu);
 }
 
 int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
@@ -355,14 +257,14 @@
 static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu,
                                      struct kvm_run *run)
 {
-	u32 *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr];
+	ulong *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr];
 	*gpr = run->dcr.data;
 }
 
 static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
                                       struct kvm_run *run)
 {
-	u32 *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr];
+	ulong *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr];
 
 	if (run->mmio.len > sizeof(*gpr)) {
 		printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len);
@@ -460,7 +362,7 @@
 		vcpu->arch.dcr_needed = 0;
 	}
 
-	kvmppc_check_and_deliver_interrupts(vcpu);
+	kvmppc_core_deliver_interrupts(vcpu);
 
 	local_irq_disable();
 	kvm_guest_enter();
@@ -478,7 +380,7 @@
 
 int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
 {
-	kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_EXTERNAL);
+	kvmppc_core_queue_external(vcpu, irq);
 
 	if (waitqueue_active(&vcpu->wq)) {
 		wake_up_interruptible(&vcpu->wq);
diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c
new file mode 100644
index 0000000..47ee603
--- /dev/null
+++ b/arch/powerpc/kvm/timing.c
@@ -0,0 +1,239 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ *          Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/fs.h>
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>
+#include <linux/uaccess.h>
+
+#include <asm/time.h>
+#include <asm-generic/div64.h>
+
+#include "timing.h"
+
+void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu)
+{
+	int i;
+
+	/* pause guest execution to avoid concurrent updates */
+	local_irq_disable();
+	mutex_lock(&vcpu->mutex);
+
+	vcpu->arch.last_exit_type = 0xDEAD;
+	for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) {
+		vcpu->arch.timing_count_type[i] = 0;
+		vcpu->arch.timing_max_duration[i] = 0;
+		vcpu->arch.timing_min_duration[i] = 0xFFFFFFFF;
+		vcpu->arch.timing_sum_duration[i] = 0;
+		vcpu->arch.timing_sum_quad_duration[i] = 0;
+	}
+	vcpu->arch.timing_last_exit = 0;
+	vcpu->arch.timing_exit.tv64 = 0;
+	vcpu->arch.timing_last_enter.tv64 = 0;
+
+	mutex_unlock(&vcpu->mutex);
+	local_irq_enable();
+}
+
+static void add_exit_timing(struct kvm_vcpu *vcpu, u64 duration, int type)
+{
+	u64 old;
+
+	do_div(duration, tb_ticks_per_usec);
+	if (unlikely(duration > 0xFFFFFFFF)) {
+		printk(KERN_ERR"%s - duration too big -> overflow"
+			" duration %lld type %d exit #%d\n",
+			__func__, duration, type,
+			vcpu->arch.timing_count_type[type]);
+		return;
+	}
+
+	vcpu->arch.timing_count_type[type]++;
+
+	/* sum */
+	old = vcpu->arch.timing_sum_duration[type];
+	vcpu->arch.timing_sum_duration[type] += duration;
+	if (unlikely(old > vcpu->arch.timing_sum_duration[type])) {
+		printk(KERN_ERR"%s - wrap adding sum of durations"
+			" old %lld new %lld type %d exit # of type %d\n",
+			__func__, old, vcpu->arch.timing_sum_duration[type],
+			type, vcpu->arch.timing_count_type[type]);
+	}
+
+	/* square sum */
+	old = vcpu->arch.timing_sum_quad_duration[type];
+	vcpu->arch.timing_sum_quad_duration[type] += (duration*duration);
+	if (unlikely(old > vcpu->arch.timing_sum_quad_duration[type])) {
+		printk(KERN_ERR"%s - wrap adding sum of squared durations"
+			" old %lld new %lld type %d exit # of type %d\n",
+			__func__, old,
+			vcpu->arch.timing_sum_quad_duration[type],
+			type, vcpu->arch.timing_count_type[type]);
+	}
+
+	/* set min/max */
+	if (unlikely(duration < vcpu->arch.timing_min_duration[type]))
+		vcpu->arch.timing_min_duration[type] = duration;
+	if (unlikely(duration > vcpu->arch.timing_max_duration[type]))
+		vcpu->arch.timing_max_duration[type] = duration;
+}
+
+void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu)
+{
+	u64 exit = vcpu->arch.timing_last_exit;
+	u64 enter = vcpu->arch.timing_last_enter.tv64;
+
+	/* save exit time, used next exit when the reenter time is known */
+	vcpu->arch.timing_last_exit = vcpu->arch.timing_exit.tv64;
+
+	if (unlikely(vcpu->arch.last_exit_type == 0xDEAD || exit == 0))
+		return; /* skip incomplete cycle (e.g. after reset) */
+
+	/* update statistics for average and standard deviation */
+	add_exit_timing(vcpu, (enter - exit), vcpu->arch.last_exit_type);
+	/* enter -> timing_last_exit is time spent in guest - log this too */
+	add_exit_timing(vcpu, (vcpu->arch.timing_last_exit - enter),
+			TIMEINGUEST);
+}
+
+static const char *kvm_exit_names[__NUMBER_OF_KVM_EXIT_TYPES] = {
+	[MMIO_EXITS] =              "MMIO",
+	[DCR_EXITS] =               "DCR",
+	[SIGNAL_EXITS] =            "SIGNAL",
+	[ITLB_REAL_MISS_EXITS] =    "ITLBREAL",
+	[ITLB_VIRT_MISS_EXITS] =    "ITLBVIRT",
+	[DTLB_REAL_MISS_EXITS] =    "DTLBREAL",
+	[DTLB_VIRT_MISS_EXITS] =    "DTLBVIRT",
+	[SYSCALL_EXITS] =           "SYSCALL",
+	[ISI_EXITS] =               "ISI",
+	[DSI_EXITS] =               "DSI",
+	[EMULATED_INST_EXITS] =     "EMULINST",
+	[EMULATED_MTMSRWE_EXITS] =  "EMUL_WAIT",
+	[EMULATED_WRTEE_EXITS] =    "EMUL_WRTEE",
+	[EMULATED_MTSPR_EXITS] =    "EMUL_MTSPR",
+	[EMULATED_MFSPR_EXITS] =    "EMUL_MFSPR",
+	[EMULATED_MTMSR_EXITS] =    "EMUL_MTMSR",
+	[EMULATED_MFMSR_EXITS] =    "EMUL_MFMSR",
+	[EMULATED_TLBSX_EXITS] =    "EMUL_TLBSX",
+	[EMULATED_TLBWE_EXITS] =    "EMUL_TLBWE",
+	[EMULATED_RFI_EXITS] =      "EMUL_RFI",
+	[DEC_EXITS] =               "DEC",
+	[EXT_INTR_EXITS] =          "EXTINT",
+	[HALT_WAKEUP] =             "HALT",
+	[USR_PR_INST] =             "USR_PR_INST",
+	[FP_UNAVAIL] =              "FP_UNAVAIL",
+	[DEBUG_EXITS] =             "DEBUG",
+	[TIMEINGUEST] =             "TIMEINGUEST"
+};
+
+static int kvmppc_exit_timing_show(struct seq_file *m, void *private)
+{
+	struct kvm_vcpu *vcpu = m->private;
+	int i;
+
+	seq_printf(m, "%s", "type	count	min	max	sum	sum_squared\n");
+
+	for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) {
+		seq_printf(m, "%12s	%10d	%10lld	%10lld	%20lld	%20lld\n",
+			kvm_exit_names[i],
+			vcpu->arch.timing_count_type[i],
+			vcpu->arch.timing_min_duration[i],
+			vcpu->arch.timing_max_duration[i],
+			vcpu->arch.timing_sum_duration[i],
+			vcpu->arch.timing_sum_quad_duration[i]);
+	}
+	return 0;
+}
+
+/* Write 'c' to clear the timing statistics. */
+static ssize_t kvmppc_exit_timing_write(struct file *file,
+				       const char __user *user_buf,
+				       size_t count, loff_t *ppos)
+{
+	int err = -EINVAL;
+	char c;
+
+	if (count > 1) {
+		goto done;
+	}
+
+	if (get_user(c, user_buf)) {
+		err = -EFAULT;
+		goto done;
+	}
+
+	if (c == 'c') {
+		struct seq_file *seqf = (struct seq_file *)file->private_data;
+		struct kvm_vcpu *vcpu = seqf->private;
+		/* Write does not affect our buffers previously generated with
+		 * show. seq_file is locked here to prevent races of init with
+		 * a show call */
+		mutex_lock(&seqf->lock);
+		kvmppc_init_timing_stats(vcpu);
+		mutex_unlock(&seqf->lock);
+		err = count;
+	}
+
+done:
+	return err;
+}
+
+static int kvmppc_exit_timing_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, kvmppc_exit_timing_show, inode->i_private);
+}
+
+static struct file_operations kvmppc_exit_timing_fops = {
+	.owner   = THIS_MODULE,
+	.open    = kvmppc_exit_timing_open,
+	.read    = seq_read,
+	.write   = kvmppc_exit_timing_write,
+	.llseek  = seq_lseek,
+	.release = single_release,
+};
+
+void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, unsigned int id)
+{
+	static char dbg_fname[50];
+	struct dentry *debugfs_file;
+
+	snprintf(dbg_fname, sizeof(dbg_fname), "vm%u_vcpu%u_timing",
+		 current->pid, id);
+	debugfs_file = debugfs_create_file(dbg_fname, 0666,
+					kvm_debugfs_dir, vcpu,
+					&kvmppc_exit_timing_fops);
+
+	if (!debugfs_file) {
+		printk(KERN_ERR"%s: error creating debugfs file %s\n",
+			__func__, dbg_fname);
+		return;
+	}
+
+	vcpu->arch.debugfs_exit_timing = debugfs_file;
+}
+
+void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu)
+{
+	if (vcpu->arch.debugfs_exit_timing) {
+		debugfs_remove(vcpu->arch.debugfs_exit_timing);
+		vcpu->arch.debugfs_exit_timing = NULL;
+	}
+}
diff --git a/arch/powerpc/kvm/timing.h b/arch/powerpc/kvm/timing.h
new file mode 100644
index 0000000..bb13b1f
--- /dev/null
+++ b/arch/powerpc/kvm/timing.h
@@ -0,0 +1,102 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * Authors: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
+ */
+
+#ifndef __POWERPC_KVM_EXITTIMING_H__
+#define __POWERPC_KVM_EXITTIMING_H__
+
+#include <linux/kvm_host.h>
+#include <asm/kvm_host.h>
+
+#ifdef CONFIG_KVM_EXIT_TIMING
+void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu);
+void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu);
+void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, unsigned int id);
+void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu);
+
+static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type)
+{
+	vcpu->arch.last_exit_type = type;
+}
+
+#else
+/* if exit timing is not configured there is no need to build the c file */
+static inline void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu) {}
+static inline void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu) {}
+static inline void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu,
+						unsigned int id) {}
+static inline void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu) {}
+static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type) {}
+#endif /* CONFIG_KVM_EXIT_TIMING */
+
+/* account the exit in kvm_stats */
+static inline void kvmppc_account_exit_stat(struct kvm_vcpu *vcpu, int type)
+{
+	/* type has to be known at build time for optimization */
+	BUILD_BUG_ON(__builtin_constant_p(type));
+	switch (type) {
+	case EXT_INTR_EXITS:
+		vcpu->stat.ext_intr_exits++;
+		break;
+	case DEC_EXITS:
+		vcpu->stat.dec_exits++;
+		break;
+	case EMULATED_INST_EXITS:
+		vcpu->stat.emulated_inst_exits++;
+		break;
+	case DCR_EXITS:
+		vcpu->stat.dcr_exits++;
+		break;
+	case DSI_EXITS:
+		vcpu->stat.dsi_exits++;
+		break;
+	case ISI_EXITS:
+		vcpu->stat.isi_exits++;
+		break;
+	case SYSCALL_EXITS:
+		vcpu->stat.syscall_exits++;
+		break;
+	case DTLB_REAL_MISS_EXITS:
+		vcpu->stat.dtlb_real_miss_exits++;
+		break;
+	case DTLB_VIRT_MISS_EXITS:
+		vcpu->stat.dtlb_virt_miss_exits++;
+		break;
+	case MMIO_EXITS:
+		vcpu->stat.mmio_exits++;
+		break;
+	case ITLB_REAL_MISS_EXITS:
+		vcpu->stat.itlb_real_miss_exits++;
+		break;
+	case ITLB_VIRT_MISS_EXITS:
+		vcpu->stat.itlb_virt_miss_exits++;
+		break;
+	case SIGNAL_EXITS:
+		vcpu->stat.signal_exits++;
+		break;
+	}
+}
+
+/* wrapper to set exit time and account for it in kvm_stats */
+static inline void kvmppc_account_exit(struct kvm_vcpu *vcpu, int type)
+{
+	kvmppc_set_exit_type(vcpu, type);
+	kvmppc_account_exit_stat(vcpu, type);
+}
+
+#endif /* __POWERPC_KVM_EXITTIMING_H__ */
diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c
index f7a6902..84e058f 100644
--- a/arch/powerpc/platforms/pseries/xics.c
+++ b/arch/powerpc/platforms/pseries/xics.c
@@ -332,7 +332,7 @@
 	lpar_xirr_info_set((0xff << 24) | irq);
 }
 
-static void xics_set_affinity(unsigned int virq, cpumask_t cpumask)
+static void xics_set_affinity(unsigned int virq, const struct cpumask *cpumask)
 {
 	unsigned int irq;
 	int status;
@@ -870,7 +870,7 @@
 
 		/* Reset affinity to all cpus */
 		irq_desc[virq].affinity = CPU_MASK_ALL;
-		desc->chip->set_affinity(virq, CPU_MASK_ALL);
+		desc->chip->set_affinity(virq, cpu_all_mask);
 unlock:
 		spin_unlock_irqrestore(&desc->lock, flags);
 	}
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index c82babb..3e0d89d 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -806,7 +806,7 @@
 
 #endif /* CONFIG_SMP */
 
-void mpic_set_affinity(unsigned int irq, cpumask_t cpumask)
+void mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask)
 {
 	struct mpic *mpic = mpic_from_irq(irq);
 	unsigned int src = mpic_irq_to_hw(irq);
@@ -818,7 +818,7 @@
 	} else {
 		cpumask_t tmp;
 
-		cpus_and(tmp, cpumask, cpu_online_map);
+		cpumask_and(&tmp, cpumask, cpu_online_mask);
 
 		mpic_irq_write(src, MPIC_INFO(IRQ_DESTINATION),
 			       mpic_physmask(cpus_addr(tmp)[0]));
diff --git a/arch/powerpc/sysdev/mpic.h b/arch/powerpc/sysdev/mpic.h
index 6209c62..3cef2af 100644
--- a/arch/powerpc/sysdev/mpic.h
+++ b/arch/powerpc/sysdev/mpic.h
@@ -36,6 +36,6 @@
 
 extern int mpic_set_irq_type(unsigned int virq, unsigned int flow_type);
 extern void mpic_set_vector(unsigned int virq, unsigned int vector);
-extern void mpic_set_affinity(unsigned int irq, cpumask_t cpumask);
+extern void mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask);
 
 #endif /* _POWERPC_SYSDEV_MPIC_H */
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 8152fef..19577ae 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -83,6 +83,7 @@
 	select HAVE_KRETPROBES
 	select HAVE_KVM if 64BIT
 	select HAVE_ARCH_TRACEHOOK
+	select INIT_ALL_POSSIBLE
 
 source "init/Kconfig"
 
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 6fc7854..3ed5c7a 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -55,12 +55,6 @@
 struct _lowcore *lowcore_ptr[NR_CPUS];
 EXPORT_SYMBOL(lowcore_ptr);
 
-cpumask_t cpu_online_map = CPU_MASK_NONE;
-EXPORT_SYMBOL(cpu_online_map);
-
-cpumask_t cpu_possible_map = CPU_MASK_ALL;
-EXPORT_SYMBOL(cpu_possible_map);
-
 static struct task_struct *current_set[NR_CPUS];
 
 static u8 smp_cpu_type;
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 5be981a..d649600 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -160,7 +160,7 @@
 	cd->min_delta_ns	= 1;
 	cd->max_delta_ns	= LONG_MAX;
 	cd->rating		= 400;
-	cd->cpumask		= cpumask_of_cpu(cpu);
+	cd->cpumask		= cpumask_of(cpu);
 	cd->set_next_event	= s390_next_event;
 	cd->set_mode		= s390_set_mode;
 
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 8b00eb2..be84971 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -113,8 +113,6 @@
 int kvm_dev_ioctl_check_extension(long ext)
 {
 	switch (ext) {
-	case KVM_CAP_USER_MEMORY:
-		return 1;
 	default:
 		return 0;
 	}
@@ -185,8 +183,6 @@
 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
 	VM_EVENT(kvm, 3, "%s", "vm created");
 
-	try_module_get(THIS_MODULE);
-
 	return kvm;
 out_nodbf:
 	free_page((unsigned long)(kvm->arch.sca));
@@ -196,13 +192,33 @@
 	return ERR_PTR(rc);
 }
 
+void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
+{
+	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
+	free_page((unsigned long)(vcpu->arch.sie_block));
+	kvm_vcpu_uninit(vcpu);
+	kfree(vcpu);
+}
+
+static void kvm_free_vcpus(struct kvm *kvm)
+{
+	unsigned int i;
+
+	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+		if (kvm->vcpus[i]) {
+			kvm_arch_vcpu_destroy(kvm->vcpus[i]);
+			kvm->vcpus[i] = NULL;
+		}
+	}
+}
+
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
-	debug_unregister(kvm->arch.dbf);
+	kvm_free_vcpus(kvm);
 	kvm_free_physmem(kvm);
 	free_page((unsigned long)(kvm->arch.sca));
+	debug_unregister(kvm->arch.dbf);
 	kfree(kvm);
-	module_put(THIS_MODULE);
 }
 
 /* Section: vcpu related */
@@ -213,8 +229,7 @@
 
 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 {
-	/* kvm common code refers to this, but does'nt call it */
-	BUG();
+	/* Nothing todo */
 }
 
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
@@ -308,8 +323,6 @@
 	VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
 		 vcpu->arch.sie_block);
 
-	try_module_get(THIS_MODULE);
-
 	return vcpu;
 out_free_cpu:
 	kfree(vcpu);
@@ -317,14 +330,6 @@
 	return ERR_PTR(rc);
 }
 
-void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
-{
-	VCPU_EVENT(vcpu, 3, "%s", "destroy cpu");
-	free_page((unsigned long)(vcpu->arch.sie_block));
-	kfree(vcpu);
-	module_put(THIS_MODULE);
-}
-
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
 {
 	/* kvm common code refers to this, but never calls it */
diff --git a/arch/sh/include/asm/smp.h b/arch/sh/include/asm/smp.h
index 85b660c..c24e9c6 100644
--- a/arch/sh/include/asm/smp.h
+++ b/arch/sh/include/asm/smp.h
@@ -31,7 +31,7 @@
 };
 
 void smp_message_recv(unsigned int msg);
-void smp_timer_broadcast(cpumask_t mask);
+void smp_timer_broadcast(const struct cpumask *mask);
 
 void local_timer_interrupt(void);
 void local_timer_setup(unsigned int cpu);
diff --git a/arch/sh/include/asm/topology.h b/arch/sh/include/asm/topology.h
index 95f0085..279d9cc 100644
--- a/arch/sh/include/asm/topology.h
+++ b/arch/sh/include/asm/topology.h
@@ -5,7 +5,6 @@
 
 /* sched_domains SD_NODE_INIT for sh machines */
 #define SD_NODE_INIT (struct sched_domain) {		\
-	.span			= CPU_MASK_NONE,	\
 	.parent			= NULL,			\
 	.child			= NULL,			\
 	.groups			= NULL,			\
diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c
index 3c5ad16..8f40274 100644
--- a/arch/sh/kernel/smp.c
+++ b/arch/sh/kernel/smp.c
@@ -31,12 +31,6 @@
 int __cpu_number_map[NR_CPUS];		/* Map physical to logical */
 int __cpu_logical_map[NR_CPUS];		/* Map logical to physical */
 
-cpumask_t cpu_possible_map;
-EXPORT_SYMBOL(cpu_possible_map);
-
-cpumask_t cpu_online_map;
-EXPORT_SYMBOL(cpu_online_map);
-
 static inline void __init smp_store_cpu_info(unsigned int cpu)
 {
 	struct sh_cpuinfo *c = cpu_data + cpu;
@@ -190,11 +184,11 @@
 	plat_send_ipi(cpu, SMP_MSG_FUNCTION_SINGLE);
 }
 
-void smp_timer_broadcast(cpumask_t mask)
+void smp_timer_broadcast(const struct cpumask *mask)
 {
 	int cpu;
 
-	for_each_cpu_mask(cpu, mask)
+	for_each_cpu(cpu, mask)
 		plat_send_ipi(cpu, SMP_MSG_TIMER);
 }
 
diff --git a/arch/sh/kernel/timers/timer-broadcast.c b/arch/sh/kernel/timers/timer-broadcast.c
index c231763..96e8eae 100644
--- a/arch/sh/kernel/timers/timer-broadcast.c
+++ b/arch/sh/kernel/timers/timer-broadcast.c
@@ -51,7 +51,7 @@
 	clk->mult		= 1;
 	clk->set_mode		= dummy_timer_set_mode;
 	clk->broadcast		= smp_timer_broadcast;
-	clk->cpumask		= cpumask_of_cpu(cpu);
+	clk->cpumask		= cpumask_of(cpu);
 
 	clockevents_register_device(clk);
 }
diff --git a/arch/sh/kernel/timers/timer-tmu.c b/arch/sh/kernel/timers/timer-tmu.c
index 3c61ddd..0db3f95 100644
--- a/arch/sh/kernel/timers/timer-tmu.c
+++ b/arch/sh/kernel/timers/timer-tmu.c
@@ -263,7 +263,7 @@
 	tmu0_clockevent.min_delta_ns =
 			clockevent_delta2ns(1, &tmu0_clockevent);
 
-	tmu0_clockevent.cpumask = cpumask_of_cpu(0);
+	tmu0_clockevent.cpumask = cpumask_of(0);
 
 	clockevents_register_device(&tmu0_clockevent);
 
diff --git a/arch/sparc/include/asm/smp_32.h b/arch/sparc/include/asm/smp_32.h
index a8180e5..8408d9d 100644
--- a/arch/sparc/include/asm/smp_32.h
+++ b/arch/sparc/include/asm/smp_32.h
@@ -29,8 +29,6 @@
  */
 
 extern unsigned char boot_cpu_id;
-extern cpumask_t phys_cpu_present_map;
-#define cpu_possible_map phys_cpu_present_map
 
 typedef void (*smpfunc_t)(unsigned long, unsigned long, unsigned long,
 		       unsigned long, unsigned long);
diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c
index a3ea2bc..cab8e028 100644
--- a/arch/sparc/kernel/irq_64.c
+++ b/arch/sparc/kernel/irq_64.c
@@ -312,7 +312,8 @@
 	}
 }
 
-static void sun4u_set_affinity(unsigned int virt_irq, cpumask_t mask)
+static void sun4u_set_affinity(unsigned int virt_irq,
+			       const struct cpumask *mask)
 {
 	sun4u_irq_enable(virt_irq);
 }
@@ -362,7 +363,8 @@
 		       ino, err);
 }
 
-static void sun4v_set_affinity(unsigned int virt_irq, cpumask_t mask)
+static void sun4v_set_affinity(unsigned int virt_irq,
+			       const struct cpumask *mask)
 {
 	unsigned int ino = virt_irq_table[virt_irq].dev_ino;
 	unsigned long cpuid = irq_choose_cpu(virt_irq);
@@ -429,7 +431,8 @@
 		       dev_handle, dev_ino, err);
 }
 
-static void sun4v_virt_set_affinity(unsigned int virt_irq, cpumask_t mask)
+static void sun4v_virt_set_affinity(unsigned int virt_irq,
+				    const struct cpumask *mask)
 {
 	unsigned long cpuid, dev_handle, dev_ino;
 	int err;
@@ -851,7 +854,7 @@
 		    !(irq_desc[irq].status & IRQ_PER_CPU)) {
 			if (irq_desc[irq].chip->set_affinity)
 				irq_desc[irq].chip->set_affinity(irq,
-					irq_desc[irq].affinity);
+					&irq_desc[irq].affinity);
 		}
 		spin_unlock_irqrestore(&irq_desc[irq].lock, flags);
 	}
diff --git a/arch/sparc/kernel/of_device_64.c b/arch/sparc/kernel/of_device_64.c
index 46e231f..322046c 100644
--- a/arch/sparc/kernel/of_device_64.c
+++ b/arch/sparc/kernel/of_device_64.c
@@ -780,7 +780,7 @@
 	if (nid != -1) {
 		cpumask_t numa_mask = node_to_cpumask(nid);
 
-		irq_set_affinity(irq, numa_mask);
+		irq_set_affinity(irq, &numa_mask);
 	}
 
 	return irq;
diff --git a/arch/sparc/kernel/pci_msi.c b/arch/sparc/kernel/pci_msi.c
index 2e680f3..0d0cd81 100644
--- a/arch/sparc/kernel/pci_msi.c
+++ b/arch/sparc/kernel/pci_msi.c
@@ -288,7 +288,7 @@
 	if (nid != -1) {
 		cpumask_t numa_mask = node_to_cpumask(nid);
 
-		irq_set_affinity(irq, numa_mask);
+		irq_set_affinity(irq, &numa_mask);
 	}
 	err = request_irq(irq, sparc64_msiq_interrupt, 0,
 			  "MSIQ",
diff --git a/arch/sparc/kernel/smp_32.c b/arch/sparc/kernel/smp_32.c
index e396c1f..1e5ac4e2 100644
--- a/arch/sparc/kernel/smp_32.c
+++ b/arch/sparc/kernel/smp_32.c
@@ -39,8 +39,6 @@
 unsigned char boot_cpu_id = 0;
 unsigned char boot_cpu_id4 = 0; /* boot_cpu_id << 2 */
 
-cpumask_t cpu_online_map = CPU_MASK_NONE;
-cpumask_t phys_cpu_present_map = CPU_MASK_NONE;
 cpumask_t smp_commenced_mask = CPU_MASK_NONE;
 
 /* The only guaranteed locking primitive available on all Sparc
@@ -334,7 +332,7 @@
 	instance = 0;
 	while (!cpu_find_by_instance(instance, NULL, &mid)) {
 		if (mid < NR_CPUS) {
-			cpu_set(mid, phys_cpu_present_map);
+			cpu_set(mid, cpu_possible_map);
 			cpu_set(mid, cpu_present_map);
 		}
 		instance++;
@@ -354,7 +352,7 @@
 
 	current_thread_info()->cpu = cpuid;
 	cpu_set(cpuid, cpu_online_map);
-	cpu_set(cpuid, phys_cpu_present_map);
+	cpu_set(cpuid, cpu_possible_map);
 }
 
 int __cpuinit __cpu_up(unsigned int cpu)
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index bfe99d8..4632979 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -49,14 +49,10 @@
 
 int sparc64_multi_core __read_mostly;
 
-cpumask_t cpu_possible_map __read_mostly = CPU_MASK_NONE;
-cpumask_t cpu_online_map __read_mostly = CPU_MASK_NONE;
 DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE;
 cpumask_t cpu_core_map[NR_CPUS] __read_mostly =
 	{ [0 ... NR_CPUS-1] = CPU_MASK_NONE };
 
-EXPORT_SYMBOL(cpu_possible_map);
-EXPORT_SYMBOL(cpu_online_map);
 EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
 EXPORT_SYMBOL(cpu_core_map);
 
diff --git a/arch/sparc/kernel/sparc_ksyms_32.c b/arch/sparc/kernel/sparc_ksyms_32.c
index a4d45fc..e1e9763 100644
--- a/arch/sparc/kernel/sparc_ksyms_32.c
+++ b/arch/sparc/kernel/sparc_ksyms_32.c
@@ -112,10 +112,6 @@
 #ifdef CONFIG_SMP
 /* IRQ implementation. */
 EXPORT_SYMBOL(synchronize_irq);
-
-/* CPU online map and active count. */
-EXPORT_SYMBOL(cpu_online_map);
-EXPORT_SYMBOL(phys_cpu_present_map);
 #endif
 
 EXPORT_SYMBOL(__udelay);
diff --git a/arch/sparc/kernel/time_64.c b/arch/sparc/kernel/time_64.c
index 141da37..9df8f09 100644
--- a/arch/sparc/kernel/time_64.c
+++ b/arch/sparc/kernel/time_64.c
@@ -763,7 +763,7 @@
 	sevt = &__get_cpu_var(sparc64_events);
 
 	memcpy(sevt, &sparc64_clockevent, sizeof(*sevt));
-	sevt->cpumask = cpumask_of_cpu(smp_processor_id());
+	sevt->cpumask = cpumask_of(smp_processor_id());
 
 	clockevents_register_device(sevt);
 }
diff --git a/arch/um/kernel/smp.c b/arch/um/kernel/smp.c
index 0457721..98351c7 100644
--- a/arch/um/kernel/smp.c
+++ b/arch/um/kernel/smp.c
@@ -25,13 +25,6 @@
 #include "irq_user.h"
 #include "os.h"
 
-/* CPU online map, set by smp_boot_cpus */
-cpumask_t cpu_online_map = CPU_MASK_NONE;
-cpumask_t cpu_possible_map = CPU_MASK_NONE;
-
-EXPORT_SYMBOL(cpu_online_map);
-EXPORT_SYMBOL(cpu_possible_map);
-
 /* Per CPU bogomips and other parameters
  * The only piece used here is the ipi pipe, which is set before SMP is
  * started and never changed.
diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c
index 47f04f4..b13a87a 100644
--- a/arch/um/kernel/time.c
+++ b/arch/um/kernel/time.c
@@ -50,7 +50,7 @@
 static struct clock_event_device itimer_clockevent = {
 	.name		= "itimer",
 	.rating		= 250,
-	.cpumask	= CPU_MASK_ALL,
+	.cpumask	= cpu_all_mask,
 	.features	= CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
 	.set_mode	= itimer_set_mode,
 	.set_next_event = itimer_next_event,
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 0f44add..249d1e0 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -601,19 +601,20 @@
 
 config MAXSMP
 	bool "Configure Maximum number of SMP Processors and NUMA Nodes"
-	depends on X86_64 && SMP && BROKEN
+	depends on X86_64 && SMP && DEBUG_KERNEL && EXPERIMENTAL
+	select CPUMASK_OFFSTACK
 	default n
 	help
 	  Configure maximum number of CPUS and NUMA Nodes for this architecture.
 	  If unsure, say N.
 
 config NR_CPUS
-	int "Maximum number of CPUs (2-512)" if !MAXSMP
-	range 2 512
-	depends on SMP
+	int "Maximum number of CPUs" if SMP && !MAXSMP
+	range 2 512 if SMP && !MAXSMP
+	default "1" if !SMP
 	default "4096" if MAXSMP
-	default "32" if X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000
-	default "8"
+	default "32" if SMP && (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000)
+	default "8" if SMP
 	help
 	  This allows you to specify the maximum number of CPUs which this
 	  kernel will support.  The maximum supported value is 512 and the
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index b195f85..9dabd00 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -24,15 +24,14 @@
 #include <asm/ucontext.h>
 #include <asm/uaccess.h>
 #include <asm/i387.h>
-#include <asm/ia32.h>
 #include <asm/ptrace.h>
 #include <asm/ia32_unistd.h>
 #include <asm/user32.h>
 #include <asm/sigcontext32.h>
 #include <asm/proto.h>
 #include <asm/vdso.h>
-
 #include <asm/sigframe.h>
+#include <asm/sys_ia32.h>
 
 #define DEBUG_SIG 0
 
diff --git a/arch/x86/ia32/ipc32.c b/arch/x86/ia32/ipc32.c
index d21991c..29cdcd0 100644
--- a/arch/x86/ia32/ipc32.c
+++ b/arch/x86/ia32/ipc32.c
@@ -8,6 +8,7 @@
 #include <linux/shm.h>
 #include <linux/ipc.h>
 #include <linux/compat.h>
+#include <asm/sys_ia32.h>
 
 asmlinkage long sys32_ipc(u32 call, int first, int second, int third,
 			  compat_uptr_t ptr, u32 fifth)
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index 2e09dcd..6c0d7f6 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -44,8 +44,8 @@
 #include <asm/types.h>
 #include <asm/uaccess.h>
 #include <asm/atomic.h>
-#include <asm/ia32.h>
 #include <asm/vgtod.h>
+#include <asm/sys_ia32.h>
 
 #define AA(__x)		((unsigned long)(__x))
 
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 25caa07..ab1d51a 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -54,7 +54,6 @@
 extern int is_vsmp_box(void);
 extern void xapic_wait_icr_idle(void);
 extern u32 safe_xapic_wait_icr_idle(void);
-extern u64 xapic_icr_read(void);
 extern void xapic_icr_write(u32, u32);
 extern int setup_profiling_timer(unsigned int);
 
@@ -93,7 +92,7 @@
 }
 
 #ifndef CONFIG_X86_32
-extern int x2apic, x2apic_preenabled;
+extern int x2apic;
 extern void check_x2apic(void);
 extern void enable_x2apic(void);
 extern void enable_IR_x2apic(void);
diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h
index ce547f2..d8dd9f5 100644
--- a/arch/x86/include/asm/bigsmp/apic.h
+++ b/arch/x86/include/asm/bigsmp/apic.h
@@ -9,12 +9,12 @@
 	return (1);
 }
 
-static inline cpumask_t target_cpus(void)
+static inline const cpumask_t *target_cpus(void)
 {
 #ifdef CONFIG_SMP
-        return cpu_online_map;
+	return &cpu_online_map;
 #else
-        return cpumask_of_cpu(0);
+	return &cpumask_of_cpu(0);
 #endif
 }
 
@@ -79,7 +79,7 @@
 
 static inline int cpu_present_to_apicid(int mps_cpu)
 {
-	if (mps_cpu < NR_CPUS)
+	if (mps_cpu < nr_cpu_ids)
 		return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu);
 
 	return BAD_APICID;
@@ -94,7 +94,7 @@
 /* Mapping from cpu number to logical apicid */
 static inline int cpu_to_logical_apicid(int cpu)
 {
-	if (cpu >= NR_CPUS)
+	if (cpu >= nr_cpu_ids)
 		return BAD_APICID;
 	return cpu_physical_id(cpu);
 }
@@ -119,16 +119,34 @@
 }
 
 /* As we are using single CPU as destination, pick only one CPU here */
-static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
+static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
 {
 	int cpu;
 	int apicid;	
 
-	cpu = first_cpu(cpumask);
+	cpu = first_cpu(*cpumask);
 	apicid = cpu_to_logical_apicid(cpu);
 	return apicid;
 }
 
+static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+						  const struct cpumask *andmask)
+{
+	int cpu;
+
+	/*
+	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
+	 * May as well be the first.
+	 */
+	for_each_cpu_and(cpu, cpumask, andmask)
+		if (cpumask_test_cpu(cpu, cpu_online_mask))
+			break;
+	if (cpu < nr_cpu_ids)
+		return cpu_to_logical_apicid(cpu);
+
+	return BAD_APICID;
+}
+
 static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
 {
 	return cpuid_apic >> index_msb;
diff --git a/arch/x86/include/asm/bigsmp/ipi.h b/arch/x86/include/asm/bigsmp/ipi.h
index 9404c53..27fcd01b 100644
--- a/arch/x86/include/asm/bigsmp/ipi.h
+++ b/arch/x86/include/asm/bigsmp/ipi.h
@@ -1,25 +1,22 @@
 #ifndef __ASM_MACH_IPI_H
 #define __ASM_MACH_IPI_H
 
-void send_IPI_mask_sequence(cpumask_t mask, int vector);
+void send_IPI_mask_sequence(const struct cpumask *mask, int vector);
+void send_IPI_mask_allbutself(const struct cpumask *mask, int vector);
 
-static inline void send_IPI_mask(cpumask_t mask, int vector)
+static inline void send_IPI_mask(const struct cpumask *mask, int vector)
 {
 	send_IPI_mask_sequence(mask, vector);
 }
 
 static inline void send_IPI_allbutself(int vector)
 {
-	cpumask_t mask = cpu_online_map;
-	cpu_clear(smp_processor_id(), mask);
-
-	if (!cpus_empty(mask))
-		send_IPI_mask(mask, vector);
+	send_IPI_mask_allbutself(cpu_online_mask, vector);
 }
 
 static inline void send_IPI_all(int vector)
 {
-	send_IPI_mask(cpu_online_map, vector);
+	send_IPI_mask(cpu_online_mask, vector);
 }
 
 #endif /* __ASM_MACH_IPI_H */
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index e6b82b17..dc27705 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -320,16 +320,14 @@
 	_set_gate(n, GATE_INTERRUPT, addr, 0, 0, __KERNEL_CS);
 }
 
-#define SYS_VECTOR_FREE		0
-#define SYS_VECTOR_ALLOCED	1
-
 extern int first_system_vector;
-extern char system_vectors[];
+/* used_vectors is BITMAP for irq is not managed by percpu vector_irq */
+extern unsigned long used_vectors[];
 
 static inline void alloc_system_vector(int vector)
 {
-	if (system_vectors[vector] == SYS_VECTOR_FREE) {
-		system_vectors[vector] = SYS_VECTOR_ALLOCED;
+	if (!test_bit(vector, used_vectors)) {
+		set_bit(vector, used_vectors);
 		if (first_system_vector > vector)
 			first_system_vector = vector;
 	} else
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index a2e545c..ca5ffb2 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -90,6 +90,7 @@
 
 #endif /* CONFIG_X86_32 */
 
+extern int add_efi_memmap;
 extern void efi_reserve_early(void);
 extern void efi_call_phys_prelog(void);
 extern void efi_call_phys_epilog(void);
diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h
index e24ef87..51ac123 100644
--- a/arch/x86/include/asm/es7000/apic.h
+++ b/arch/x86/include/asm/es7000/apic.h
@@ -9,14 +9,14 @@
 	        return (1);
 }
 
-static inline cpumask_t target_cpus_cluster(void)
+static inline const cpumask_t *target_cpus_cluster(void)
 {
-	return CPU_MASK_ALL;
+	return &CPU_MASK_ALL;
 }
 
-static inline cpumask_t target_cpus(void)
+static inline const cpumask_t *target_cpus(void)
 {
-	return cpumask_of_cpu(smp_processor_id());
+	return &cpumask_of_cpu(smp_processor_id());
 }
 
 #define APIC_DFR_VALUE_CLUSTER		(APIC_DFR_CLUSTER)
@@ -80,9 +80,10 @@
 static inline void setup_apic_routing(void)
 {
 	int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id());
-	printk("Enabling APIC mode:  %s.  Using %d I/O APICs, target cpus %lx\n",
+	printk("Enabling APIC mode:  %s. Using %d I/O APICs, target cpus %lx\n",
 		(apic_version[apic] == 0x14) ?
-		"Physical Cluster" : "Logical Cluster", nr_ioapics, cpus_addr(target_cpus())[0]);
+			"Physical Cluster" : "Logical Cluster",
+			nr_ioapics, cpus_addr(*target_cpus())[0]);
 }
 
 static inline int multi_timer_check(int apic, int irq)
@@ -100,7 +101,7 @@
 {
 	if (!mps_cpu)
 		return boot_cpu_physical_apicid;
-	else if (mps_cpu < NR_CPUS)
+	else if (mps_cpu < nr_cpu_ids)
 		return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu);
 	else
 		return BAD_APICID;
@@ -120,9 +121,9 @@
 static inline int cpu_to_logical_apicid(int cpu)
 {
 #ifdef CONFIG_SMP
-       if (cpu >= NR_CPUS)
-	       return BAD_APICID;
-       return (int)cpu_2_logical_apicid[cpu];
+	if (cpu >= nr_cpu_ids)
+		return BAD_APICID;
+	return (int)cpu_2_logical_apicid[cpu];
 #else
 	return logical_smp_processor_id();
 #endif
@@ -146,14 +147,15 @@
 	return (1);
 }
 
-static inline unsigned int cpu_mask_to_apicid_cluster(cpumask_t cpumask)
+static inline unsigned int
+cpu_mask_to_apicid_cluster(const struct cpumask *cpumask)
 {
 	int num_bits_set;
 	int cpus_found = 0;
 	int cpu;
 	int apicid;
 
-	num_bits_set = cpus_weight(cpumask);
+	num_bits_set = cpumask_weight(cpumask);
 	/* Return id to all */
 	if (num_bits_set == NR_CPUS)
 		return 0xFF;
@@ -161,10 +163,10 @@
 	 * The cpus in the mask must all be on the apic cluster.  If are not
 	 * on the same apicid cluster return default value of TARGET_CPUS.
 	 */
-	cpu = first_cpu(cpumask);
+	cpu = cpumask_first(cpumask);
 	apicid = cpu_to_logical_apicid(cpu);
 	while (cpus_found < num_bits_set) {
-		if (cpu_isset(cpu, cpumask)) {
+		if (cpumask_test_cpu(cpu, cpumask)) {
 			int new_apicid = cpu_to_logical_apicid(cpu);
 			if (apicid_cluster(apicid) !=
 					apicid_cluster(new_apicid)){
@@ -179,14 +181,14 @@
 	return apicid;
 }
 
-static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
+static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
 {
 	int num_bits_set;
 	int cpus_found = 0;
 	int cpu;
 	int apicid;
 
-	num_bits_set = cpus_weight(cpumask);
+	num_bits_set = cpus_weight(*cpumask);
 	/* Return id to all */
 	if (num_bits_set == NR_CPUS)
 		return cpu_to_logical_apicid(0);
@@ -194,10 +196,10 @@
 	 * The cpus in the mask must all be on the apic cluster.  If are not
 	 * on the same apicid cluster return default value of TARGET_CPUS.
 	 */
-	cpu = first_cpu(cpumask);
+	cpu = first_cpu(*cpumask);
 	apicid = cpu_to_logical_apicid(cpu);
 	while (cpus_found < num_bits_set) {
-		if (cpu_isset(cpu, cpumask)) {
+		if (cpu_isset(cpu, *cpumask)) {
 			int new_apicid = cpu_to_logical_apicid(cpu);
 			if (apicid_cluster(apicid) !=
 					apicid_cluster(new_apicid)){
@@ -212,6 +214,50 @@
 	return apicid;
 }
 
+
+static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask,
+						  const struct cpumask *andmask)
+{
+	int num_bits_set;
+	int cpus_found = 0;
+	int cpu;
+	int apicid = cpu_to_logical_apicid(0);
+	cpumask_var_t cpumask;
+
+	if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
+		return apicid;
+
+	cpumask_and(cpumask, inmask, andmask);
+	cpumask_and(cpumask, cpumask, cpu_online_mask);
+
+	num_bits_set = cpumask_weight(cpumask);
+	/* Return id to all */
+	if (num_bits_set == NR_CPUS)
+		goto exit;
+	/*
+	 * The cpus in the mask must all be on the apic cluster.  If are not
+	 * on the same apicid cluster return default value of TARGET_CPUS.
+	 */
+	cpu = cpumask_first(cpumask);
+	apicid = cpu_to_logical_apicid(cpu);
+	while (cpus_found < num_bits_set) {
+		if (cpumask_test_cpu(cpu, cpumask)) {
+			int new_apicid = cpu_to_logical_apicid(cpu);
+			if (apicid_cluster(apicid) !=
+					apicid_cluster(new_apicid)){
+				printk ("%s: Not a valid mask!\n", __func__);
+				return cpu_to_logical_apicid(0);
+			}
+			apicid = new_apicid;
+			cpus_found++;
+		}
+		cpu++;
+	}
+exit:
+	free_cpumask_var(cpumask);
+	return apicid;
+}
+
 static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
 {
 	return cpuid_apic >> index_msb;
diff --git a/arch/x86/include/asm/es7000/ipi.h b/arch/x86/include/asm/es7000/ipi.h
index 632a955..7e8ed24 100644
--- a/arch/x86/include/asm/es7000/ipi.h
+++ b/arch/x86/include/asm/es7000/ipi.h
@@ -1,24 +1,22 @@
 #ifndef __ASM_ES7000_IPI_H
 #define __ASM_ES7000_IPI_H
 
-void send_IPI_mask_sequence(cpumask_t mask, int vector);
+void send_IPI_mask_sequence(const struct cpumask *mask, int vector);
+void send_IPI_mask_allbutself(const struct cpumask *mask, int vector);
 
-static inline void send_IPI_mask(cpumask_t mask, int vector)
+static inline void send_IPI_mask(const struct cpumask *mask, int vector)
 {
 	send_IPI_mask_sequence(mask, vector);
 }
 
 static inline void send_IPI_allbutself(int vector)
 {
-	cpumask_t mask = cpu_online_map;
-	cpu_clear(smp_processor_id(), mask);
-	if (!cpus_empty(mask))
-		send_IPI_mask(mask, vector);
+	send_IPI_mask_allbutself(cpu_online_mask, vector);
 }
 
 static inline void send_IPI_all(int vector)
 {
-	send_IPI_mask(cpu_online_map, vector);
+	send_IPI_mask(cpu_online_mask, vector);
 }
 
 #endif /* __ASM_ES7000_IPI_H */
diff --git a/arch/x86/include/asm/genapic_32.h b/arch/x86/include/asm/genapic_32.h
index 0ac17d3..746f37a 100644
--- a/arch/x86/include/asm/genapic_32.h
+++ b/arch/x86/include/asm/genapic_32.h
@@ -24,7 +24,7 @@
 	int (*probe)(void);
 
 	int (*apic_id_registered)(void);
-	cpumask_t (*target_cpus)(void);
+	const struct cpumask *(*target_cpus)(void);
 	int int_delivery_mode;
 	int int_dest_mode;
 	int ESR_DISABLE;
@@ -57,12 +57,16 @@
 
 	unsigned (*get_apic_id)(unsigned long x);
 	unsigned long apic_id_mask;
-	unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask);
-	cpumask_t (*vector_allocation_domain)(int cpu);
+	unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask);
+	unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask,
+					       const struct cpumask *andmask);
+	void (*vector_allocation_domain)(int cpu, struct cpumask *retmask);
 
 #ifdef CONFIG_SMP
 	/* ipi */
-	void (*send_IPI_mask)(cpumask_t mask, int vector);
+	void (*send_IPI_mask)(const struct cpumask *mask, int vector);
+	void (*send_IPI_mask_allbutself)(const struct cpumask *mask,
+					 int vector);
 	void (*send_IPI_allbutself)(int vector);
 	void (*send_IPI_all)(int vector);
 #endif
@@ -114,6 +118,7 @@
 	APICFUNC(get_apic_id)				\
 	.apic_id_mask = APIC_ID_MASK,			\
 	APICFUNC(cpu_mask_to_apicid)			\
+	APICFUNC(cpu_mask_to_apicid_and)		\
 	APICFUNC(vector_allocation_domain)		\
 	APICFUNC(acpi_madt_oem_check)			\
 	IPIFUNC(send_IPI_mask)				\
diff --git a/arch/x86/include/asm/genapic_64.h b/arch/x86/include/asm/genapic_64.h
index 2cae011..adf32fb 100644
--- a/arch/x86/include/asm/genapic_64.h
+++ b/arch/x86/include/asm/genapic_64.h
@@ -1,6 +1,8 @@
 #ifndef _ASM_X86_GENAPIC_64_H
 #define _ASM_X86_GENAPIC_64_H
 
+#include <linux/cpumask.h>
+
 /*
  * Copyright 2004 James Cleverdon, IBM.
  * Subject to the GNU Public License, v.2
@@ -18,16 +20,20 @@
 	u32 int_delivery_mode;
 	u32 int_dest_mode;
 	int (*apic_id_registered)(void);
-	cpumask_t (*target_cpus)(void);
-	cpumask_t (*vector_allocation_domain)(int cpu);
+	const struct cpumask *(*target_cpus)(void);
+	void (*vector_allocation_domain)(int cpu, struct cpumask *retmask);
 	void (*init_apic_ldr)(void);
 	/* ipi */
-	void (*send_IPI_mask)(cpumask_t mask, int vector);
+	void (*send_IPI_mask)(const struct cpumask *mask, int vector);
+	void (*send_IPI_mask_allbutself)(const struct cpumask *mask,
+					 int vector);
 	void (*send_IPI_allbutself)(int vector);
 	void (*send_IPI_all)(int vector);
 	void (*send_IPI_self)(int vector);
 	/* */
-	unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask);
+	unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask);
+	unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask,
+					       const struct cpumask *andmask);
 	unsigned int (*phys_pkg_id)(int index_msb);
 	unsigned int (*get_apic_id)(unsigned long x);
 	unsigned long (*set_apic_id)(unsigned int id);
diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h
index f89dffb..c745a30 100644
--- a/arch/x86/include/asm/ipi.h
+++ b/arch/x86/include/asm/ipi.h
@@ -117,7 +117,8 @@
 	native_apic_mem_write(APIC_ICR, cfg);
 }
 
-static inline void send_IPI_mask_sequence(cpumask_t mask, int vector)
+static inline void send_IPI_mask_sequence(const struct cpumask *mask,
+					  int vector)
 {
 	unsigned long flags;
 	unsigned long query_cpu;
@@ -128,11 +129,29 @@
 	 * - mbligh
 	 */
 	local_irq_save(flags);
-	for_each_cpu_mask_nr(query_cpu, mask) {
+	for_each_cpu(query_cpu, mask) {
 		__send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, query_cpu),
 				      vector, APIC_DEST_PHYSICAL);
 	}
 	local_irq_restore(flags);
 }
 
+static inline void send_IPI_mask_allbutself(const struct cpumask *mask,
+					    int vector)
+{
+	unsigned long flags;
+	unsigned int query_cpu;
+	unsigned int this_cpu = smp_processor_id();
+
+	/* See Hack comment above */
+
+	local_irq_save(flags);
+	for_each_cpu(query_cpu, mask)
+		if (query_cpu != this_cpu)
+			__send_IPI_dest_field(
+				per_cpu(x86_cpu_to_apicid, query_cpu),
+				vector, APIC_DEST_PHYSICAL);
+	local_irq_restore(flags);
+}
+
 #endif /* _ASM_X86_IPI_H */
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index 28e409f..592688e 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -33,7 +33,7 @@
 
 #ifdef CONFIG_HOTPLUG_CPU
 #include <linux/cpumask.h>
-extern void fixup_irqs(cpumask_t map);
+extern void fixup_irqs(void);
 #endif
 
 extern unsigned int do_IRQ(struct pt_regs *regs);
@@ -42,5 +42,6 @@
 
 /* Interrupt vector management */
 extern DECLARE_BITMAP(used_vectors, NR_VECTORS);
+extern int vector_used_by_percpu_irq(unsigned int vector);
 
 #endif /* _ASM_X86_IRQ_H */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 8346be8..97215a4 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -21,6 +21,7 @@
 
 #include <asm/pvclock-abi.h>
 #include <asm/desc.h>
+#include <asm/mtrr.h>
 
 #define KVM_MAX_VCPUS 16
 #define KVM_MEMORY_SLOTS 32
@@ -86,6 +87,7 @@
 #define KVM_MIN_FREE_MMU_PAGES 5
 #define KVM_REFILL_PAGES 25
 #define KVM_MAX_CPUID_ENTRIES 40
+#define KVM_NR_FIXED_MTRR_REGION 88
 #define KVM_NR_VAR_MTRR 8
 
 extern spinlock_t kvm_lock;
@@ -180,6 +182,8 @@
 	struct list_head link;
 	struct hlist_node hash_link;
 
+	struct list_head oos_link;
+
 	/*
 	 * The following two entries are used to key the shadow page in the
 	 * hash table.
@@ -190,13 +194,16 @@
 	u64 *spt;
 	/* hold the gfn of each spte inside spt */
 	gfn_t *gfns;
-	unsigned long slot_bitmap; /* One bit set per slot which has memory
-				    * in this shadow page.
-				    */
+	/*
+	 * One bit set per slot which has memory
+	 * in this shadow page.
+	 */
+	DECLARE_BITMAP(slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS);
 	int multimapped;         /* More than one parent_pte? */
 	int root_count;          /* Currently serving as active root */
 	bool unsync;
-	bool unsync_children;
+	bool global;
+	unsigned int unsync_children;
 	union {
 		u64 *parent_pte;               /* !multimapped */
 		struct hlist_head parent_ptes; /* multimapped, kvm_pte_chain */
@@ -327,8 +334,10 @@
 
 	bool nmi_pending;
 	bool nmi_injected;
+	bool nmi_window_open;
 
-	u64 mtrr[0x100];
+	struct mtrr_state_type mtrr_state;
+	u32 pat;
 };
 
 struct kvm_mem_alias {
@@ -350,11 +359,13 @@
 	 */
 	struct list_head active_mmu_pages;
 	struct list_head assigned_dev_head;
+	struct list_head oos_global_pages;
 	struct dmar_domain *intel_iommu_domain;
 	struct kvm_pic *vpic;
 	struct kvm_ioapic *vioapic;
 	struct kvm_pit *vpit;
 	struct hlist_head irq_ack_notifier_list;
+	int vapics_in_nmi_mode;
 
 	int round_robin_prev_vcpu;
 	unsigned int tss_addr;
@@ -378,6 +389,7 @@
 	u32 mmu_recycled;
 	u32 mmu_cache_miss;
 	u32 mmu_unsync;
+	u32 mmu_unsync_global;
 	u32 remote_tlb_flush;
 	u32 lpages;
 };
@@ -397,6 +409,7 @@
 	u32 halt_exits;
 	u32 halt_wakeup;
 	u32 request_irq_exits;
+	u32 request_nmi_exits;
 	u32 irq_exits;
 	u32 host_state_reload;
 	u32 efer_reload;
@@ -405,6 +418,7 @@
 	u32 insn_emulation_fail;
 	u32 hypercalls;
 	u32 irq_injections;
+	u32 nmi_injections;
 };
 
 struct descriptor_table {
@@ -477,6 +491,7 @@
 
 	int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
 	int (*get_tdp_level)(void);
+	int (*get_mt_mask_shift)(void);
 };
 
 extern struct kvm_x86_ops *kvm_x86_ops;
@@ -490,7 +505,7 @@
 void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte);
 void kvm_mmu_set_base_ptes(u64 base_pte);
 void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
-		u64 dirty_mask, u64 nx_mask, u64 x_mask);
+		u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 mt_mask);
 
 int kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
 void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
@@ -587,12 +602,14 @@
 
 void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu);
 void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
-		       const u8 *new, int bytes);
+		       const u8 *new, int bytes,
+		       bool guest_initiated);
 int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva);
 void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
 int kvm_mmu_load(struct kvm_vcpu *vcpu);
 void kvm_mmu_unload(struct kvm_vcpu *vcpu);
 void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
+void kvm_mmu_sync_global(struct kvm_vcpu *vcpu);
 
 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
 
@@ -607,6 +624,8 @@
 int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3);
 int complete_pio(struct kvm_vcpu *vcpu);
 
+struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn);
+
 static inline struct kvm_mmu_page *page_header(hpa_t shadow_page)
 {
 	struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT);
@@ -702,18 +721,6 @@
 	kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
 }
 
-#define ASM_VMX_VMCLEAR_RAX       ".byte 0x66, 0x0f, 0xc7, 0x30"
-#define ASM_VMX_VMLAUNCH          ".byte 0x0f, 0x01, 0xc2"
-#define ASM_VMX_VMRESUME          ".byte 0x0f, 0x01, 0xc3"
-#define ASM_VMX_VMPTRLD_RAX       ".byte 0x0f, 0xc7, 0x30"
-#define ASM_VMX_VMREAD_RDX_RAX    ".byte 0x0f, 0x78, 0xd0"
-#define ASM_VMX_VMWRITE_RAX_RDX   ".byte 0x0f, 0x79, 0xd0"
-#define ASM_VMX_VMWRITE_RSP_RDX   ".byte 0x0f, 0x79, 0xd4"
-#define ASM_VMX_VMXOFF            ".byte 0x0f, 0x01, 0xc4"
-#define ASM_VMX_VMXON_RAX         ".byte 0xf3, 0x0f, 0xc7, 0x30"
-#define ASM_VMX_INVEPT		  ".byte 0x66, 0x0f, 0x38, 0x80, 0x08"
-#define ASM_VMX_INVVPID		  ".byte 0x66, 0x0f, 0x38, 0x81, 0x08"
-
 #define MSR_IA32_TIME_STAMP_COUNTER		0x010
 
 #define TSS_IOPB_BASE_OFFSET 0x66
diff --git a/arch/x86/include/asm/kvm_x86_emulate.h b/arch/x86/include/asm/kvm_x86_emulate.h
index 25179a2..6a15973 100644
--- a/arch/x86/include/asm/kvm_x86_emulate.h
+++ b/arch/x86/include/asm/kvm_x86_emulate.h
@@ -123,6 +123,7 @@
 	u8 ad_bytes;
 	u8 rex_prefix;
 	struct operand src;
+	struct operand src2;
 	struct operand dst;
 	bool has_seg_override;
 	u8 seg_override;
@@ -146,22 +147,18 @@
 	/* Register state before/after emulation. */
 	struct kvm_vcpu *vcpu;
 
-	/* Linear faulting address (if emulating a page-faulting instruction) */
 	unsigned long eflags;
-
 	/* Emulated execution mode, represented by an X86EMUL_MODE value. */
 	int mode;
-
 	u32 cs_base;
 
 	/* decode cache */
-
 	struct decode_cache decode;
 };
 
 /* Repeat String Operation Prefix */
-#define REPE_PREFIX  1
-#define REPNE_PREFIX    2
+#define REPE_PREFIX	1
+#define REPNE_PREFIX	2
 
 /* Execution mode, passed to the emulator. */
 #define X86EMUL_MODE_REAL     0	/* Real mode.             */
@@ -170,7 +167,7 @@
 #define X86EMUL_MODE_PROT64   8	/* 64-bit (long) mode.    */
 
 /* Host execution mode. */
-#if defined(__i386__)
+#if defined(CONFIG_X86_32)
 #define X86EMUL_MODE_HOST X86EMUL_MODE_PROT32
 #elif defined(CONFIG_X86_64)
 #define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64
diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h
index 6cb3a46..cc09cbb 100644
--- a/arch/x86/include/asm/mach-default/mach_apic.h
+++ b/arch/x86/include/asm/mach-default/mach_apic.h
@@ -8,12 +8,12 @@
 
 #define APIC_DFR_VALUE	(APIC_DFR_FLAT)
 
-static inline cpumask_t target_cpus(void)
+static inline const struct cpumask *target_cpus(void)
 { 
 #ifdef CONFIG_SMP
-	return cpu_online_map;
+	return cpu_online_mask;
 #else
-	return cpumask_of_cpu(0);
+	return cpumask_of(0);
 #endif
 } 
 
@@ -28,6 +28,7 @@
 #define apic_id_registered (genapic->apic_id_registered)
 #define init_apic_ldr (genapic->init_apic_ldr)
 #define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid)
+#define cpu_mask_to_apicid_and (genapic->cpu_mask_to_apicid_and)
 #define phys_pkg_id	(genapic->phys_pkg_id)
 #define vector_allocation_domain    (genapic->vector_allocation_domain)
 #define read_apic_id()  (GET_APIC_ID(apic_read(APIC_ID)))
@@ -61,9 +62,19 @@
 	return physid_isset(read_apic_id(), phys_cpu_present_map);
 }
 
-static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
+static inline unsigned int cpu_mask_to_apicid(const struct cpumask *cpumask)
 {
-	return cpus_addr(cpumask)[0];
+	return cpumask_bits(cpumask)[0];
+}
+
+static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+						  const struct cpumask *andmask)
+{
+	unsigned long mask1 = cpumask_bits(cpumask)[0];
+	unsigned long mask2 = cpumask_bits(andmask)[0];
+	unsigned long mask3 = cpumask_bits(cpu_online_mask)[0];
+
+	return (unsigned int)(mask1 & mask2 & mask3);
 }
 
 static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
@@ -88,7 +99,7 @@
 #endif
 }
 
-static inline cpumask_t vector_allocation_domain(int cpu)
+static inline void vector_allocation_domain(int cpu, struct cpumask *retmask)
 {
         /* Careful. Some cpus do not strictly honor the set of cpus
          * specified in the interrupt destination when using lowest
@@ -98,8 +109,7 @@
          * deliver interrupts to the wrong hyperthread when only one
          * hyperthread was specified in the interrupt desitination.
          */
-        cpumask_t domain = { { [0] = APIC_ALL_CPUS, } };
-        return domain;
+	*retmask = (cpumask_t) { { [0] = APIC_ALL_CPUS } };
 }
 #endif
 
@@ -131,7 +141,7 @@
 
 static inline int cpu_present_to_apicid(int mps_cpu)
 {
-	if (mps_cpu < NR_CPUS && cpu_present(mps_cpu))
+	if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu))
 		return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu);
 	else
 		return BAD_APICID;
diff --git a/arch/x86/include/asm/mach-default/mach_ipi.h b/arch/x86/include/asm/mach-default/mach_ipi.h
index fabca01..191312d 100644
--- a/arch/x86/include/asm/mach-default/mach_ipi.h
+++ b/arch/x86/include/asm/mach-default/mach_ipi.h
@@ -4,7 +4,8 @@
 /* Avoid include hell */
 #define NMI_VECTOR 0x02
 
-void send_IPI_mask_bitmask(cpumask_t mask, int vector);
+void send_IPI_mask_bitmask(const struct cpumask *mask, int vector);
+void send_IPI_mask_allbutself(const struct cpumask *mask, int vector);
 void __send_IPI_shortcut(unsigned int shortcut, int vector);
 
 extern int no_broadcast;
@@ -12,28 +13,27 @@
 #ifdef CONFIG_X86_64
 #include <asm/genapic.h>
 #define send_IPI_mask (genapic->send_IPI_mask)
+#define send_IPI_mask_allbutself (genapic->send_IPI_mask_allbutself)
 #else
-static inline void send_IPI_mask(cpumask_t mask, int vector)
+static inline void send_IPI_mask(const struct cpumask *mask, int vector)
 {
 	send_IPI_mask_bitmask(mask, vector);
 }
+void send_IPI_mask_allbutself(const struct cpumask *mask, int vector);
 #endif
 
 static inline void __local_send_IPI_allbutself(int vector)
 {
-	if (no_broadcast || vector == NMI_VECTOR) {
-		cpumask_t mask = cpu_online_map;
-
-		cpu_clear(smp_processor_id(), mask);
-		send_IPI_mask(mask, vector);
-	} else
+	if (no_broadcast || vector == NMI_VECTOR)
+		send_IPI_mask_allbutself(cpu_online_mask, vector);
+	else
 		__send_IPI_shortcut(APIC_DEST_ALLBUT, vector);
 }
 
 static inline void __local_send_IPI_all(int vector)
 {
 	if (no_broadcast || vector == NMI_VECTOR)
-		send_IPI_mask(cpu_online_map, vector);
+		send_IPI_mask(cpu_online_mask, vector);
 	else
 		__send_IPI_shortcut(APIC_DEST_ALLINC, vector);
 }
diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h
index e430f47..48553e9 100644
--- a/arch/x86/include/asm/mach-generic/mach_apic.h
+++ b/arch/x86/include/asm/mach-generic/mach_apic.h
@@ -24,6 +24,7 @@
 #define check_phys_apicid_present (genapic->check_phys_apicid_present)
 #define check_apicid_used (genapic->check_apicid_used)
 #define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid)
+#define cpu_mask_to_apicid_and (genapic->cpu_mask_to_apicid_and)
 #define vector_allocation_domain (genapic->vector_allocation_domain)
 #define enable_apic_mode (genapic->enable_apic_mode)
 #define phys_pkg_id (genapic->phys_pkg_id)
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index 91885c2..62d14ce 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -6,13 +6,13 @@
 #include <asm/mpspec_def.h>
 
 extern int apic_version[MAX_APICS];
+extern int pic_mode;
 
 #ifdef CONFIG_X86_32
 #include <mach_mpspec.h>
 
 extern unsigned int def_to_bigsmp;
 extern u8 apicid_2_node[];
-extern int pic_mode;
 
 #ifdef CONFIG_X86_NUMAQ
 extern int mp_bus_id_to_node[MAX_MP_BUSSES];
diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h
index 7c1e425..cb988aa 100644
--- a/arch/x86/include/asm/mtrr.h
+++ b/arch/x86/include/asm/mtrr.h
@@ -57,6 +57,31 @@
 };
 #endif /* !__i386__ */
 
+struct mtrr_var_range {
+	u32 base_lo;
+	u32 base_hi;
+	u32 mask_lo;
+	u32 mask_hi;
+};
+
+/* In the Intel processor's MTRR interface, the MTRR type is always held in
+   an 8 bit field: */
+typedef u8 mtrr_type;
+
+#define MTRR_NUM_FIXED_RANGES 88
+#define MTRR_MAX_VAR_RANGES 256
+
+struct mtrr_state_type {
+	struct mtrr_var_range var_ranges[MTRR_MAX_VAR_RANGES];
+	mtrr_type fixed_ranges[MTRR_NUM_FIXED_RANGES];
+	unsigned char enabled;
+	unsigned char have_fixed;
+	mtrr_type def_type;
+};
+
+#define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg))
+#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
+
 /*  These are the various ioctls  */
 #define MTRRIOC_ADD_ENTRY        _IOW(MTRR_IOCTL_BASE,  0, struct mtrr_sentry)
 #define MTRRIOC_SET_ENTRY        _IOW(MTRR_IOCTL_BASE,  1, struct mtrr_sentry)
diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h
index 0bf2a06..c80f00d 100644
--- a/arch/x86/include/asm/numaq/apic.h
+++ b/arch/x86/include/asm/numaq/apic.h
@@ -7,9 +7,9 @@
 
 #define APIC_DFR_VALUE	(APIC_DFR_CLUSTER)
 
-static inline cpumask_t target_cpus(void)
+static inline const cpumask_t *target_cpus(void)
 {
-	return CPU_MASK_ALL;
+	return &CPU_MASK_ALL;
 }
 
 #define NO_BALANCE_IRQ (1)
@@ -122,7 +122,13 @@
  * We use physical apicids here, not logical, so just return the default
  * physical broadcast to stop people from breaking us
  */
-static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
+static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
+{
+	return (int) 0xF;
+}
+
+static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+						  const struct cpumask *andmask)
 {
 	return (int) 0xF;
 }
diff --git a/arch/x86/include/asm/numaq/ipi.h b/arch/x86/include/asm/numaq/ipi.h
index 935588d2..a8374c6 100644
--- a/arch/x86/include/asm/numaq/ipi.h
+++ b/arch/x86/include/asm/numaq/ipi.h
@@ -1,25 +1,22 @@
 #ifndef __ASM_NUMAQ_IPI_H
 #define __ASM_NUMAQ_IPI_H
 
-void send_IPI_mask_sequence(cpumask_t, int vector);
+void send_IPI_mask_sequence(const struct cpumask *mask, int vector);
+void send_IPI_mask_allbutself(const struct cpumask *mask, int vector);
 
-static inline void send_IPI_mask(cpumask_t mask, int vector)
+static inline void send_IPI_mask(const struct cpumask *mask, int vector)
 {
 	send_IPI_mask_sequence(mask, vector);
 }
 
 static inline void send_IPI_allbutself(int vector)
 {
-	cpumask_t mask = cpu_online_map;
-	cpu_clear(smp_processor_id(), mask);
-
-	if (!cpus_empty(mask))
-		send_IPI_mask(mask, vector);
+	send_IPI_mask_allbutself(cpu_online_mask, vector);
 }
 
 static inline void send_IPI_all(int vector)
 {
-	send_IPI_mask(cpu_online_map, vector);
+	send_IPI_mask(cpu_online_mask, vector);
 }
 
 #endif /* __ASM_NUMAQ_IPI_H */
diff --git a/arch/x86/pci/pci.h b/arch/x86/include/asm/pci_x86.h
similarity index 88%
rename from arch/x86/pci/pci.h
rename to arch/x86/include/asm/pci_x86.h
index 1959018..e60fd3e 100644
--- a/arch/x86/pci/pci.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -57,7 +57,8 @@
 struct irq_info {
 	u8 bus, devfn;			/* Bus, device and function */
 	struct {
-		u8 link;		/* IRQ line ID, chipset dependent, 0=not routed */
+		u8 link;		/* IRQ line ID, chipset dependent,
+					   0 = not routed */
 		u16 bitmap;		/* Available IRQs */
 	} __attribute__((packed)) irq[4];
 	u8 slot;			/* Slot number, 0=onboard */
@@ -69,11 +70,13 @@
 	u16 version;			/* PIRQ_VERSION */
 	u16 size;			/* Table size in bytes */
 	u8 rtr_bus, rtr_devfn;		/* Where the interrupt router lies */
-	u16 exclusive_irqs;		/* IRQs devoted exclusively to PCI usage */
-	u16 rtr_vendor, rtr_device;	/* Vendor and device ID of interrupt router */
+	u16 exclusive_irqs;		/* IRQs devoted exclusively to
+					   PCI usage */
+	u16 rtr_vendor, rtr_device;	/* Vendor and device ID of
+					   interrupt router */
 	u32 miniport_data;		/* Crap */
 	u8 rfu[11];
-	u8 checksum;			/* Modulo 256 checksum must give zero */
+	u8 checksum;			/* Modulo 256 checksum must give 0 */
 	struct irq_info slots[0];
 } __attribute__((packed));
 
@@ -148,15 +151,15 @@
 
 static inline void mmio_config_writeb(void __iomem *pos, u8 val)
 {
-	asm volatile("movb %%al,(%1)" :: "a" (val), "r" (pos) : "memory");
+	asm volatile("movb %%al,(%1)" : : "a" (val), "r" (pos) : "memory");
 }
 
 static inline void mmio_config_writew(void __iomem *pos, u16 val)
 {
-	asm volatile("movw %%ax,(%1)" :: "a" (val), "r" (pos) : "memory");
+	asm volatile("movw %%ax,(%1)" : : "a" (val), "r" (pos) : "memory");
 }
 
 static inline void mmio_config_writel(void __iomem *pos, u32 val)
 {
-	asm volatile("movl %%eax,(%1)" :: "a" (val), "r" (pos) : "memory");
+	asm volatile("movl %%eax,(%1)" : : "a" (val), "r" (pos) : "memory");
 }
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index d12811c..830b9fc 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -60,7 +60,7 @@
 	void (*cpu_die)(unsigned int cpu);
 	void (*play_dead)(void);
 
-	void (*send_call_func_ipi)(cpumask_t mask);
+	void (*send_call_func_ipi)(const struct cpumask *mask);
 	void (*send_call_func_single_ipi)(int cpu);
 };
 
@@ -125,7 +125,7 @@
 
 static inline void arch_send_call_function_ipi(cpumask_t mask)
 {
-	smp_ops.send_call_func_ipi(mask);
+	smp_ops.send_call_func_ipi(&mask);
 }
 
 void cpu_disable_common(void);
@@ -138,7 +138,7 @@
 void native_play_dead(void);
 void play_dead_common(void);
 
-void native_send_call_func_ipi(cpumask_t mask);
+void native_send_call_func_ipi(const struct cpumask *mask);
 void native_send_call_func_single_ipi(int cpu);
 
 extern void prefill_possible_map(void);
diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h
index 9b3070f..99327d1 100644
--- a/arch/x86/include/asm/summit/apic.h
+++ b/arch/x86/include/asm/summit/apic.h
@@ -14,13 +14,13 @@
 
 #define APIC_DFR_VALUE	(APIC_DFR_CLUSTER)
 
-static inline cpumask_t target_cpus(void)
+static inline const cpumask_t *target_cpus(void)
 {
 	/* CPU_MASK_ALL (0xff) has undefined behaviour with
 	 * dest_LowestPrio mode logical clustered apic interrupt routing
 	 * Just start on cpu 0.  IRQ balancing will spread load
 	 */
-	return cpumask_of_cpu(0);
+	return &cpumask_of_cpu(0);
 }
 
 #define INT_DELIVERY_MODE (dest_LowestPrio)
@@ -137,14 +137,14 @@
 {
 }
 
-static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
+static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
 {
 	int num_bits_set;
 	int cpus_found = 0;
 	int cpu;
 	int apicid;
 
-	num_bits_set = cpus_weight(cpumask);
+	num_bits_set = cpus_weight(*cpumask);
 	/* Return id to all */
 	if (num_bits_set == NR_CPUS)
 		return (int) 0xFF;
@@ -152,10 +152,10 @@
 	 * The cpus in the mask must all be on the apic cluster.  If are not
 	 * on the same apicid cluster return default value of TARGET_CPUS.
 	 */
-	cpu = first_cpu(cpumask);
+	cpu = first_cpu(*cpumask);
 	apicid = cpu_to_logical_apicid(cpu);
 	while (cpus_found < num_bits_set) {
-		if (cpu_isset(cpu, cpumask)) {
+		if (cpu_isset(cpu, *cpumask)) {
 			int new_apicid = cpu_to_logical_apicid(cpu);
 			if (apicid_cluster(apicid) !=
 					apicid_cluster(new_apicid)){
@@ -170,6 +170,49 @@
 	return apicid;
 }
 
+static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask,
+						  const struct cpumask *andmask)
+{
+	int num_bits_set;
+	int cpus_found = 0;
+	int cpu;
+	int apicid = 0xFF;
+	cpumask_var_t cpumask;
+
+	if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
+		return (int) 0xFF;
+
+	cpumask_and(cpumask, inmask, andmask);
+	cpumask_and(cpumask, cpumask, cpu_online_mask);
+
+	num_bits_set = cpumask_weight(cpumask);
+	/* Return id to all */
+	if (num_bits_set == nr_cpu_ids)
+		goto exit;
+	/*
+	 * The cpus in the mask must all be on the apic cluster.  If are not
+	 * on the same apicid cluster return default value of TARGET_CPUS.
+	 */
+	cpu = cpumask_first(cpumask);
+	apicid = cpu_to_logical_apicid(cpu);
+	while (cpus_found < num_bits_set) {
+		if (cpumask_test_cpu(cpu, cpumask)) {
+			int new_apicid = cpu_to_logical_apicid(cpu);
+			if (apicid_cluster(apicid) !=
+					apicid_cluster(new_apicid)){
+				printk ("%s: Not a valid mask!\n", __func__);
+				return 0xFF;
+			}
+			apicid = apicid | new_apicid;
+			cpus_found++;
+		}
+		cpu++;
+	}
+exit:
+	free_cpumask_var(cpumask);
+	return apicid;
+}
+
 /* cpuid returns the value latched in the HW at reset, not the APIC ID
  * register's value.  For any box whose BIOS changes APIC IDs, like
  * clustered APIC systems, we must use hard_smp_processor_id.
diff --git a/arch/x86/include/asm/summit/ipi.h b/arch/x86/include/asm/summit/ipi.h
index 53bd1e7..a8a2c24 100644
--- a/arch/x86/include/asm/summit/ipi.h
+++ b/arch/x86/include/asm/summit/ipi.h
@@ -1,9 +1,10 @@
 #ifndef __ASM_SUMMIT_IPI_H
 #define __ASM_SUMMIT_IPI_H
 
-void send_IPI_mask_sequence(cpumask_t mask, int vector);
+void send_IPI_mask_sequence(const cpumask_t *mask, int vector);
+void send_IPI_mask_allbutself(const cpumask_t *mask, int vector);
 
-static inline void send_IPI_mask(cpumask_t mask, int vector)
+static inline void send_IPI_mask(const cpumask_t *mask, int vector)
 {
 	send_IPI_mask_sequence(mask, vector);
 }
@@ -14,12 +15,12 @@
 	cpu_clear(smp_processor_id(), mask);
 
 	if (!cpus_empty(mask))
-		send_IPI_mask(mask, vector);
+		send_IPI_mask(&mask, vector);
 }
 
 static inline void send_IPI_all(int vector)
 {
-	send_IPI_mask(cpu_online_map, vector);
+	send_IPI_mask(&cpu_online_map, vector);
 }
 
 #endif /* __ASM_SUMMIT_IPI_H */
diff --git a/arch/x86/kvm/svm.h b/arch/x86/include/asm/svm.h
similarity index 100%
rename from arch/x86/kvm/svm.h
rename to arch/x86/include/asm/svm.h
diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h
new file mode 100644
index 0000000..ffb08be
--- /dev/null
+++ b/arch/x86/include/asm/sys_ia32.h
@@ -0,0 +1,101 @@
+/*
+ * sys_ia32.h - Linux ia32 syscall interfaces
+ *
+ * Copyright (c) 2008 Jaswinder Singh Rajput
+ *
+ * This file is released under the GPLv2.
+ * See the file COPYING for more details.
+ */
+
+#ifndef _ASM_X86_SYS_IA32_H
+#define _ASM_X86_SYS_IA32_H
+
+#include <linux/compiler.h>
+#include <linux/linkage.h>
+#include <linux/types.h>
+#include <linux/signal.h>
+#include <asm/compat.h>
+#include <asm/ia32.h>
+
+/* ia32/sys_ia32.c */
+asmlinkage long sys32_truncate64(char __user *, unsigned long, unsigned long);
+asmlinkage long sys32_ftruncate64(unsigned int, unsigned long, unsigned long);
+
+asmlinkage long sys32_stat64(char __user *, struct stat64 __user *);
+asmlinkage long sys32_lstat64(char __user *, struct stat64 __user *);
+asmlinkage long sys32_fstat64(unsigned int, struct stat64 __user *);
+asmlinkage long sys32_fstatat(unsigned int, char __user *,
+			      struct stat64 __user *, int);
+struct mmap_arg_struct;
+asmlinkage long sys32_mmap(struct mmap_arg_struct __user *);
+asmlinkage long sys32_mprotect(unsigned long, size_t, unsigned long);
+
+asmlinkage long sys32_pipe(int __user *);
+struct sigaction32;
+struct old_sigaction32;
+asmlinkage long sys32_rt_sigaction(int, struct sigaction32 __user *,
+				   struct sigaction32 __user *, unsigned int);
+asmlinkage long sys32_sigaction(int, struct old_sigaction32 __user *,
+				struct old_sigaction32 __user *);
+asmlinkage long sys32_rt_sigprocmask(int, compat_sigset_t __user *,
+				     compat_sigset_t __user *, unsigned int);
+asmlinkage long sys32_alarm(unsigned int);
+
+struct sel_arg_struct;
+asmlinkage long sys32_old_select(struct sel_arg_struct __user *);
+asmlinkage long sys32_waitpid(compat_pid_t, unsigned int *, int);
+asmlinkage long sys32_sysfs(int, u32, u32);
+
+asmlinkage long sys32_sched_rr_get_interval(compat_pid_t,
+					    struct compat_timespec __user *);
+asmlinkage long sys32_rt_sigpending(compat_sigset_t __user *, compat_size_t);
+asmlinkage long sys32_rt_sigqueueinfo(int, int, compat_siginfo_t __user *);
+
+#ifdef CONFIG_SYSCTL_SYSCALL
+struct sysctl_ia32;
+asmlinkage long sys32_sysctl(struct sysctl_ia32 __user *);
+#endif
+
+asmlinkage long sys32_pread(unsigned int, char __user *, u32, u32, u32);
+asmlinkage long sys32_pwrite(unsigned int, char __user *, u32, u32, u32);
+
+asmlinkage long sys32_personality(unsigned long);
+asmlinkage long sys32_sendfile(int, int, compat_off_t __user *, s32);
+
+asmlinkage long sys32_mmap2(unsigned long, unsigned long, unsigned long,
+			    unsigned long, unsigned long, unsigned long);
+
+struct oldold_utsname;
+struct old_utsname;
+asmlinkage long sys32_olduname(struct oldold_utsname __user *);
+long sys32_uname(struct old_utsname __user *);
+
+long sys32_ustat(unsigned, struct ustat32 __user *);
+
+asmlinkage long sys32_execve(char __user *, compat_uptr_t __user *,
+			     compat_uptr_t __user *, struct pt_regs *);
+asmlinkage long sys32_clone(unsigned int, unsigned int, struct pt_regs *);
+
+long sys32_lseek(unsigned int, int, unsigned int);
+long sys32_kill(int, int);
+long sys32_fadvise64_64(int, __u32, __u32, __u32, __u32, int);
+long sys32_vm86_warning(void);
+long sys32_lookup_dcookie(u32, u32, char __user *, size_t);
+
+asmlinkage ssize_t sys32_readahead(int, unsigned, unsigned, size_t);
+asmlinkage long sys32_sync_file_range(int, unsigned, unsigned,
+				      unsigned, unsigned, int);
+asmlinkage long sys32_fadvise64(int, unsigned, unsigned, size_t, int);
+asmlinkage long sys32_fallocate(int, int, unsigned,
+				unsigned, unsigned, unsigned);
+
+/* ia32/ia32_signal.c */
+asmlinkage long sys32_sigsuspend(int, int, old_sigset_t);
+asmlinkage long sys32_sigaltstack(const stack_ia32_t __user *,
+				  stack_ia32_t __user *, struct pt_regs *);
+asmlinkage long sys32_sigreturn(struct pt_regs *);
+asmlinkage long sys32_rt_sigreturn(struct pt_regs *);
+
+/* ia32/ipc32.c */
+asmlinkage long sys32_ipc(u32, int, int, int, compat_uptr_t, u32);
+#endif /* _ASM_X86_SYS_IA32_H */
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index ff386ff..79e31e9 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -226,6 +226,8 @@
 #define topology_core_id(cpu)			(cpu_data(cpu).cpu_core_id)
 #define topology_core_siblings(cpu)		(per_cpu(cpu_core_map, cpu))
 #define topology_thread_siblings(cpu)		(per_cpu(cpu_sibling_map, cpu))
+#define topology_core_cpumask(cpu)		(&per_cpu(cpu_core_map, cpu))
+#define topology_thread_cpumask(cpu)		(&per_cpu(cpu_sibling_map, cpu))
 
 /* indicates that pointers to the topology cpumask_t maps are valid */
 #define arch_provides_topology_pointers		yes
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index e236325..50423c7 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -133,61 +133,61 @@
  * see table 4.2.3.0.1 in broacast_assist spec.
  */
 struct bau_msg_header {
-	int dest_subnodeid:6;	/* must be zero */
+	unsigned int dest_subnodeid:6;	/* must be zero */
 	/* bits 5:0 */
-	int base_dest_nodeid:15; /* nasid>>1 (pnode) of first bit in node_map */
-	/* bits 20:6 */
-	int command:8;		/* message type */
+	unsigned int base_dest_nodeid:15; /* nasid>>1 (pnode) of */
+	/* bits 20:6 */			  /* first bit in node_map */
+	unsigned int command:8;	/* message type */
 	/* bits 28:21 */
 				/* 0x38: SN3net EndPoint Message */
-	int rsvd_1:3;		/* must be zero */
+	unsigned int rsvd_1:3;	/* must be zero */
 	/* bits 31:29 */
 				/* int will align on 32 bits */
-	int rsvd_2:9;		/* must be zero */
+	unsigned int rsvd_2:9;	/* must be zero */
 	/* bits 40:32 */
 				/* Suppl_A is 56-41 */
-	int payload_2a:8;	/* becomes byte 16 of msg */
+	unsigned int payload_2a:8;/* becomes byte 16 of msg */
 	/* bits 48:41 */	/* not currently using */
-	int payload_2b:8;	/* becomes byte 17 of msg */
+	unsigned int payload_2b:8;/* becomes byte 17 of msg */
 	/* bits 56:49 */	/* not currently using */
 				/* Address field (96:57) is never used as an
 				   address (these are address bits 42:3) */
-	int rsvd_3:1;		/* must be zero */
+	unsigned int rsvd_3:1;	/* must be zero */
 	/* bit 57 */
 				/* address bits 27:4 are payload */
 				/* these 24 bits become bytes 12-14 of msg */
-	int replied_to:1;	/* sent as 0 by the source to byte 12 */
+	unsigned int replied_to:1;/* sent as 0 by the source to byte 12 */
 	/* bit 58 */
 
-	int payload_1a:5;	/* not currently used */
+	unsigned int payload_1a:5;/* not currently used */
 	/* bits 63:59 */
-	int payload_1b:8;	/* not currently used */
+	unsigned int payload_1b:8;/* not currently used */
 	/* bits 71:64 */
-	int payload_1c:8;	/* not currently used */
+	unsigned int payload_1c:8;/* not currently used */
 	/* bits 79:72 */
-	int payload_1d:2;	/* not currently used */
+	unsigned int payload_1d:2;/* not currently used */
 	/* bits 81:80 */
 
-	int rsvd_4:7;		/* must be zero */
+	unsigned int rsvd_4:7;	/* must be zero */
 	/* bits 88:82 */
-	int sw_ack_flag:1;	/* software acknowledge flag */
+	unsigned int sw_ack_flag:1;/* software acknowledge flag */
 	/* bit 89 */
 				/* INTD trasactions at destination are to
 				   wait for software acknowledge */
-	int rsvd_5:6;		/* must be zero */
+	unsigned int rsvd_5:6;	/* must be zero */
 	/* bits 95:90 */
-	int rsvd_6:5;		/* must be zero */
+	unsigned int rsvd_6:5;	/* must be zero */
 	/* bits 100:96 */
-	int int_both:1;		/* if 1, interrupt both sockets on the blade */
+	unsigned int int_both:1;/* if 1, interrupt both sockets on the blade */
 	/* bit 101*/
-	int fairness:3;		/* usually zero */
+	unsigned int fairness:3;/* usually zero */
 	/* bits 104:102 */
-	int multilevel:1;	/* multi-level multicast format */
+	unsigned int multilevel:1;	/* multi-level multicast format */
 	/* bit 105 */
 				/* 0 for TLB: endpoint multi-unicast messages */
-	int chaining:1;		/* next descriptor is part of this activation*/
+	unsigned int chaining:1;/* next descriptor is part of this activation*/
 	/* bit 106 */
-	int rsvd_7:21;		/* must be zero */
+	unsigned int rsvd_7:21;	/* must be zero */
 	/* bits 127:107 */
 };
 
diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h
new file mode 100644
index 0000000..5936362
--- /dev/null
+++ b/arch/x86/include/asm/virtext.h
@@ -0,0 +1,132 @@
+/* CPU virtualization extensions handling
+ *
+ * This should carry the code for handling CPU virtualization extensions
+ * that needs to live in the kernel core.
+ *
+ * Author: Eduardo Habkost <ehabkost@redhat.com>
+ *
+ * Copyright (C) 2008, Red Hat Inc.
+ *
+ * Contains code from KVM, Copyright (C) 2006 Qumranet, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+#ifndef _ASM_X86_VIRTEX_H
+#define _ASM_X86_VIRTEX_H
+
+#include <asm/processor.h>
+#include <asm/system.h>
+
+#include <asm/vmx.h>
+#include <asm/svm.h>
+
+/*
+ * VMX functions:
+ */
+
+static inline int cpu_has_vmx(void)
+{
+	unsigned long ecx = cpuid_ecx(1);
+	return test_bit(5, &ecx); /* CPUID.1:ECX.VMX[bit 5] -> VT */
+}
+
+
+/** Disable VMX on the current CPU
+ *
+ * vmxoff causes a undefined-opcode exception if vmxon was not run
+ * on the CPU previously. Only call this function if you know VMX
+ * is enabled.
+ */
+static inline void cpu_vmxoff(void)
+{
+	asm volatile (ASM_VMX_VMXOFF : : : "cc");
+	write_cr4(read_cr4() & ~X86_CR4_VMXE);
+}
+
+static inline int cpu_vmx_enabled(void)
+{
+	return read_cr4() & X86_CR4_VMXE;
+}
+
+/** Disable VMX if it is enabled on the current CPU
+ *
+ * You shouldn't call this if cpu_has_vmx() returns 0.
+ */
+static inline void __cpu_emergency_vmxoff(void)
+{
+	if (cpu_vmx_enabled())
+		cpu_vmxoff();
+}
+
+/** Disable VMX if it is supported and enabled on the current CPU
+ */
+static inline void cpu_emergency_vmxoff(void)
+{
+	if (cpu_has_vmx())
+		__cpu_emergency_vmxoff();
+}
+
+
+
+
+/*
+ * SVM functions:
+ */
+
+/** Check if the CPU has SVM support
+ *
+ * You can use the 'msg' arg to get a message describing the problem,
+ * if the function returns zero. Simply pass NULL if you are not interested
+ * on the messages; gcc should take care of not generating code for
+ * the messages on this case.
+ */
+static inline int cpu_has_svm(const char **msg)
+{
+	uint32_t eax, ebx, ecx, edx;
+
+	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
+		if (msg)
+			*msg = "not amd";
+		return 0;
+	}
+
+	cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
+	if (eax < SVM_CPUID_FUNC) {
+		if (msg)
+			*msg = "can't execute cpuid_8000000a";
+		return 0;
+	}
+
+	cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
+	if (!(ecx & (1 << SVM_CPUID_FEATURE_SHIFT))) {
+		if (msg)
+			*msg = "svm not available";
+		return 0;
+	}
+	return 1;
+}
+
+
+/** Disable SVM on the current CPU
+ *
+ * You should call this only if cpu_has_svm() returned true.
+ */
+static inline void cpu_svm_disable(void)
+{
+	uint64_t efer;
+
+	wrmsrl(MSR_VM_HSAVE_PA, 0);
+	rdmsrl(MSR_EFER, efer);
+	wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK);
+}
+
+/** Makes sure SVM is disabled, if it is supported on the CPU
+ */
+static inline void cpu_emergency_svm_disable(void)
+{
+	if (cpu_has_svm(NULL))
+		cpu_svm_disable();
+}
+
+#endif /* _ASM_X86_VIRTEX_H */
diff --git a/arch/x86/kvm/vmx.h b/arch/x86/include/asm/vmx.h
similarity index 92%
rename from arch/x86/kvm/vmx.h
rename to arch/x86/include/asm/vmx.h
index ec5edc3..d0238e6 100644
--- a/arch/x86/kvm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -63,10 +63,13 @@
 
 #define VM_EXIT_HOST_ADDR_SPACE_SIZE            0x00000200
 #define VM_EXIT_ACK_INTR_ON_EXIT                0x00008000
+#define VM_EXIT_SAVE_IA32_PAT			0x00040000
+#define VM_EXIT_LOAD_IA32_PAT			0x00080000
 
 #define VM_ENTRY_IA32E_MODE                     0x00000200
 #define VM_ENTRY_SMM                            0x00000400
 #define VM_ENTRY_DEACT_DUAL_MONITOR             0x00000800
+#define VM_ENTRY_LOAD_IA32_PAT			0x00004000
 
 /* VMCS Encodings */
 enum vmcs_field {
@@ -112,6 +115,8 @@
 	VMCS_LINK_POINTER_HIGH          = 0x00002801,
 	GUEST_IA32_DEBUGCTL             = 0x00002802,
 	GUEST_IA32_DEBUGCTL_HIGH        = 0x00002803,
+	GUEST_IA32_PAT			= 0x00002804,
+	GUEST_IA32_PAT_HIGH		= 0x00002805,
 	GUEST_PDPTR0                    = 0x0000280a,
 	GUEST_PDPTR0_HIGH               = 0x0000280b,
 	GUEST_PDPTR1                    = 0x0000280c,
@@ -120,6 +125,8 @@
 	GUEST_PDPTR2_HIGH               = 0x0000280f,
 	GUEST_PDPTR3                    = 0x00002810,
 	GUEST_PDPTR3_HIGH               = 0x00002811,
+	HOST_IA32_PAT			= 0x00002c00,
+	HOST_IA32_PAT_HIGH		= 0x00002c01,
 	PIN_BASED_VM_EXEC_CONTROL       = 0x00004000,
 	CPU_BASED_VM_EXEC_CONTROL       = 0x00004002,
 	EXCEPTION_BITMAP                = 0x00004004,
@@ -331,8 +338,9 @@
 
 #define AR_RESERVD_MASK 0xfffe0f00
 
-#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT	9
-#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT	10
+#define TSS_PRIVATE_MEMSLOT			(KVM_MEMORY_SLOTS + 0)
+#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT	(KVM_MEMORY_SLOTS + 1)
+#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT	(KVM_MEMORY_SLOTS + 2)
 
 #define VMX_NR_VPIDS				(1 << 16)
 #define VMX_VPID_EXTENT_SINGLE_CONTEXT		1
@@ -356,4 +364,19 @@
 
 #define VMX_EPT_IDENTITY_PAGETABLE_ADDR		0xfffbc000ul
 
+
+#define ASM_VMX_VMCLEAR_RAX       ".byte 0x66, 0x0f, 0xc7, 0x30"
+#define ASM_VMX_VMLAUNCH          ".byte 0x0f, 0x01, 0xc2"
+#define ASM_VMX_VMRESUME          ".byte 0x0f, 0x01, 0xc3"
+#define ASM_VMX_VMPTRLD_RAX       ".byte 0x0f, 0xc7, 0x30"
+#define ASM_VMX_VMREAD_RDX_RAX    ".byte 0x0f, 0x78, 0xd0"
+#define ASM_VMX_VMWRITE_RAX_RDX   ".byte 0x0f, 0x79, 0xd0"
+#define ASM_VMX_VMWRITE_RSP_RDX   ".byte 0x0f, 0x79, 0xd4"
+#define ASM_VMX_VMXOFF            ".byte 0x0f, 0x01, 0xc4"
+#define ASM_VMX_VMXON_RAX         ".byte 0xf3, 0x0f, 0xc7, 0x30"
+#define ASM_VMX_INVEPT		  ".byte 0x66, 0x0f, 0x38, 0x80, 0x08"
+#define ASM_VMX_INVVPID		  ".byte 0x66, 0x0f, 0x38, 0x81, 0x08"
+
+
+
 #endif
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 2e2da71..658e29e 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -1296,7 +1296,7 @@
  * we don't need to preallocate the protection domains anymore.
  * For now we have to.
  */
-void prealloc_protection_domains(void)
+static void prealloc_protection_domains(void)
 {
 	struct pci_dev *dev = NULL;
 	struct dma_ops_domain *dma_dom;
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index c625800..fb85e8d 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -243,7 +243,7 @@
 }
 
 /* Function to enable the hardware */
-void __init iommu_enable(struct amd_iommu *iommu)
+static void __init iommu_enable(struct amd_iommu *iommu)
 {
 	printk(KERN_INFO "AMD IOMMU: Enabling IOMMU "
 	       "at %02x:%02x.%x cap 0x%hx\n",
@@ -256,7 +256,7 @@
 }
 
 /* Function to enable IOMMU event logging and event interrupts */
-void __init iommu_enable_event_logging(struct amd_iommu *iommu)
+static void __init iommu_enable_event_logging(struct amd_iommu *iommu)
 {
 	iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
 	iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index b5229af..d652515 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -98,8 +98,8 @@
 #ifdef HAVE_X2APIC
 int x2apic;
 /* x2apic enabled before OS handover */
-int x2apic_preenabled;
-int disable_x2apic;
+static int x2apic_preenabled;
+static int disable_x2apic;
 static __init int setup_nox2apic(char *str)
 {
 	disable_x2apic = 1;
@@ -119,8 +119,6 @@
 
 int first_system_vector = 0xfe;
 
-char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE};
-
 /*
  * Debug level, exported for io_apic.c
  */
@@ -142,7 +140,7 @@
 			    struct clock_event_device *evt);
 static void lapic_timer_setup(enum clock_event_mode mode,
 			      struct clock_event_device *evt);
-static void lapic_timer_broadcast(cpumask_t mask);
+static void lapic_timer_broadcast(const cpumask_t *mask);
 static void apic_pm_activate(void);
 
 /*
@@ -228,7 +226,7 @@
 	apic_write(APIC_ICR, low);
 }
 
-u64 xapic_icr_read(void)
+static u64 xapic_icr_read(void)
 {
 	u32 icr1, icr2;
 
@@ -268,7 +266,7 @@
 	wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low);
 }
 
-u64 x2apic_icr_read(void)
+static u64 x2apic_icr_read(void)
 {
 	unsigned long val;
 
@@ -455,7 +453,7 @@
 /*
  * Local APIC timer broadcast function
  */
-static void lapic_timer_broadcast(cpumask_t mask)
+static void lapic_timer_broadcast(const cpumask_t *mask)
 {
 #ifdef CONFIG_SMP
 	send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
@@ -471,7 +469,7 @@
 	struct clock_event_device *levt = &__get_cpu_var(lapic_events);
 
 	memcpy(levt, &lapic_clockevent, sizeof(*levt));
-	levt->cpumask = cpumask_of_cpu(smp_processor_id());
+	levt->cpumask = cpumask_of(smp_processor_id());
 
 	clockevents_register_device(levt);
 }
@@ -1807,28 +1805,32 @@
 void __cpuinit generic_processor_info(int apicid, int version)
 {
 	int cpu;
-	cpumask_t tmp_map;
 
 	/*
 	 * Validate version
 	 */
 	if (version == 0x0) {
 		pr_warning("BIOS bug, APIC version is 0 for CPU#%d! "
-			"fixing up to 0x10. (tell your hw vendor)\n",
-			version);
+			   "fixing up to 0x10. (tell your hw vendor)\n",
+				version);
 		version = 0x10;
 	}
 	apic_version[apicid] = version;
 
-	if (num_processors >= NR_CPUS) {
-		pr_warning("WARNING: NR_CPUS limit of %i reached."
-			"  Processor ignored.\n", NR_CPUS);
+	if (num_processors >= nr_cpu_ids) {
+		int max = nr_cpu_ids;
+		int thiscpu = max + disabled_cpus;
+
+		pr_warning(
+			"ACPI: NR_CPUS/possible_cpus limit of %i reached."
+			"  Processor %d/0x%x ignored.\n", max, thiscpu, apicid);
+
+		disabled_cpus++;
 		return;
 	}
 
 	num_processors++;
-	cpus_complement(tmp_map, cpu_present_map);
-	cpu = first_cpu(tmp_map);
+	cpu = cpumask_next_zero(-1, cpu_present_mask);
 
 	physid_set(apicid, phys_cpu_present_map);
 	if (apicid == boot_cpu_physical_apicid) {
@@ -1878,8 +1880,8 @@
 	}
 #endif
 
-	cpu_set(cpu, cpu_possible_map);
-	cpu_set(cpu, cpu_present_map);
+	set_cpu_possible(cpu, true);
+	set_cpu_present(cpu, true);
 }
 
 #ifdef CONFIG_X86_64
@@ -2081,7 +2083,7 @@
 	bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
 	bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
 
-	for (i = 0; i < NR_CPUS; i++) {
+	for (i = 0; i < nr_cpu_ids; i++) {
 		/* are we being called early in kernel startup? */
 		if (bios_cpu_apicid) {
 			id = bios_cpu_apicid[i];
diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c
index 2a0a2a3..f638827 100644
--- a/arch/x86/kernel/bios_uv.c
+++ b/arch/x86/kernel/bios_uv.c
@@ -25,7 +25,7 @@
 #include <asm/uv/bios.h>
 #include <asm/uv/uv_hub.h>
 
-struct uv_systab uv_systab;
+static struct uv_systab uv_systab;
 
 s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5)
 {
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 68b5d86..c6ecda6 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -534,12 +534,29 @@
 	per_cpu(cpuid4_info, cpu) = NULL;
 }
 
+static void get_cpu_leaves(void *_retval)
+{
+	int j, *retval = _retval, cpu = smp_processor_id();
+
+	/* Do cpuid and store the results */
+	for (j = 0; j < num_cache_leaves; j++) {
+		struct _cpuid4_info *this_leaf;
+		this_leaf = CPUID4_INFO_IDX(cpu, j);
+		*retval = cpuid4_cache_lookup(j, this_leaf);
+		if (unlikely(*retval < 0)) {
+			int i;
+
+			for (i = 0; i < j; i++)
+				cache_remove_shared_cpu_map(cpu, i);
+			break;
+		}
+		cache_shared_cpu_map_setup(cpu, j);
+	}
+}
+
 static int __cpuinit detect_cache_attributes(unsigned int cpu)
 {
-	struct _cpuid4_info	*this_leaf;
-	unsigned long		j;
 	int			retval;
-	cpumask_t		oldmask;
 
 	if (num_cache_leaves == 0)
 		return -ENOENT;
@@ -549,27 +566,7 @@
 	if (per_cpu(cpuid4_info, cpu) == NULL)
 		return -ENOMEM;
 
-	oldmask = current->cpus_allowed;
-	retval = set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
-	if (retval)
-		goto out;
-
-	/* Do cpuid and store the results */
-	for (j = 0; j < num_cache_leaves; j++) {
-		this_leaf = CPUID4_INFO_IDX(cpu, j);
-		retval = cpuid4_cache_lookup(j, this_leaf);
-		if (unlikely(retval < 0)) {
-			int i;
-
-			for (i = 0; i < j; i++)
-				cache_remove_shared_cpu_map(cpu, i);
-			break;
-		}
-		cache_shared_cpu_map_setup(cpu, j);
-	}
-	set_cpus_allowed_ptr(current, &oldmask);
-
-out:
+	smp_call_function_single(cpu, get_cpu_leaves, &retval, true);
 	if (retval) {
 		kfree(per_cpu(cpuid4_info, cpu));
 		per_cpu(cpuid4_info, cpu) = NULL;
@@ -626,8 +623,8 @@
 		cpumask_t *mask = &this_leaf->shared_cpu_map;
 
 		n = type?
-			cpulist_scnprintf(buf, len-2, *mask):
-			cpumask_scnprintf(buf, len-2, *mask);
+			cpulist_scnprintf(buf, len-2, mask) :
+			cpumask_scnprintf(buf, len-2, mask);
 		buf[n++] = '\n';
 		buf[n] = '\0';
 	}
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
index 748c8f9..a5a5e05 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
@@ -83,34 +83,41 @@
  * CPU Initialization
  */
 
+struct thresh_restart {
+	struct threshold_block *b;
+	int reset;
+	u16 old_limit;
+};
+
 /* must be called with correct cpu affinity */
-static void threshold_restart_bank(struct threshold_block *b,
-				   int reset, u16 old_limit)
+static long threshold_restart_bank(void *_tr)
 {
+	struct thresh_restart *tr = _tr;
 	u32 mci_misc_hi, mci_misc_lo;
 
-	rdmsr(b->address, mci_misc_lo, mci_misc_hi);
+	rdmsr(tr->b->address, mci_misc_lo, mci_misc_hi);
 
-	if (b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX))
-		reset = 1;	/* limit cannot be lower than err count */
+	if (tr->b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX))
+		tr->reset = 1;	/* limit cannot be lower than err count */
 
-	if (reset) {		/* reset err count and overflow bit */
+	if (tr->reset) {		/* reset err count and overflow bit */
 		mci_misc_hi =
 		    (mci_misc_hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) |
-		    (THRESHOLD_MAX - b->threshold_limit);
-	} else if (old_limit) {	/* change limit w/o reset */
+		    (THRESHOLD_MAX - tr->b->threshold_limit);
+	} else if (tr->old_limit) {	/* change limit w/o reset */
 		int new_count = (mci_misc_hi & THRESHOLD_MAX) +
-		    (old_limit - b->threshold_limit);
+		    (tr->old_limit - tr->b->threshold_limit);
 		mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) |
 		    (new_count & THRESHOLD_MAX);
 	}
 
-	b->interrupt_enable ?
+	tr->b->interrupt_enable ?
 	    (mci_misc_hi = (mci_misc_hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) :
 	    (mci_misc_hi &= ~MASK_INT_TYPE_HI);
 
 	mci_misc_hi |= MASK_COUNT_EN_HI;
-	wrmsr(b->address, mci_misc_lo, mci_misc_hi);
+	wrmsr(tr->b->address, mci_misc_lo, mci_misc_hi);
+	return 0;
 }
 
 /* cpu init entry point, called from mce.c with preempt off */
@@ -120,6 +127,7 @@
 	unsigned int cpu = smp_processor_id();
 	u8 lvt_off;
 	u32 low = 0, high = 0, address = 0;
+	struct thresh_restart tr;
 
 	for (bank = 0; bank < NR_BANKS; ++bank) {
 		for (block = 0; block < NR_BLOCKS; ++block) {
@@ -162,7 +170,10 @@
 			wrmsr(address, low, high);
 
 			threshold_defaults.address = address;
-			threshold_restart_bank(&threshold_defaults, 0, 0);
+			tr.b = &threshold_defaults;
+			tr.reset = 0;
+			tr.old_limit = 0;
+			threshold_restart_bank(&tr);
 		}
 	}
 }
@@ -251,20 +262,6 @@
 	ssize_t(*store) (struct threshold_block *, const char *, size_t count);
 };
 
-static void affinity_set(unsigned int cpu, cpumask_t *oldmask,
-					   cpumask_t *newmask)
-{
-	*oldmask = current->cpus_allowed;
-	cpus_clear(*newmask);
-	cpu_set(cpu, *newmask);
-	set_cpus_allowed_ptr(current, newmask);
-}
-
-static void affinity_restore(const cpumask_t *oldmask)
-{
-	set_cpus_allowed_ptr(current, oldmask);
-}
-
 #define SHOW_FIELDS(name)                                           \
 static ssize_t show_ ## name(struct threshold_block * b, char *buf) \
 {                                                                   \
@@ -277,15 +274,16 @@
 				      const char *buf, size_t count)
 {
 	char *end;
-	cpumask_t oldmask, newmask;
+	struct thresh_restart tr;
 	unsigned long new = simple_strtoul(buf, &end, 0);
 	if (end == buf)
 		return -EINVAL;
 	b->interrupt_enable = !!new;
 
-	affinity_set(b->cpu, &oldmask, &newmask);
-	threshold_restart_bank(b, 0, 0);
-	affinity_restore(&oldmask);
+	tr.b = b;
+	tr.reset = 0;
+	tr.old_limit = 0;
+	work_on_cpu(b->cpu, threshold_restart_bank, &tr);
 
 	return end - buf;
 }
@@ -294,8 +292,7 @@
 				     const char *buf, size_t count)
 {
 	char *end;
-	cpumask_t oldmask, newmask;
-	u16 old;
+	struct thresh_restart tr;
 	unsigned long new = simple_strtoul(buf, &end, 0);
 	if (end == buf)
 		return -EINVAL;
@@ -303,34 +300,36 @@
 		new = THRESHOLD_MAX;
 	if (new < 1)
 		new = 1;
-	old = b->threshold_limit;
+	tr.old_limit = b->threshold_limit;
 	b->threshold_limit = new;
+	tr.b = b;
+	tr.reset = 0;
 
-	affinity_set(b->cpu, &oldmask, &newmask);
-	threshold_restart_bank(b, 0, old);
-	affinity_restore(&oldmask);
+	work_on_cpu(b->cpu, threshold_restart_bank, &tr);
 
 	return end - buf;
 }
 
+static long local_error_count(void *_b)
+{
+	struct threshold_block *b = _b;
+	u32 low, high;
+
+	rdmsr(b->address, low, high);
+	return (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit);
+}
+
 static ssize_t show_error_count(struct threshold_block *b, char *buf)
 {
-	u32 high, low;
-	cpumask_t oldmask, newmask;
-	affinity_set(b->cpu, &oldmask, &newmask);
-	rdmsr(b->address, low, high);
-	affinity_restore(&oldmask);
-	return sprintf(buf, "%x\n",
-		       (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit));
+	return sprintf(buf, "%lx\n", work_on_cpu(b->cpu, local_error_count, b));
 }
 
 static ssize_t store_error_count(struct threshold_block *b,
 				 const char *buf, size_t count)
 {
-	cpumask_t oldmask, newmask;
-	affinity_set(b->cpu, &oldmask, &newmask);
-	threshold_restart_bank(b, 1, 0);
-	affinity_restore(&oldmask);
+	struct thresh_restart tr = { .b = b, .reset = 1, .old_limit = 0 };
+
+	work_on_cpu(b->cpu, threshold_restart_bank, &tr);
 	return 1;
 }
 
@@ -463,12 +462,19 @@
 	return err;
 }
 
+static long local_allocate_threshold_blocks(void *_bank)
+{
+	unsigned int *bank = _bank;
+
+	return allocate_threshold_blocks(smp_processor_id(), *bank, 0,
+					 MSR_IA32_MC0_MISC + *bank * 4);
+}
+
 /* symlinks sibling shared banks to first core.  first core owns dir/files. */
 static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
 {
 	int i, err = 0;
 	struct threshold_bank *b = NULL;
-	cpumask_t oldmask, newmask;
 	char name[32];
 
 	sprintf(name, "threshold_bank%i", bank);
@@ -519,11 +525,7 @@
 
 	per_cpu(threshold_banks, cpu)[bank] = b;
 
-	affinity_set(cpu, &oldmask, &newmask);
-	err = allocate_threshold_blocks(cpu, bank, 0,
-					MSR_IA32_MC0_MISC + bank * 4);
-	affinity_restore(&oldmask);
-
+	err = work_on_cpu(cpu, local_allocate_threshold_blocks, &bank);
 	if (err)
 		goto out_free;
 
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 4e8d77f..b59ddcc 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -14,14 +14,6 @@
 #include <asm/pat.h>
 #include "mtrr.h"
 
-struct mtrr_state {
-	struct mtrr_var_range var_ranges[MAX_VAR_RANGES];
-	mtrr_type fixed_ranges[NUM_FIXED_RANGES];
-	unsigned char enabled;
-	unsigned char have_fixed;
-	mtrr_type def_type;
-};
-
 struct fixed_range_block {
 	int base_msr; /* start address of an MTRR block */
 	int ranges;   /* number of MTRRs in this block  */
@@ -35,10 +27,12 @@
 };
 
 static unsigned long smp_changes_mask;
-static struct mtrr_state mtrr_state = {};
 static int mtrr_state_set;
 u64 mtrr_tom2;
 
+struct mtrr_state_type mtrr_state = {};
+EXPORT_SYMBOL_GPL(mtrr_state);
+
 #undef MODULE_PARAM_PREFIX
 #define MODULE_PARAM_PREFIX "mtrr."
 
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 1159e26..d259e5d 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -49,7 +49,7 @@
 
 u32 num_var_ranges = 0;
 
-unsigned int mtrr_usage_table[MAX_VAR_RANGES];
+unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
 static DEFINE_MUTEX(mtrr_mutex);
 
 u64 size_or_mask, size_and_mask;
@@ -574,7 +574,7 @@
 	unsigned long	lsize;
 };
 
-static struct mtrr_value mtrr_state[MAX_VAR_RANGES];
+static struct mtrr_value mtrr_state[MTRR_MAX_VAR_RANGES];
 
 static int mtrr_save(struct sys_device * sysdev, pm_message_t state)
 {
@@ -824,16 +824,14 @@
 
 static int __init disable_mtrr_cleanup_setup(char *str)
 {
-	if (enable_mtrr_cleanup != -1)
-		enable_mtrr_cleanup = 0;
+	enable_mtrr_cleanup = 0;
 	return 0;
 }
 early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
 
 static int __init enable_mtrr_cleanup_setup(char *str)
 {
-	if (enable_mtrr_cleanup != -1)
-		enable_mtrr_cleanup = 1;
+	enable_mtrr_cleanup = 1;
 	return 0;
 }
 early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup);
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index 2dc4ec6..ffd6040 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -8,11 +8,6 @@
 #define MTRRcap_MSR     0x0fe
 #define MTRRdefType_MSR 0x2ff
 
-#define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg))
-#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
-
-#define NUM_FIXED_RANGES 88
-#define MAX_VAR_RANGES 256
 #define MTRRfix64K_00000_MSR 0x250
 #define MTRRfix16K_80000_MSR 0x258
 #define MTRRfix16K_A0000_MSR 0x259
@@ -29,11 +24,7 @@
 #define MTRR_CHANGE_MASK_VARIABLE  0x02
 #define MTRR_CHANGE_MASK_DEFTYPE   0x04
 
-/* In the Intel processor's MTRR interface, the MTRR type is always held in
-   an 8 bit field: */
-typedef u8 mtrr_type;
-
-extern unsigned int mtrr_usage_table[MAX_VAR_RANGES];
+extern unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
 
 struct mtrr_ops {
 	u32	vendor;
@@ -70,13 +61,6 @@
 	u32 ccr3;
 };
 
-struct mtrr_var_range {
-	u32 base_lo;
-	u32 base_hi;
-	u32 mask_lo;
-	u32 mask_hi;
-};
-
 void set_mtrr_done(struct set_mtrr_context *ctxt);
 void set_mtrr_cache_disable(struct set_mtrr_context *ctxt);
 void set_mtrr_prepare_save(struct set_mtrr_context *ctxt);
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index 72cefd1..85d28d5 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -39,10 +39,10 @@
 #include <linux/device.h>
 #include <linux/cpu.h>
 #include <linux/notifier.h>
+#include <linux/uaccess.h>
 
 #include <asm/processor.h>
 #include <asm/msr.h>
-#include <asm/uaccess.h>
 #include <asm/system.h>
 
 static struct class *cpuid_class;
@@ -82,7 +82,7 @@
 }
 
 static ssize_t cpuid_read(struct file *file, char __user *buf,
-			  size_t count, loff_t * ppos)
+			  size_t count, loff_t *ppos)
 {
 	char __user *tmp = buf;
 	struct cpuid_regs cmd;
@@ -117,7 +117,7 @@
 	unsigned int cpu;
 	struct cpuinfo_x86 *c;
 	int ret = 0;
-	
+
 	lock_kernel();
 
 	cpu = iminor(file->f_path.dentry->d_inode);
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index d84a852..c689d19 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -26,6 +26,7 @@
 #include <linux/kdebug.h>
 #include <asm/smp.h>
 #include <asm/reboot.h>
+#include <asm/virtext.h>
 
 #include <mach_ipi.h>
 
@@ -49,6 +50,15 @@
 #endif
 	crash_save_cpu(regs, cpu);
 
+	/* Disable VMX or SVM if needed.
+	 *
+	 * We need to disable virtualization on all CPUs.
+	 * Having VMX or SVM enabled on any CPU may break rebooting
+	 * after the kdump kernel has finished its task.
+	 */
+	cpu_emergency_vmxoff();
+	cpu_emergency_svm_disable();
+
 	disable_local_APIC();
 }
 
@@ -80,6 +90,14 @@
 	local_irq_disable();
 
 	kdump_nmi_shootdown_cpus();
+
+	/* Booting kdump kernel with VMX or SVM enabled won't work,
+	 * because (among other limitations) we can't disable paging
+	 * with the virt flags.
+	 */
+	cpu_emergency_vmxoff();
+	cpu_emergency_svm_disable();
+
 	lapic_shutdown();
 #if defined(CONFIG_X86_IO_APIC)
 	disable_IO_APIC();
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index 23b138e..504ad19 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -886,7 +886,7 @@
 	va_list ap;
 
 	va_start(ap, fmt);
-	n = vscnprintf(buf, 512, fmt, ap);
+	n = vscnprintf(buf, sizeof(buf), fmt, ap);
 	early_console->write(early_console, buf, n);
 	va_end(ap);
 }
diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c
index c026279..3418548 100644
--- a/arch/x86/kernel/genapic_flat_64.c
+++ b/arch/x86/kernel/genapic_flat_64.c
@@ -30,12 +30,12 @@
 	return 1;
 }
 
-static cpumask_t flat_target_cpus(void)
+static const struct cpumask *flat_target_cpus(void)
 {
-	return cpu_online_map;
+	return cpu_online_mask;
 }
 
-static cpumask_t flat_vector_allocation_domain(int cpu)
+static void flat_vector_allocation_domain(int cpu, struct cpumask *retmask)
 {
 	/* Careful. Some cpus do not strictly honor the set of cpus
 	 * specified in the interrupt destination when using lowest
@@ -45,8 +45,8 @@
 	 * deliver interrupts to the wrong hyperthread when only one
 	 * hyperthread was specified in the interrupt desitination.
 	 */
-	cpumask_t domain = { { [0] = APIC_ALL_CPUS, } };
-	return domain;
+	cpumask_clear(retmask);
+	cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
 }
 
 /*
@@ -69,9 +69,8 @@
 	apic_write(APIC_LDR, val);
 }
 
-static void flat_send_IPI_mask(cpumask_t cpumask, int vector)
+static inline void _flat_send_IPI_mask(unsigned long mask, int vector)
 {
-	unsigned long mask = cpus_addr(cpumask)[0];
 	unsigned long flags;
 
 	local_irq_save(flags);
@@ -79,20 +78,41 @@
 	local_irq_restore(flags);
 }
 
+static void flat_send_IPI_mask(const struct cpumask *cpumask, int vector)
+{
+	unsigned long mask = cpumask_bits(cpumask)[0];
+
+	_flat_send_IPI_mask(mask, vector);
+}
+
+static void flat_send_IPI_mask_allbutself(const struct cpumask *cpumask,
+					  int vector)
+{
+	unsigned long mask = cpumask_bits(cpumask)[0];
+	int cpu = smp_processor_id();
+
+	if (cpu < BITS_PER_LONG)
+		clear_bit(cpu, &mask);
+	_flat_send_IPI_mask(mask, vector);
+}
+
 static void flat_send_IPI_allbutself(int vector)
 {
+	int cpu = smp_processor_id();
 #ifdef	CONFIG_HOTPLUG_CPU
 	int hotplug = 1;
 #else
 	int hotplug = 0;
 #endif
 	if (hotplug || vector == NMI_VECTOR) {
-		cpumask_t allbutme = cpu_online_map;
+		if (!cpumask_equal(cpu_online_mask, cpumask_of(cpu))) {
+			unsigned long mask = cpumask_bits(cpu_online_mask)[0];
 
-		cpu_clear(smp_processor_id(), allbutme);
+			if (cpu < BITS_PER_LONG)
+				clear_bit(cpu, &mask);
 
-		if (!cpus_empty(allbutme))
-			flat_send_IPI_mask(allbutme, vector);
+			_flat_send_IPI_mask(mask, vector);
+		}
 	} else if (num_online_cpus() > 1) {
 		__send_IPI_shortcut(APIC_DEST_ALLBUT, vector,APIC_DEST_LOGICAL);
 	}
@@ -101,7 +121,7 @@
 static void flat_send_IPI_all(int vector)
 {
 	if (vector == NMI_VECTOR)
-		flat_send_IPI_mask(cpu_online_map, vector);
+		flat_send_IPI_mask(cpu_online_mask, vector);
 	else
 		__send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL);
 }
@@ -135,9 +155,18 @@
 	return physid_isset(read_xapic_id(), phys_cpu_present_map);
 }
 
-static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask)
+static unsigned int flat_cpu_mask_to_apicid(const struct cpumask *cpumask)
 {
-	return cpus_addr(cpumask)[0] & APIC_ALL_CPUS;
+	return cpumask_bits(cpumask)[0] & APIC_ALL_CPUS;
+}
+
+static unsigned int flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+						const struct cpumask *andmask)
+{
+	unsigned long mask1 = cpumask_bits(cpumask)[0] & APIC_ALL_CPUS;
+	unsigned long mask2 = cpumask_bits(andmask)[0] & APIC_ALL_CPUS;
+
+	return mask1 & mask2;
 }
 
 static unsigned int phys_pkg_id(int index_msb)
@@ -157,8 +186,10 @@
 	.send_IPI_all = flat_send_IPI_all,
 	.send_IPI_allbutself = flat_send_IPI_allbutself,
 	.send_IPI_mask = flat_send_IPI_mask,
+	.send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself,
 	.send_IPI_self = apic_send_IPI_self,
 	.cpu_mask_to_apicid = flat_cpu_mask_to_apicid,
+	.cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and,
 	.phys_pkg_id = phys_pkg_id,
 	.get_apic_id = get_apic_id,
 	.set_apic_id = set_apic_id,
@@ -188,35 +219,39 @@
 	return 0;
 }
 
-static cpumask_t physflat_target_cpus(void)
+static const struct cpumask *physflat_target_cpus(void)
 {
-	return cpu_online_map;
+	return cpu_online_mask;
 }
 
-static cpumask_t physflat_vector_allocation_domain(int cpu)
+static void physflat_vector_allocation_domain(int cpu, struct cpumask *retmask)
 {
-	return cpumask_of_cpu(cpu);
+	cpumask_clear(retmask);
+	cpumask_set_cpu(cpu, retmask);
 }
 
-static void physflat_send_IPI_mask(cpumask_t cpumask, int vector)
+static void physflat_send_IPI_mask(const struct cpumask *cpumask, int vector)
 {
 	send_IPI_mask_sequence(cpumask, vector);
 }
 
+static void physflat_send_IPI_mask_allbutself(const struct cpumask *cpumask,
+					      int vector)
+{
+	send_IPI_mask_allbutself(cpumask, vector);
+}
+
 static void physflat_send_IPI_allbutself(int vector)
 {
-	cpumask_t allbutme = cpu_online_map;
-
-	cpu_clear(smp_processor_id(), allbutme);
-	physflat_send_IPI_mask(allbutme, vector);
+	send_IPI_mask_allbutself(cpu_online_mask, vector);
 }
 
 static void physflat_send_IPI_all(int vector)
 {
-	physflat_send_IPI_mask(cpu_online_map, vector);
+	physflat_send_IPI_mask(cpu_online_mask, vector);
 }
 
-static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask)
+static unsigned int physflat_cpu_mask_to_apicid(const struct cpumask *cpumask)
 {
 	int cpu;
 
@@ -224,13 +259,31 @@
 	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
 	 * May as well be the first.
 	 */
-	cpu = first_cpu(cpumask);
+	cpu = cpumask_first(cpumask);
 	if ((unsigned)cpu < nr_cpu_ids)
 		return per_cpu(x86_cpu_to_apicid, cpu);
 	else
 		return BAD_APICID;
 }
 
+static unsigned int
+physflat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+				const struct cpumask *andmask)
+{
+	int cpu;
+
+	/*
+	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
+	 * May as well be the first.
+	 */
+	for_each_cpu_and(cpu, cpumask, andmask)
+		if (cpumask_test_cpu(cpu, cpu_online_mask))
+			break;
+	if (cpu < nr_cpu_ids)
+		return per_cpu(x86_cpu_to_apicid, cpu);
+	return BAD_APICID;
+}
+
 struct genapic apic_physflat =  {
 	.name = "physical flat",
 	.acpi_madt_oem_check = physflat_acpi_madt_oem_check,
@@ -243,8 +296,10 @@
 	.send_IPI_all = physflat_send_IPI_all,
 	.send_IPI_allbutself = physflat_send_IPI_allbutself,
 	.send_IPI_mask = physflat_send_IPI_mask,
+	.send_IPI_mask_allbutself = physflat_send_IPI_mask_allbutself,
 	.send_IPI_self = apic_send_IPI_self,
 	.cpu_mask_to_apicid = physflat_cpu_mask_to_apicid,
+	.cpu_mask_to_apicid_and = physflat_cpu_mask_to_apicid_and,
 	.phys_pkg_id = phys_pkg_id,
 	.get_apic_id = get_apic_id,
 	.set_apic_id = set_apic_id,
diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c
index f6a2c8e..6ce497c 100644
--- a/arch/x86/kernel/genx2apic_cluster.c
+++ b/arch/x86/kernel/genx2apic_cluster.c
@@ -22,19 +22,18 @@
 
 /* Start with all IRQs pointing to boot CPU.  IRQ balancing will shift them. */
 
-static cpumask_t x2apic_target_cpus(void)
+static const struct cpumask *x2apic_target_cpus(void)
 {
-	return cpumask_of_cpu(0);
+	return cpumask_of(0);
 }
 
 /*
  * for now each logical cpu is in its own vector allocation domain.
  */
-static cpumask_t x2apic_vector_allocation_domain(int cpu)
+static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask)
 {
-	cpumask_t domain = CPU_MASK_NONE;
-	cpu_set(cpu, domain);
-	return domain;
+	cpumask_clear(retmask);
+	cpumask_set_cpu(cpu, retmask);
 }
 
 static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
@@ -56,32 +55,53 @@
  * at once. We have 16 cpu's in a cluster. This will minimize IPI register
  * writes.
  */
-static void x2apic_send_IPI_mask(cpumask_t mask, int vector)
+static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
 {
 	unsigned long flags;
 	unsigned long query_cpu;
 
 	local_irq_save(flags);
-	for_each_cpu_mask(query_cpu, mask) {
-		__x2apic_send_IPI_dest(per_cpu(x86_cpu_to_logical_apicid, query_cpu),
-				       vector, APIC_DEST_LOGICAL);
-	}
+	for_each_cpu(query_cpu, mask)
+		__x2apic_send_IPI_dest(
+			per_cpu(x86_cpu_to_logical_apicid, query_cpu),
+			vector, APIC_DEST_LOGICAL);
+	local_irq_restore(flags);
+}
+
+static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask,
+					    int vector)
+{
+	unsigned long flags;
+	unsigned long query_cpu;
+	unsigned long this_cpu = smp_processor_id();
+
+	local_irq_save(flags);
+	for_each_cpu(query_cpu, mask)
+		if (query_cpu != this_cpu)
+			__x2apic_send_IPI_dest(
+				per_cpu(x86_cpu_to_logical_apicid, query_cpu),
+				vector, APIC_DEST_LOGICAL);
 	local_irq_restore(flags);
 }
 
 static void x2apic_send_IPI_allbutself(int vector)
 {
-	cpumask_t mask = cpu_online_map;
+	unsigned long flags;
+	unsigned long query_cpu;
+	unsigned long this_cpu = smp_processor_id();
 
-	cpu_clear(smp_processor_id(), mask);
-
-	if (!cpus_empty(mask))
-		x2apic_send_IPI_mask(mask, vector);
+	local_irq_save(flags);
+	for_each_online_cpu(query_cpu)
+		if (query_cpu != this_cpu)
+			__x2apic_send_IPI_dest(
+				per_cpu(x86_cpu_to_logical_apicid, query_cpu),
+				vector, APIC_DEST_LOGICAL);
+	local_irq_restore(flags);
 }
 
 static void x2apic_send_IPI_all(int vector)
 {
-	x2apic_send_IPI_mask(cpu_online_map, vector);
+	x2apic_send_IPI_mask(cpu_online_mask, vector);
 }
 
 static int x2apic_apic_id_registered(void)
@@ -89,21 +109,38 @@
 	return 1;
 }
 
-static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask)
+static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask)
 {
 	int cpu;
 
 	/*
-	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
+	 * We're using fixed IRQ delivery, can only return one logical APIC ID.
 	 * May as well be the first.
 	 */
-	cpu = first_cpu(cpumask);
-	if ((unsigned)cpu < NR_CPUS)
+	cpu = cpumask_first(cpumask);
+	if ((unsigned)cpu < nr_cpu_ids)
 		return per_cpu(x86_cpu_to_logical_apicid, cpu);
 	else
 		return BAD_APICID;
 }
 
+static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+						  const struct cpumask *andmask)
+{
+	int cpu;
+
+	/*
+	 * We're using fixed IRQ delivery, can only return one logical APIC ID.
+	 * May as well be the first.
+	 */
+	for_each_cpu_and(cpu, cpumask, andmask)
+		if (cpumask_test_cpu(cpu, cpu_online_mask))
+			break;
+	if (cpu < nr_cpu_ids)
+		return per_cpu(x86_cpu_to_logical_apicid, cpu);
+	return BAD_APICID;
+}
+
 static unsigned int get_apic_id(unsigned long x)
 {
 	unsigned int id;
@@ -150,8 +187,10 @@
 	.send_IPI_all = x2apic_send_IPI_all,
 	.send_IPI_allbutself = x2apic_send_IPI_allbutself,
 	.send_IPI_mask = x2apic_send_IPI_mask,
+	.send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself,
 	.send_IPI_self = x2apic_send_IPI_self,
 	.cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
+	.cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and,
 	.phys_pkg_id = phys_pkg_id,
 	.get_apic_id = get_apic_id,
 	.set_apic_id = set_apic_id,
diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c
index d042211..21bcc0e 100644
--- a/arch/x86/kernel/genx2apic_phys.c
+++ b/arch/x86/kernel/genx2apic_phys.c
@@ -29,16 +29,15 @@
 
 /* Start with all IRQs pointing to boot CPU.  IRQ balancing will shift them. */
 
-static cpumask_t x2apic_target_cpus(void)
+static const struct cpumask *x2apic_target_cpus(void)
 {
-	return cpumask_of_cpu(0);
+	return cpumask_of(0);
 }
 
-static cpumask_t x2apic_vector_allocation_domain(int cpu)
+static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask)
 {
-	cpumask_t domain = CPU_MASK_NONE;
-	cpu_set(cpu, domain);
-	return domain;
+	cpumask_clear(retmask);
+	cpumask_set_cpu(cpu, retmask);
 }
 
 static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
@@ -54,32 +53,54 @@
 	x2apic_icr_write(cfg, apicid);
 }
 
-static void x2apic_send_IPI_mask(cpumask_t mask, int vector)
+static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
 {
 	unsigned long flags;
 	unsigned long query_cpu;
 
 	local_irq_save(flags);
-	for_each_cpu_mask(query_cpu, mask) {
+	for_each_cpu(query_cpu, mask) {
 		__x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu),
 				       vector, APIC_DEST_PHYSICAL);
 	}
 	local_irq_restore(flags);
 }
 
+static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask,
+					    int vector)
+{
+	unsigned long flags;
+	unsigned long query_cpu;
+	unsigned long this_cpu = smp_processor_id();
+
+	local_irq_save(flags);
+	for_each_cpu(query_cpu, mask) {
+		if (query_cpu != this_cpu)
+			__x2apic_send_IPI_dest(
+				per_cpu(x86_cpu_to_apicid, query_cpu),
+				vector, APIC_DEST_PHYSICAL);
+	}
+	local_irq_restore(flags);
+}
+
 static void x2apic_send_IPI_allbutself(int vector)
 {
-	cpumask_t mask = cpu_online_map;
+	unsigned long flags;
+	unsigned long query_cpu;
+	unsigned long this_cpu = smp_processor_id();
 
-	cpu_clear(smp_processor_id(), mask);
-
-	if (!cpus_empty(mask))
-		x2apic_send_IPI_mask(mask, vector);
+	local_irq_save(flags);
+	for_each_online_cpu(query_cpu)
+		if (query_cpu != this_cpu)
+			__x2apic_send_IPI_dest(
+				per_cpu(x86_cpu_to_apicid, query_cpu),
+				vector, APIC_DEST_PHYSICAL);
+	local_irq_restore(flags);
 }
 
 static void x2apic_send_IPI_all(int vector)
 {
-	x2apic_send_IPI_mask(cpu_online_map, vector);
+	x2apic_send_IPI_mask(cpu_online_mask, vector);
 }
 
 static int x2apic_apic_id_registered(void)
@@ -87,7 +108,7 @@
 	return 1;
 }
 
-static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask)
+static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask)
 {
 	int cpu;
 
@@ -95,13 +116,30 @@
 	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
 	 * May as well be the first.
 	 */
-	cpu = first_cpu(cpumask);
-	if ((unsigned)cpu < NR_CPUS)
+	cpu = cpumask_first(cpumask);
+	if ((unsigned)cpu < nr_cpu_ids)
 		return per_cpu(x86_cpu_to_apicid, cpu);
 	else
 		return BAD_APICID;
 }
 
+static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+						  const struct cpumask *andmask)
+{
+	int cpu;
+
+	/*
+	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
+	 * May as well be the first.
+	 */
+	for_each_cpu_and(cpu, cpumask, andmask)
+		if (cpumask_test_cpu(cpu, cpu_online_mask))
+			break;
+	if (cpu < nr_cpu_ids)
+		return per_cpu(x86_cpu_to_apicid, cpu);
+	return BAD_APICID;
+}
+
 static unsigned int get_apic_id(unsigned long x)
 {
 	unsigned int id;
@@ -123,12 +161,12 @@
 	return current_cpu_data.initial_apicid >> index_msb;
 }
 
-void x2apic_send_IPI_self(int vector)
+static void x2apic_send_IPI_self(int vector)
 {
 	apic_write(APIC_SELF_IPI, vector);
 }
 
-void init_x2apic_ldr(void)
+static void init_x2apic_ldr(void)
 {
 	return;
 }
@@ -145,8 +183,10 @@
 	.send_IPI_all = x2apic_send_IPI_all,
 	.send_IPI_allbutself = x2apic_send_IPI_allbutself,
 	.send_IPI_mask = x2apic_send_IPI_mask,
+	.send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself,
 	.send_IPI_self = x2apic_send_IPI_self,
 	.cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
+	.cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and,
 	.phys_pkg_id = phys_pkg_id,
 	.get_apic_id = get_apic_id,
 	.set_apic_id = set_apic_id,
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index dece172..b193e08 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -79,16 +79,15 @@
 
 /* Start with all IRQs pointing to boot CPU.  IRQ balancing will shift them. */
 
-static cpumask_t uv_target_cpus(void)
+static const struct cpumask *uv_target_cpus(void)
 {
-	return cpumask_of_cpu(0);
+	return cpumask_of(0);
 }
 
-static cpumask_t uv_vector_allocation_domain(int cpu)
+static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask)
 {
-	cpumask_t domain = CPU_MASK_NONE;
-	cpu_set(cpu, domain);
-	return domain;
+	cpumask_clear(retmask);
+	cpumask_set_cpu(cpu, retmask);
 }
 
 int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip)
@@ -127,28 +126,37 @@
 	uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
 }
 
-static void uv_send_IPI_mask(cpumask_t mask, int vector)
+static void uv_send_IPI_mask(const struct cpumask *mask, int vector)
 {
 	unsigned int cpu;
 
-	for_each_possible_cpu(cpu)
-		if (cpu_isset(cpu, mask))
+	for_each_cpu(cpu, mask)
+		uv_send_IPI_one(cpu, vector);
+}
+
+static void uv_send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
+{
+	unsigned int cpu;
+	unsigned int this_cpu = smp_processor_id();
+
+	for_each_cpu(cpu, mask)
+		if (cpu != this_cpu)
 			uv_send_IPI_one(cpu, vector);
 }
 
 static void uv_send_IPI_allbutself(int vector)
 {
-	cpumask_t mask = cpu_online_map;
+	unsigned int cpu;
+	unsigned int this_cpu = smp_processor_id();
 
-	cpu_clear(smp_processor_id(), mask);
-
-	if (!cpus_empty(mask))
-		uv_send_IPI_mask(mask, vector);
+	for_each_online_cpu(cpu)
+		if (cpu != this_cpu)
+			uv_send_IPI_one(cpu, vector);
 }
 
 static void uv_send_IPI_all(int vector)
 {
-	uv_send_IPI_mask(cpu_online_map, vector);
+	uv_send_IPI_mask(cpu_online_mask, vector);
 }
 
 static int uv_apic_id_registered(void)
@@ -160,7 +168,7 @@
 {
 }
 
-static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask)
+static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask)
 {
 	int cpu;
 
@@ -168,13 +176,30 @@
 	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
 	 * May as well be the first.
 	 */
-	cpu = first_cpu(cpumask);
+	cpu = cpumask_first(cpumask);
 	if ((unsigned)cpu < nr_cpu_ids)
 		return per_cpu(x86_cpu_to_apicid, cpu);
 	else
 		return BAD_APICID;
 }
 
+static unsigned int uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+					      const struct cpumask *andmask)
+{
+	int cpu;
+
+	/*
+	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
+	 * May as well be the first.
+	 */
+	for_each_cpu_and(cpu, cpumask, andmask)
+		if (cpumask_test_cpu(cpu, cpu_online_mask))
+			break;
+	if (cpu < nr_cpu_ids)
+		return per_cpu(x86_cpu_to_apicid, cpu);
+	return BAD_APICID;
+}
+
 static unsigned int get_apic_id(unsigned long x)
 {
 	unsigned int id;
@@ -222,8 +247,10 @@
 	.send_IPI_all = uv_send_IPI_all,
 	.send_IPI_allbutself = uv_send_IPI_allbutself,
 	.send_IPI_mask = uv_send_IPI_mask,
+	.send_IPI_mask_allbutself = uv_send_IPI_mask_allbutself,
 	.send_IPI_self = uv_send_IPI_self,
 	.cpu_mask_to_apicid = uv_cpu_mask_to_apicid,
+	.cpu_mask_to_apicid_and = uv_cpu_mask_to_apicid_and,
 	.phys_pkg_id = phys_pkg_id,
 	.get_apic_id = get_apic_id,
 	.set_apic_id = set_apic_id,
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 845ea09..cd759ad 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -248,7 +248,7 @@
 	 * Start hpet with the boot cpu mask and make it
 	 * global after the IO_APIC has been initialized.
 	 */
-	hpet_clockevent.cpumask = cpumask_of_cpu(smp_processor_id());
+	hpet_clockevent.cpumask = cpumask_of(smp_processor_id());
 	clockevents_register_device(&hpet_clockevent);
 	global_clock_event = &hpet_clockevent;
 	printk(KERN_DEBUG "hpet clockevent registered\n");
@@ -303,7 +303,7 @@
 			struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
 			hpet_setup_msi_irq(hdev->irq);
 			disable_irq(hdev->irq);
-			irq_set_affinity(hdev->irq, cpumask_of_cpu(hdev->cpu));
+			irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu));
 			enable_irq(hdev->irq);
 		}
 		break;
@@ -451,7 +451,7 @@
 		return -1;
 
 	disable_irq(dev->irq);
-	irq_set_affinity(dev->irq, cpumask_of_cpu(dev->cpu));
+	irq_set_affinity(dev->irq, cpumask_of(dev->cpu));
 	enable_irq(dev->irq);
 
 	printk(KERN_DEBUG "hpet: %s irq %d for MSI\n",
@@ -502,7 +502,7 @@
 	/* 5 usec minimum reprogramming delta. */
 	evt->min_delta_ns = 5000;
 
-	evt->cpumask = cpumask_of_cpu(hdev->cpu);
+	evt->cpumask = cpumask_of(hdev->cpu);
 	clockevents_register_device(evt);
 }
 
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c
index c1b5e3e..10f92fb 100644
--- a/arch/x86/kernel/i8253.c
+++ b/arch/x86/kernel/i8253.c
@@ -114,7 +114,7 @@
 	 * Start pit with the boot cpu mask and make it global after the
 	 * IO_APIC has been initialized.
 	 */
-	pit_clockevent.cpumask = cpumask_of_cpu(smp_processor_id());
+	pit_clockevent.cpumask = cpumask_of(smp_processor_id());
 	pit_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC,
 				     pit_clockevent.shift);
 	pit_clockevent.max_delta_ns =
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 7491765..6991172 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -136,8 +136,8 @@
 
 struct irq_cfg {
 	struct irq_pin_list *irq_2_pin;
-	cpumask_t domain;
-	cpumask_t old_domain;
+	cpumask_var_t domain;
+	cpumask_var_t old_domain;
 	unsigned move_cleanup_count;
 	u8 vector;
 	u8 move_in_progress : 1;
@@ -152,22 +152,22 @@
 #else
 static struct irq_cfg irq_cfgx[NR_IRQS] = {
 #endif
-	[0]  = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR,  },
-	[1]  = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR,  },
-	[2]  = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR,  },
-	[3]  = { .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR,  },
-	[4]  = { .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR,  },
-	[5]  = { .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR,  },
-	[6]  = { .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR,  },
-	[7]  = { .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR,  },
-	[8]  = { .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR,  },
-	[9]  = { .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR,  },
-	[10] = { .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
-	[11] = { .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
-	[12] = { .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
-	[13] = { .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
-	[14] = { .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
-	[15] = { .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
+	[0]  = { .vector = IRQ0_VECTOR,  },
+	[1]  = { .vector = IRQ1_VECTOR,  },
+	[2]  = { .vector = IRQ2_VECTOR,  },
+	[3]  = { .vector = IRQ3_VECTOR,  },
+	[4]  = { .vector = IRQ4_VECTOR,  },
+	[5]  = { .vector = IRQ5_VECTOR,  },
+	[6]  = { .vector = IRQ6_VECTOR,  },
+	[7]  = { .vector = IRQ7_VECTOR,  },
+	[8]  = { .vector = IRQ8_VECTOR,  },
+	[9]  = { .vector = IRQ9_VECTOR,  },
+	[10] = { .vector = IRQ10_VECTOR, },
+	[11] = { .vector = IRQ11_VECTOR, },
+	[12] = { .vector = IRQ12_VECTOR, },
+	[13] = { .vector = IRQ13_VECTOR, },
+	[14] = { .vector = IRQ14_VECTOR, },
+	[15] = { .vector = IRQ15_VECTOR, },
 };
 
 int __init arch_early_irq_init(void)
@@ -183,6 +183,10 @@
 	for (i = 0; i < count; i++) {
 		desc = irq_to_desc(i);
 		desc->chip_data = &cfg[i];
+		alloc_bootmem_cpumask_var(&cfg[i].domain);
+		alloc_bootmem_cpumask_var(&cfg[i].old_domain);
+		if (i < NR_IRQS_LEGACY)
+			cpumask_setall(cfg[i].domain);
 	}
 
 	return 0;
@@ -209,6 +213,20 @@
 	node = cpu_to_node(cpu);
 
 	cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node);
+	if (cfg) {
+		/* FIXME: needs alloc_cpumask_var_node() */
+		if (!alloc_cpumask_var(&cfg->domain, GFP_ATOMIC)) {
+			kfree(cfg);
+			cfg = NULL;
+		} else if (!alloc_cpumask_var(&cfg->old_domain, GFP_ATOMIC)) {
+			free_cpumask_var(cfg->domain);
+			kfree(cfg);
+			cfg = NULL;
+		} else {
+			cpumask_clear(cfg->domain);
+			cpumask_clear(cfg->old_domain);
+		}
+	}
 	printk(KERN_DEBUG "  alloc irq_cfg on cpu %d node %d\n", cpu, node);
 
 	return cfg;
@@ -333,13 +351,14 @@
 	}
 }
 
-static void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask)
+static void
+set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
 {
 	struct irq_cfg *cfg = desc->chip_data;
 
 	if (!cfg->move_in_progress) {
 		/* it means that domain is not changed */
-		if (!cpus_intersects(desc->affinity, mask))
+		if (!cpumask_intersects(&desc->affinity, mask))
 			cfg->move_desc_pending = 1;
 	}
 }
@@ -354,7 +373,8 @@
 #endif
 
 #ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC
-static inline void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask)
+static inline void
+set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
 {
 }
 #endif
@@ -485,6 +505,26 @@
 }
 
 #ifdef CONFIG_SMP
+static void send_cleanup_vector(struct irq_cfg *cfg)
+{
+	cpumask_var_t cleanup_mask;
+
+	if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
+		unsigned int i;
+		cfg->move_cleanup_count = 0;
+		for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
+			cfg->move_cleanup_count++;
+		for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
+			send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
+	} else {
+		cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
+		cfg->move_cleanup_count = cpumask_weight(cleanup_mask);
+		send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+		free_cpumask_var(cleanup_mask);
+	}
+	cfg->move_in_progress = 0;
+}
+
 static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
 {
 	int apic, pin;
@@ -520,41 +560,55 @@
 	}
 }
 
-static int assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask);
+static int
+assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask);
 
-static void set_ioapic_affinity_irq_desc(struct irq_desc *desc, cpumask_t mask)
+/*
+ * Either sets desc->affinity to a valid value, and returns cpu_mask_to_apicid
+ * of that, or returns BAD_APICID and leaves desc->affinity untouched.
+ */
+static unsigned int
+set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
 {
 	struct irq_cfg *cfg;
-	unsigned long flags;
-	unsigned int dest;
-	cpumask_t tmp;
 	unsigned int irq;
 
-	cpus_and(tmp, mask, cpu_online_map);
-	if (cpus_empty(tmp))
-		return;
+	if (!cpumask_intersects(mask, cpu_online_mask))
+		return BAD_APICID;
 
 	irq = desc->irq;
 	cfg = desc->chip_data;
 	if (assign_irq_vector(irq, cfg, mask))
-		return;
+		return BAD_APICID;
 
+	cpumask_and(&desc->affinity, cfg->domain, mask);
 	set_extra_move_desc(desc, mask);
+	return cpu_mask_to_apicid_and(&desc->affinity, cpu_online_mask);
+}
 
-	cpus_and(tmp, cfg->domain, mask);
-	dest = cpu_mask_to_apicid(tmp);
-	/*
-	 * Only the high 8 bits are valid.
-	 */
-	dest = SET_APIC_LOGICAL_ID(dest);
+static void
+set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
+{
+	struct irq_cfg *cfg;
+	unsigned long flags;
+	unsigned int dest;
+	unsigned int irq;
+
+	irq = desc->irq;
+	cfg = desc->chip_data;
 
 	spin_lock_irqsave(&ioapic_lock, flags);
-	__target_IO_APIC_irq(irq, dest, cfg);
-	desc->affinity = mask;
+	dest = set_desc_affinity(desc, mask);
+	if (dest != BAD_APICID) {
+		/* Only the high 8 bits are valid. */
+		dest = SET_APIC_LOGICAL_ID(dest);
+		__target_IO_APIC_irq(irq, dest, cfg);
+	}
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
-static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+static void
+set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask)
 {
 	struct irq_desc *desc;
 
@@ -652,7 +706,7 @@
 }
 
 #ifdef CONFIG_X86_64
-void io_apic_sync(struct irq_pin_list *entry)
+static void io_apic_sync(struct irq_pin_list *entry)
 {
 	/*
 	 * Synchronize the IO-APIC and the CPU by doing
@@ -1222,7 +1276,8 @@
 	spin_unlock(&vector_lock);
 }
 
-static int __assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask)
+static int
+__assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
 {
 	/*
 	 * NOTE! The local APIC isn't very good at handling
@@ -1237,49 +1292,49 @@
 	 */
 	static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
 	unsigned int old_vector;
-	int cpu;
+	int cpu, err;
+	cpumask_var_t tmp_mask;
 
 	if ((cfg->move_in_progress) || cfg->move_cleanup_count)
 		return -EBUSY;
 
-	/* Only try and allocate irqs on cpus that are present */
-	cpus_and(mask, mask, cpu_online_map);
+	if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
+		return -ENOMEM;
 
 	old_vector = cfg->vector;
 	if (old_vector) {
-		cpumask_t tmp;
-		cpus_and(tmp, cfg->domain, mask);
-		if (!cpus_empty(tmp))
+		cpumask_and(tmp_mask, mask, cpu_online_mask);
+		cpumask_and(tmp_mask, cfg->domain, tmp_mask);
+		if (!cpumask_empty(tmp_mask)) {
+			free_cpumask_var(tmp_mask);
 			return 0;
+		}
 	}
 
-	for_each_cpu_mask_nr(cpu, mask) {
-		cpumask_t domain, new_mask;
+	/* Only try and allocate irqs on cpus that are present */
+	err = -ENOSPC;
+	for_each_cpu_and(cpu, mask, cpu_online_mask) {
 		int new_cpu;
 		int vector, offset;
 
-		domain = vector_allocation_domain(cpu);
-		cpus_and(new_mask, domain, cpu_online_map);
+		vector_allocation_domain(cpu, tmp_mask);
 
 		vector = current_vector;
 		offset = current_offset;
 next:
 		vector += 8;
 		if (vector >= first_system_vector) {
-			/* If we run out of vectors on large boxen, must share them. */
+			/* If out of vectors on large boxen, must share them. */
 			offset = (offset + 1) % 8;
 			vector = FIRST_DEVICE_VECTOR + offset;
 		}
 		if (unlikely(current_vector == vector))
 			continue;
-#ifdef CONFIG_X86_64
-		if (vector == IA32_SYSCALL_VECTOR)
+
+		if (test_bit(vector, used_vectors))
 			goto next;
-#else
-		if (vector == SYSCALL_VECTOR)
-			goto next;
-#endif
-		for_each_cpu_mask_nr(new_cpu, new_mask)
+
+		for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
 			if (per_cpu(vector_irq, new_cpu)[vector] != -1)
 				goto next;
 		/* Found one! */
@@ -1287,18 +1342,21 @@
 		current_offset = offset;
 		if (old_vector) {
 			cfg->move_in_progress = 1;
-			cfg->old_domain = cfg->domain;
+			cpumask_copy(cfg->old_domain, cfg->domain);
 		}
-		for_each_cpu_mask_nr(new_cpu, new_mask)
+		for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
 			per_cpu(vector_irq, new_cpu)[vector] = irq;
 		cfg->vector = vector;
-		cfg->domain = domain;
-		return 0;
+		cpumask_copy(cfg->domain, tmp_mask);
+		err = 0;
+		break;
 	}
-	return -ENOSPC;
+	free_cpumask_var(tmp_mask);
+	return err;
 }
 
-static int assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask)
+static int
+assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
 {
 	int err;
 	unsigned long flags;
@@ -1311,23 +1369,20 @@
 
 static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
 {
-	cpumask_t mask;
 	int cpu, vector;
 
 	BUG_ON(!cfg->vector);
 
 	vector = cfg->vector;
-	cpus_and(mask, cfg->domain, cpu_online_map);
-	for_each_cpu_mask_nr(cpu, mask)
+	for_each_cpu_and(cpu, cfg->domain, cpu_online_mask)
 		per_cpu(vector_irq, cpu)[vector] = -1;
 
 	cfg->vector = 0;
-	cpus_clear(cfg->domain);
+	cpumask_clear(cfg->domain);
 
 	if (likely(!cfg->move_in_progress))
 		return;
-	cpus_and(mask, cfg->old_domain, cpu_online_map);
-	for_each_cpu_mask_nr(cpu, mask) {
+	for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) {
 		for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
 								vector++) {
 			if (per_cpu(vector_irq, cpu)[vector] != irq)
@@ -1350,7 +1405,7 @@
 	/* Mark the inuse vectors */
 	for_each_irq_desc(irq, desc) {
 		cfg = desc->chip_data;
-		if (!cpu_isset(cpu, cfg->domain))
+		if (!cpumask_test_cpu(cpu, cfg->domain))
 			continue;
 		vector = cfg->vector;
 		per_cpu(vector_irq, cpu)[vector] = irq;
@@ -1362,7 +1417,7 @@
 			continue;
 
 		cfg = irq_cfg(irq);
-		if (!cpu_isset(cpu, cfg->domain))
+		if (!cpumask_test_cpu(cpu, cfg->domain))
 			per_cpu(vector_irq, cpu)[vector] = -1;
 	}
 }
@@ -1498,18 +1553,17 @@
 {
 	struct irq_cfg *cfg;
 	struct IO_APIC_route_entry entry;
-	cpumask_t mask;
+	unsigned int dest;
 
 	if (!IO_APIC_IRQ(irq))
 		return;
 
 	cfg = desc->chip_data;
 
-	mask = TARGET_CPUS;
-	if (assign_irq_vector(irq, cfg, mask))
+	if (assign_irq_vector(irq, cfg, TARGET_CPUS))
 		return;
 
-	cpus_and(mask, cfg->domain, mask);
+	dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS);
 
 	apic_printk(APIC_VERBOSE,KERN_DEBUG
 		    "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
@@ -1519,8 +1573,7 @@
 
 
 	if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry,
-			       cpu_mask_to_apicid(mask), trigger, polarity,
-			       cfg->vector)) {
+			       dest, trigger, polarity, cfg->vector)) {
 		printk("Failed to setup ioapic entry for ioapic  %d, pin %d\n",
 		       mp_ioapics[apic].mp_apicid, pin);
 		__clear_irq_vector(irq, cfg);
@@ -2240,7 +2293,7 @@
 	unsigned long flags;
 
 	spin_lock_irqsave(&vector_lock, flags);
-	send_IPI_mask(cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector);
+	send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector);
 	spin_unlock_irqrestore(&vector_lock, flags);
 
 	return 1;
@@ -2289,18 +2342,17 @@
  * as simple as edge triggered migration and we can do the irq migration
  * with a simple atomic update to IO-APIC RTE.
  */
-static void migrate_ioapic_irq_desc(struct irq_desc *desc, cpumask_t mask)
+static void
+migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
 {
 	struct irq_cfg *cfg;
-	cpumask_t tmp, cleanup_mask;
 	struct irte irte;
 	int modify_ioapic_rte;
 	unsigned int dest;
 	unsigned long flags;
 	unsigned int irq;
 
-	cpus_and(tmp, mask, cpu_online_map);
-	if (cpus_empty(tmp))
+	if (!cpumask_intersects(mask, cpu_online_mask))
 		return;
 
 	irq = desc->irq;
@@ -2313,8 +2365,7 @@
 
 	set_extra_move_desc(desc, mask);
 
-	cpus_and(tmp, cfg->domain, mask);
-	dest = cpu_mask_to_apicid(tmp);
+	dest = cpu_mask_to_apicid_and(cfg->domain, mask);
 
 	modify_ioapic_rte = desc->status & IRQ_LEVEL;
 	if (modify_ioapic_rte) {
@@ -2331,14 +2382,10 @@
 	 */
 	modify_irte(irq, &irte);
 
-	if (cfg->move_in_progress) {
-		cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
-		cfg->move_cleanup_count = cpus_weight(cleanup_mask);
-		send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
-		cfg->move_in_progress = 0;
-	}
+	if (cfg->move_in_progress)
+		send_cleanup_vector(cfg);
 
-	desc->affinity = mask;
+	cpumask_copy(&desc->affinity, mask);
 }
 
 static int migrate_irq_remapped_level_desc(struct irq_desc *desc)
@@ -2360,11 +2407,11 @@
 	}
 
 	/* everthing is clear. we have right of way */
-	migrate_ioapic_irq_desc(desc, desc->pending_mask);
+	migrate_ioapic_irq_desc(desc, &desc->pending_mask);
 
 	ret = 0;
 	desc->status &= ~IRQ_MOVE_PENDING;
-	cpus_clear(desc->pending_mask);
+	cpumask_clear(&desc->pending_mask);
 
 unmask:
 	unmask_IO_APIC_irq_desc(desc);
@@ -2389,7 +2436,7 @@
 				continue;
 			}
 
-			desc->chip->set_affinity(irq, desc->pending_mask);
+			desc->chip->set_affinity(irq, &desc->pending_mask);
 			spin_unlock_irqrestore(&desc->lock, flags);
 		}
 	}
@@ -2398,18 +2445,20 @@
 /*
  * Migrates the IRQ destination in the process context.
  */
-static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, cpumask_t mask)
+static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
+					    const struct cpumask *mask)
 {
 	if (desc->status & IRQ_LEVEL) {
 		desc->status |= IRQ_MOVE_PENDING;
-		desc->pending_mask = mask;
+		cpumask_copy(&desc->pending_mask, mask);
 		migrate_irq_remapped_level_desc(desc);
 		return;
 	}
 
 	migrate_ioapic_irq_desc(desc, mask);
 }
-static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+static void set_ir_ioapic_affinity_irq(unsigned int irq,
+				       const struct cpumask *mask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 
@@ -2444,7 +2493,7 @@
 		if (!cfg->move_cleanup_count)
 			goto unlock;
 
-		if ((vector == cfg->vector) && cpu_isset(me, cfg->domain))
+		if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
 			goto unlock;
 
 		__get_cpu_var(vector_irq)[vector] = -1;
@@ -2481,20 +2530,14 @@
 
 	vector = ~get_irq_regs()->orig_ax;
 	me = smp_processor_id();
-	if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) {
-		cpumask_t cleanup_mask;
-
 #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
 		*descp = desc = move_irq_desc(desc, me);
 		/* get the new one */
 		cfg = desc->chip_data;
 #endif
 
-		cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
-		cfg->move_cleanup_count = cpus_weight(cleanup_mask);
-		send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
-		cfg->move_in_progress = 0;
-	}
+	if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
+		send_cleanup_vector(cfg);
 }
 #else
 static inline void irq_complete_move(struct irq_desc **descp) {}
@@ -3216,16 +3259,13 @@
 	struct irq_cfg *cfg;
 	int err;
 	unsigned dest;
-	cpumask_t tmp;
 
 	cfg = irq_cfg(irq);
-	tmp = TARGET_CPUS;
-	err = assign_irq_vector(irq, cfg, tmp);
+	err = assign_irq_vector(irq, cfg, TARGET_CPUS);
 	if (err)
 		return err;
 
-	cpus_and(tmp, cfg->domain, tmp);
-	dest = cpu_mask_to_apicid(tmp);
+	dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS);
 
 #ifdef CONFIG_INTR_REMAP
 	if (irq_remapped(irq)) {
@@ -3279,26 +3319,18 @@
 }
 
 #ifdef CONFIG_SMP
-static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
+static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_cfg *cfg;
 	struct msi_msg msg;
 	unsigned int dest;
-	cpumask_t tmp;
 
-	cpus_and(tmp, mask, cpu_online_map);
-	if (cpus_empty(tmp))
+	dest = set_desc_affinity(desc, mask);
+	if (dest == BAD_APICID)
 		return;
 
 	cfg = desc->chip_data;
-	if (assign_irq_vector(irq, cfg, mask))
-		return;
-
-	set_extra_move_desc(desc, mask);
-
-	cpus_and(tmp, cfg->domain, mask);
-	dest = cpu_mask_to_apicid(tmp);
 
 	read_msi_msg_desc(desc, &msg);
 
@@ -3308,37 +3340,27 @@
 	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
 	write_msi_msg_desc(desc, &msg);
-	desc->affinity = mask;
 }
 #ifdef CONFIG_INTR_REMAP
 /*
  * Migrate the MSI irq to another cpumask. This migration is
  * done in the process context using interrupt-remapping hardware.
  */
-static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
+static void
+ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
-	struct irq_cfg *cfg;
+	struct irq_cfg *cfg = desc->chip_data;
 	unsigned int dest;
-	cpumask_t tmp, cleanup_mask;
 	struct irte irte;
 
-	cpus_and(tmp, mask, cpu_online_map);
-	if (cpus_empty(tmp))
-		return;
-
 	if (get_irte(irq, &irte))
 		return;
 
-	cfg = desc->chip_data;
-	if (assign_irq_vector(irq, cfg, mask))
+	dest = set_desc_affinity(desc, mask);
+	if (dest == BAD_APICID)
 		return;
 
-	set_extra_move_desc(desc, mask);
-
-	cpus_and(tmp, cfg->domain, mask);
-	dest = cpu_mask_to_apicid(tmp);
-
 	irte.vector = cfg->vector;
 	irte.dest_id = IRTE_DEST(dest);
 
@@ -3352,14 +3374,8 @@
 	 * at the new destination. So, time to cleanup the previous
 	 * vector allocation.
 	 */
-	if (cfg->move_in_progress) {
-		cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
-		cfg->move_cleanup_count = cpus_weight(cleanup_mask);
-		send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
-		cfg->move_in_progress = 0;
-	}
-
-	desc->affinity = mask;
+	if (cfg->move_in_progress)
+		send_cleanup_vector(cfg);
 }
 
 #endif
@@ -3550,26 +3566,18 @@
 
 #ifdef CONFIG_DMAR
 #ifdef CONFIG_SMP
-static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
+static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_cfg *cfg;
 	struct msi_msg msg;
 	unsigned int dest;
-	cpumask_t tmp;
 
-	cpus_and(tmp, mask, cpu_online_map);
-	if (cpus_empty(tmp))
+	dest = set_desc_affinity(desc, mask);
+	if (dest == BAD_APICID)
 		return;
 
 	cfg = desc->chip_data;
-	if (assign_irq_vector(irq, cfg, mask))
-		return;
-
-	set_extra_move_desc(desc, mask);
-
-	cpus_and(tmp, cfg->domain, mask);
-	dest = cpu_mask_to_apicid(tmp);
 
 	dmar_msi_read(irq, &msg);
 
@@ -3579,7 +3587,6 @@
 	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
 	dmar_msi_write(irq, &msg);
-	desc->affinity = mask;
 }
 
 #endif /* CONFIG_SMP */
@@ -3613,26 +3620,18 @@
 #ifdef CONFIG_HPET_TIMER
 
 #ifdef CONFIG_SMP
-static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask)
+static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_cfg *cfg;
 	struct msi_msg msg;
 	unsigned int dest;
-	cpumask_t tmp;
 
-	cpus_and(tmp, mask, cpu_online_map);
-	if (cpus_empty(tmp))
+	dest = set_desc_affinity(desc, mask);
+	if (dest == BAD_APICID)
 		return;
 
 	cfg = desc->chip_data;
-	if (assign_irq_vector(irq, cfg, mask))
-		return;
-
-	set_extra_move_desc(desc, mask);
-
-	cpus_and(tmp, cfg->domain, mask);
-	dest = cpu_mask_to_apicid(tmp);
 
 	hpet_msi_read(irq, &msg);
 
@@ -3642,7 +3641,6 @@
 	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
 	hpet_msi_write(irq, &msg);
-	desc->affinity = mask;
 }
 
 #endif /* CONFIG_SMP */
@@ -3697,28 +3695,19 @@
 	write_ht_irq_msg(irq, &msg);
 }
 
-static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
+static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_cfg *cfg;
 	unsigned int dest;
-	cpumask_t tmp;
 
-	cpus_and(tmp, mask, cpu_online_map);
-	if (cpus_empty(tmp))
+	dest = set_desc_affinity(desc, mask);
+	if (dest == BAD_APICID)
 		return;
 
 	cfg = desc->chip_data;
-	if (assign_irq_vector(irq, cfg, mask))
-		return;
-
-	set_extra_move_desc(desc, mask);
-
-	cpus_and(tmp, cfg->domain, mask);
-	dest = cpu_mask_to_apicid(tmp);
 
 	target_ht_irq(irq, dest, cfg->vector);
-	desc->affinity = mask;
 }
 
 #endif
@@ -3738,17 +3727,14 @@
 {
 	struct irq_cfg *cfg;
 	int err;
-	cpumask_t tmp;
 
 	cfg = irq_cfg(irq);
-	tmp = TARGET_CPUS;
-	err = assign_irq_vector(irq, cfg, tmp);
+	err = assign_irq_vector(irq, cfg, TARGET_CPUS);
 	if (!err) {
 		struct ht_irq_msg msg;
 		unsigned dest;
 
-		cpus_and(tmp, cfg->domain, tmp);
-		dest = cpu_mask_to_apicid(tmp);
+		dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS);
 
 		msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
 
@@ -3784,7 +3770,7 @@
 int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
 		       unsigned long mmr_offset)
 {
-	const cpumask_t *eligible_cpu = get_cpu_mask(cpu);
+	const struct cpumask *eligible_cpu = cpumask_of(cpu);
 	struct irq_cfg *cfg;
 	int mmr_pnode;
 	unsigned long mmr_value;
@@ -3794,7 +3780,7 @@
 
 	cfg = irq_cfg(irq);
 
-	err = assign_irq_vector(irq, cfg, *eligible_cpu);
+	err = assign_irq_vector(irq, cfg, eligible_cpu);
 	if (err != 0)
 		return err;
 
@@ -3813,7 +3799,7 @@
 	entry->polarity = 0;
 	entry->trigger = 0;
 	entry->mask = 0;
-	entry->dest = cpu_mask_to_apicid(*eligible_cpu);
+	entry->dest = cpu_mask_to_apicid(eligible_cpu);
 
 	mmr_pnode = uv_blade_to_pnode(mmr_blade);
 	uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
@@ -4024,7 +4010,7 @@
 	int pin, ioapic, irq, irq_entry;
 	struct irq_desc *desc;
 	struct irq_cfg *cfg;
-	cpumask_t mask;
+	const struct cpumask *mask;
 
 	if (skip_ioapic_setup == 1)
 		return;
@@ -4055,7 +4041,7 @@
 			 */
 			if (desc->status &
 			    (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
-				mask = desc->affinity;
+				mask = &desc->affinity;
 			else
 				mask = TARGET_CPUS;
 
diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c
index f1c688e..285bbf8 100644
--- a/arch/x86/kernel/ipi.c
+++ b/arch/x86/kernel/ipi.c
@@ -116,18 +116,18 @@
 /*
  * This is only used on smaller machines.
  */
-void send_IPI_mask_bitmask(cpumask_t cpumask, int vector)
+void send_IPI_mask_bitmask(const struct cpumask *cpumask, int vector)
 {
-	unsigned long mask = cpus_addr(cpumask)[0];
+	unsigned long mask = cpumask_bits(cpumask)[0];
 	unsigned long flags;
 
 	local_irq_save(flags);
-	WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]);
+	WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]);
 	__send_IPI_dest_field(mask, vector);
 	local_irq_restore(flags);
 }
 
-void send_IPI_mask_sequence(cpumask_t mask, int vector)
+void send_IPI_mask_sequence(const struct cpumask *mask, int vector)
 {
 	unsigned long flags;
 	unsigned int query_cpu;
@@ -139,12 +139,24 @@
 	 */
 
 	local_irq_save(flags);
-	for_each_possible_cpu(query_cpu) {
-		if (cpu_isset(query_cpu, mask)) {
+	for_each_cpu(query_cpu, mask)
+		__send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), vector);
+	local_irq_restore(flags);
+}
+
+void send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
+{
+	unsigned long flags;
+	unsigned int query_cpu;
+	unsigned int this_cpu = smp_processor_id();
+
+	/* See Hack comment above */
+
+	local_irq_save(flags);
+	for_each_cpu(query_cpu, mask)
+		if (query_cpu != this_cpu)
 			__send_IPI_dest_field(cpu_to_logical_apicid(query_cpu),
 					      vector);
-		}
-	}
 	local_irq_restore(flags);
 }
 
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 3f1d9d1..bce53e1 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -9,6 +9,7 @@
 #include <asm/apic.h>
 #include <asm/io_apic.h>
 #include <asm/smp.h>
+#include <asm/irq.h>
 
 atomic_t irq_err_count;
 
@@ -190,3 +191,5 @@
 #endif
 	return sum;
 }
+
+EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq);
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 119fc9c..9dc5588 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -233,27 +233,28 @@
 #ifdef CONFIG_HOTPLUG_CPU
 #include <mach_apic.h>
 
-void fixup_irqs(cpumask_t map)
+/* A cpu has been removed from cpu_online_mask.  Reset irq affinities. */
+void fixup_irqs(void)
 {
 	unsigned int irq;
 	static int warned;
 	struct irq_desc *desc;
 
 	for_each_irq_desc(irq, desc) {
-		cpumask_t mask;
+		const struct cpumask *affinity;
 
 		if (!desc)
 			continue;
 		if (irq == 2)
 			continue;
 
-		cpus_and(mask, desc->affinity, map);
-		if (any_online_cpu(mask) == NR_CPUS) {
+		affinity = &desc->affinity;
+		if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
 			printk("Breaking affinity for irq %i\n", irq);
-			mask = map;
+			affinity = cpu_all_mask;
 		}
 		if (desc->chip->set_affinity)
-			desc->chip->set_affinity(irq, mask);
+			desc->chip->set_affinity(irq, affinity);
 		else if (desc->action && !(warned++))
 			printk("Cannot set affinity for irq %i\n", irq);
 	}
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index a174a21..6383d50 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -80,16 +80,17 @@
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
-void fixup_irqs(cpumask_t map)
+/* A cpu has been removed from cpu_online_mask.  Reset irq affinities. */
+void fixup_irqs(void)
 {
 	unsigned int irq;
 	static int warned;
 	struct irq_desc *desc;
 
 	for_each_irq_desc(irq, desc) {
-		cpumask_t mask;
 		int break_affinity = 0;
 		int set_affinity = 1;
+		const struct cpumask *affinity;
 
 		if (!desc)
 			continue;
@@ -99,23 +100,23 @@
 		/* interrupt's are disabled at this point */
 		spin_lock(&desc->lock);
 
+		affinity = &desc->affinity;
 		if (!irq_has_action(irq) ||
-		    cpus_equal(desc->affinity, map)) {
+		    cpumask_equal(affinity, cpu_online_mask)) {
 			spin_unlock(&desc->lock);
 			continue;
 		}
 
-		cpus_and(mask, desc->affinity, map);
-		if (cpus_empty(mask)) {
+		if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
 			break_affinity = 1;
-			mask = map;
+			affinity = cpu_all_mask;
 		}
 
 		if (desc->chip->mask)
 			desc->chip->mask(irq);
 
 		if (desc->chip->set_affinity)
-			desc->chip->set_affinity(irq, mask);
+			desc->chip->set_affinity(irq, affinity);
 		else if (!(warned++))
 			set_affinity = 0;
 
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 203384e..8472329 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -110,6 +110,18 @@
 	[IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1
 };
 
+int vector_used_by_percpu_irq(unsigned int vector)
+{
+	int cpu;
+
+	for_each_online_cpu(cpu) {
+		if (per_cpu(vector_irq, cpu)[vector] != -1)
+			return 1;
+	}
+
+	return 0;
+}
+
 /* Overridden in paravirt.c */
 void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
 
@@ -146,10 +158,12 @@
 	alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
 
 	/* IPI for single call function */
-	set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, call_function_single_interrupt);
+	alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
+				 call_function_single_interrupt);
 
 	/* Low priority IPI to cleanup after moving an irq */
 	set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
+	set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors);
 #endif
 
 #ifdef CONFIG_X86_LOCAL_APIC
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index 6190e6ef..31ebfe3 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -69,6 +69,18 @@
 	[IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1
 };
 
+int vector_used_by_percpu_irq(unsigned int vector)
+{
+	int cpu;
+
+	for_each_online_cpu(cpu) {
+		if (per_cpu(vector_irq, cpu)[vector] != -1)
+			return 1;
+	}
+
+	return 0;
+}
+
 void __init init_ISA_irqs(void)
 {
 	int i;
@@ -121,6 +133,7 @@
 
 	/* Low priority IPI to cleanup after moving an irq */
 	set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
+	set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors);
 #endif
 }
 
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index e169ae9..652fce6d 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -89,17 +89,17 @@
  */
 static unsigned long kvm_get_tsc_khz(void)
 {
-	return preset_lpj;
+	struct pvclock_vcpu_time_info *src;
+	src = &per_cpu(hv_clock, 0);
+	return pvclock_tsc_khz(src);
 }
 
 static void kvm_get_preset_lpj(void)
 {
-	struct pvclock_vcpu_time_info *src;
 	unsigned long khz;
 	u64 lpj;
 
-	src = &per_cpu(hv_clock, 0);
-	khz = pvclock_tsc_khz(src);
+	khz = kvm_get_tsc_khz();
 
 	lpj = ((u64)khz * 1000);
 	do_div(lpj, HZ);
@@ -194,5 +194,7 @@
 #endif
 		kvm_get_preset_lpj();
 		clocksource_register(&kvm_clock);
+		pv_info.paravirt_enabled = 1;
+		pv_info.name = "KVM";
 	}
 }
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index eee32b4..71f1d99 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -12,8 +12,8 @@
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/vmalloc.h>
+#include <linux/uaccess.h>
 
-#include <asm/uaccess.h>
 #include <asm/system.h>
 #include <asm/ldt.h>
 #include <asm/desc.h>
@@ -93,7 +93,7 @@
 	if (err < 0)
 		return err;
 
-	for(i = 0; i < old->size; i++)
+	for (i = 0; i < old->size; i++)
 		write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE);
 	return 0;
 }
diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c
index 3b59951..c12314c 100644
--- a/arch/x86/kernel/mfgpt_32.c
+++ b/arch/x86/kernel/mfgpt_32.c
@@ -287,7 +287,7 @@
 	.set_mode = mfgpt_set_mode,
 	.set_next_event = mfgpt_next_event,
 	.rating = 250,
-	.cpumask = CPU_MASK_ALL,
+	.cpumask = cpu_all_mask,
 	.shift = 32
 };
 
diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c
index efc2f36..666e43d 100644
--- a/arch/x86/kernel/mmconf-fam10h_64.c
+++ b/arch/x86/kernel/mmconf-fam10h_64.c
@@ -13,8 +13,7 @@
 #include <asm/msr.h>
 #include <asm/acpi.h>
 #include <asm/mmconfig.h>
-
-#include "../pci/pci.h"
+#include <asm/pci_x86.h>
 
 struct pci_hostbridge_probe {
 	u32 bus;
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 45e3b69..c5c5b8d 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -16,14 +16,14 @@
 #include <linux/bitops.h>
 #include <linux/acpi.h>
 #include <linux/module.h>
+#include <linux/smp.h>
+#include <linux/acpi.h>
 
-#include <asm/smp.h>
 #include <asm/mtrr.h>
 #include <asm/mpspec.h>
 #include <asm/pgalloc.h>
 #include <asm/io_apic.h>
 #include <asm/proto.h>
-#include <asm/acpi.h>
 #include <asm/bios_ebda.h>
 #include <asm/e820.h>
 #include <asm/trampoline.h>
@@ -95,8 +95,8 @@
 #endif
 
 	if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA) - 1) == 0) {
-		 set_bit(m->mpc_busid, mp_bus_not_pci);
-#if defined(CONFIG_EISA) || defined (CONFIG_MCA)
+		set_bit(m->mpc_busid, mp_bus_not_pci);
+#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
 		mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
 #endif
 	} else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) {
@@ -104,7 +104,7 @@
 			x86_quirks->mpc_oem_pci_bus(m);
 
 		clear_bit(m->mpc_busid, mp_bus_not_pci);
-#if defined(CONFIG_EISA) || defined (CONFIG_MCA)
+#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
 		mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
 	} else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA) - 1) == 0) {
 		mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA;
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 8bd1bf9..45a09cc 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -26,11 +26,10 @@
 #include <linux/kernel_stat.h>
 #include <linux/kdebug.h>
 #include <linux/smp.h>
+#include <linux/nmi.h>
 
 #include <asm/i8259.h>
 #include <asm/io_apic.h>
-#include <asm/smp.h>
-#include <asm/nmi.h>
 #include <asm/proto.h>
 #include <asm/timer.h>
 
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index a35eaa3..00c2bcd 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -52,7 +52,7 @@
  * to trigger bugs with some popular PCI cards, in particular 3ware (but
  * has been also also seen with Qlogic at least).
  */
-int iommu_fullflush = 1;
+static int iommu_fullflush = 1;
 
 /* Allocation bitmap for the remapping area: */
 static DEFINE_SPINLOCK(iommu_bitmap_lock);
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 61f718d..bf088c6 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -12,6 +12,8 @@
 #include <asm/proto.h>
 #include <asm/reboot_fixups.h>
 #include <asm/reboot.h>
+#include <asm/pci_x86.h>
+#include <asm/virtext.h>
 
 #ifdef CONFIG_X86_32
 # include <linux/dmi.h>
@@ -23,7 +25,6 @@
 
 #include <mach_ipi.h>
 
-
 /*
  * Power off function, if any
  */
@@ -39,6 +40,12 @@
 static int reboot_cpu = -1;
 #endif
 
+/* This is set if we need to go through the 'emergency' path.
+ * When machine_emergency_restart() is called, we may be on
+ * an inconsistent state and won't be able to do a clean cleanup
+ */
+static int reboot_emergency;
+
 /* This is set by the PCI code if either type 1 or type 2 PCI is detected */
 bool port_cf9_safe = false;
 
@@ -368,6 +375,48 @@
 	}
 }
 
+static void vmxoff_nmi(int cpu, struct die_args *args)
+{
+	cpu_emergency_vmxoff();
+}
+
+/* Use NMIs as IPIs to tell all CPUs to disable virtualization
+ */
+static void emergency_vmx_disable_all(void)
+{
+	/* Just make sure we won't change CPUs while doing this */
+	local_irq_disable();
+
+	/* We need to disable VMX on all CPUs before rebooting, otherwise
+	 * we risk hanging up the machine, because the CPU ignore INIT
+	 * signals when VMX is enabled.
+	 *
+	 * We can't take any locks and we may be on an inconsistent
+	 * state, so we use NMIs as IPIs to tell the other CPUs to disable
+	 * VMX and halt.
+	 *
+	 * For safety, we will avoid running the nmi_shootdown_cpus()
+	 * stuff unnecessarily, but we don't have a way to check
+	 * if other CPUs have VMX enabled. So we will call it only if the
+	 * CPU we are running on has VMX enabled.
+	 *
+	 * We will miss cases where VMX is not enabled on all CPUs. This
+	 * shouldn't do much harm because KVM always enable VMX on all
+	 * CPUs anyway. But we can miss it on the small window where KVM
+	 * is still enabling VMX.
+	 */
+	if (cpu_has_vmx() && cpu_vmx_enabled()) {
+		/* Disable VMX on this CPU.
+		 */
+		cpu_vmxoff();
+
+		/* Halt and disable VMX on the other CPUs */
+		nmi_shootdown_cpus(vmxoff_nmi);
+
+	}
+}
+
+
 void __attribute__((weak)) mach_reboot_fixups(void)
 {
 }
@@ -376,6 +425,9 @@
 {
 	int i;
 
+	if (reboot_emergency)
+		emergency_vmx_disable_all();
+
 	/* Tell the BIOS if we want cold or warm reboot */
 	*((unsigned short *)__va(0x472)) = reboot_mode;
 
@@ -482,13 +534,19 @@
 #endif
 }
 
+static void __machine_emergency_restart(int emergency)
+{
+	reboot_emergency = emergency;
+	machine_ops.emergency_restart();
+}
+
 static void native_machine_restart(char *__unused)
 {
 	printk("machine restart\n");
 
 	if (!reboot_force)
 		machine_shutdown();
-	machine_emergency_restart();
+	__machine_emergency_restart(0);
 }
 
 static void native_machine_halt(void)
@@ -532,7 +590,7 @@
 
 void machine_emergency_restart(void)
 {
-	machine_ops.emergency_restart();
+	__machine_emergency_restart(1);
 }
 
 void machine_restart(char *cmd)
@@ -592,10 +650,7 @@
 
 static void smp_send_nmi_allbutself(void)
 {
-	cpumask_t mask = cpu_online_map;
-	cpu_clear(safe_smp_processor_id(), mask);
-	if (!cpus_empty(mask))
-		send_IPI_mask(mask, NMI_VECTOR);
+	send_IPI_allbutself(NMI_VECTOR);
 }
 
 static struct notifier_block crash_nmi_nb = {
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index ae0c0d3..0b63b08 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -152,6 +152,11 @@
 	old_size = PERCPU_ENOUGH_ROOM;
 	align = max_t(unsigned long, PAGE_SIZE, align);
 	size = roundup(old_size, align);
+
+	printk(KERN_INFO
+		"NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n",
+		NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids);
+
 	printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n",
 			  size);
 
@@ -168,24 +173,24 @@
 			       "cpu %d has no node %d or node-local memory\n",
 				cpu, node);
 			if (ptr)
-				printk(KERN_DEBUG "per cpu data for cpu%d at %016lx\n",
+				printk(KERN_DEBUG
+					"per cpu data for cpu%d at %016lx\n",
 					 cpu, __pa(ptr));
 		}
 		else {
 			ptr = __alloc_bootmem_node(NODE_DATA(node), size, align,
 							__pa(MAX_DMA_ADDRESS));
 			if (ptr)
-				printk(KERN_DEBUG "per cpu data for cpu%d on node%d at %016lx\n",
-					 cpu, node, __pa(ptr));
+				printk(KERN_DEBUG
+					"per cpu data for cpu%d on node%d "
+					"at %016lx\n",
+					cpu, node, __pa(ptr));
 		}
 #endif
 		per_cpu_offset(cpu) = ptr - __per_cpu_start;
 		memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
 	}
 
-	printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n",
-		NR_CPUS, nr_cpu_ids, nr_node_ids);
-
 	/* Setup percpu data maps */
 	setup_per_cpu_maps();
 
@@ -282,7 +287,7 @@
 	else
 		cpu_clear(cpu, *mask);
 
-	cpulist_scnprintf(buf, sizeof(buf), *mask);
+	cpulist_scnprintf(buf, sizeof(buf), mask);
 	printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
 		enable? "numa_add_cpu":"numa_remove_cpu", cpu, node, buf);
  }
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 7e558db..beea2649a 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -118,22 +118,22 @@
 		WARN_ON(1);
 		return;
 	}
-	send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
+	send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR);
 }
 
 void native_send_call_func_single_ipi(int cpu)
 {
-	send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_SINGLE_VECTOR);
+	send_IPI_mask(cpumask_of(cpu), CALL_FUNCTION_SINGLE_VECTOR);
 }
 
-void native_send_call_func_ipi(cpumask_t mask)
+void native_send_call_func_ipi(const struct cpumask *mask)
 {
 	cpumask_t allbutself;
 
 	allbutself = cpu_online_map;
 	cpu_clear(smp_processor_id(), allbutself);
 
-	if (cpus_equal(mask, allbutself) &&
+	if (cpus_equal(*mask, allbutself) &&
 	    cpus_equal(cpu_online_map, cpu_callout_map))
 		send_IPI_allbutself(CALL_FUNCTION_VECTOR);
 	else
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index f8500c9..31869bf 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -102,14 +102,8 @@
 /* Last level cache ID of each logical CPU */
 DEFINE_PER_CPU(u16, cpu_llc_id) = BAD_APICID;
 
-/* bitmap of online cpus */
-cpumask_t cpu_online_map __read_mostly;
-EXPORT_SYMBOL(cpu_online_map);
-
 cpumask_t cpu_callin_map;
 cpumask_t cpu_callout_map;
-cpumask_t cpu_possible_map;
-EXPORT_SYMBOL(cpu_possible_map);
 
 /* representing HT siblings of each logical CPU */
 DEFINE_PER_CPU(cpumask_t, cpu_sibling_map);
@@ -1260,6 +1254,15 @@
 	check_nmi_watchdog();
 }
 
+static int __initdata setup_possible_cpus = -1;
+static int __init _setup_possible_cpus(char *str)
+{
+	get_option(&str, &setup_possible_cpus);
+	return 0;
+}
+early_param("possible_cpus", _setup_possible_cpus);
+
+
 /*
  * cpu_possible_map should be static, it cannot change as cpu's
  * are onlined, or offlined. The reason is per-cpu data-structures
@@ -1272,7 +1275,7 @@
  *
  * Three ways to find out the number of additional hotplug CPUs:
  * - If the BIOS specified disabled CPUs in ACPI/mptables use that.
- * - The user can overwrite it with additional_cpus=NUM
+ * - The user can overwrite it with possible_cpus=NUM
  * - Otherwise don't reserve additional CPUs.
  * We do this because additional CPUs waste a lot of memory.
  * -AK
@@ -1285,9 +1288,17 @@
 	if (!num_processors)
 		num_processors = 1;
 
-	possible = num_processors + disabled_cpus;
-	if (possible > NR_CPUS)
-		possible = NR_CPUS;
+	if (setup_possible_cpus == -1)
+		possible = num_processors + disabled_cpus;
+	else
+		possible = setup_possible_cpus;
+
+	if (possible > CONFIG_NR_CPUS) {
+		printk(KERN_WARNING
+			"%d Processors exceeds NR_CPUS limit of %d\n",
+			possible, CONFIG_NR_CPUS);
+		possible = CONFIG_NR_CPUS;
+	}
 
 	printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n",
 		possible, max_t(int, possible - num_processors, 0));
@@ -1352,7 +1363,7 @@
 	lock_vector_lock();
 	remove_cpu_from_maps(cpu);
 	unlock_vector_lock();
-	fixup_irqs(cpu_online_map);
+	fixup_irqs();
 }
 
 int native_cpu_disable(void)
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c
index 8da059f..ce50546 100644
--- a/arch/x86/kernel/tlb_32.c
+++ b/arch/x86/kernel/tlb_32.c
@@ -163,7 +163,7 @@
 	 * We have to send the IPI only to
 	 * CPUs affected.
 	 */
-	send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR);
+	send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR);
 
 	while (!cpus_empty(flush_cpumask))
 		/* nothing. lockup detection does not belong here */
diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c
index 29887d7..f8be6f1 100644
--- a/arch/x86/kernel/tlb_64.c
+++ b/arch/x86/kernel/tlb_64.c
@@ -191,7 +191,7 @@
 	 * We have to send the IPI only to
 	 * CPUs affected.
 	 */
-	send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR_START + sender);
+	send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR_START + sender);
 
 	while (!cpus_empty(f->flush_cpumask))
 		cpu_relax();
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 141907a..ce6650e 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -72,9 +72,6 @@
 
 #include "cpu/mcheck/mce.h"
 
-DECLARE_BITMAP(used_vectors, NR_VECTORS);
-EXPORT_SYMBOL_GPL(used_vectors);
-
 asmlinkage int system_call(void);
 
 /* Do we ignore FPU interrupts ? */
@@ -89,6 +86,9 @@
 	__attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, };
 #endif
 
+DECLARE_BITMAP(used_vectors, NR_VECTORS);
+EXPORT_SYMBOL_GPL(used_vectors);
+
 static int ignore_nmis;
 
 static inline void conditional_sti(struct pt_regs *regs)
@@ -292,8 +292,10 @@
 	tsk->thread.error_code = error_code;
 	tsk->thread.trap_no = 8;
 
-	/* This is always a kernel trap and never fixable (and thus must
-	   never return). */
+	/*
+	 * This is always a kernel trap and never fixable (and thus must
+	 * never return).
+	 */
 	for (;;)
 		die(str, regs, error_code);
 }
@@ -520,9 +522,11 @@
 }
 
 #ifdef CONFIG_X86_64
-/* Help handler running on IST stack to switch back to user stack
-   for scheduling or signal handling. The actual stack switch is done in
-   entry.S */
+/*
+ * Help handler running on IST stack to switch back to user stack
+ * for scheduling or signal handling. The actual stack switch is done in
+ * entry.S
+ */
 asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
 {
 	struct pt_regs *regs = eregs;
@@ -532,8 +536,10 @@
 	/* Exception from user space */
 	else if (user_mode(eregs))
 		regs = task_pt_regs(current);
-	/* Exception from kernel and interrupts are enabled. Move to
-	   kernel process stack. */
+	/*
+	 * Exception from kernel and interrupts are enabled. Move to
+	 * kernel process stack.
+	 */
 	else if (eregs->flags & X86_EFLAGS_IF)
 		regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs));
 	if (eregs != regs)
@@ -685,12 +691,7 @@
 	cwd = get_fpu_cwd(task);
 	swd = get_fpu_swd(task);
 
-	err = swd & ~cwd & 0x3f;
-
-#ifdef CONFIG_X86_32
-	if (!err)
-		return;
-#endif
+	err = swd & ~cwd;
 
 	if (err & 0x001) {	/* Invalid op */
 		/*
@@ -708,7 +709,11 @@
 	} else if (err & 0x020) { /* Precision */
 		info.si_code = FPE_FLTRES;
 	} else {
-		info.si_code = __SI_FAULT|SI_KERNEL; /* WTF? */
+		/*
+		 * If we're using IRQ 13, or supposedly even some trap 16
+		 * implementations, it's possible we get a spurious trap...
+		 */
+		return;		/* Spurious trap, no error */
 	}
 	force_sig_info(SIGFPE, &info, task);
 }
@@ -941,9 +946,7 @@
 
 void __init trap_init(void)
 {
-#ifdef CONFIG_X86_32
 	int i;
-#endif
 
 #ifdef CONFIG_EISA
 	void __iomem *p = early_ioremap(0x0FFFD9, 4);
@@ -1000,11 +1003,15 @@
 	}
 
 	set_system_trap_gate(SYSCALL_VECTOR, &system_call);
+#endif
 
 	/* Reserve all the builtin and the syscall vector: */
 	for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++)
 		set_bit(i, used_vectors);
 
+#ifdef CONFIG_X86_64
+	set_bit(IA32_SYSCALL_VECTOR, used_vectors);
+#else
 	set_bit(SYSCALL_VECTOR, used_vectors);
 #endif
 	/*
diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c
index 254ee07..c4c1f9e 100644
--- a/arch/x86/kernel/vmiclock_32.c
+++ b/arch/x86/kernel/vmiclock_32.c
@@ -226,7 +226,7 @@
 	/* Upper bound is clockevent's use of ulong for cycle deltas. */
 	evt->max_delta_ns = clockevent_delta2ns(ULONG_MAX, evt);
 	evt->min_delta_ns = clockevent_delta2ns(1, evt);
-	evt->cpumask = cpumask_of_cpu(cpu);
+	evt->cpumask = cpumask_of(cpu);
 
 	printk(KERN_WARNING "vmi: registering clock event %s. mult=%lu shift=%u\n",
 	       evt->name, evt->mult, evt->shift);
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index 15c3e69..2b54fe0 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -159,7 +159,7 @@
  * Restore the extended state if present. Otherwise, restore the FP/SSE
  * state.
  */
-int restore_user_xstate(void __user *buf)
+static int restore_user_xstate(void __user *buf)
 {
 	struct _fpx_sw_bytes fx_sw_user;
 	u64 mask;
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 59ebd37..e665d1c 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -603,10 +603,29 @@
 
 static void __inject_pit_timer_intr(struct kvm *kvm)
 {
+	struct kvm_vcpu *vcpu;
+	int i;
+
 	mutex_lock(&kvm->lock);
 	kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1);
 	kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0);
 	mutex_unlock(&kvm->lock);
+
+	/*
+	 * Provides NMI watchdog support via Virtual Wire mode.
+	 * The route is: PIT -> PIC -> LVT0 in NMI mode.
+	 *
+	 * Note: Our Virtual Wire implementation is simplified, only
+	 * propagating PIT interrupts to all VCPUs when they have set
+	 * LVT0 to NMI delivery. Other PIC interrupts are just sent to
+	 * VCPU0, and only if its LVT0 is in EXTINT mode.
+	 */
+	if (kvm->arch.vapics_in_nmi_mode > 0)
+		for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+			vcpu = kvm->vcpus[i];
+			if (vcpu)
+				kvm_apic_nmi_wd_deliver(vcpu);
+		}
 }
 
 void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 17e41e1..179dcb0 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -26,10 +26,40 @@
  *   Port from Qemu.
  */
 #include <linux/mm.h>
+#include <linux/bitops.h>
 #include "irq.h"
 
 #include <linux/kvm_host.h>
 
+static void pic_lock(struct kvm_pic *s)
+{
+	spin_lock(&s->lock);
+}
+
+static void pic_unlock(struct kvm_pic *s)
+{
+	struct kvm *kvm = s->kvm;
+	unsigned acks = s->pending_acks;
+	bool wakeup = s->wakeup_needed;
+	struct kvm_vcpu *vcpu;
+
+	s->pending_acks = 0;
+	s->wakeup_needed = false;
+
+	spin_unlock(&s->lock);
+
+	while (acks) {
+		kvm_notify_acked_irq(kvm, __ffs(acks));
+		acks &= acks - 1;
+	}
+
+	if (wakeup) {
+		vcpu = s->kvm->vcpus[0];
+		if (vcpu)
+			kvm_vcpu_kick(vcpu);
+	}
+}
+
 static void pic_clear_isr(struct kvm_kpic_state *s, int irq)
 {
 	s->isr &= ~(1 << irq);
@@ -136,17 +166,21 @@
 
 void kvm_pic_update_irq(struct kvm_pic *s)
 {
+	pic_lock(s);
 	pic_update_irq(s);
+	pic_unlock(s);
 }
 
 void kvm_pic_set_irq(void *opaque, int irq, int level)
 {
 	struct kvm_pic *s = opaque;
 
+	pic_lock(s);
 	if (irq >= 0 && irq < PIC_NUM_PINS) {
 		pic_set_irq1(&s->pics[irq >> 3], irq & 7, level);
 		pic_update_irq(s);
 	}
+	pic_unlock(s);
 }
 
 /*
@@ -172,6 +206,7 @@
 	int irq, irq2, intno;
 	struct kvm_pic *s = pic_irqchip(kvm);
 
+	pic_lock(s);
 	irq = pic_get_irq(&s->pics[0]);
 	if (irq >= 0) {
 		pic_intack(&s->pics[0], irq);
@@ -196,6 +231,7 @@
 		intno = s->pics[0].irq_base + irq;
 	}
 	pic_update_irq(s);
+	pic_unlock(s);
 	kvm_notify_acked_irq(kvm, irq);
 
 	return intno;
@@ -203,7 +239,7 @@
 
 void kvm_pic_reset(struct kvm_kpic_state *s)
 {
-	int irq, irqbase;
+	int irq, irqbase, n;
 	struct kvm *kvm = s->pics_state->irq_request_opaque;
 	struct kvm_vcpu *vcpu0 = kvm->vcpus[0];
 
@@ -214,8 +250,10 @@
 
 	for (irq = 0; irq < PIC_NUM_PINS/2; irq++) {
 		if (vcpu0 && kvm_apic_accept_pic_intr(vcpu0))
-			if (s->irr & (1 << irq) || s->isr & (1 << irq))
-				kvm_notify_acked_irq(kvm, irq+irqbase);
+			if (s->irr & (1 << irq) || s->isr & (1 << irq)) {
+				n = irq + irqbase;
+				s->pics_state->pending_acks |= 1 << n;
+			}
 	}
 	s->last_irr = 0;
 	s->irr = 0;
@@ -406,6 +444,7 @@
 			printk(KERN_ERR "PIC: non byte write\n");
 		return;
 	}
+	pic_lock(s);
 	switch (addr) {
 	case 0x20:
 	case 0x21:
@@ -418,6 +457,7 @@
 		elcr_ioport_write(&s->pics[addr & 1], addr, data);
 		break;
 	}
+	pic_unlock(s);
 }
 
 static void picdev_read(struct kvm_io_device *this,
@@ -431,6 +471,7 @@
 			printk(KERN_ERR "PIC: non byte read\n");
 		return;
 	}
+	pic_lock(s);
 	switch (addr) {
 	case 0x20:
 	case 0x21:
@@ -444,6 +485,7 @@
 		break;
 	}
 	*(unsigned char *)val = data;
+	pic_unlock(s);
 }
 
 /*
@@ -459,7 +501,7 @@
 	s->output = level;
 	if (vcpu && level && (s->pics[0].isr_ack & (1 << irq))) {
 		s->pics[0].isr_ack &= ~(1 << irq);
-		kvm_vcpu_kick(vcpu);
+		s->wakeup_needed = true;
 	}
 }
 
@@ -469,6 +511,8 @@
 	s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL);
 	if (!s)
 		return NULL;
+	spin_lock_init(&s->lock);
+	s->kvm = kvm;
 	s->pics[0].elcr_mask = 0xf8;
 	s->pics[1].elcr_mask = 0xde;
 	s->irq_request = pic_irq_request;
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index f17c8f5..2bf32a0 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -25,6 +25,7 @@
 #include <linux/mm_types.h>
 #include <linux/hrtimer.h>
 #include <linux/kvm_host.h>
+#include <linux/spinlock.h>
 
 #include "iodev.h"
 #include "ioapic.h"
@@ -59,6 +60,10 @@
 };
 
 struct kvm_pic {
+	spinlock_t lock;
+	bool wakeup_needed;
+	unsigned pending_acks;
+	struct kvm *kvm;
 	struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */
 	irq_request_func *irq_request;
 	void *irq_request_opaque;
@@ -87,6 +92,7 @@
 void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
 void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
 void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
+void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu);
 void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu);
 void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu);
 void __kvm_migrate_timers(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/kvm_svm.h b/arch/x86/kvm/kvm_svm.h
index 65ef0fc..8e5ee99 100644
--- a/arch/x86/kvm/kvm_svm.h
+++ b/arch/x86/kvm/kvm_svm.h
@@ -7,7 +7,7 @@
 #include <linux/kvm_host.h>
 #include <asm/msr.h>
 
-#include "svm.h"
+#include <asm/svm.h>
 
 static const u32 host_save_user_msrs[] = {
 #ifdef CONFIG_X86_64
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 0fc3cab..afac68c 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -130,6 +130,11 @@
 	return apic_get_reg(apic, APIC_LVTT) & APIC_LVT_TIMER_PERIODIC;
 }
 
+static inline int apic_lvt_nmi_mode(u32 lvt_val)
+{
+	return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI;
+}
+
 static unsigned int apic_lvt_mask[APIC_LVT_NUM] = {
 	LVT_MASK | APIC_LVT_TIMER_PERIODIC,	/* LVTT */
 	LVT_MASK | APIC_MODE_MASK,	/* LVTTHMR */
@@ -354,6 +359,7 @@
 
 	case APIC_DM_NMI:
 		kvm_inject_nmi(vcpu);
+		kvm_vcpu_kick(vcpu);
 		break;
 
 	case APIC_DM_INIT:
@@ -380,6 +386,14 @@
 		}
 		break;
 
+	case APIC_DM_EXTINT:
+		/*
+		 * Should only be called by kvm_apic_local_deliver() with LVT0,
+		 * before NMI watchdog was enabled. Already handled by
+		 * kvm_apic_accept_pic_intr().
+		 */
+		break;
+
 	default:
 		printk(KERN_ERR "TODO: unsupported delivery mode %x\n",
 		       delivery_mode);
@@ -663,6 +677,20 @@
 					apic->timer.period)));
 }
 
+static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
+{
+	int nmi_wd_enabled = apic_lvt_nmi_mode(apic_get_reg(apic, APIC_LVT0));
+
+	if (apic_lvt_nmi_mode(lvt0_val)) {
+		if (!nmi_wd_enabled) {
+			apic_debug("Receive NMI setting on APIC_LVT0 "
+				   "for cpu %d\n", apic->vcpu->vcpu_id);
+			apic->vcpu->kvm->arch.vapics_in_nmi_mode++;
+		}
+	} else if (nmi_wd_enabled)
+		apic->vcpu->kvm->arch.vapics_in_nmi_mode--;
+}
+
 static void apic_mmio_write(struct kvm_io_device *this,
 			    gpa_t address, int len, const void *data)
 {
@@ -743,10 +771,11 @@
 		apic_set_reg(apic, APIC_ICR2, val & 0xff000000);
 		break;
 
+	case APIC_LVT0:
+		apic_manage_nmi_watchdog(apic, val);
 	case APIC_LVTT:
 	case APIC_LVTTHMR:
 	case APIC_LVTPC:
-	case APIC_LVT0:
 	case APIC_LVT1:
 	case APIC_LVTERR:
 		/* TODO: Check vector */
@@ -961,12 +990,26 @@
 	return 0;
 }
 
-static int __inject_apic_timer_irq(struct kvm_lapic *apic)
+static int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
 {
-	int vector;
+	u32 reg = apic_get_reg(apic, lvt_type);
+	int vector, mode, trig_mode;
 
-	vector = apic_lvt_vector(apic, APIC_LVTT);
-	return __apic_accept_irq(apic, APIC_DM_FIXED, vector, 1, 0);
+	if (apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) {
+		vector = reg & APIC_VECTOR_MASK;
+		mode = reg & APIC_MODE_MASK;
+		trig_mode = reg & APIC_LVT_LEVEL_TRIGGER;
+		return __apic_accept_irq(apic, mode, vector, 1, trig_mode);
+	}
+	return 0;
+}
+
+void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu)
+{
+	struct kvm_lapic *apic = vcpu->arch.apic;
+
+	if (apic)
+		kvm_apic_local_deliver(apic, APIC_LVT0);
 }
 
 static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
@@ -1061,9 +1104,8 @@
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
 
-	if (apic && apic_lvt_enabled(apic, APIC_LVTT) &&
-		atomic_read(&apic->timer.pending) > 0) {
-		if (__inject_apic_timer_irq(apic))
+	if (apic && atomic_read(&apic->timer.pending) > 0) {
+		if (kvm_apic_local_deliver(apic, APIC_LVTT))
 			atomic_dec(&apic->timer.pending);
 	}
 }
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 410ddbc..83f11c7 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -17,7 +17,6 @@
  *
  */
 
-#include "vmx.h"
 #include "mmu.h"
 
 #include <linux/kvm_host.h>
@@ -33,6 +32,7 @@
 #include <asm/page.h>
 #include <asm/cmpxchg.h>
 #include <asm/io.h>
+#include <asm/vmx.h>
 
 /*
  * When setting this variable to true it enables Two-Dimensional-Paging
@@ -168,6 +168,7 @@
 static u64 __read_mostly shadow_user_mask;
 static u64 __read_mostly shadow_accessed_mask;
 static u64 __read_mostly shadow_dirty_mask;
+static u64 __read_mostly shadow_mt_mask;
 
 void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte)
 {
@@ -183,13 +184,14 @@
 EXPORT_SYMBOL_GPL(kvm_mmu_set_base_ptes);
 
 void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
-		u64 dirty_mask, u64 nx_mask, u64 x_mask)
+		u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 mt_mask)
 {
 	shadow_user_mask = user_mask;
 	shadow_accessed_mask = accessed_mask;
 	shadow_dirty_mask = dirty_mask;
 	shadow_nx_mask = nx_mask;
 	shadow_x_mask = x_mask;
+	shadow_mt_mask = mt_mask;
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
 
@@ -384,7 +386,9 @@
 {
 	int *write_count;
 
-	write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn));
+	gfn = unalias_gfn(kvm, gfn);
+	write_count = slot_largepage_idx(gfn,
+					 gfn_to_memslot_unaliased(kvm, gfn));
 	*write_count += 1;
 }
 
@@ -392,16 +396,20 @@
 {
 	int *write_count;
 
-	write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn));
+	gfn = unalias_gfn(kvm, gfn);
+	write_count = slot_largepage_idx(gfn,
+					 gfn_to_memslot_unaliased(kvm, gfn));
 	*write_count -= 1;
 	WARN_ON(*write_count < 0);
 }
 
 static int has_wrprotected_page(struct kvm *kvm, gfn_t gfn)
 {
-	struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
+	struct kvm_memory_slot *slot;
 	int *largepage_idx;
 
+	gfn = unalias_gfn(kvm, gfn);
+	slot = gfn_to_memslot_unaliased(kvm, gfn);
 	if (slot) {
 		largepage_idx = slot_largepage_idx(gfn, slot);
 		return *largepage_idx;
@@ -613,7 +621,7 @@
 	return NULL;
 }
 
-static void rmap_write_protect(struct kvm *kvm, u64 gfn)
+static int rmap_write_protect(struct kvm *kvm, u64 gfn)
 {
 	unsigned long *rmapp;
 	u64 *spte;
@@ -659,8 +667,7 @@
 		spte = rmap_next(kvm, rmapp, spte);
 	}
 
-	if (write_protected)
-		kvm_flush_remote_tlbs(kvm);
+	return write_protected;
 }
 
 static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp)
@@ -786,9 +793,11 @@
 	sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE);
 	set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
 	list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
+	INIT_LIST_HEAD(&sp->oos_link);
 	ASSERT(is_empty_shadow_page(sp->spt));
-	sp->slot_bitmap = 0;
+	bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS);
 	sp->multimapped = 0;
+	sp->global = 1;
 	sp->parent_pte = parent_pte;
 	--vcpu->kvm->arch.n_free_mmu_pages;
 	return sp;
@@ -900,8 +909,9 @@
 	struct kvm_mmu_page *sp = page_header(__pa(spte));
 
 	index = spte - sp->spt;
-	__set_bit(index, sp->unsync_child_bitmap);
-	sp->unsync_children = 1;
+	if (!__test_and_set_bit(index, sp->unsync_child_bitmap))
+		sp->unsync_children++;
+	WARN_ON(!sp->unsync_children);
 }
 
 static void kvm_mmu_update_parents_unsync(struct kvm_mmu_page *sp)
@@ -928,7 +938,6 @@
 
 static int unsync_walk_fn(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 {
-	sp->unsync_children = 1;
 	kvm_mmu_update_parents_unsync(sp);
 	return 1;
 }
@@ -959,38 +968,66 @@
 {
 }
 
+#define KVM_PAGE_ARRAY_NR 16
+
+struct kvm_mmu_pages {
+	struct mmu_page_and_offset {
+		struct kvm_mmu_page *sp;
+		unsigned int idx;
+	} page[KVM_PAGE_ARRAY_NR];
+	unsigned int nr;
+};
+
 #define for_each_unsync_children(bitmap, idx)		\
 	for (idx = find_first_bit(bitmap, 512);		\
 	     idx < 512;					\
 	     idx = find_next_bit(bitmap, 512, idx+1))
 
-static int mmu_unsync_walk(struct kvm_mmu_page *sp,
-			   struct kvm_unsync_walk *walker)
+int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp,
+		   int idx)
 {
-	int i, ret;
+	int i;
 
-	if (!sp->unsync_children)
-		return 0;
+	if (sp->unsync)
+		for (i=0; i < pvec->nr; i++)
+			if (pvec->page[i].sp == sp)
+				return 0;
+
+	pvec->page[pvec->nr].sp = sp;
+	pvec->page[pvec->nr].idx = idx;
+	pvec->nr++;
+	return (pvec->nr == KVM_PAGE_ARRAY_NR);
+}
+
+static int __mmu_unsync_walk(struct kvm_mmu_page *sp,
+			   struct kvm_mmu_pages *pvec)
+{
+	int i, ret, nr_unsync_leaf = 0;
 
 	for_each_unsync_children(sp->unsync_child_bitmap, i) {
 		u64 ent = sp->spt[i];
 
-		if (is_shadow_present_pte(ent)) {
+		if (is_shadow_present_pte(ent) && !is_large_pte(ent)) {
 			struct kvm_mmu_page *child;
 			child = page_header(ent & PT64_BASE_ADDR_MASK);
 
 			if (child->unsync_children) {
-				ret = mmu_unsync_walk(child, walker);
-				if (ret)
+				if (mmu_pages_add(pvec, child, i))
+					return -ENOSPC;
+
+				ret = __mmu_unsync_walk(child, pvec);
+				if (!ret)
+					__clear_bit(i, sp->unsync_child_bitmap);
+				else if (ret > 0)
+					nr_unsync_leaf += ret;
+				else
 					return ret;
-				__clear_bit(i, sp->unsync_child_bitmap);
 			}
 
 			if (child->unsync) {
-				ret = walker->entry(child, walker);
-				__clear_bit(i, sp->unsync_child_bitmap);
-				if (ret)
-					return ret;
+				nr_unsync_leaf++;
+				if (mmu_pages_add(pvec, child, i))
+					return -ENOSPC;
 			}
 		}
 	}
@@ -998,7 +1035,17 @@
 	if (find_first_bit(sp->unsync_child_bitmap, 512) == 512)
 		sp->unsync_children = 0;
 
-	return 0;
+	return nr_unsync_leaf;
+}
+
+static int mmu_unsync_walk(struct kvm_mmu_page *sp,
+			   struct kvm_mmu_pages *pvec)
+{
+	if (!sp->unsync_children)
+		return 0;
+
+	mmu_pages_add(pvec, sp, 0);
+	return __mmu_unsync_walk(sp, pvec);
 }
 
 static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn)
@@ -1021,10 +1068,18 @@
 	return NULL;
 }
 
+static void kvm_unlink_unsync_global(struct kvm *kvm, struct kvm_mmu_page *sp)
+{
+	list_del(&sp->oos_link);
+	--kvm->stat.mmu_unsync_global;
+}
+
 static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp)
 {
 	WARN_ON(!sp->unsync);
 	sp->unsync = 0;
+	if (sp->global)
+		kvm_unlink_unsync_global(kvm, sp);
 	--kvm->stat.mmu_unsync;
 }
 
@@ -1037,7 +1092,8 @@
 		return 1;
 	}
 
-	rmap_write_protect(vcpu->kvm, sp->gfn);
+	if (rmap_write_protect(vcpu->kvm, sp->gfn))
+		kvm_flush_remote_tlbs(vcpu->kvm);
 	kvm_unlink_unsync_page(vcpu->kvm, sp);
 	if (vcpu->arch.mmu.sync_page(vcpu, sp)) {
 		kvm_mmu_zap_page(vcpu->kvm, sp);
@@ -1048,30 +1104,89 @@
 	return 0;
 }
 
-struct sync_walker {
-	struct kvm_vcpu *vcpu;
-	struct kvm_unsync_walk walker;
+struct mmu_page_path {
+	struct kvm_mmu_page *parent[PT64_ROOT_LEVEL-1];
+	unsigned int idx[PT64_ROOT_LEVEL-1];
 };
 
-static int mmu_sync_fn(struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk)
-{
-	struct sync_walker *sync_walk = container_of(walk, struct sync_walker,
-						     walker);
-	struct kvm_vcpu *vcpu = sync_walk->vcpu;
+#define for_each_sp(pvec, sp, parents, i)			\
+		for (i = mmu_pages_next(&pvec, &parents, -1),	\
+			sp = pvec.page[i].sp;			\
+			i < pvec.nr && ({ sp = pvec.page[i].sp; 1;});	\
+			i = mmu_pages_next(&pvec, &parents, i))
 
-	kvm_sync_page(vcpu, sp);
-	return (need_resched() || spin_needbreak(&vcpu->kvm->mmu_lock));
+int mmu_pages_next(struct kvm_mmu_pages *pvec, struct mmu_page_path *parents,
+		   int i)
+{
+	int n;
+
+	for (n = i+1; n < pvec->nr; n++) {
+		struct kvm_mmu_page *sp = pvec->page[n].sp;
+
+		if (sp->role.level == PT_PAGE_TABLE_LEVEL) {
+			parents->idx[0] = pvec->page[n].idx;
+			return n;
+		}
+
+		parents->parent[sp->role.level-2] = sp;
+		parents->idx[sp->role.level-1] = pvec->page[n].idx;
+	}
+
+	return n;
 }
 
-static void mmu_sync_children(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
+void mmu_pages_clear_parents(struct mmu_page_path *parents)
 {
-	struct sync_walker walker = {
-		.walker = { .entry = mmu_sync_fn, },
-		.vcpu = vcpu,
-	};
+	struct kvm_mmu_page *sp;
+	unsigned int level = 0;
 
-	while (mmu_unsync_walk(sp, &walker.walker))
+	do {
+		unsigned int idx = parents->idx[level];
+
+		sp = parents->parent[level];
+		if (!sp)
+			return;
+
+		--sp->unsync_children;
+		WARN_ON((int)sp->unsync_children < 0);
+		__clear_bit(idx, sp->unsync_child_bitmap);
+		level++;
+	} while (level < PT64_ROOT_LEVEL-1 && !sp->unsync_children);
+}
+
+static void kvm_mmu_pages_init(struct kvm_mmu_page *parent,
+			       struct mmu_page_path *parents,
+			       struct kvm_mmu_pages *pvec)
+{
+	parents->parent[parent->role.level-1] = NULL;
+	pvec->nr = 0;
+}
+
+static void mmu_sync_children(struct kvm_vcpu *vcpu,
+			      struct kvm_mmu_page *parent)
+{
+	int i;
+	struct kvm_mmu_page *sp;
+	struct mmu_page_path parents;
+	struct kvm_mmu_pages pages;
+
+	kvm_mmu_pages_init(parent, &parents, &pages);
+	while (mmu_unsync_walk(parent, &pages)) {
+		int protected = 0;
+
+		for_each_sp(pages, sp, parents, i)
+			protected |= rmap_write_protect(vcpu->kvm, sp->gfn);
+
+		if (protected)
+			kvm_flush_remote_tlbs(vcpu->kvm);
+
+		for_each_sp(pages, sp, parents, i) {
+			kvm_sync_page(vcpu, sp);
+			mmu_pages_clear_parents(&parents);
+		}
 		cond_resched_lock(&vcpu->kvm->mmu_lock);
+		kvm_mmu_pages_init(parent, &parents, &pages);
+	}
 }
 
 static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
@@ -1129,7 +1244,8 @@
 	sp->role = role;
 	hlist_add_head(&sp->hash_link, bucket);
 	if (!metaphysical) {
-		rmap_write_protect(vcpu->kvm, gfn);
+		if (rmap_write_protect(vcpu->kvm, gfn))
+			kvm_flush_remote_tlbs(vcpu->kvm);
 		account_shadowed(vcpu->kvm, gfn);
 	}
 	if (shadow_trap_nonpresent_pte != shadow_notrap_nonpresent_pte)
@@ -1153,6 +1269,8 @@
 	if (level == PT32E_ROOT_LEVEL) {
 		shadow_addr = vcpu->arch.mmu.pae_root[(addr >> 30) & 3];
 		shadow_addr &= PT64_BASE_ADDR_MASK;
+		if (!shadow_addr)
+			return 1;
 		--level;
 	}
 
@@ -1237,33 +1355,29 @@
 	}
 }
 
-struct zap_walker {
-	struct kvm_unsync_walk walker;
-	struct kvm *kvm;
-	int zapped;
-};
-
-static int mmu_zap_fn(struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk)
+static int mmu_zap_unsync_children(struct kvm *kvm,
+				   struct kvm_mmu_page *parent)
 {
-	struct zap_walker *zap_walk = container_of(walk, struct zap_walker,
-						     walker);
-	kvm_mmu_zap_page(zap_walk->kvm, sp);
-	zap_walk->zapped = 1;
-	return 0;
-}
+	int i, zapped = 0;
+	struct mmu_page_path parents;
+	struct kvm_mmu_pages pages;
 
-static int mmu_zap_unsync_children(struct kvm *kvm, struct kvm_mmu_page *sp)
-{
-	struct zap_walker walker = {
-		.walker = { .entry = mmu_zap_fn, },
-		.kvm = kvm,
-		.zapped = 0,
-	};
-
-	if (sp->role.level == PT_PAGE_TABLE_LEVEL)
+	if (parent->role.level == PT_PAGE_TABLE_LEVEL)
 		return 0;
-	mmu_unsync_walk(sp, &walker.walker);
-	return walker.zapped;
+
+	kvm_mmu_pages_init(parent, &parents, &pages);
+	while (mmu_unsync_walk(parent, &pages)) {
+		struct kvm_mmu_page *sp;
+
+		for_each_sp(pages, sp, parents, i) {
+			kvm_mmu_zap_page(kvm, sp);
+			mmu_pages_clear_parents(&parents);
+		}
+		zapped += pages.nr;
+		kvm_mmu_pages_init(parent, &parents, &pages);
+	}
+
+	return zapped;
 }
 
 static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp)
@@ -1362,7 +1476,7 @@
 	int slot = memslot_id(kvm, gfn_to_memslot(kvm, gfn));
 	struct kvm_mmu_page *sp = page_header(__pa(pte));
 
-	__set_bit(slot, &sp->slot_bitmap);
+	__set_bit(slot, sp->slot_bitmap);
 }
 
 static void mmu_convert_notrap(struct kvm_mmu_page *sp)
@@ -1393,6 +1507,110 @@
 	return page;
 }
 
+/*
+ * The function is based on mtrr_type_lookup() in
+ * arch/x86/kernel/cpu/mtrr/generic.c
+ */
+static int get_mtrr_type(struct mtrr_state_type *mtrr_state,
+			 u64 start, u64 end)
+{
+	int i;
+	u64 base, mask;
+	u8 prev_match, curr_match;
+	int num_var_ranges = KVM_NR_VAR_MTRR;
+
+	if (!mtrr_state->enabled)
+		return 0xFF;
+
+	/* Make end inclusive end, instead of exclusive */
+	end--;
+
+	/* Look in fixed ranges. Just return the type as per start */
+	if (mtrr_state->have_fixed && (start < 0x100000)) {
+		int idx;
+
+		if (start < 0x80000) {
+			idx = 0;
+			idx += (start >> 16);
+			return mtrr_state->fixed_ranges[idx];
+		} else if (start < 0xC0000) {
+			idx = 1 * 8;
+			idx += ((start - 0x80000) >> 14);
+			return mtrr_state->fixed_ranges[idx];
+		} else if (start < 0x1000000) {
+			idx = 3 * 8;
+			idx += ((start - 0xC0000) >> 12);
+			return mtrr_state->fixed_ranges[idx];
+		}
+	}
+
+	/*
+	 * Look in variable ranges
+	 * Look of multiple ranges matching this address and pick type
+	 * as per MTRR precedence
+	 */
+	if (!(mtrr_state->enabled & 2))
+		return mtrr_state->def_type;
+
+	prev_match = 0xFF;
+	for (i = 0; i < num_var_ranges; ++i) {
+		unsigned short start_state, end_state;
+
+		if (!(mtrr_state->var_ranges[i].mask_lo & (1 << 11)))
+			continue;
+
+		base = (((u64)mtrr_state->var_ranges[i].base_hi) << 32) +
+		       (mtrr_state->var_ranges[i].base_lo & PAGE_MASK);
+		mask = (((u64)mtrr_state->var_ranges[i].mask_hi) << 32) +
+		       (mtrr_state->var_ranges[i].mask_lo & PAGE_MASK);
+
+		start_state = ((start & mask) == (base & mask));
+		end_state = ((end & mask) == (base & mask));
+		if (start_state != end_state)
+			return 0xFE;
+
+		if ((start & mask) != (base & mask))
+			continue;
+
+		curr_match = mtrr_state->var_ranges[i].base_lo & 0xff;
+		if (prev_match == 0xFF) {
+			prev_match = curr_match;
+			continue;
+		}
+
+		if (prev_match == MTRR_TYPE_UNCACHABLE ||
+		    curr_match == MTRR_TYPE_UNCACHABLE)
+			return MTRR_TYPE_UNCACHABLE;
+
+		if ((prev_match == MTRR_TYPE_WRBACK &&
+		     curr_match == MTRR_TYPE_WRTHROUGH) ||
+		    (prev_match == MTRR_TYPE_WRTHROUGH &&
+		     curr_match == MTRR_TYPE_WRBACK)) {
+			prev_match = MTRR_TYPE_WRTHROUGH;
+			curr_match = MTRR_TYPE_WRTHROUGH;
+		}
+
+		if (prev_match != curr_match)
+			return MTRR_TYPE_UNCACHABLE;
+	}
+
+	if (prev_match != 0xFF)
+		return prev_match;
+
+	return mtrr_state->def_type;
+}
+
+static u8 get_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
+{
+	u8 mtrr;
+
+	mtrr = get_mtrr_type(&vcpu->arch.mtrr_state, gfn << PAGE_SHIFT,
+			     (gfn << PAGE_SHIFT) + PAGE_SIZE);
+	if (mtrr == 0xfe || mtrr == 0xff)
+		mtrr = MTRR_TYPE_WRBACK;
+	return mtrr;
+}
+
 static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 {
 	unsigned index;
@@ -1409,9 +1627,15 @@
 		if (s->role.word != sp->role.word)
 			return 1;
 	}
-	kvm_mmu_mark_parents_unsync(vcpu, sp);
 	++vcpu->kvm->stat.mmu_unsync;
 	sp->unsync = 1;
+
+	if (sp->global) {
+		list_add(&sp->oos_link, &vcpu->kvm->arch.oos_global_pages);
+		++vcpu->kvm->stat.mmu_unsync_global;
+	} else
+		kvm_mmu_mark_parents_unsync(vcpu, sp);
+
 	mmu_convert_notrap(sp);
 	return 0;
 }
@@ -1437,11 +1661,24 @@
 static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
 		    unsigned pte_access, int user_fault,
 		    int write_fault, int dirty, int largepage,
-		    gfn_t gfn, pfn_t pfn, bool speculative,
+		    int global, gfn_t gfn, pfn_t pfn, bool speculative,
 		    bool can_unsync)
 {
 	u64 spte;
 	int ret = 0;
+	u64 mt_mask = shadow_mt_mask;
+	struct kvm_mmu_page *sp = page_header(__pa(shadow_pte));
+
+	if (!(vcpu->arch.cr4 & X86_CR4_PGE))
+		global = 0;
+	if (!global && sp->global) {
+		sp->global = 0;
+		if (sp->unsync) {
+			kvm_unlink_unsync_global(vcpu->kvm, sp);
+			kvm_mmu_mark_parents_unsync(vcpu, sp);
+		}
+	}
+
 	/*
 	 * We don't set the accessed bit, since we sometimes want to see
 	 * whether the guest actually used the pte (in order to detect
@@ -1460,6 +1697,11 @@
 		spte |= shadow_user_mask;
 	if (largepage)
 		spte |= PT_PAGE_SIZE_MASK;
+	if (mt_mask) {
+		mt_mask = get_memory_type(vcpu, gfn) <<
+			  kvm_x86_ops->get_mt_mask_shift();
+		spte |= mt_mask;
+	}
 
 	spte |= (u64)pfn << PAGE_SHIFT;
 
@@ -1474,6 +1716,15 @@
 
 		spte |= PT_WRITABLE_MASK;
 
+		/*
+		 * Optimization: for pte sync, if spte was writable the hash
+		 * lookup is unnecessary (and expensive). Write protection
+		 * is responsibility of mmu_get_page / kvm_sync_page.
+		 * Same reasoning can be applied to dirty page accounting.
+		 */
+		if (!can_unsync && is_writeble_pte(*shadow_pte))
+			goto set_pte;
+
 		if (mmu_need_write_protect(vcpu, gfn, can_unsync)) {
 			pgprintk("%s: found shadow page for %lx, marking ro\n",
 				 __func__, gfn);
@@ -1495,8 +1746,8 @@
 static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
 			 unsigned pt_access, unsigned pte_access,
 			 int user_fault, int write_fault, int dirty,
-			 int *ptwrite, int largepage, gfn_t gfn,
-			 pfn_t pfn, bool speculative)
+			 int *ptwrite, int largepage, int global,
+			 gfn_t gfn, pfn_t pfn, bool speculative)
 {
 	int was_rmapped = 0;
 	int was_writeble = is_writeble_pte(*shadow_pte);
@@ -1529,7 +1780,7 @@
 		}
 	}
 	if (set_spte(vcpu, shadow_pte, pte_access, user_fault, write_fault,
-		      dirty, largepage, gfn, pfn, speculative, true)) {
+		      dirty, largepage, global, gfn, pfn, speculative, true)) {
 		if (write_fault)
 			*ptwrite = 1;
 		kvm_x86_ops->tlb_flush(vcpu);
@@ -1586,7 +1837,7 @@
 	    || (walk->largepage && level == PT_DIRECTORY_LEVEL)) {
 		mmu_set_spte(vcpu, sptep, ACC_ALL, ACC_ALL,
 			     0, walk->write, 1, &walk->pt_write,
-			     walk->largepage, gfn, walk->pfn, false);
+			     walk->largepage, 0, gfn, walk->pfn, false);
 		++vcpu->stat.pf_fixed;
 		return 1;
 	}
@@ -1773,6 +2024,15 @@
 	}
 }
 
+static void mmu_sync_global(struct kvm_vcpu *vcpu)
+{
+	struct kvm *kvm = vcpu->kvm;
+	struct kvm_mmu_page *sp, *n;
+
+	list_for_each_entry_safe(sp, n, &kvm->arch.oos_global_pages, oos_link)
+		kvm_sync_page(vcpu, sp);
+}
+
 void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
 {
 	spin_lock(&vcpu->kvm->mmu_lock);
@@ -1780,6 +2040,13 @@
 	spin_unlock(&vcpu->kvm->mmu_lock);
 }
 
+void kvm_mmu_sync_global(struct kvm_vcpu *vcpu)
+{
+	spin_lock(&vcpu->kvm->mmu_lock);
+	mmu_sync_global(vcpu);
+	spin_unlock(&vcpu->kvm->mmu_lock);
+}
+
 static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr)
 {
 	return vaddr;
@@ -2178,7 +2445,8 @@
 }
 
 void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
-		       const u8 *new, int bytes)
+		       const u8 *new, int bytes,
+		       bool guest_initiated)
 {
 	gfn_t gfn = gpa >> PAGE_SHIFT;
 	struct kvm_mmu_page *sp;
@@ -2204,15 +2472,17 @@
 	kvm_mmu_free_some_pages(vcpu);
 	++vcpu->kvm->stat.mmu_pte_write;
 	kvm_mmu_audit(vcpu, "pre pte write");
-	if (gfn == vcpu->arch.last_pt_write_gfn
-	    && !last_updated_pte_accessed(vcpu)) {
-		++vcpu->arch.last_pt_write_count;
-		if (vcpu->arch.last_pt_write_count >= 3)
-			flooded = 1;
-	} else {
-		vcpu->arch.last_pt_write_gfn = gfn;
-		vcpu->arch.last_pt_write_count = 1;
-		vcpu->arch.last_pte_updated = NULL;
+	if (guest_initiated) {
+		if (gfn == vcpu->arch.last_pt_write_gfn
+		    && !last_updated_pte_accessed(vcpu)) {
+			++vcpu->arch.last_pt_write_count;
+			if (vcpu->arch.last_pt_write_count >= 3)
+				flooded = 1;
+		} else {
+			vcpu->arch.last_pt_write_gfn = gfn;
+			vcpu->arch.last_pt_write_count = 1;
+			vcpu->arch.last_pte_updated = NULL;
+		}
 	}
 	index = kvm_page_table_hashfn(gfn);
 	bucket = &vcpu->kvm->arch.mmu_page_hash[index];
@@ -2352,9 +2622,7 @@
 
 void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
 {
-	spin_lock(&vcpu->kvm->mmu_lock);
 	vcpu->arch.mmu.invlpg(vcpu, gva);
-	spin_unlock(&vcpu->kvm->mmu_lock);
 	kvm_mmu_flush_tlb(vcpu);
 	++vcpu->stat.invlpg;
 }
@@ -2451,7 +2719,7 @@
 		int i;
 		u64 *pt;
 
-		if (!test_bit(slot, &sp->slot_bitmap))
+		if (!test_bit(slot, sp->slot_bitmap))
 			continue;
 
 		pt = sp->spt;
@@ -2860,8 +3128,8 @@
 		if (sp->role.metaphysical)
 			continue;
 
-		slot = gfn_to_memslot(vcpu->kvm, sp->gfn);
 		gfn = unalias_gfn(vcpu->kvm, sp->gfn);
+		slot = gfn_to_memslot_unaliased(vcpu->kvm, sp->gfn);
 		rmapp = &slot->rmap[gfn - slot->base_gfn];
 		if (*rmapp)
 			printk(KERN_ERR "%s: (%s) shadow page has writable"
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 84eee43..9fd78b6 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -82,6 +82,7 @@
 	int *ptwrite;
 	pfn_t pfn;
 	u64 *sptep;
+	gpa_t pte_gpa;
 };
 
 static gfn_t gpte_to_gfn(pt_element_t gpte)
@@ -222,7 +223,7 @@
 		if (ret)
 			goto walk;
 		pte |= PT_DIRTY_MASK;
-		kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte));
+		kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte), 0);
 		walker->ptes[walker->level - 1] = pte;
 	}
 
@@ -274,7 +275,8 @@
 		return;
 	kvm_get_pfn(pfn);
 	mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
-		     gpte & PT_DIRTY_MASK, NULL, largepage, gpte_to_gfn(gpte),
+		     gpte & PT_DIRTY_MASK, NULL, largepage,
+		     gpte & PT_GLOBAL_MASK, gpte_to_gfn(gpte),
 		     pfn, true);
 }
 
@@ -301,8 +303,9 @@
 		mmu_set_spte(vcpu, sptep, access, gw->pte_access & access,
 			     sw->user_fault, sw->write_fault,
 			     gw->ptes[gw->level-1] & PT_DIRTY_MASK,
-			     sw->ptwrite, sw->largepage, gw->gfn, sw->pfn,
-			     false);
+			     sw->ptwrite, sw->largepage,
+			     gw->ptes[gw->level-1] & PT_GLOBAL_MASK,
+			     gw->gfn, sw->pfn, false);
 		sw->sptep = sptep;
 		return 1;
 	}
@@ -466,10 +469,22 @@
 				      struct kvm_vcpu *vcpu, u64 addr,
 				      u64 *sptep, int level)
 {
+	struct shadow_walker *sw =
+		container_of(_sw, struct shadow_walker, walker);
 
-	if (level == PT_PAGE_TABLE_LEVEL) {
-		if (is_shadow_present_pte(*sptep))
+	/* FIXME: properly handle invlpg on large guest pages */
+	if (level == PT_PAGE_TABLE_LEVEL ||
+	    ((level == PT_DIRECTORY_LEVEL) && is_large_pte(*sptep))) {
+		struct kvm_mmu_page *sp = page_header(__pa(sptep));
+
+		sw->pte_gpa = (sp->gfn << PAGE_SHIFT);
+		sw->pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t);
+
+		if (is_shadow_present_pte(*sptep)) {
 			rmap_remove(vcpu->kvm, sptep);
+			if (is_large_pte(*sptep))
+				--vcpu->kvm->stat.lpages;
+		}
 		set_shadow_pte(sptep, shadow_trap_nonpresent_pte);
 		return 1;
 	}
@@ -480,11 +495,26 @@
 
 static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
 {
+	pt_element_t gpte;
 	struct shadow_walker walker = {
 		.walker = { .entry = FNAME(shadow_invlpg_entry), },
+		.pte_gpa = -1,
 	};
 
+	spin_lock(&vcpu->kvm->mmu_lock);
 	walk_shadow(&walker.walker, vcpu, gva);
+	spin_unlock(&vcpu->kvm->mmu_lock);
+	if (walker.pte_gpa == -1)
+		return;
+	if (kvm_read_guest_atomic(vcpu->kvm, walker.pte_gpa, &gpte,
+				  sizeof(pt_element_t)))
+		return;
+	if (is_present_pte(gpte) && (gpte & PT_ACCESSED_MASK)) {
+		if (mmu_topup_memory_caches(vcpu))
+			return;
+		kvm_mmu_pte_write(vcpu, walker.pte_gpa, (const u8 *)&gpte,
+				  sizeof(pt_element_t), 0);
+	}
 }
 
 static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr)
@@ -580,7 +610,7 @@
 		nr_present++;
 		pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
 		set_spte(vcpu, &sp->spt[i], pte_access, 0, 0,
-			 is_dirty_pte(gpte), 0, gfn,
+			 is_dirty_pte(gpte), 0, gpte & PT_GLOBAL_MASK, gfn,
 			 spte_to_pfn(sp->spt[i]), true, false);
 	}
 
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 9c4ce65..1452851 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -28,6 +28,8 @@
 
 #include <asm/desc.h>
 
+#include <asm/virtext.h>
+
 #define __ex(x) __kvm_handle_fault_on_reboot(x)
 
 MODULE_AUTHOR("Qumranet");
@@ -245,34 +247,19 @@
 
 static int has_svm(void)
 {
-	uint32_t eax, ebx, ecx, edx;
+	const char *msg;
 
-	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
-		printk(KERN_INFO "has_svm: not amd\n");
+	if (!cpu_has_svm(&msg)) {
+		printk(KERN_INFO "has_svn: %s\n", msg);
 		return 0;
 	}
 
-	cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
-	if (eax < SVM_CPUID_FUNC) {
-		printk(KERN_INFO "has_svm: can't execute cpuid_8000000a\n");
-		return 0;
-	}
-
-	cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
-	if (!(ecx & (1 << SVM_CPUID_FEATURE_SHIFT))) {
-		printk(KERN_DEBUG "has_svm: svm not available\n");
-		return 0;
-	}
 	return 1;
 }
 
 static void svm_hardware_disable(void *garbage)
 {
-	uint64_t efer;
-
-	wrmsrl(MSR_VM_HSAVE_PA, 0);
-	rdmsrl(MSR_EFER, efer);
-	wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK);
+	cpu_svm_disable();
 }
 
 static void svm_hardware_enable(void *garbage)
@@ -772,6 +759,22 @@
 	var->l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1;
 	var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1;
 	var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1;
+
+	/*
+	 * SVM always stores 0 for the 'G' bit in the CS selector in
+	 * the VMCB on a VMEXIT. This hurts cross-vendor migration:
+	 * Intel's VMENTRY has a check on the 'G' bit.
+	 */
+	if (seg == VCPU_SREG_CS)
+		var->g = s->limit > 0xfffff;
+
+	/*
+	 * Work around a bug where the busy flag in the tr selector
+	 * isn't exposed
+	 */
+	if (seg == VCPU_SREG_TR)
+		var->type |= 0x2;
+
 	var->unusable = !var->present;
 }
 
@@ -1099,6 +1102,7 @@
 	rep = (io_info & SVM_IOIO_REP_MASK) != 0;
 	down = (svm->vmcb->save.rflags & X86_EFLAGS_DF) != 0;
 
+	skip_emulated_instruction(&svm->vcpu);
 	return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port);
 }
 
@@ -1912,6 +1916,11 @@
 #endif
 }
 
+static int svm_get_mt_mask_shift(void)
+{
+	return 0;
+}
+
 static struct kvm_x86_ops svm_x86_ops = {
 	.cpu_has_kvm_support = has_svm,
 	.disabled_by_bios = is_disabled,
@@ -1967,6 +1976,7 @@
 
 	.set_tss_addr = svm_set_tss_addr,
 	.get_tdp_level = get_npt_level,
+	.get_mt_mask_shift = svm_get_mt_mask_shift,
 };
 
 static int __init svm_init(void)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index a4018b0..6259d74 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -16,7 +16,6 @@
  */
 
 #include "irq.h"
-#include "vmx.h"
 #include "mmu.h"
 
 #include <linux/kvm_host.h>
@@ -31,6 +30,8 @@
 
 #include <asm/io.h>
 #include <asm/desc.h>
+#include <asm/vmx.h>
+#include <asm/virtext.h>
 
 #define __ex(x) __kvm_handle_fault_on_reboot(x)
 
@@ -90,6 +91,11 @@
 	} rmode;
 	int vpid;
 	bool emulation_required;
+
+	/* Support for vnmi-less CPUs */
+	int soft_vnmi_blocked;
+	ktime_t entry_time;
+	s64 vnmi_blocked_time;
 };
 
 static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
@@ -122,7 +128,7 @@
 	u32 vmentry_ctrl;
 } vmcs_config;
 
-struct vmx_capability {
+static struct vmx_capability {
 	u32 ept;
 	u32 vpid;
 } vmx_capability;
@@ -957,6 +963,13 @@
 		pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", msr_index, data);
 
 		break;
+	case MSR_IA32_CR_PAT:
+		if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
+			vmcs_write64(GUEST_IA32_PAT, data);
+			vcpu->arch.pat = data;
+			break;
+		}
+		/* Otherwise falls through to kvm_set_msr_common */
 	default:
 		vmx_load_host_state(vmx);
 		msr = find_msr_entry(vmx, msr_index);
@@ -1032,8 +1045,7 @@
 
 static __init int cpu_has_kvm_support(void)
 {
-	unsigned long ecx = cpuid_ecx(1);
-	return test_bit(5, &ecx); /* CPUID.1:ECX.VMX[bit 5] -> VT */
+	return cpu_has_vmx();
 }
 
 static __init int vmx_disabled_by_bios(void)
@@ -1079,11 +1091,20 @@
 		__vcpu_clear(vmx);
 }
 
+
+/* Just like cpu_vmxoff(), but with the __kvm_handle_fault_on_reboot()
+ * tricks.
+ */
+static void kvm_cpu_vmxoff(void)
+{
+	asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
+	write_cr4(read_cr4() & ~X86_CR4_VMXE);
+}
+
 static void hardware_disable(void *garbage)
 {
 	vmclear_local_vcpus();
-	asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
-	write_cr4(read_cr4() & ~X86_CR4_VMXE);
+	kvm_cpu_vmxoff();
 }
 
 static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt,
@@ -1176,12 +1197,13 @@
 #ifdef CONFIG_X86_64
 	min |= VM_EXIT_HOST_ADDR_SPACE_SIZE;
 #endif
-	opt = 0;
+	opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT;
 	if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS,
 				&_vmexit_control) < 0)
 		return -EIO;
 
-	min = opt = 0;
+	min = 0;
+	opt = VM_ENTRY_LOAD_IA32_PAT;
 	if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS,
 				&_vmentry_control) < 0)
 		return -EIO;
@@ -2087,8 +2109,9 @@
  */
 static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 {
-	u32 host_sysenter_cs;
+	u32 host_sysenter_cs, msr_low, msr_high;
 	u32 junk;
+	u64 host_pat;
 	unsigned long a;
 	struct descriptor_table dt;
 	int i;
@@ -2176,6 +2199,20 @@
 	rdmsrl(MSR_IA32_SYSENTER_EIP, a);
 	vmcs_writel(HOST_IA32_SYSENTER_EIP, a);   /* 22.2.3 */
 
+	if (vmcs_config.vmexit_ctrl & VM_EXIT_LOAD_IA32_PAT) {
+		rdmsr(MSR_IA32_CR_PAT, msr_low, msr_high);
+		host_pat = msr_low | ((u64) msr_high << 32);
+		vmcs_write64(HOST_IA32_PAT, host_pat);
+	}
+	if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
+		rdmsr(MSR_IA32_CR_PAT, msr_low, msr_high);
+		host_pat = msr_low | ((u64) msr_high << 32);
+		/* Write the default value follow host pat */
+		vmcs_write64(GUEST_IA32_PAT, host_pat);
+		/* Keep arch.pat sync with GUEST_IA32_PAT */
+		vmx->vcpu.arch.pat = host_pat;
+	}
+
 	for (i = 0; i < NR_VMX_MSR; ++i) {
 		u32 index = vmx_msr_index[i];
 		u32 data_low, data_high;
@@ -2230,6 +2267,8 @@
 
 	vmx->vcpu.arch.rmode.active = 0;
 
+	vmx->soft_vnmi_blocked = 0;
+
 	vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
 	kvm_set_cr8(&vmx->vcpu, 0);
 	msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
@@ -2335,6 +2374,29 @@
 	return ret;
 }
 
+static void enable_irq_window(struct kvm_vcpu *vcpu)
+{
+	u32 cpu_based_vm_exec_control;
+
+	cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+	cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
+	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
+}
+
+static void enable_nmi_window(struct kvm_vcpu *vcpu)
+{
+	u32 cpu_based_vm_exec_control;
+
+	if (!cpu_has_virtual_nmis()) {
+		enable_irq_window(vcpu);
+		return;
+	}
+
+	cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+	cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING;
+	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
+}
+
 static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -2358,10 +2420,54 @@
 
 static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
 {
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+	if (!cpu_has_virtual_nmis()) {
+		/*
+		 * Tracking the NMI-blocked state in software is built upon
+		 * finding the next open IRQ window. This, in turn, depends on
+		 * well-behaving guests: They have to keep IRQs disabled at
+		 * least as long as the NMI handler runs. Otherwise we may
+		 * cause NMI nesting, maybe breaking the guest. But as this is
+		 * highly unlikely, we can live with the residual risk.
+		 */
+		vmx->soft_vnmi_blocked = 1;
+		vmx->vnmi_blocked_time = 0;
+	}
+
+	++vcpu->stat.nmi_injections;
+	if (vcpu->arch.rmode.active) {
+		vmx->rmode.irq.pending = true;
+		vmx->rmode.irq.vector = NMI_VECTOR;
+		vmx->rmode.irq.rip = kvm_rip_read(vcpu);
+		vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
+			     NMI_VECTOR | INTR_TYPE_SOFT_INTR |
+			     INTR_INFO_VALID_MASK);
+		vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
+		kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1);
+		return;
+	}
 	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
 			INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR);
 }
 
+static void vmx_update_window_states(struct kvm_vcpu *vcpu)
+{
+	u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
+
+	vcpu->arch.nmi_window_open =
+		!(guest_intr & (GUEST_INTR_STATE_STI |
+				GUEST_INTR_STATE_MOV_SS |
+				GUEST_INTR_STATE_NMI));
+	if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked)
+		vcpu->arch.nmi_window_open = 0;
+
+	vcpu->arch.interrupt_window_open =
+		((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
+		 !(guest_intr & (GUEST_INTR_STATE_STI |
+				 GUEST_INTR_STATE_MOV_SS)));
+}
+
 static void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
 {
 	int word_index = __ffs(vcpu->arch.irq_summary);
@@ -2374,40 +2480,49 @@
 	kvm_queue_interrupt(vcpu, irq);
 }
 
-
 static void do_interrupt_requests(struct kvm_vcpu *vcpu,
 				       struct kvm_run *kvm_run)
 {
-	u32 cpu_based_vm_exec_control;
+	vmx_update_window_states(vcpu);
 
-	vcpu->arch.interrupt_window_open =
-		((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
-		 (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0);
+	if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
+		if (vcpu->arch.interrupt.pending) {
+			enable_nmi_window(vcpu);
+		} else if (vcpu->arch.nmi_window_open) {
+			vcpu->arch.nmi_pending = false;
+			vcpu->arch.nmi_injected = true;
+		} else {
+			enable_nmi_window(vcpu);
+			return;
+		}
+	}
+	if (vcpu->arch.nmi_injected) {
+		vmx_inject_nmi(vcpu);
+		if (vcpu->arch.nmi_pending)
+			enable_nmi_window(vcpu);
+		else if (vcpu->arch.irq_summary
+			 || kvm_run->request_interrupt_window)
+			enable_irq_window(vcpu);
+		return;
+	}
 
-	if (vcpu->arch.interrupt_window_open &&
-	    vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending)
-		kvm_do_inject_irq(vcpu);
+	if (vcpu->arch.interrupt_window_open) {
+		if (vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending)
+			kvm_do_inject_irq(vcpu);
 
-	if (vcpu->arch.interrupt_window_open && vcpu->arch.interrupt.pending)
-		vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
-
-	cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+		if (vcpu->arch.interrupt.pending)
+			vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
+	}
 	if (!vcpu->arch.interrupt_window_open &&
 	    (vcpu->arch.irq_summary || kvm_run->request_interrupt_window))
-		/*
-		 * Interrupts blocked.  Wait for unblock.
-		 */
-		cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
-	else
-		cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING;
-	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
+		enable_irq_window(vcpu);
 }
 
 static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
 {
 	int ret;
 	struct kvm_userspace_memory_region tss_mem = {
-		.slot = 8,
+		.slot = TSS_PRIVATE_MEMSLOT,
 		.guest_phys_addr = addr,
 		.memory_size = PAGE_SIZE * 3,
 		.flags = 0,
@@ -2492,7 +2607,7 @@
 		set_bit(irq / BITS_PER_LONG, &vcpu->arch.irq_summary);
 	}
 
-	if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) /* nmi */
+	if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR)
 		return 1;  /* already handled by vmx_vcpu_run() */
 
 	if (is_no_device(intr_info)) {
@@ -2581,6 +2696,7 @@
 	rep = (exit_qualification & 32) != 0;
 	port = exit_qualification >> 16;
 
+	skip_emulated_instruction(vcpu);
 	return kvm_emulate_pio(vcpu, kvm_run, in, size, port);
 }
 
@@ -2767,6 +2883,7 @@
 	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
 
 	KVMTRACE_0D(PEND_INTR, vcpu, handler);
+	++vcpu->stat.irq_window_exits;
 
 	/*
 	 * If the user space waits to inject interrupts, exit as soon as
@@ -2775,7 +2892,6 @@
 	if (kvm_run->request_interrupt_window &&
 	    !vcpu->arch.irq_summary) {
 		kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
-		++vcpu->stat.irq_window_exits;
 		return 0;
 	}
 	return 1;
@@ -2832,6 +2948,7 @@
 
 static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	unsigned long exit_qualification;
 	u16 tss_selector;
 	int reason;
@@ -2839,6 +2956,15 @@
 	exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
 
 	reason = (u32)exit_qualification >> 30;
+	if (reason == TASK_SWITCH_GATE && vmx->vcpu.arch.nmi_injected &&
+	    (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
+	    (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK)
+	    == INTR_TYPE_NMI_INTR) {
+		vcpu->arch.nmi_injected = false;
+		if (cpu_has_virtual_nmis())
+			vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
+				      GUEST_INTR_STATE_NMI);
+	}
 	tss_selector = exit_qualification;
 
 	return kvm_task_switch(vcpu, tss_selector, reason);
@@ -2927,16 +3053,12 @@
 	while (!guest_state_valid(vcpu)) {
 		err = emulate_instruction(vcpu, kvm_run, 0, 0, 0);
 
-		switch (err) {
-			case EMULATE_DONE:
-				break;
-			case EMULATE_DO_MMIO:
-				kvm_report_emulation_failure(vcpu, "mmio");
-				/* TODO: Handle MMIO */
-				return;
-			default:
-				kvm_report_emulation_failure(vcpu, "emulation failure");
-				return;
+		if (err == EMULATE_DO_MMIO)
+			break;
+
+		if (err != EMULATE_DONE) {
+			kvm_report_emulation_failure(vcpu, "emulation failure");
+			return;
 		}
 
 		if (signal_pending(current))
@@ -2948,8 +3070,10 @@
 	local_irq_disable();
 	preempt_disable();
 
-	/* Guest state should be valid now, no more emulation should be needed */
-	vmx->emulation_required = 0;
+	/* Guest state should be valid now except if we need to
+	 * emulate an MMIO */
+	if (guest_state_valid(vcpu))
+		vmx->emulation_required = 0;
 }
 
 /*
@@ -2996,6 +3120,11 @@
 	KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)kvm_rip_read(vcpu),
 		    (u32)((u64)kvm_rip_read(vcpu) >> 32), entryexit);
 
+	/* If we need to emulate an MMIO from handle_invalid_guest_state
+	 * we just return 0 */
+	if (vmx->emulation_required && emulate_invalid_guest_state)
+		return 0;
+
 	/* Access CR3 don't cause VMExit in paging mode, so we need
 	 * to sync with guest real CR3. */
 	if (vm_need_ept() && is_paging(vcpu)) {
@@ -3012,9 +3141,32 @@
 
 	if ((vectoring_info & VECTORING_INFO_VALID_MASK) &&
 			(exit_reason != EXIT_REASON_EXCEPTION_NMI &&
-			exit_reason != EXIT_REASON_EPT_VIOLATION))
-		printk(KERN_WARNING "%s: unexpected, valid vectoring info and "
-		       "exit reason is 0x%x\n", __func__, exit_reason);
+			exit_reason != EXIT_REASON_EPT_VIOLATION &&
+			exit_reason != EXIT_REASON_TASK_SWITCH))
+		printk(KERN_WARNING "%s: unexpected, valid vectoring info "
+		       "(0x%x) and exit reason is 0x%x\n",
+		       __func__, vectoring_info, exit_reason);
+
+	if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) {
+		if (vcpu->arch.interrupt_window_open) {
+			vmx->soft_vnmi_blocked = 0;
+			vcpu->arch.nmi_window_open = 1;
+		} else if (vmx->vnmi_blocked_time > 1000000000LL &&
+			   vcpu->arch.nmi_pending) {
+			/*
+			 * This CPU don't support us in finding the end of an
+			 * NMI-blocked window if the guest runs with IRQs
+			 * disabled. So we pull the trigger after 1 s of
+			 * futile waiting, but inform the user about this.
+			 */
+			printk(KERN_WARNING "%s: Breaking out of NMI-blocked "
+			       "state on VCPU %d after 1 s timeout\n",
+			       __func__, vcpu->vcpu_id);
+			vmx->soft_vnmi_blocked = 0;
+			vmx->vcpu.arch.nmi_window_open = 1;
+		}
+	}
+
 	if (exit_reason < kvm_vmx_max_exit_handlers
 	    && kvm_vmx_exit_handlers[exit_reason])
 		return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run);
@@ -3042,51 +3194,6 @@
 	vmcs_write32(TPR_THRESHOLD, (max_irr > tpr) ? tpr >> 4 : max_irr >> 4);
 }
 
-static void enable_irq_window(struct kvm_vcpu *vcpu)
-{
-	u32 cpu_based_vm_exec_control;
-
-	cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
-	cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
-	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
-}
-
-static void enable_nmi_window(struct kvm_vcpu *vcpu)
-{
-	u32 cpu_based_vm_exec_control;
-
-	if (!cpu_has_virtual_nmis())
-		return;
-
-	cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
-	cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING;
-	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
-}
-
-static int vmx_nmi_enabled(struct kvm_vcpu *vcpu)
-{
-	u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
-	return !(guest_intr & (GUEST_INTR_STATE_NMI |
-			       GUEST_INTR_STATE_MOV_SS |
-			       GUEST_INTR_STATE_STI));
-}
-
-static int vmx_irq_enabled(struct kvm_vcpu *vcpu)
-{
-	u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
-	return (!(guest_intr & (GUEST_INTR_STATE_MOV_SS |
-			       GUEST_INTR_STATE_STI)) &&
-		(vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF));
-}
-
-static void enable_intr_window(struct kvm_vcpu *vcpu)
-{
-	if (vcpu->arch.nmi_pending)
-		enable_nmi_window(vcpu);
-	else if (kvm_cpu_has_interrupt(vcpu))
-		enable_irq_window(vcpu);
-}
-
 static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
 {
 	u32 exit_intr_info;
@@ -3109,7 +3216,9 @@
 		if (unblock_nmi && vector != DF_VECTOR)
 			vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
 				      GUEST_INTR_STATE_NMI);
-	}
+	} else if (unlikely(vmx->soft_vnmi_blocked))
+		vmx->vnmi_blocked_time +=
+			ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time));
 
 	idt_vectoring_info = vmx->idt_vectoring_info;
 	idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK;
@@ -3147,26 +3256,29 @@
 {
 	update_tpr_threshold(vcpu);
 
-	if (cpu_has_virtual_nmis()) {
-		if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
-			if (vcpu->arch.interrupt.pending) {
-				enable_nmi_window(vcpu);
-			} else if (vmx_nmi_enabled(vcpu)) {
-				vcpu->arch.nmi_pending = false;
-				vcpu->arch.nmi_injected = true;
-			} else {
-				enable_intr_window(vcpu);
-				return;
-			}
-		}
-		if (vcpu->arch.nmi_injected) {
-			vmx_inject_nmi(vcpu);
-			enable_intr_window(vcpu);
+	vmx_update_window_states(vcpu);
+
+	if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
+		if (vcpu->arch.interrupt.pending) {
+			enable_nmi_window(vcpu);
+		} else if (vcpu->arch.nmi_window_open) {
+			vcpu->arch.nmi_pending = false;
+			vcpu->arch.nmi_injected = true;
+		} else {
+			enable_nmi_window(vcpu);
 			return;
 		}
 	}
+	if (vcpu->arch.nmi_injected) {
+		vmx_inject_nmi(vcpu);
+		if (vcpu->arch.nmi_pending)
+			enable_nmi_window(vcpu);
+		else if (kvm_cpu_has_interrupt(vcpu))
+			enable_irq_window(vcpu);
+		return;
+	}
 	if (!vcpu->arch.interrupt.pending && kvm_cpu_has_interrupt(vcpu)) {
-		if (vmx_irq_enabled(vcpu))
+		if (vcpu->arch.interrupt_window_open)
 			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu));
 		else
 			enable_irq_window(vcpu);
@@ -3174,6 +3286,8 @@
 	if (vcpu->arch.interrupt.pending) {
 		vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
 		kvm_timer_intr_post(vcpu, vcpu->arch.interrupt.nr);
+		if (kvm_cpu_has_interrupt(vcpu))
+			enable_irq_window(vcpu);
 	}
 }
 
@@ -3213,6 +3327,10 @@
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	u32 intr_info;
 
+	/* Record the guest's net vcpu time for enforced NMI injections. */
+	if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked))
+		vmx->entry_time = ktime_get();
+
 	/* Handle invalid guest state instead of entering VMX */
 	if (vmx->emulation_required && emulate_invalid_guest_state) {
 		handle_invalid_guest_state(vcpu, kvm_run);
@@ -3327,9 +3445,7 @@
 	if (vmx->rmode.irq.pending)
 		fixup_rmode_irq(vmx);
 
-	vcpu->arch.interrupt_window_open =
-		(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
-		 (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)) == 0;
+	vmx_update_window_states(vcpu);
 
 	asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
 	vmx->launched = 1;
@@ -3337,7 +3453,7 @@
 	intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
 
 	/* We need to handle NMIs before interrupts are enabled */
-	if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200 &&
+	if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR &&
 	    (intr_info & INTR_INFO_VALID_MASK)) {
 		KVMTRACE_0D(NMI, vcpu, handler);
 		asm("int $2");
@@ -3455,6 +3571,11 @@
 	return VMX_EPT_DEFAULT_GAW + 1;
 }
 
+static int vmx_get_mt_mask_shift(void)
+{
+	return VMX_EPT_MT_EPTE_SHIFT;
+}
+
 static struct kvm_x86_ops vmx_x86_ops = {
 	.cpu_has_kvm_support = cpu_has_kvm_support,
 	.disabled_by_bios = vmx_disabled_by_bios,
@@ -3510,6 +3631,7 @@
 
 	.set_tss_addr = vmx_set_tss_addr,
 	.get_tdp_level = get_ept_level,
+	.get_mt_mask_shift = vmx_get_mt_mask_shift,
 };
 
 static int __init vmx_init(void)
@@ -3566,10 +3688,10 @@
 		bypass_guest_pf = 0;
 		kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK |
 			VMX_EPT_WRITABLE_MASK |
-			VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT |
 			VMX_EPT_IGMT_BIT);
 		kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull,
-				VMX_EPT_EXECUTABLE_MASK);
+				VMX_EPT_EXECUTABLE_MASK,
+				VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT);
 		kvm_enable_tdp();
 	} else
 		kvm_disable_tdp();
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f1f8ff2..0e6aa81 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -39,6 +39,7 @@
 #include <asm/uaccess.h>
 #include <asm/msr.h>
 #include <asm/desc.h>
+#include <asm/mtrr.h>
 
 #define MAX_IO_MSRS 256
 #define CR0_RESERVED_BITS						\
@@ -86,6 +87,7 @@
 	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
 	{ "hypercalls", VCPU_STAT(hypercalls) },
 	{ "request_irq", VCPU_STAT(request_irq_exits) },
+	{ "request_nmi", VCPU_STAT(request_nmi_exits) },
 	{ "irq_exits", VCPU_STAT(irq_exits) },
 	{ "host_state_reload", VCPU_STAT(host_state_reload) },
 	{ "efer_reload", VCPU_STAT(efer_reload) },
@@ -93,6 +95,7 @@
 	{ "insn_emulation", VCPU_STAT(insn_emulation) },
 	{ "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) },
 	{ "irq_injections", VCPU_STAT(irq_injections) },
+	{ "nmi_injections", VCPU_STAT(nmi_injections) },
 	{ "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) },
 	{ "mmu_pte_write", VM_STAT(mmu_pte_write) },
 	{ "mmu_pte_updated", VM_STAT(mmu_pte_updated) },
@@ -101,6 +104,7 @@
 	{ "mmu_recycled", VM_STAT(mmu_recycled) },
 	{ "mmu_cache_miss", VM_STAT(mmu_cache_miss) },
 	{ "mmu_unsync", VM_STAT(mmu_unsync) },
+	{ "mmu_unsync_global", VM_STAT(mmu_unsync_global) },
 	{ "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
 	{ "largepages", VM_STAT(lpages) },
 	{ NULL }
@@ -312,6 +316,7 @@
 	kvm_x86_ops->set_cr0(vcpu, cr0);
 	vcpu->arch.cr0 = cr0;
 
+	kvm_mmu_sync_global(vcpu);
 	kvm_mmu_reset_context(vcpu);
 	return;
 }
@@ -355,6 +360,7 @@
 	}
 	kvm_x86_ops->set_cr4(vcpu, cr4);
 	vcpu->arch.cr4 = cr4;
+	kvm_mmu_sync_global(vcpu);
 	kvm_mmu_reset_context(vcpu);
 }
 EXPORT_SYMBOL_GPL(kvm_set_cr4);
@@ -449,7 +455,7 @@
 	MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
 #endif
 	MSR_IA32_TIME_STAMP_COUNTER, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
-	MSR_IA32_PERF_STATUS,
+	MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT
 };
 
 static unsigned num_msrs_to_save;
@@ -648,10 +654,38 @@
 
 static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 {
+	u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
+
 	if (!msr_mtrr_valid(msr))
 		return 1;
 
-	vcpu->arch.mtrr[msr - 0x200] = data;
+	if (msr == MSR_MTRRdefType) {
+		vcpu->arch.mtrr_state.def_type = data;
+		vcpu->arch.mtrr_state.enabled = (data & 0xc00) >> 10;
+	} else if (msr == MSR_MTRRfix64K_00000)
+		p[0] = data;
+	else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
+		p[1 + msr - MSR_MTRRfix16K_80000] = data;
+	else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000)
+		p[3 + msr - MSR_MTRRfix4K_C0000] = data;
+	else if (msr == MSR_IA32_CR_PAT)
+		vcpu->arch.pat = data;
+	else {	/* Variable MTRRs */
+		int idx, is_mtrr_mask;
+		u64 *pt;
+
+		idx = (msr - 0x200) / 2;
+		is_mtrr_mask = msr - 0x200 - 2 * idx;
+		if (!is_mtrr_mask)
+			pt =
+			  (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo;
+		else
+			pt =
+			  (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo;
+		*pt = data;
+	}
+
+	kvm_mmu_reset_context(vcpu);
 	return 0;
 }
 
@@ -747,10 +781,37 @@
 
 static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 {
+	u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
+
 	if (!msr_mtrr_valid(msr))
 		return 1;
 
-	*pdata = vcpu->arch.mtrr[msr - 0x200];
+	if (msr == MSR_MTRRdefType)
+		*pdata = vcpu->arch.mtrr_state.def_type +
+			 (vcpu->arch.mtrr_state.enabled << 10);
+	else if (msr == MSR_MTRRfix64K_00000)
+		*pdata = p[0];
+	else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
+		*pdata = p[1 + msr - MSR_MTRRfix16K_80000];
+	else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000)
+		*pdata = p[3 + msr - MSR_MTRRfix4K_C0000];
+	else if (msr == MSR_IA32_CR_PAT)
+		*pdata = vcpu->arch.pat;
+	else {	/* Variable MTRRs */
+		int idx, is_mtrr_mask;
+		u64 *pt;
+
+		idx = (msr - 0x200) / 2;
+		is_mtrr_mask = msr - 0x200 - 2 * idx;
+		if (!is_mtrr_mask)
+			pt =
+			  (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo;
+		else
+			pt =
+			  (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo;
+		*pdata = *pt;
+	}
+
 	return 0;
 }
 
@@ -903,7 +964,6 @@
 	case KVM_CAP_IRQCHIP:
 	case KVM_CAP_HLT:
 	case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
-	case KVM_CAP_USER_MEMORY:
 	case KVM_CAP_SET_TSS_ADDR:
 	case KVM_CAP_EXT_CPUID:
 	case KVM_CAP_CLOCKSOURCE:
@@ -1188,6 +1248,7 @@
 		int t, times = entry->eax & 0xff;
 
 		entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
+		entry->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
 		for (t = 1; t < times && *nent < maxnent; ++t) {
 			do_cpuid_1_ent(&entry[t], function, 0);
 			entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
@@ -1218,7 +1279,7 @@
 		entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
 		/* read more entries until level_type is zero */
 		for (i = 1; *nent < maxnent; ++i) {
-			level_type = entry[i - 1].ecx & 0xff;
+			level_type = entry[i - 1].ecx & 0xff00;
 			if (!level_type)
 				break;
 			do_cpuid_1_ent(&entry[i], function, i);
@@ -1318,6 +1379,15 @@
 	return 0;
 }
 
+static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu)
+{
+	vcpu_load(vcpu);
+	kvm_inject_nmi(vcpu);
+	vcpu_put(vcpu);
+
+	return 0;
+}
+
 static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
 					   struct kvm_tpr_access_ctl *tac)
 {
@@ -1377,6 +1447,13 @@
 		r = 0;
 		break;
 	}
+	case KVM_NMI: {
+		r = kvm_vcpu_ioctl_nmi(vcpu);
+		if (r)
+			goto out;
+		r = 0;
+		break;
+	}
 	case KVM_SET_CPUID: {
 		struct kvm_cpuid __user *cpuid_arg = argp;
 		struct kvm_cpuid cpuid;
@@ -1968,7 +2045,7 @@
 	ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes);
 	if (ret < 0)
 		return 0;
-	kvm_mmu_pte_write(vcpu, gpa, val, bytes);
+	kvm_mmu_pte_write(vcpu, gpa, val, bytes, 1);
 	return 1;
 }
 
@@ -2404,8 +2481,6 @@
 	val = kvm_register_read(vcpu, VCPU_REGS_RAX);
 	memcpy(vcpu->arch.pio_data, &val, 4);
 
-	kvm_x86_ops->skip_emulated_instruction(vcpu);
-
 	pio_dev = vcpu_find_pio_dev(vcpu, port, size, !in);
 	if (pio_dev) {
 		kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data);
@@ -2541,7 +2616,7 @@
 	kvm_mmu_set_nonpresent_ptes(0ull, 0ull);
 	kvm_mmu_set_base_ptes(PT_PRESENT_MASK);
 	kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
-			PT_DIRTY_MASK, PT64_NX_MASK, 0);
+			PT_DIRTY_MASK, PT64_NX_MASK, 0, 0);
 	return 0;
 
 out:
@@ -2729,7 +2804,7 @@
 
 	e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT;
 	/* when no next entry is found, the current entry[i] is reselected */
-	for (j = i + 1; j == i; j = (j + 1) % nent) {
+	for (j = i + 1; ; j = (j + 1) % nent) {
 		struct kvm_cpuid_entry2 *ej = &vcpu->arch.cpuid_entries[j];
 		if (ej->function == e->function) {
 			ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
@@ -2973,7 +3048,7 @@
 		pr_debug("vcpu %d received sipi with vector # %x\n",
 			 vcpu->vcpu_id, vcpu->arch.sipi_vector);
 		kvm_lapic_reset(vcpu);
-		r = kvm_x86_ops->vcpu_reset(vcpu);
+		r = kvm_arch_vcpu_reset(vcpu);
 		if (r)
 			return r;
 		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
@@ -3275,9 +3350,9 @@
 	kvm_desct->padding = 0;
 }
 
-static void get_segment_descritptor_dtable(struct kvm_vcpu *vcpu,
-					   u16 selector,
-					   struct descriptor_table *dtable)
+static void get_segment_descriptor_dtable(struct kvm_vcpu *vcpu,
+					  u16 selector,
+					  struct descriptor_table *dtable)
 {
 	if (selector & 1 << 2) {
 		struct kvm_segment kvm_seg;
@@ -3302,7 +3377,7 @@
 	struct descriptor_table dtable;
 	u16 index = selector >> 3;
 
-	get_segment_descritptor_dtable(vcpu, selector, &dtable);
+	get_segment_descriptor_dtable(vcpu, selector, &dtable);
 
 	if (dtable.limit < index * 8 + 7) {
 		kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc);
@@ -3321,7 +3396,7 @@
 	struct descriptor_table dtable;
 	u16 index = selector >> 3;
 
-	get_segment_descritptor_dtable(vcpu, selector, &dtable);
+	get_segment_descriptor_dtable(vcpu, selector, &dtable);
 
 	if (dtable.limit < index * 8 + 7)
 		return 1;
@@ -3900,6 +3975,7 @@
 	/* We do fxsave: this must be aligned. */
 	BUG_ON((unsigned long)&vcpu->arch.host_fx_image & 0xF);
 
+	vcpu->arch.mtrr_state.have_fixed = 1;
 	vcpu_load(vcpu);
 	r = kvm_arch_vcpu_reset(vcpu);
 	if (r == 0)
@@ -3925,6 +4001,9 @@
 
 int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
 {
+	vcpu->arch.nmi_pending = false;
+	vcpu->arch.nmi_injected = false;
+
 	return kvm_x86_ops->vcpu_reset(vcpu);
 }
 
@@ -4012,6 +4091,7 @@
 		return ERR_PTR(-ENOMEM);
 
 	INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
+	INIT_LIST_HEAD(&kvm->arch.oos_global_pages);
 	INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
 
 	/* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
@@ -4048,8 +4128,8 @@
 
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
-	kvm_iommu_unmap_guest(kvm);
 	kvm_free_all_assigned_devices(kvm);
+	kvm_iommu_unmap_guest(kvm);
 	kvm_free_pit(kvm);
 	kfree(kvm->arch.vpic);
 	kfree(kvm->arch.vioapic);
@@ -4127,7 +4207,8 @@
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
 {
 	return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE
-	       || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED;
+	       || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED
+	       || vcpu->arch.nmi_pending;
 }
 
 static void vcpu_kick_intr(void *info)
diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index ea05117..d174db7 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -58,6 +58,7 @@
 #define SrcMem32    (4<<4)	/* Memory operand (32-bit). */
 #define SrcImm      (5<<4)	/* Immediate operand. */
 #define SrcImmByte  (6<<4)	/* 8-bit sign-extended immediate operand. */
+#define SrcOne      (7<<4)	/* Implied '1' */
 #define SrcMask     (7<<4)
 /* Generic ModRM decode. */
 #define ModRM       (1<<7)
@@ -70,17 +71,23 @@
 #define Group       (1<<14)     /* Bits 3:5 of modrm byte extend opcode */
 #define GroupDual   (1<<15)     /* Alternate decoding of mod == 3 */
 #define GroupMask   0xff        /* Group number stored in bits 0:7 */
+/* Source 2 operand type */
+#define Src2None    (0<<29)
+#define Src2CL      (1<<29)
+#define Src2ImmByte (2<<29)
+#define Src2One     (3<<29)
+#define Src2Mask    (7<<29)
 
 enum {
 	Group1_80, Group1_81, Group1_82, Group1_83,
 	Group1A, Group3_Byte, Group3, Group4, Group5, Group7,
 };
 
-static u16 opcode_table[256] = {
+static u32 opcode_table[256] = {
 	/* 0x00 - 0x07 */
 	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
 	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
-	0, 0, 0, 0,
+	ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 0, 0,
 	/* 0x08 - 0x0F */
 	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
 	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
@@ -195,7 +202,7 @@
 	ImplicitOps, ImplicitOps, Group | Group4, Group | Group5,
 };
 
-static u16 twobyte_table[256] = {
+static u32 twobyte_table[256] = {
 	/* 0x00 - 0x0F */
 	0, Group | GroupDual | Group7, 0, 0, 0, 0, ImplicitOps, 0,
 	ImplicitOps, ImplicitOps, 0, 0, 0, ImplicitOps | ModRM, 0, 0,
@@ -230,9 +237,14 @@
 	/* 0x90 - 0x9F */
 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 	/* 0xA0 - 0xA7 */
-	0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, 0, 0,
+	0, 0, 0, DstMem | SrcReg | ModRM | BitOp,
+	DstMem | SrcReg | Src2ImmByte | ModRM,
+	DstMem | SrcReg | Src2CL | ModRM, 0, 0,
 	/* 0xA8 - 0xAF */
-	0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, ModRM, 0,
+	0, 0, 0, DstMem | SrcReg | ModRM | BitOp,
+	DstMem | SrcReg | Src2ImmByte | ModRM,
+	DstMem | SrcReg | Src2CL | ModRM,
+	ModRM, 0,
 	/* 0xB0 - 0xB7 */
 	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 0,
 	    DstMem | SrcReg | ModRM | BitOp,
@@ -253,7 +265,7 @@
 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 };
 
-static u16 group_table[] = {
+static u32 group_table[] = {
 	[Group1_80*8] =
 	ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
 	ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
@@ -297,9 +309,9 @@
 	SrcMem16 | ModRM | Mov, SrcMem | ModRM | ByteOp,
 };
 
-static u16 group2_table[] = {
+static u32 group2_table[] = {
 	[Group7*8] =
-	SrcNone | ModRM, 0, 0, 0,
+	SrcNone | ModRM, 0, 0, SrcNone | ModRM,
 	SrcNone | ModRM | DstMem | Mov, 0,
 	SrcMem16 | ModRM | Mov, 0,
 };
@@ -359,49 +371,48 @@
 	"andl %"_msk",%"_LO32 _tmp"; "		\
 	"orl  %"_LO32 _tmp",%"_sav"; "
 
+#ifdef CONFIG_X86_64
+#define ON64(x) x
+#else
+#define ON64(x)
+#endif
+
+#define ____emulate_2op(_op, _src, _dst, _eflags, _x, _y, _suffix)	\
+	do {								\
+		__asm__ __volatile__ (					\
+			_PRE_EFLAGS("0", "4", "2")			\
+			_op _suffix " %"_x"3,%1; "			\
+			_POST_EFLAGS("0", "4", "2")			\
+			: "=m" (_eflags), "=m" ((_dst).val),		\
+			  "=&r" (_tmp)					\
+			: _y ((_src).val), "i" (EFLAGS_MASK));		\
+	} while (0)
+
+
 /* Raw emulation: instruction has two explicit operands. */
 #define __emulate_2op_nobyte(_op,_src,_dst,_eflags,_wx,_wy,_lx,_ly,_qx,_qy) \
-	do { 								    \
-		unsigned long _tmp;					    \
-									    \
-		switch ((_dst).bytes) {					    \
-		case 2:							    \
-			__asm__ __volatile__ (				    \
-				_PRE_EFLAGS("0", "4", "2")		    \
-				_op"w %"_wx"3,%1; "			    \
-				_POST_EFLAGS("0", "4", "2")		    \
-				: "=m" (_eflags), "=m" ((_dst).val),        \
-				  "=&r" (_tmp)				    \
-				: _wy ((_src).val), "i" (EFLAGS_MASK));     \
-			break;						    \
-		case 4:							    \
-			__asm__ __volatile__ (				    \
-				_PRE_EFLAGS("0", "4", "2")		    \
-				_op"l %"_lx"3,%1; "			    \
-				_POST_EFLAGS("0", "4", "2")		    \
-				: "=m" (_eflags), "=m" ((_dst).val),	    \
-				  "=&r" (_tmp)				    \
-				: _ly ((_src).val), "i" (EFLAGS_MASK));     \
-			break;						    \
-		case 8:							    \
-			__emulate_2op_8byte(_op, _src, _dst,		    \
-					    _eflags, _qx, _qy);		    \
-			break;						    \
-		}							    \
+	do {								\
+		unsigned long _tmp;					\
+									\
+		switch ((_dst).bytes) {					\
+		case 2:							\
+			____emulate_2op(_op,_src,_dst,_eflags,_wx,_wy,"w"); \
+			break;						\
+		case 4:							\
+			____emulate_2op(_op,_src,_dst,_eflags,_lx,_ly,"l"); \
+			break;						\
+		case 8:							\
+			ON64(____emulate_2op(_op,_src,_dst,_eflags,_qx,_qy,"q")); \
+			break;						\
+		}							\
 	} while (0)
 
 #define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy) \
 	do {								     \
-		unsigned long __tmp;					     \
+		unsigned long _tmp;					     \
 		switch ((_dst).bytes) {				             \
 		case 1:							     \
-			__asm__ __volatile__ (				     \
-				_PRE_EFLAGS("0", "4", "2")		     \
-				_op"b %"_bx"3,%1; "			     \
-				_POST_EFLAGS("0", "4", "2")		     \
-				: "=m" (_eflags), "=m" ((_dst).val),	     \
-				  "=&r" (__tmp)				     \
-				: _by ((_src).val), "i" (EFLAGS_MASK));      \
+			____emulate_2op(_op,_src,_dst,_eflags,_bx,_by,"b");  \
 			break;						     \
 		default:						     \
 			__emulate_2op_nobyte(_op, _src, _dst, _eflags,	     \
@@ -425,72 +436,69 @@
 	__emulate_2op_nobyte(_op, _src, _dst, _eflags,			\
 			     "w", "r", _LO32, "r", "", "r")
 
-/* Instruction has only one explicit operand (no source operand). */
-#define emulate_1op(_op, _dst, _eflags)                                    \
+/* Instruction has three operands and one operand is stored in ECX register */
+#define __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, _suffix, _type) 	\
+	do {									\
+		unsigned long _tmp;						\
+		_type _clv  = (_cl).val;  					\
+		_type _srcv = (_src).val;    					\
+		_type _dstv = (_dst).val;					\
+										\
+		__asm__ __volatile__ (						\
+			_PRE_EFLAGS("0", "5", "2")				\
+			_op _suffix " %4,%1 \n"					\
+			_POST_EFLAGS("0", "5", "2")				\
+			: "=m" (_eflags), "+r" (_dstv), "=&r" (_tmp)		\
+			: "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK)		\
+			); 							\
+										\
+		(_cl).val  = (unsigned long) _clv;				\
+		(_src).val = (unsigned long) _srcv;				\
+		(_dst).val = (unsigned long) _dstv;				\
+	} while (0)
+
+#define emulate_2op_cl(_op, _cl, _src, _dst, _eflags)				\
+	do {									\
+		switch ((_dst).bytes) {						\
+		case 2:								\
+			__emulate_2op_cl(_op, _cl, _src, _dst, _eflags,  	\
+						"w", unsigned short);         	\
+			break;							\
+		case 4: 							\
+			__emulate_2op_cl(_op, _cl, _src, _dst, _eflags,  	\
+						"l", unsigned int);           	\
+			break;							\
+		case 8:								\
+			ON64(__emulate_2op_cl(_op, _cl, _src, _dst, _eflags,	\
+						"q", unsigned long));  		\
+			break;							\
+		}								\
+	} while (0)
+
+#define __emulate_1op(_op, _dst, _eflags, _suffix)			\
 	do {								\
 		unsigned long _tmp;					\
 									\
+		__asm__ __volatile__ (					\
+			_PRE_EFLAGS("0", "3", "2")			\
+			_op _suffix " %1; "				\
+			_POST_EFLAGS("0", "3", "2")			\
+			: "=m" (_eflags), "+m" ((_dst).val),		\
+			  "=&r" (_tmp)					\
+			: "i" (EFLAGS_MASK));				\
+	} while (0)
+
+/* Instruction has only one explicit operand (no source operand). */
+#define emulate_1op(_op, _dst, _eflags)                                    \
+	do {								\
 		switch ((_dst).bytes) {				        \
-		case 1:							\
-			__asm__ __volatile__ (				\
-				_PRE_EFLAGS("0", "3", "2")		\
-				_op"b %1; "				\
-				_POST_EFLAGS("0", "3", "2")		\
-				: "=m" (_eflags), "=m" ((_dst).val),	\
-				  "=&r" (_tmp)				\
-				: "i" (EFLAGS_MASK));			\
-			break;						\
-		case 2:							\
-			__asm__ __volatile__ (				\
-				_PRE_EFLAGS("0", "3", "2")		\
-				_op"w %1; "				\
-				_POST_EFLAGS("0", "3", "2")		\
-				: "=m" (_eflags), "=m" ((_dst).val),	\
-				  "=&r" (_tmp)				\
-				: "i" (EFLAGS_MASK));			\
-			break;						\
-		case 4:							\
-			__asm__ __volatile__ (				\
-				_PRE_EFLAGS("0", "3", "2")		\
-				_op"l %1; "				\
-				_POST_EFLAGS("0", "3", "2")		\
-				: "=m" (_eflags), "=m" ((_dst).val),	\
-				  "=&r" (_tmp)				\
-				: "i" (EFLAGS_MASK));			\
-			break;						\
-		case 8:							\
-			__emulate_1op_8byte(_op, _dst, _eflags);	\
-			break;						\
+		case 1:	__emulate_1op(_op, _dst, _eflags, "b"); break;	\
+		case 2:	__emulate_1op(_op, _dst, _eflags, "w"); break;	\
+		case 4:	__emulate_1op(_op, _dst, _eflags, "l"); break;	\
+		case 8:	ON64(__emulate_1op(_op, _dst, _eflags, "q")); break; \
 		}							\
 	} while (0)
 
-/* Emulate an instruction with quadword operands (x86/64 only). */
-#if defined(CONFIG_X86_64)
-#define __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy)           \
-	do {								  \
-		__asm__ __volatile__ (					  \
-			_PRE_EFLAGS("0", "4", "2")			  \
-			_op"q %"_qx"3,%1; "				  \
-			_POST_EFLAGS("0", "4", "2")			  \
-			: "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \
-			: _qy ((_src).val), "i" (EFLAGS_MASK));		\
-	} while (0)
-
-#define __emulate_1op_8byte(_op, _dst, _eflags)                           \
-	do {								  \
-		__asm__ __volatile__ (					  \
-			_PRE_EFLAGS("0", "3", "2")			  \
-			_op"q %1; "					  \
-			_POST_EFLAGS("0", "3", "2")			  \
-			: "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \
-			: "i" (EFLAGS_MASK));				  \
-	} while (0)
-
-#elif defined(__i386__)
-#define __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy)
-#define __emulate_1op_8byte(_op, _dst, _eflags)
-#endif				/* __i386__ */
-
 /* Fetch next part of the instruction being emulated. */
 #define insn_fetch(_type, _size, _eip)                                  \
 ({	unsigned long _x;						\
@@ -1041,6 +1049,33 @@
 		c->src.bytes = 1;
 		c->src.val = insn_fetch(s8, 1, c->eip);
 		break;
+	case SrcOne:
+		c->src.bytes = 1;
+		c->src.val = 1;
+		break;
+	}
+
+	/*
+	 * Decode and fetch the second source operand: register, memory
+	 * or immediate.
+	 */
+	switch (c->d & Src2Mask) {
+	case Src2None:
+		break;
+	case Src2CL:
+		c->src2.bytes = 1;
+		c->src2.val = c->regs[VCPU_REGS_RCX] & 0x8;
+		break;
+	case Src2ImmByte:
+		c->src2.type = OP_IMM;
+		c->src2.ptr = (unsigned long *)c->eip;
+		c->src2.bytes = 1;
+		c->src2.val = insn_fetch(u8, 1, c->eip);
+		break;
+	case Src2One:
+		c->src2.bytes = 1;
+		c->src2.val = 1;
+		break;
 	}
 
 	/* Decode and fetch the destination operand: register or memory. */
@@ -1100,20 +1135,33 @@
 					       c->regs[VCPU_REGS_RSP]);
 }
 
+static int emulate_pop(struct x86_emulate_ctxt *ctxt,
+		       struct x86_emulate_ops *ops)
+{
+	struct decode_cache *c = &ctxt->decode;
+	int rc;
+
+	rc = ops->read_emulated(register_address(c, ss_base(ctxt),
+						 c->regs[VCPU_REGS_RSP]),
+				&c->src.val, c->src.bytes, ctxt->vcpu);
+	if (rc != 0)
+		return rc;
+
+	register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->src.bytes);
+	return rc;
+}
+
 static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt,
 				struct x86_emulate_ops *ops)
 {
 	struct decode_cache *c = &ctxt->decode;
 	int rc;
 
-	rc = ops->read_std(register_address(c, ss_base(ctxt),
-					    c->regs[VCPU_REGS_RSP]),
-			   &c->dst.val, c->dst.bytes, ctxt->vcpu);
+	c->src.bytes = c->dst.bytes;
+	rc = emulate_pop(ctxt, ops);
 	if (rc != 0)
 		return rc;
-
-	register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->dst.bytes);
-
+	c->dst.val = c->src.val;
 	return 0;
 }
 
@@ -1415,24 +1463,15 @@
 		emulate_1op("dec", c->dst, ctxt->eflags);
 		break;
 	case 0x50 ... 0x57:  /* push reg */
-		c->dst.type  = OP_MEM;
-		c->dst.bytes = c->op_bytes;
-		c->dst.val = c->src.val;
-		register_address_increment(c, &c->regs[VCPU_REGS_RSP],
-					   -c->op_bytes);
-		c->dst.ptr = (void *) register_address(
-			c, ss_base(ctxt), c->regs[VCPU_REGS_RSP]);
+		emulate_push(ctxt);
 		break;
 	case 0x58 ... 0x5f: /* pop reg */
 	pop_instruction:
-		if ((rc = ops->read_std(register_address(c, ss_base(ctxt),
-			c->regs[VCPU_REGS_RSP]), c->dst.ptr,
-			c->op_bytes, ctxt->vcpu)) != 0)
+		c->src.bytes = c->op_bytes;
+		rc = emulate_pop(ctxt, ops);
+		if (rc != 0)
 			goto done;
-
-		register_address_increment(c, &c->regs[VCPU_REGS_RSP],
-					   c->op_bytes);
-		c->dst.type = OP_NONE;	/* Disable writeback. */
+		c->dst.val = c->src.val;
 		break;
 	case 0x63:		/* movsxd */
 		if (ctxt->mode != X86EMUL_MODE_PROT64)
@@ -1591,7 +1630,9 @@
 		emulate_push(ctxt);
 		break;
 	case 0x9d: /* popf */
+		c->dst.type = OP_REG;
 		c->dst.ptr = (unsigned long *) &ctxt->eflags;
+		c->dst.bytes = c->op_bytes;
 		goto pop_instruction;
 	case 0xa0 ... 0xa1:	/* mov */
 		c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
@@ -1689,7 +1730,9 @@
 		emulate_grp2(ctxt);
 		break;
 	case 0xc3: /* ret */
+		c->dst.type = OP_REG;
 		c->dst.ptr = &c->eip;
+		c->dst.bytes = c->op_bytes;
 		goto pop_instruction;
 	case 0xc6 ... 0xc7:	/* mov (sole member of Grp11) */
 	mov:
@@ -1778,7 +1821,7 @@
 			c->eip = saved_eip;
 			goto cannot_emulate;
 		}
-		return 0;
+		break;
 	case 0xf4:              /* hlt */
 		ctxt->vcpu->arch.halt_request = 1;
 		break;
@@ -1999,12 +2042,20 @@
 		c->src.val &= (c->dst.bytes << 3) - 1;
 		emulate_2op_SrcV_nobyte("bt", c->src, c->dst, ctxt->eflags);
 		break;
+	case 0xa4: /* shld imm8, r, r/m */
+	case 0xa5: /* shld cl, r, r/m */
+		emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags);
+		break;
 	case 0xab:
 	      bts:		/* bts */
 		/* only subword offset */
 		c->src.val &= (c->dst.bytes << 3) - 1;
 		emulate_2op_SrcV_nobyte("bts", c->src, c->dst, ctxt->eflags);
 		break;
+	case 0xac: /* shrd imm8, r, r/m */
+	case 0xad: /* shrd cl, r, r/m */
+		emulate_2op_cl("shrd", c->src2, c->src, c->dst, ctxt->eflags);
+		break;
 	case 0xae:              /* clflush */
 		break;
 	case 0xb0 ... 0xb1:	/* cmpxchg */
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 50a7792..a7ed208 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -738,7 +738,7 @@
 
 	/* We can't set cpumask in the initializer: damn C limitations!  Set it
 	 * here and register our timer device. */
-	lguest_clockevent.cpumask = cpumask_of_cpu(0);
+	lguest_clockevent.cpumask = cpumask_of(0);
 	clockevents_register_device(&lguest_clockevent);
 
 	/* Finally, we unblock the timer interrupt. */
diff --git a/arch/x86/mach-default/setup.c b/arch/x86/mach-default/setup.c
index 37b9ae4..df167f2 100644
--- a/arch/x86/mach-default/setup.c
+++ b/arch/x86/mach-default/setup.c
@@ -133,29 +133,28 @@
  **/
 void mca_nmi_hook(void)
 {
-	/* If I recall correctly, there's a whole bunch of other things that
+	/*
+	 * If I recall correctly, there's a whole bunch of other things that
 	 * we can do to check for NMI problems, but that's all I know about
 	 * at the moment.
 	 */
-
-	printk("NMI generated from unknown source!\n");
+	pr_warning("NMI generated from unknown source!\n");
 }
 #endif
 
 static __init int no_ipi_broadcast(char *str)
 {
 	get_option(&str, &no_broadcast);
-	printk ("Using %s mode\n", no_broadcast ? "No IPI Broadcast" :
-											"IPI Broadcast");
+	pr_info("Using %s mode\n",
+		no_broadcast ? "No IPI Broadcast" : "IPI Broadcast");
 	return 1;
 }
-
 __setup("no_ipi_broadcast=", no_ipi_broadcast);
 
 static int __init print_ipi_mode(void)
 {
-	printk ("Using IPI %s mode\n", no_broadcast ? "No-Shortcut" :
-											"Shortcut");
+	pr_info("Using IPI %s mode\n",
+		no_broadcast ? "No-Shortcut" : "Shortcut");
 	return 0;
 }
 
diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c
index 3624a36..bc4c784 100644
--- a/arch/x86/mach-generic/bigsmp.c
+++ b/arch/x86/mach-generic/bigsmp.c
@@ -42,9 +42,10 @@
 	 { }
 };
 
-static cpumask_t vector_allocation_domain(int cpu)
+static void vector_allocation_domain(int cpu, cpumask_t *retmask)
 {
-        return cpumask_of_cpu(cpu);
+	cpus_clear(*retmask);
+	cpu_set(cpu, *retmask);
 }
 
 static int probe_bigsmp(void)
diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c
index 7b4e6d0..4ba5cca 100644
--- a/arch/x86/mach-generic/es7000.c
+++ b/arch/x86/mach-generic/es7000.c
@@ -87,7 +87,7 @@
 }
 #endif
 
-static cpumask_t vector_allocation_domain(int cpu)
+static void vector_allocation_domain(int cpu, cpumask_t *retmask)
 {
 	/* Careful. Some cpus do not strictly honor the set of cpus
 	 * specified in the interrupt destination when using lowest
@@ -97,8 +97,7 @@
 	 * deliver interrupts to the wrong hyperthread when only one
 	 * hyperthread was specified in the interrupt desitination.
 	 */
-	cpumask_t domain = { { [0] = APIC_ALL_CPUS, } };
-	return domain;
+	*retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } };
 }
 
 struct genapic __initdata_refok apic_es7000 = APIC_INIT("es7000", probe_es7000);
diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c
index 71a309b..511d794 100644
--- a/arch/x86/mach-generic/numaq.c
+++ b/arch/x86/mach-generic/numaq.c
@@ -38,7 +38,7 @@
 	return 0;
 }
 
-static cpumask_t vector_allocation_domain(int cpu)
+static void vector_allocation_domain(int cpu, cpumask_t *retmask)
 {
 	/* Careful. Some cpus do not strictly honor the set of cpus
 	 * specified in the interrupt destination when using lowest
@@ -48,8 +48,7 @@
 	 * deliver interrupts to the wrong hyperthread when only one
 	 * hyperthread was specified in the interrupt desitination.
 	 */
-	cpumask_t domain = { { [0] = APIC_ALL_CPUS, } };
-	return domain;
+	*retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } };
 }
 
 struct genapic apic_numaq = APIC_INIT("NUMAQ", probe_numaq);
diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c
index 2c6d234..2821ffc 100644
--- a/arch/x86/mach-generic/summit.c
+++ b/arch/x86/mach-generic/summit.c
@@ -24,7 +24,7 @@
 	return 0;
 }
 
-static cpumask_t vector_allocation_domain(int cpu)
+static void vector_allocation_domain(int cpu, cpumask_t *retmask)
 {
 	/* Careful. Some cpus do not strictly honor the set of cpus
 	 * specified in the interrupt destination when using lowest
@@ -34,8 +34,7 @@
 	 * deliver interrupts to the wrong hyperthread when only one
 	 * hyperthread was specified in the interrupt desitination.
 	 */
-	cpumask_t domain = { { [0] = APIC_ALL_CPUS, } };
-	return domain;
+	*retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } };
 }
 
 struct genapic apic_summit = APIC_INIT("summit", probe_summit);
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index 5214500..a5bc054 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -63,11 +63,6 @@
 /* Used for the invalidate map that's also checked in the spinlock */
 static volatile unsigned long smp_invalidate_needed;
 
-/* Bitmask of currently online CPUs - used by setup.c for
-   /proc/cpuinfo, visible externally but still physical */
-cpumask_t cpu_online_map = CPU_MASK_NONE;
-EXPORT_SYMBOL(cpu_online_map);
-
 /* Bitmask of CPUs present in the system - exported by i386_syms.c, used
  * by scheduler but indexed physically */
 cpumask_t phys_cpu_present_map = CPU_MASK_NONE;
@@ -218,8 +213,6 @@
 /* This is for the new dynamic CPU boot code */
 cpumask_t cpu_callin_map = CPU_MASK_NONE;
 cpumask_t cpu_callout_map = CPU_MASK_NONE;
-cpumask_t cpu_possible_map = CPU_MASK_NONE;
-EXPORT_SYMBOL(cpu_possible_map);
 
 /* The per processor IRQ masks (these are usually kept in sync) */
 static __u16 vic_irq_mask[NR_CPUS] __cacheline_aligned;
@@ -679,7 +672,7 @@
 
 	/* loop over all the extended VIC CPUs and boot them.  The
 	 * Quad CPUs must be bootstrapped by their extended VIC cpu */
-	for (i = 0; i < NR_CPUS; i++) {
+	for (i = 0; i < nr_cpu_ids; i++) {
 		if (i == boot_cpu_id || !cpu_isset(i, phys_cpu_present_map))
 			continue;
 		do_boot_cpu(i);
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 8655b5b..f99a6c6 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -435,8 +435,12 @@
 #endif /* !CONFIG_NUMA */
 
 #else
-# define permanent_kmaps_init(pgd_base)		do { } while (0)
-# define set_highmem_pages_init()	do { } while (0)
+static inline void permanent_kmaps_init(pgd_t *pgd_base)
+{
+}
+static inline void set_highmem_pages_init(void)
+{
+}
 #endif /* CONFIG_HIGHMEM */
 
 void __init native_pagetable_setup_start(pgd_t *base)
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index cebcbf1..71a14f8 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -278,7 +278,7 @@
 	int rr, i;
 
 	rr = first_node(node_online_map);
-	for (i = 0; i < NR_CPUS; i++) {
+	for (i = 0; i < nr_cpu_ids; i++) {
 		if (early_cpu_to_node(i) != NUMA_NO_NODE)
 			continue;
 		numa_set_node(i, rr);
@@ -549,7 +549,7 @@
 	memnodemap[0] = 0;
 	node_set_online(0);
 	node_set(0, node_possible_map);
-	for (i = 0; i < NR_CPUS; i++)
+	for (i = 0; i < nr_cpu_ids; i++)
 		numa_set_node(i, 0);
 	e820_register_active_regions(0, start_pfn, last_pfn);
 	setup_node_bootmem(0, start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT);
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 51c0a2f..09737c8 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -382,7 +382,7 @@
 		if (!node_online(i))
 			setup_node_bootmem(i, nodes[i].start, nodes[i].end);
 
-	for (i = 0; i < NR_CPUS; i++) {
+	for (i = 0; i < nr_cpu_ids; i++) {
 		int node = early_cpu_to_node(i);
 
 		if (node == NUMA_NO_NODE)
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 1d88d2b..9e5752f 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -4,7 +4,7 @@
 #include <linux/irq.h>
 #include <linux/dmi.h>
 #include <asm/numa.h>
-#include "pci.h"
+#include <asm/pci_x86.h>
 
 struct pci_root_info {
 	char *name;
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c
index 22e0576..9bb0982 100644
--- a/arch/x86/pci/amd_bus.c
+++ b/arch/x86/pci/amd_bus.c
@@ -2,7 +2,7 @@
 #include <linux/pci.h>
 #include <linux/topology.h>
 #include <linux/cpu.h>
-#include "pci.h"
+#include <asm/pci_x86.h>
 
 #ifdef CONFIG_X86_64
 #include <asm/pci-direct.h>
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index bb1a01f..62ddb73 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -14,8 +14,7 @@
 #include <asm/segment.h>
 #include <asm/io.h>
 #include <asm/smp.h>
-
-#include "pci.h"
+#include <asm/pci_x86.h>
 
 unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 |
 				PCI_PROBE_MMCONF;
diff --git a/arch/x86/pci/direct.c b/arch/x86/pci/direct.c
index 9a5af6c..bd13c3e 100644
--- a/arch/x86/pci/direct.c
+++ b/arch/x86/pci/direct.c
@@ -5,7 +5,7 @@
 #include <linux/pci.h>
 #include <linux/init.h>
 #include <linux/dmi.h>
-#include "pci.h"
+#include <asm/pci_x86.h>
 
 /*
  * Functions for accessing PCI base (first 256 bytes) and extended
diff --git a/arch/x86/pci/early.c b/arch/x86/pci/early.c
index 86631cc..f6adf2c 100644
--- a/arch/x86/pci/early.c
+++ b/arch/x86/pci/early.c
@@ -2,7 +2,7 @@
 #include <linux/pci.h>
 #include <asm/pci-direct.h>
 #include <asm/io.h>
-#include "pci.h"
+#include <asm/pci_x86.h>
 
 /* Direct PCI access. This is used for PCI accesses in early boot before
    the PCI subsystem works. */
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index 2051dc9..7d388d5 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -6,8 +6,7 @@
 #include <linux/dmi.h>
 #include <linux/pci.h>
 #include <linux/init.h>
-#include "pci.h"
-
+#include <asm/pci_x86.h>
 
 static void __devinit pci_fixup_i450nx(struct pci_dev *d)
 {
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 844df0c..e51bf2c 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -34,8 +34,8 @@
 
 #include <asm/pat.h>
 #include <asm/e820.h>
+#include <asm/pci_x86.h>
 
-#include "pci.h"
 
 static int
 skip_isa_ioresource_align(struct pci_dev *dev) {
diff --git a/arch/x86/pci/init.c b/arch/x86/pci/init.c
index d6c950f..bec3b04 100644
--- a/arch/x86/pci/init.c
+++ b/arch/x86/pci/init.c
@@ -1,6 +1,6 @@
 #include <linux/pci.h>
 #include <linux/init.h>
-#include "pci.h"
+#include <asm/pci_x86.h>
 
 /* arch_initcall has too random ordering, so call the initializers
    in the right sequence from here. */
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index bf69dbe..373b9af 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -16,8 +16,7 @@
 #include <asm/io_apic.h>
 #include <linux/irq.h>
 #include <linux/acpi.h>
-
-#include "pci.h"
+#include <asm/pci_x86.h>
 
 #define PIRQ_SIGNATURE	(('$' << 0) + ('P' << 8) + ('I' << 16) + ('R' << 24))
 #define PIRQ_VERSION 0x0100
diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c
index b722dd4..f1065b1 100644
--- a/arch/x86/pci/legacy.c
+++ b/arch/x86/pci/legacy.c
@@ -3,7 +3,7 @@
  */
 #include <linux/init.h>
 #include <linux/pci.h>
-#include "pci.h"
+#include <asm/pci_x86.h>
 
 /*
  * Discover remaining PCI buses in case there are peer host bridges.
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index 654a223..89bf924 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -15,8 +15,7 @@
 #include <linux/acpi.h>
 #include <linux/bitmap.h>
 #include <asm/e820.h>
-
-#include "pci.h"
+#include <asm/pci_x86.h>
 
 /* aperture is up to 256MB but BIOS may reserve less */
 #define MMCONFIG_APER_MIN	(2 * 1024*1024)
diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c
index f3c761d..8b2d561 100644
--- a/arch/x86/pci/mmconfig_32.c
+++ b/arch/x86/pci/mmconfig_32.c
@@ -13,7 +13,7 @@
 #include <linux/init.h>
 #include <linux/acpi.h>
 #include <asm/e820.h>
-#include "pci.h"
+#include <asm/pci_x86.h>
 
 /* Assume systems with more busses have correct MCFG */
 #define mmcfg_virt_addr ((void __iomem *) fix_to_virt(FIX_PCIE_MCFG))
diff --git a/arch/x86/pci/mmconfig_64.c b/arch/x86/pci/mmconfig_64.c
index a199416..30007ff 100644
--- a/arch/x86/pci/mmconfig_64.c
+++ b/arch/x86/pci/mmconfig_64.c
@@ -10,8 +10,7 @@
 #include <linux/acpi.h>
 #include <linux/bitmap.h>
 #include <asm/e820.h>
-
-#include "pci.h"
+#include <asm/pci_x86.h>
 
 /* Static virtual mapping of the MMCONFIG aperture */
 struct mmcfg_virt {
diff --git a/arch/x86/pci/numaq_32.c b/arch/x86/pci/numaq_32.c
index 1177845..2089354 100644
--- a/arch/x86/pci/numaq_32.c
+++ b/arch/x86/pci/numaq_32.c
@@ -7,7 +7,7 @@
 #include <linux/nodemask.h>
 #include <mach_apic.h>
 #include <asm/mpspec.h>
-#include "pci.h"
+#include <asm/pci_x86.h>
 
 #define XQUAD_PORTIO_BASE 0xfe400000
 #define XQUAD_PORTIO_QUAD 0x40000  /* 256k per quad. */
diff --git a/arch/x86/pci/olpc.c b/arch/x86/pci/olpc.c
index e11e9e8..b889d82 100644
--- a/arch/x86/pci/olpc.c
+++ b/arch/x86/pci/olpc.c
@@ -29,7 +29,7 @@
 #include <linux/init.h>
 #include <asm/olpc.h>
 #include <asm/geode.h>
-#include "pci.h"
+#include <asm/pci_x86.h>
 
 /*
  * In the tables below, the first two line (8 longwords) are the
diff --git a/arch/x86/pci/pcbios.c b/arch/x86/pci/pcbios.c
index 37472fc..b82cae9 100644
--- a/arch/x86/pci/pcbios.c
+++ b/arch/x86/pci/pcbios.c
@@ -6,9 +6,8 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/uaccess.h>
-#include "pci.h"
-#include "pci-functions.h"
-
+#include <asm/pci_x86.h>
+#include <asm/mach-default/pci-functions.h>
 
 /* BIOS32 signature: "_32_" */
 #define BIOS32_SIGNATURE	(('_' << 0) + ('3' << 8) + ('2' << 16) + ('_' << 24))
diff --git a/arch/x86/pci/visws.c b/arch/x86/pci/visws.c
index 42f4cb1..16d0c0e 100644
--- a/arch/x86/pci/visws.c
+++ b/arch/x86/pci/visws.c
@@ -9,11 +9,10 @@
 #include <linux/init.h>
 
 #include <asm/setup.h>
+#include <asm/pci_x86.h>
 #include <asm/visws/cobalt.h>
 #include <asm/visws/lithium.h>
 
-#include "pci.h"
-
 static int pci_visws_enable_irq(struct pci_dev *dev) { return 0; }
 static void pci_visws_disable_irq(struct pci_dev *dev) { }
 
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 773d68d..503c240 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1082,7 +1082,7 @@
 
 static void xen_drop_mm_ref(struct mm_struct *mm)
 {
-	cpumask_t mask;
+	cpumask_var_t mask;
 	unsigned cpu;
 
 	if (current->active_mm == mm) {
@@ -1094,7 +1094,16 @@
 	}
 
 	/* Get the "official" set of cpus referring to our pagetable. */
-	mask = mm->cpu_vm_mask;
+	if (!alloc_cpumask_var(&mask, GFP_ATOMIC)) {
+		for_each_online_cpu(cpu) {
+			if (!cpumask_test_cpu(cpu, &mm->cpu_vm_mask)
+			    && per_cpu(xen_current_cr3, cpu) != __pa(mm->pgd))
+				continue;
+			smp_call_function_single(cpu, drop_other_mm_ref, mm, 1);
+		}
+		return;
+	}
+	cpumask_copy(mask, &mm->cpu_vm_mask);
 
 	/* It's possible that a vcpu may have a stale reference to our
 	   cr3, because its in lazy mode, and it hasn't yet flushed
@@ -1103,11 +1112,12 @@
 	   if needed. */
 	for_each_online_cpu(cpu) {
 		if (per_cpu(xen_current_cr3, cpu) == __pa(mm->pgd))
-			cpu_set(cpu, mask);
+			cpumask_set_cpu(cpu, mask);
 	}
 
-	if (!cpus_empty(mask))
-		smp_call_function_mask(mask, drop_other_mm_ref, mm, 1);
+	if (!cpumask_empty(mask))
+		smp_call_function_many(mask, drop_other_mm_ref, mm, 1);
+	free_cpumask_var(mask);
 }
 #else
 static void xen_drop_mm_ref(struct mm_struct *mm)
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index acd9b67..c44e206 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -33,7 +33,7 @@
 #include "xen-ops.h"
 #include "mmu.h"
 
-cpumask_t xen_cpu_initialized_map;
+cpumask_var_t xen_cpu_initialized_map;
 
 static DEFINE_PER_CPU(int, resched_irq);
 static DEFINE_PER_CPU(int, callfunc_irq);
@@ -158,7 +158,7 @@
 {
 	int i, rc;
 
-	for (i = 0; i < NR_CPUS; i++) {
+	for (i = 0; i < nr_cpu_ids; i++) {
 		rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
 		if (rc >= 0) {
 			num_processors++;
@@ -192,11 +192,14 @@
 	if (xen_smp_intr_init(0))
 		BUG();
 
-	xen_cpu_initialized_map = cpumask_of_cpu(0);
+	if (!alloc_cpumask_var(&xen_cpu_initialized_map, GFP_KERNEL))
+		panic("could not allocate xen_cpu_initialized_map\n");
+
+	cpumask_copy(xen_cpu_initialized_map, cpumask_of(0));
 
 	/* Restrict the possible_map according to max_cpus. */
 	while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) {
-		for (cpu = NR_CPUS - 1; !cpu_possible(cpu); cpu--)
+		for (cpu = nr_cpu_ids - 1; !cpu_possible(cpu); cpu--)
 			continue;
 		cpu_clear(cpu, cpu_possible_map);
 	}
@@ -221,7 +224,7 @@
 	struct vcpu_guest_context *ctxt;
 	struct desc_struct *gdt;
 
-	if (cpu_test_and_set(cpu, xen_cpu_initialized_map))
+	if (cpumask_test_and_set_cpu(cpu, xen_cpu_initialized_map))
 		return 0;
 
 	ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
@@ -408,24 +411,23 @@
 	xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR);
 }
 
-static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector)
+static void xen_send_IPI_mask(const struct cpumask *mask,
+			      enum ipi_vector vector)
 {
 	unsigned cpu;
 
-	cpus_and(mask, mask, cpu_online_map);
-
-	for_each_cpu_mask_nr(cpu, mask)
+	for_each_cpu_and(cpu, mask, cpu_online_mask)
 		xen_send_IPI_one(cpu, vector);
 }
 
-static void xen_smp_send_call_function_ipi(cpumask_t mask)
+static void xen_smp_send_call_function_ipi(const struct cpumask *mask)
 {
 	int cpu;
 
 	xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
 
 	/* Make sure other vcpus get a chance to run if they need to. */
-	for_each_cpu_mask_nr(cpu, mask) {
+	for_each_cpu(cpu, mask) {
 		if (xen_vcpu_stolen(cpu)) {
 			HYPERVISOR_sched_op(SCHEDOP_yield, 0);
 			break;
@@ -435,7 +437,8 @@
 
 static void xen_smp_send_call_function_single_ipi(int cpu)
 {
-	xen_send_IPI_mask(cpumask_of_cpu(cpu), XEN_CALL_FUNCTION_SINGLE_VECTOR);
+	xen_send_IPI_mask(cpumask_of(cpu),
+			  XEN_CALL_FUNCTION_SINGLE_VECTOR);
 }
 
 static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id)
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
index 2a234db..212ffe0 100644
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -35,7 +35,8 @@
 			pfn_to_mfn(xen_start_info->console.domU.mfn);
 	} else {
 #ifdef CONFIG_SMP
-		xen_cpu_initialized_map = cpu_online_map;
+		BUG_ON(xen_cpu_initialized_map == NULL);
+		cpumask_copy(xen_cpu_initialized_map, cpu_online_mask);
 #endif
 		xen_vcpu_restore();
 	}
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index c9f7cda..65d75a6 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -437,7 +437,7 @@
 	evt = &per_cpu(xen_clock_events, cpu);
 	memcpy(evt, xen_clockevent, sizeof(*evt));
 
-	evt->cpumask = cpumask_of_cpu(cpu);
+	evt->cpumask = cpumask_of(cpu);
 	evt->irq = irq;
 
 	setup_runstate_info(cpu);
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 9e1afae..c1f8faf 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -58,7 +58,7 @@
 __cpuinit void xen_init_lock_cpu(int cpu);
 void xen_uninit_lock_cpu(int cpu);
 
-extern cpumask_t xen_cpu_initialized_map;
+extern cpumask_var_t xen_cpu_initialized_map;
 #else
 static inline void xen_smp_init(void) {}
 #endif
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index 64f5d54..4259072 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -109,7 +109,7 @@
  */
 static ssize_t print_cpus_map(char *buf, cpumask_t *map)
 {
-	int n = cpulist_scnprintf(buf, PAGE_SIZE-2, *map);
+	int n = cpulist_scnprintf(buf, PAGE_SIZE-2, map);
 
 	buf[n++] = '\n';
 	buf[n] = '\0';
diff --git a/drivers/base/node.c b/drivers/base/node.c
index f520709..91636cd 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -30,8 +30,8 @@
 	BUILD_BUG_ON((NR_CPUS/32 * 9) > (PAGE_SIZE-1));
 
 	len = type?
-		cpulist_scnprintf(buf, PAGE_SIZE-2, *mask):
-		cpumask_scnprintf(buf, PAGE_SIZE-2, *mask);
+		cpulist_scnprintf(buf, PAGE_SIZE-2, mask) :
+		cpumask_scnprintf(buf, PAGE_SIZE-2, mask);
  	buf[len++] = '\n';
  	buf[len] = '\0';
 	return len;
diff --git a/drivers/base/topology.c b/drivers/base/topology.c
index 199cd97..a8bc1cb 100644
--- a/drivers/base/topology.c
+++ b/drivers/base/topology.c
@@ -49,8 +49,8 @@
 
 	if (len > 1) {
 		n = type?
-			cpulist_scnprintf(buf, len-2, *mask):
-			cpumask_scnprintf(buf, len-2, *mask);
+			cpulist_scnprintf(buf, len-2, mask) :
+			cpumask_scnprintf(buf, len-2, mask);
 		buf[n++] = '\n';
 		buf[n] = '\0';
 	}
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index c602b54..16970431 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -190,7 +190,7 @@
 
 config ESPSERIAL
 	tristate "Hayes ESP serial port support"
-	depends on SERIAL_NONSTANDARD && ISA && ISA_DMA_API
+	depends on SERIAL_NONSTANDARD && ISA && ISA_DMA_API && BROKEN
 	help
 	  This is a driver which supports Hayes ESP serial ports.  Both single
 	  port cards and multiport cards are supported.  Make sure to read
@@ -443,6 +443,17 @@
 	  All modern Linux systems use the Unix98 ptys.  Say Y unless
 	  you're on an embedded system and want to conserve memory.
 
+config DEVPTS_MULTIPLE_INSTANCES
+	bool "Support multiple instances of devpts"
+	depends on UNIX98_PTYS
+	default n
+	---help---
+	  Enable support for multiple instances of devpts filesystem.
+	  If you want to have isolated PTY namespaces (eg: in containers),
+	  say Y here.  Otherwise, say N. If enabled, each mount of devpts
+	  filesystem with the '-o newinstance' option will create an
+	  independent PTY namespace.
+
 config LEGACY_PTYS
 	bool "Legacy (BSD) PTY support"
 	default y
diff --git a/drivers/char/amiserial.c b/drivers/char/amiserial.c
index b97aebd..4e0cfde 100644
--- a/drivers/char/amiserial.c
+++ b/drivers/char/amiserial.c
@@ -170,7 +170,7 @@
  */
 static void rs_stop(struct tty_struct *tty)
 {
-	struct async_struct *info = (struct async_struct *)tty->driver_data;
+	struct async_struct *info = tty->driver_data;
 	unsigned long flags;
 
 	if (serial_paranoia_check(info, tty->name, "rs_stop"))
@@ -190,7 +190,7 @@
 
 static void rs_start(struct tty_struct *tty)
 {
-	struct async_struct *info = (struct async_struct *)tty->driver_data;
+	struct async_struct *info = tty->driver_data;
 	unsigned long flags;
 
 	if (serial_paranoia_check(info, tty->name, "rs_start"))
@@ -861,7 +861,7 @@
 
 static void rs_flush_chars(struct tty_struct *tty)
 {
-	struct async_struct *info = (struct async_struct *)tty->driver_data;
+	struct async_struct *info = tty->driver_data;
 	unsigned long flags;
 
 	if (serial_paranoia_check(info, tty->name, "rs_flush_chars"))
@@ -934,7 +934,7 @@
 
 static int rs_write_room(struct tty_struct *tty)
 {
-	struct async_struct *info = (struct async_struct *)tty->driver_data;
+	struct async_struct *info = tty->driver_data;
 
 	if (serial_paranoia_check(info, tty->name, "rs_write_room"))
 		return 0;
@@ -943,7 +943,7 @@
 
 static int rs_chars_in_buffer(struct tty_struct *tty)
 {
-	struct async_struct *info = (struct async_struct *)tty->driver_data;
+	struct async_struct *info = tty->driver_data;
 
 	if (serial_paranoia_check(info, tty->name, "rs_chars_in_buffer"))
 		return 0;
@@ -952,7 +952,7 @@
 
 static void rs_flush_buffer(struct tty_struct *tty)
 {
-	struct async_struct *info = (struct async_struct *)tty->driver_data;
+	struct async_struct *info = tty->driver_data;
 	unsigned long flags;
 
 	if (serial_paranoia_check(info, tty->name, "rs_flush_buffer"))
@@ -969,7 +969,7 @@
  */
 static void rs_send_xchar(struct tty_struct *tty, char ch)
 {
-	struct async_struct *info = (struct async_struct *)tty->driver_data;
+	struct async_struct *info = tty->driver_data;
         unsigned long flags;
 
 	if (serial_paranoia_check(info, tty->name, "rs_send_char"))
@@ -1004,7 +1004,7 @@
  */
 static void rs_throttle(struct tty_struct * tty)
 {
-	struct async_struct *info = (struct async_struct *)tty->driver_data;
+	struct async_struct *info = tty->driver_data;
 	unsigned long flags;
 #ifdef SERIAL_DEBUG_THROTTLE
 	char	buf[64];
@@ -1029,7 +1029,7 @@
 
 static void rs_unthrottle(struct tty_struct * tty)
 {
-	struct async_struct *info = (struct async_struct *)tty->driver_data;
+	struct async_struct *info = tty->driver_data;
 	unsigned long flags;
 #ifdef SERIAL_DEBUG_THROTTLE
 	char	buf[64];
@@ -1194,7 +1194,7 @@
 
 static int rs_tiocmget(struct tty_struct *tty, struct file *file)
 {
-	struct async_struct * info = (struct async_struct *)tty->driver_data;
+	struct async_struct * info = tty->driver_data;
 	unsigned char control, status;
 	unsigned long flags;
 
@@ -1217,7 +1217,7 @@
 static int rs_tiocmset(struct tty_struct *tty, struct file *file,
 		       unsigned int set, unsigned int clear)
 {
-	struct async_struct * info = (struct async_struct *)tty->driver_data;
+	struct async_struct * info = tty->driver_data;
 	unsigned long flags;
 
 	if (serial_paranoia_check(info, tty->name, "rs_ioctl"))
@@ -1244,7 +1244,7 @@
  */
 static int rs_break(struct tty_struct *tty, int break_state)
 {
-	struct async_struct * info = (struct async_struct *)tty->driver_data;
+	struct async_struct * info = tty->driver_data;
 	unsigned long flags;
 
 	if (serial_paranoia_check(info, tty->name, "rs_break"))
@@ -1264,7 +1264,7 @@
 static int rs_ioctl(struct tty_struct *tty, struct file * file,
 		    unsigned int cmd, unsigned long arg)
 {
-	struct async_struct * info = (struct async_struct *)tty->driver_data;
+	struct async_struct * info = tty->driver_data;
 	struct async_icount cprev, cnow;	/* kernel counter temps */
 	struct serial_icounter_struct icount;
 	void __user *argp = (void __user *)arg;
@@ -1368,7 +1368,7 @@
 
 static void rs_set_termios(struct tty_struct *tty, struct ktermios *old_termios)
 {
-	struct async_struct *info = (struct async_struct *)tty->driver_data;
+	struct async_struct *info = tty->driver_data;
 	unsigned long flags;
 	unsigned int cflag = tty->termios->c_cflag;
 
@@ -1428,7 +1428,7 @@
  */
 static void rs_close(struct tty_struct *tty, struct file * filp)
 {
-	struct async_struct * info = (struct async_struct *)tty->driver_data;
+	struct async_struct * info = tty->driver_data;
 	struct serial_state *state;
 	unsigned long flags;
 
@@ -1523,7 +1523,7 @@
  */
 static void rs_wait_until_sent(struct tty_struct *tty, int timeout)
 {
-	struct async_struct * info = (struct async_struct *)tty->driver_data;
+	struct async_struct * info = tty->driver_data;
 	unsigned long orig_jiffies, char_time;
 	int lsr;
 
@@ -1587,7 +1587,7 @@
  */
 static void rs_hangup(struct tty_struct *tty)
 {
-	struct async_struct * info = (struct async_struct *)tty->driver_data;
+	struct async_struct * info = tty->driver_data;
 	struct serial_state *state = info->state;
 
 	if (serial_paranoia_check(info, tty->name, "rs_hangup"))
diff --git a/drivers/char/cyclades.c b/drivers/char/cyclades.c
index 5e5b1dc..6a59f72 100644
--- a/drivers/char/cyclades.c
+++ b/drivers/char/cyclades.c
@@ -5010,7 +5010,7 @@
 		if (nchan == 0) {
 			dev_err(&pdev->dev, "Cyclom-Y PCI host card with no "
 					"Serial-Modules\n");
-			return -EIO;
+			goto err_unmap;
 		}
 	} else if (device_id == PCI_DEVICE_ID_CYCLOM_Z_Hi) {
 		struct RUNTIME_9060 __iomem *ctl_addr;
diff --git a/drivers/char/epca.c b/drivers/char/epca.c
index cf2461d..39ad820 100644
--- a/drivers/char/epca.c
+++ b/drivers/char/epca.c
@@ -69,7 +69,9 @@
 
 /*
  * The ISA boards do window flipping into the same spaces so its only sane with
- * a single lock. It's still pretty efficient.
+ * a single lock. It's still pretty efficient. This lock guards the hardware
+ * and the tty_port lock guards the kernel side stuff like use counts. Take
+ * this lock inside the port lock if you must take both.
  */
 static DEFINE_SPINLOCK(epca_lock);
 
@@ -156,14 +158,12 @@
 static void pc_sched_event(struct channel *, int);
 static void epca_error(int, char *);
 static void pc_close(struct tty_struct *, struct file *);
-static void shutdown(struct channel *);
+static void shutdown(struct channel *, struct tty_struct *tty);
 static void pc_hangup(struct tty_struct *);
 static int pc_write_room(struct tty_struct *);
 static int pc_chars_in_buffer(struct tty_struct *);
 static void pc_flush_buffer(struct tty_struct *);
 static void pc_flush_chars(struct tty_struct *);
-static int block_til_ready(struct tty_struct *, struct file *,
-			struct channel *);
 static int pc_open(struct tty_struct *, struct file *);
 static void post_fep_init(unsigned int crd);
 static void epcapoll(unsigned long);
@@ -173,7 +173,7 @@
 static unsigned termios2digi_i(struct channel *ch, unsigned);
 static unsigned termios2digi_c(struct channel *ch, unsigned);
 static void epcaparam(struct tty_struct *, struct channel *);
-static void receive_data(struct channel *);
+static void receive_data(struct channel *, struct tty_struct *tty);
 static int pc_ioctl(struct tty_struct *, struct file *,
 			unsigned int, unsigned long);
 static int info_ioctl(struct tty_struct *, struct file *,
@@ -392,7 +392,7 @@
 	 * through tty->driver_data this should catch it.
 	 */
 	if (tty) {
-		struct channel *ch = (struct channel *)tty->driver_data;
+		struct channel *ch = tty->driver_data;
 		if (ch >= &digi_channels[0] && ch < &digi_channels[nbdevs]) {
 			if (ch->magic == EPCA_MAGIC)
 				return ch;
@@ -419,76 +419,34 @@
 static void pc_close(struct tty_struct *tty, struct file *filp)
 {
 	struct channel *ch;
-	unsigned long flags;
+	struct tty_port *port;
 	/*
 	 * verifyChannel returns the channel from the tty struct if it is
 	 * valid. This serves as a sanity check.
 	 */
 	ch = verifyChannel(tty);
-	if (ch != NULL) {
-		spin_lock_irqsave(&epca_lock, flags);
-		if (tty_hung_up_p(filp)) {
-			spin_unlock_irqrestore(&epca_lock, flags);
-			return;
-		}
-		if (ch->port.count-- > 1)  {
-			/* Begin channel is open more than once */
-			/*
-			 * Return without doing anything. Someone might still
-			 * be using the channel.
-			 */
-			spin_unlock_irqrestore(&epca_lock, flags);
-			return;
-		}
-		/* Port open only once go ahead with shutdown & reset */
-		BUG_ON(ch->port.count < 0);
+	if (ch == NULL)
+		return;
+	port = &ch->port;
 
-		/*
-		 * Let the rest of the driver know the channel is being closed.
-		 * This becomes important if an open is attempted before close
-		 * is finished.
-		 */
-		ch->port.flags |= ASYNC_CLOSING;
-		tty->closing = 1;
+	if (tty_port_close_start(port, tty, filp) == 0)
+		return;
 
-		spin_unlock_irqrestore(&epca_lock, flags);
+	pc_flush_buffer(tty);
+	shutdown(ch, tty);
 
-		if (ch->port.flags & ASYNC_INITIALIZED)  {
-			/* Setup an event to indicate when the
-			   transmit buffer empties */
-			setup_empty_event(tty, ch);
-			/* 30 seconds timeout */
-			tty_wait_until_sent(tty, 3000);
-		}
-		pc_flush_buffer(tty);
-
-		tty_ldisc_flush(tty);
-		shutdown(ch);
-
-		spin_lock_irqsave(&epca_lock, flags);
-		tty->closing = 0;
-		ch->event = 0;
-		ch->port.tty = NULL;
-		spin_unlock_irqrestore(&epca_lock, flags);
-
-		if (ch->port.blocked_open) {
-			if (ch->close_delay)
-				msleep_interruptible(jiffies_to_msecs(ch->close_delay));
-			wake_up_interruptible(&ch->port.open_wait);
-		}
-		ch->port.flags &= ~(ASYNC_NORMAL_ACTIVE | ASYNC_INITIALIZED |
-					ASYNC_CLOSING);
-		wake_up_interruptible(&ch->port.close_wait);
-	}
+	tty_port_close_end(port, tty);
+	ch->event = 0;	/* FIXME: review ch->event locking */
+	tty_port_tty_set(port, NULL);
 }
 
-static void shutdown(struct channel *ch)
+static void shutdown(struct channel *ch, struct tty_struct *tty)
 {
 	unsigned long flags;
-	struct tty_struct *tty;
 	struct board_chan __iomem *bc;
+	struct tty_port *port = &ch->port;
 
-	if (!(ch->port.flags & ASYNC_INITIALIZED))
+	if (!(port->flags & ASYNC_INITIALIZED))
 		return;
 
 	spin_lock_irqsave(&epca_lock, flags);
@@ -503,7 +461,6 @@
 	 */
 	if (bc)
 		writeb(0, &bc->idata);
-	tty = ch->port.tty;
 
 	/* If we're a modem control device and HUPCL is on, drop RTS & DTR. */
 	if (tty->termios->c_cflag & HUPCL)  {
@@ -517,32 +474,26 @@
 	 * will have to reinitialized. Set a flag to indicate this.
 	 */
 	/* Prevent future Digi programmed interrupts from coming active */
-	ch->port.flags &= ~ASYNC_INITIALIZED;
+	port->flags &= ~ASYNC_INITIALIZED;
 	spin_unlock_irqrestore(&epca_lock, flags);
 }
 
 static void pc_hangup(struct tty_struct *tty)
 {
 	struct channel *ch;
+
 	/*
 	 * verifyChannel returns the channel from the tty struct if it is
 	 * valid. This serves as a sanity check.
 	 */
 	ch = verifyChannel(tty);
 	if (ch != NULL) {
-		unsigned long flags;
-
 		pc_flush_buffer(tty);
 		tty_ldisc_flush(tty);
-		shutdown(ch);
+		shutdown(ch, tty);
 
-		spin_lock_irqsave(&epca_lock, flags);
-		ch->port.tty   = NULL;
-		ch->event = 0;
-		ch->port.count = 0;
-		ch->port.flags &= ~(ASYNC_NORMAL_ACTIVE | ASYNC_INITIALIZED);
-		spin_unlock_irqrestore(&epca_lock, flags);
-		wake_up_interruptible(&ch->port.open_wait);
+		ch->event = 0;	/* FIXME: review locking of ch->event */
+		tty_port_hangup(&ch->port);
 	}
 }
 
@@ -786,100 +737,22 @@
 	}
 }
 
-static int block_til_ready(struct tty_struct *tty,
-				struct file *filp, struct channel *ch)
+static int epca_carrier_raised(struct tty_port *port)
 {
-	DECLARE_WAITQUEUE(wait, current);
-	int retval, do_clocal = 0;
-	unsigned long flags;
-
-	if (tty_hung_up_p(filp)) {
-		if (ch->port.flags & ASYNC_HUP_NOTIFY)
-			retval = -EAGAIN;
-		else
-			retval = -ERESTARTSYS;
-		return retval;
-	}
-
-	/*
-	 * If the device is in the middle of being closed, then block until
-	 * it's done, and then try again.
-	 */
-	if (ch->port.flags & ASYNC_CLOSING) {
-		interruptible_sleep_on(&ch->port.close_wait);
-
-		if (ch->port.flags & ASYNC_HUP_NOTIFY)
-			return -EAGAIN;
-		else
-			return -ERESTARTSYS;
-	}
-
-	if (filp->f_flags & O_NONBLOCK)  {
-		/*
-		 * If non-blocking mode is set, then make the check up front
-		 * and then exit.
-		 */
-		ch->port.flags |= ASYNC_NORMAL_ACTIVE;
-		return 0;
-	}
-	if (tty->termios->c_cflag & CLOCAL)
-		do_clocal = 1;
-	/* Block waiting for the carrier detect and the line to become free */
-
-	retval = 0;
-	add_wait_queue(&ch->port.open_wait, &wait);
-
-	spin_lock_irqsave(&epca_lock, flags);
-	/* We dec count so that pc_close will know when to free things */
-	if (!tty_hung_up_p(filp))
-		ch->port.count--;
-	ch->port.blocked_open++;
-	while (1) {
-		set_current_state(TASK_INTERRUPTIBLE);
-		if (tty_hung_up_p(filp) ||
-				!(ch->port.flags & ASYNC_INITIALIZED)) {
-			if (ch->port.flags & ASYNC_HUP_NOTIFY)
-				retval = -EAGAIN;
-			else
-				retval = -ERESTARTSYS;
-			break;
-		}
-		if (!(ch->port.flags & ASYNC_CLOSING) &&
-			  (do_clocal || (ch->imodem & ch->dcd)))
-			break;
-		if (signal_pending(current)) {
-			retval = -ERESTARTSYS;
-			break;
-		}
-		spin_unlock_irqrestore(&epca_lock, flags);
-		/*
-		 * Allow someone else to be scheduled. We will occasionally go
-		 * through this loop until one of the above conditions change.
-		 * The below schedule call will allow other processes to enter
-		 * and prevent this loop from hogging the cpu.
-		 */
-		schedule();
-		spin_lock_irqsave(&epca_lock, flags);
-	}
-
-	__set_current_state(TASK_RUNNING);
-	remove_wait_queue(&ch->port.open_wait, &wait);
-	if (!tty_hung_up_p(filp))
-		ch->port.count++;
-	ch->port.blocked_open--;
-
-	spin_unlock_irqrestore(&epca_lock, flags);
-
-	if (retval)
-		return retval;
-
-	ch->port.flags |= ASYNC_NORMAL_ACTIVE;
+	struct channel *ch = container_of(port, struct channel, port);
+	if (ch->imodem & ch->dcd)
+		return 1;
 	return 0;
 }
 
+static void epca_raise_dtr_rts(struct tty_port *port)
+{
+}
+
 static int pc_open(struct tty_struct *tty, struct file *filp)
 {
 	struct channel *ch;
+	struct tty_port *port;
 	unsigned long flags;
 	int line, retval, boardnum;
 	struct board_chan __iomem *bc;
@@ -890,6 +763,7 @@
 		return -ENODEV;
 
 	ch = &digi_channels[line];
+	port = &ch->port;
 	boardnum = ch->boardnum;
 
 	/* Check status of board configured in system.  */
@@ -926,22 +800,24 @@
 		return -ENODEV;
 	}
 
-	spin_lock_irqsave(&epca_lock, flags);
+	spin_lock_irqsave(&port->lock, flags);
 	/*
 	 * Every time a channel is opened, increment a counter. This is
 	 * necessary because we do not wish to flush and shutdown the channel
 	 * until the last app holding the channel open, closes it.
 	 */
-	ch->port.count++;
+	port->count++;
 	/*
 	 * Set a kernel structures pointer to our local channel structure. This
 	 * way we can get to it when passed only a tty struct.
 	 */
 	tty->driver_data = ch;
+	port->tty = tty;
 	/*
 	 * If this is the first time the channel has been opened, initialize
 	 * the tty->termios struct otherwise let pc_close handle it.
 	 */
+	spin_lock(&epca_lock);
 	globalwinon(ch);
 	ch->statusflags = 0;
 
@@ -956,31 +832,33 @@
 	writew(head, &bc->rout);
 
 	/* Set the channels associated tty structure */
-	ch->port.tty = tty;
 
 	/*
 	 * The below routine generally sets up parity, baud, flow control
 	 * issues, etc.... It effect both control flags and input flags.
 	 */
 	epcaparam(tty, ch);
-	ch->port.flags |= ASYNC_INITIALIZED;
 	memoff(ch);
-	spin_unlock_irqrestore(&epca_lock, flags);
+	spin_unlock(&epca_lock);
+	port->flags |= ASYNC_INITIALIZED;
+	spin_unlock_irqrestore(&port->lock, flags);
 
-	retval = block_til_ready(tty, filp, ch);
+	retval = tty_port_block_til_ready(port, tty, filp);
 	if (retval)
 		return retval;
 	/*
 	 * Set this again in case a hangup set it to zero while this open() was
 	 * waiting for the line...
 	 */
-	spin_lock_irqsave(&epca_lock, flags);
-	ch->port.tty = tty;
+	spin_lock_irqsave(&port->lock, flags);
+	port->tty = tty;
+	spin_lock(&epca_lock);
 	globalwinon(ch);
 	/* Enable Digi Data events */
 	writeb(1, &bc->idata);
 	memoff(ch);
-	spin_unlock_irqrestore(&epca_lock, flags);
+	spin_unlock(&epca_lock);
+	spin_unlock_irqrestore(&port->lock, flags);
 	return 0;
 }
 
@@ -1016,8 +894,11 @@
 		}
 		ch = card_ptr[crd];
 		for (count = 0; count < bd->numports; count++, ch++) {
-			if (ch && ch->port.tty)
-				tty_hangup(ch->port.tty);
+			struct tty_struct *tty = tty_port_tty_get(&ch->port);
+			if (tty) {
+				tty_hangup(tty);
+				tty_kref_put(tty);
+			}
 		}
 	}
 	pci_unregister_driver(&epca_driver);
@@ -1042,6 +923,11 @@
 	.break_ctl = pc_send_break
 };
 
+static const struct tty_port_operations epca_port_ops = {
+	.carrier_raised = epca_carrier_raised,
+	.raise_dtr_rts = epca_raise_dtr_rts,
+};
+
 static int info_open(struct tty_struct *tty, struct file *filp)
 {
 	return 0;
@@ -1377,6 +1263,7 @@
 		u16 tseg, rseg;
 
 		tty_port_init(&ch->port);
+		ch->port.ops = &epca_port_ops;
 		ch->brdchan = bc;
 		ch->mailbox = gd;
 		INIT_WORK(&ch->tqueue, do_softint);
@@ -1428,7 +1315,7 @@
 		ch->boardnum   = crd;
 		ch->channelnum = i;
 		ch->magic      = EPCA_MAGIC;
-		ch->port.tty        = NULL;
+		tty_port_tty_set(&ch->port, NULL);
 
 		if (shrinkmem) {
 			fepcmd(ch, SETBUFFER, 32, 0, 0, 0);
@@ -1510,7 +1397,7 @@
 		ch->fepstartca = 0;
 		ch->fepstopca = 0;
 
-		ch->close_delay = 50;
+		ch->port.close_delay = 50;
 
 		spin_unlock_irqrestore(&epca_lock, flags);
 	}
@@ -1622,15 +1509,16 @@
 		if (bc == NULL)
 			goto next;
 
+		tty = tty_port_tty_get(&ch->port);
 		if (event & DATA_IND)  { /* Begin DATA_IND */
-			receive_data(ch);
+			receive_data(ch, tty);
 			assertgwinon(ch);
 		} /* End DATA_IND */
 		/* else *//* Fix for DCD transition missed bug */
 		if (event & MODEMCHG_IND) {
 			/* A modem signal change has been indicated */
 			ch->imodem = mstat;
-			if (ch->port.flags & ASYNC_CHECK_CD) {
+			if (test_bit(ASYNC_CHECK_CD, &ch->port.flags)) {
 				/* We are now receiving dcd */
 				if (mstat & ch->dcd)
 					wake_up_interruptible(&ch->port.open_wait);
@@ -1638,7 +1526,6 @@
 					pc_sched_event(ch, EPCA_EVENT_HANGUP);
 			}
 		}
-		tty = ch->port.tty;
 		if (tty) {
 			if (event & BREAK_IND) {
 				/* A break has been indicated */
@@ -1658,6 +1545,7 @@
 					tty_wakeup(tty);
 				}
 			}
+			tty_kref_put(tty);
 		}
 next:
 		globalwinon(ch);
@@ -1877,9 +1765,9 @@
 		 * that the driver will wait on carrier detect.
 		 */
 		if (ts->c_cflag & CLOCAL)
-			ch->port.flags &= ~ASYNC_CHECK_CD;
+			clear_bit(ASYNC_CHECK_CD, &ch->port.flags);
 		else
-			ch->port.flags |= ASYNC_CHECK_CD;
+			set_bit(ASYNC_CHECK_CD, &ch->port.flags);
 		mval = ch->m_dtr | ch->m_rts;
 	} /* End CBAUD not detected */
 	iflag = termios2digi_i(ch, ts->c_iflag);
@@ -1952,11 +1840,10 @@
 }
 
 /* Caller holds lock */
-static void receive_data(struct channel *ch)
+static void receive_data(struct channel *ch, struct tty_struct *tty)
 {
 	unchar *rptr;
 	struct ktermios *ts = NULL;
-	struct tty_struct *tty;
 	struct board_chan __iomem *bc;
 	int dataToRead, wrapgap, bytesAvailable;
 	unsigned int tail, head;
@@ -1969,7 +1856,6 @@
 	globalwinon(ch);
 	if (ch->statusflags & RXSTOPPED)
 		return;
-	tty = ch->port.tty;
 	if (tty)
 		ts = tty->termios;
 	bc = ch->brdchan;
@@ -2029,7 +1915,7 @@
 	globalwinon(ch);
 	writew(tail, &bc->rout);
 	/* Must be called with global data */
-	tty_schedule_flip(ch->port.tty);
+	tty_schedule_flip(tty);
 }
 
 static int info_ioctl(struct tty_struct *tty, struct file *file,
@@ -2097,7 +1983,7 @@
 
 static int pc_tiocmget(struct tty_struct *tty, struct file *file)
 {
-	struct channel *ch = (struct channel *) tty->driver_data;
+	struct channel *ch = tty->driver_data;
 	struct board_chan __iomem *bc;
 	unsigned int mstat, mflag = 0;
 	unsigned long flags;
@@ -2131,7 +2017,7 @@
 static int pc_tiocmset(struct tty_struct *tty, struct file *file,
 		       unsigned int set, unsigned int clear)
 {
-	struct channel *ch = (struct channel *) tty->driver_data;
+	struct channel *ch = tty->driver_data;
 	unsigned long flags;
 
 	if (!ch)
@@ -2178,7 +2064,7 @@
 	unsigned int mflag, mstat;
 	unsigned char startc, stopc;
 	struct board_chan __iomem *bc;
-	struct channel *ch = (struct channel *) tty->driver_data;
+	struct channel *ch = tty->driver_data;
 	void __user *argp = (void __user *)arg;
 
 	if (ch)
@@ -2352,15 +2238,16 @@
 	struct channel *ch = container_of(work, struct channel, tqueue);
 	/* Called in response to a modem change event */
 	if (ch && ch->magic == EPCA_MAGIC) {
-		struct tty_struct *tty = ch->port.tty;
+		struct tty_struct *tty = tty_port_tty_get(&ch->port);;
 
 		if (tty && tty->driver_data) {
 			if (test_and_clear_bit(EPCA_EVENT_HANGUP, &ch->event)) {
 				tty_hangup(tty);
 				wake_up_interruptible(&ch->port.open_wait);
-				ch->port.flags &= ~ASYNC_NORMAL_ACTIVE;
+				clear_bit(ASYNC_NORMAL_ACTIVE, &ch->port.flags);
 			}
 		}
+		tty_kref_put(tty);
 	}
 }
 
@@ -2473,7 +2360,7 @@
 
 static int pc_send_break(struct tty_struct *tty, int msec)
 {
-	struct channel *ch = (struct channel *) tty->driver_data;
+	struct channel *ch = tty->driver_data;
 	unsigned long flags;
 
 	if (msec == -1)
diff --git a/drivers/char/esp.c b/drivers/char/esp.c
index 7f077c0..45ec263 100644
--- a/drivers/char/esp.c
+++ b/drivers/char/esp.c
@@ -2054,6 +2054,15 @@
 	wake_up_interruptible(&info->port.open_wait);
 }
 
+static int esp_carrier_raised(struct tty_port *port)
+{
+	struct esp_struct *info = container_of(port, struct esp_struct, port);
+	serial_out(info, UART_ESI_CMD1, ESI_GET_UART_STAT);
+	if (serial_in(info, UART_ESI_STAT2) & UART_MSR_DCD)
+		return 1;
+	return 0;
+}
+
 /*
  * ------------------------------------------------------------
  * esp_open() and friends
@@ -2066,17 +2075,19 @@
 	int		retval;
 	int		do_clocal = 0;
 	unsigned long	flags;
+	int		cd;
+	struct tty_port *port = &info->port;
 
 	/*
 	 * If the device is in the middle of being closed, then block
 	 * until it's done, and then try again.
 	 */
 	if (tty_hung_up_p(filp) ||
-	    (info->port.flags & ASYNC_CLOSING)) {
-		if (info->port.flags & ASYNC_CLOSING)
-			interruptible_sleep_on(&info->port.close_wait);
+	    (port->flags & ASYNC_CLOSING)) {
+		if (port->flags & ASYNC_CLOSING)
+			interruptible_sleep_on(&port->close_wait);
 #ifdef SERIAL_DO_RESTART
-		if (info->port.flags & ASYNC_HUP_NOTIFY)
+		if (port->flags & ASYNC_HUP_NOTIFY)
 			return -EAGAIN;
 		else
 			return -ERESTARTSYS;
@@ -2091,7 +2102,7 @@
 	 */
 	if ((filp->f_flags & O_NONBLOCK) ||
 	    (tty->flags & (1 << TTY_IO_ERROR))) {
-		info->port.flags |= ASYNC_NORMAL_ACTIVE;
+		port->flags |= ASYNC_NORMAL_ACTIVE;
 		return 0;
 	}
 
@@ -2101,20 +2112,20 @@
 	/*
 	 * Block waiting for the carrier detect and the line to become
 	 * free (i.e., not in use by the callout).  While we are in
-	 * this loop, info->port.count is dropped by one, so that
+	 * this loop, port->count is dropped by one, so that
 	 * rs_close() knows when to free things.  We restore it upon
 	 * exit, either normal or abnormal.
 	 */
 	retval = 0;
-	add_wait_queue(&info->port.open_wait, &wait);
+	add_wait_queue(&port->open_wait, &wait);
 #ifdef SERIAL_DEBUG_OPEN
 	printk(KERN_DEBUG "block_til_ready before block: ttys%d, count = %d\n",
-	       info->line, info->port.count);
+	       info->line, port->count);
 #endif
 	spin_lock_irqsave(&info->lock, flags);
 	if (!tty_hung_up_p(filp))
-		info->port.count--;
-	info->port.blocked_open++;
+		port->count--;
+	port->blocked_open++;
 	while (1) {
 		if ((tty->termios->c_cflag & CBAUD)) {
 			unsigned int scratch;
@@ -2129,9 +2140,9 @@
 		}
 		set_current_state(TASK_INTERRUPTIBLE);
 		if (tty_hung_up_p(filp) ||
-		    !(info->port.flags & ASYNC_INITIALIZED)) {
+		    !(port->flags & ASYNC_INITIALIZED)) {
 #ifdef SERIAL_DO_RESTART
-			if (info->port.flags & ASYNC_HUP_NOTIFY)
+			if (port->flags & ASYNC_HUP_NOTIFY)
 				retval = -EAGAIN;
 			else
 				retval = -ERESTARTSYS;
@@ -2141,11 +2152,9 @@
 			break;
 		}
 
-		serial_out(info, UART_ESI_CMD1, ESI_GET_UART_STAT);
-		if (serial_in(info, UART_ESI_STAT2) & UART_MSR_DCD)
-			do_clocal = 1;
+		cd = tty_port_carrier_raised(port);
 
-		if (!(info->port.flags & ASYNC_CLOSING) &&
+		if (!(port->flags & ASYNC_CLOSING) &&
 		    (do_clocal))
 			break;
 		if (signal_pending(current)) {
@@ -2154,25 +2163,25 @@
 		}
 #ifdef SERIAL_DEBUG_OPEN
 		printk(KERN_DEBUG "block_til_ready blocking: ttys%d, count = %d\n",
-		       info->line, info->port.count);
+		       info->line, port->count);
 #endif
 		spin_unlock_irqrestore(&info->lock, flags);
 		schedule();
 		spin_lock_irqsave(&info->lock, flags);
 	}
 	set_current_state(TASK_RUNNING);
-	remove_wait_queue(&info->port.open_wait, &wait);
+	remove_wait_queue(&port->open_wait, &wait);
 	if (!tty_hung_up_p(filp))
-		info->port.count++;
-	info->port.blocked_open--;
+		port->count++;
+	port->blocked_open--;
 	spin_unlock_irqrestore(&info->lock, flags);
 #ifdef SERIAL_DEBUG_OPEN
 	printk(KERN_DEBUG "block_til_ready after blocking: ttys%d, count = %d\n",
-	       info->line, info->port.count);
+	       info->line, port->count);
 #endif
 	if (retval)
 		return retval;
-	info->port.flags |= ASYNC_NORMAL_ACTIVE;
+	port->flags |= ASYNC_NORMAL_ACTIVE;
 	return 0;
 }
 
@@ -2329,6 +2338,10 @@
 	.tiocmset = esp_tiocmset,
 };
 
+static const struct tty_port_operations esp_port_ops = {
+	.esp_carrier_raised,
+};
+
 /*
  * The serial driver boot-time initialization code!
  */
@@ -2415,6 +2428,8 @@
 	offset = 0;
 
 	do {
+		tty_port_init(&info->port);
+		info->port.ops = &esp_port_ops;
 		info->io_port = esp[i] + offset;
 		info->irq = irq[i];
 		info->line = (i * 8) + (offset / 8);
@@ -2437,8 +2452,6 @@
 		info->config.flow_off = flow_off;
 		info->config.pio_threshold = pio_threshold;
 		info->next_port = ports;
-		init_waitqueue_head(&info->port.open_wait);
-		init_waitqueue_head(&info->port.close_wait);
 		init_waitqueue_head(&info->delta_msr_wait);
 		init_waitqueue_head(&info->break_wait);
 		ports = info;
diff --git a/drivers/char/generic_serial.c b/drivers/char/generic_serial.c
index c6090f8..9e4e569 100644
--- a/drivers/char/generic_serial.c
+++ b/drivers/char/generic_serial.c
@@ -376,7 +376,8 @@
 
 void gs_hangup(struct tty_struct *tty)
 {
-	struct gs_port   *port;
+	struct gs_port *port;
+	unsigned long flags;
 
 	func_enter ();
 
@@ -386,9 +387,11 @@
 		return;
 
 	gs_shutdown_port (port);
+	spin_lock_irqsave(&port->port.lock, flags);
 	port->port.flags &= ~(ASYNC_NORMAL_ACTIVE|GS_ACTIVE);
 	port->port.tty = NULL;
 	port->port.count = 0;
+	spin_unlock_irqrestore(&port->port.lock, flags);
 
 	wake_up_interruptible(&port->port.open_wait);
 	func_exit ();
@@ -397,7 +400,8 @@
 
 int gs_block_til_ready(void *port_, struct file * filp)
 {
-	struct gs_port *port = port_;
+	struct gs_port *gp = port_;
+	struct tty_port *port = &gp->port;
 	DECLARE_WAITQUEUE(wait, current);
 	int    retval;
 	int    do_clocal = 0;
@@ -409,16 +413,16 @@
 
 	if (!port) return 0;
 
-	tty = port->port.tty;
+	tty = port->tty;
 
 	gs_dprintk (GS_DEBUG_BTR, "Entering gs_block_till_ready.\n"); 
 	/*
 	 * If the device is in the middle of being closed, then block
 	 * until it's done, and then try again.
 	 */
-	if (tty_hung_up_p(filp) || port->port.flags & ASYNC_CLOSING) {
-		interruptible_sleep_on(&port->port.close_wait);
-		if (port->port.flags & ASYNC_HUP_NOTIFY)
+	if (tty_hung_up_p(filp) || port->flags & ASYNC_CLOSING) {
+		interruptible_sleep_on(&port->close_wait);
+		if (port->flags & ASYNC_HUP_NOTIFY)
 			return -EAGAIN;
 		else
 			return -ERESTARTSYS;
@@ -432,7 +436,7 @@
 	 */
 	if ((filp->f_flags & O_NONBLOCK) ||
 	    (tty->flags & (1 << TTY_IO_ERROR))) {
-		port->port.flags |= ASYNC_NORMAL_ACTIVE;
+		port->flags |= ASYNC_NORMAL_ACTIVE;
 		return 0;
 	}
 
@@ -444,34 +448,34 @@
 	/*
 	 * Block waiting for the carrier detect and the line to become
 	 * free (i.e., not in use by the callout).  While we are in
-	 * this loop, port->port.count is dropped by one, so that
+	 * this loop, port->count is dropped by one, so that
 	 * rs_close() knows when to free things.  We restore it upon
 	 * exit, either normal or abnormal.
 	 */
 	retval = 0;
 
-	add_wait_queue(&port->port.open_wait, &wait);
+	add_wait_queue(&port->open_wait, &wait);
 
 	gs_dprintk (GS_DEBUG_BTR, "after add waitq.\n"); 
-	spin_lock_irqsave(&port->driver_lock, flags);
+	spin_lock_irqsave(&port->lock, flags);
 	if (!tty_hung_up_p(filp)) {
-		port->port.count--;
+		port->count--;
 	}
-	spin_unlock_irqrestore(&port->driver_lock, flags);
-	port->port.blocked_open++;
+	port->blocked_open++;
+	spin_unlock_irqrestore(&port->lock, flags);
 	while (1) {
-		CD = port->rd->get_CD (port);
+		CD = tty_port_carrier_raised(port);
 		gs_dprintk (GS_DEBUG_BTR, "CD is now %d.\n", CD);
 		set_current_state (TASK_INTERRUPTIBLE);
 		if (tty_hung_up_p(filp) ||
-		    !(port->port.flags & ASYNC_INITIALIZED)) {
-			if (port->port.flags & ASYNC_HUP_NOTIFY)
+		    !(port->flags & ASYNC_INITIALIZED)) {
+			if (port->flags & ASYNC_HUP_NOTIFY)
 				retval = -EAGAIN;
 			else
 				retval = -ERESTARTSYS;
 			break;
 		}
-		if (!(port->port.flags & ASYNC_CLOSING) &&
+		if (!(port->flags & ASYNC_CLOSING) &&
 		    (do_clocal || CD))
 			break;
 		gs_dprintk (GS_DEBUG_BTR, "signal_pending is now: %d (%lx)\n", 
@@ -483,19 +487,20 @@
 		schedule();
 	}
 	gs_dprintk (GS_DEBUG_BTR, "Got out of the loop. (%d)\n",
-		    port->port.blocked_open);
+		    port->blocked_open);
 	set_current_state (TASK_RUNNING);
-	remove_wait_queue(&port->port.open_wait, &wait);
+	remove_wait_queue(&port->open_wait, &wait);
+	
+	spin_lock_irqsave(&port->lock, flags);
 	if (!tty_hung_up_p(filp)) {
-		port->port.count++;
+		port->count++;
 	}
-	port->port.blocked_open--;
-	if (retval)
-		return retval;
-
-	port->port.flags |= ASYNC_NORMAL_ACTIVE;
+	port->blocked_open--;
+	if (retval == 0)
+        	port->flags |= ASYNC_NORMAL_ACTIVE;
+	spin_unlock_irqrestore(&port->lock, flags);
 	func_exit ();
-	return 0;
+	return retval;
 }			 
 
 
@@ -506,7 +511,7 @@
 	
 	func_enter ();
 
-	port = (struct gs_port *) tty->driver_data;
+	port = tty->driver_data;
 
 	if (!port) return;
 
@@ -516,10 +521,10 @@
 		port->port.tty = tty;
 	}
 
-	spin_lock_irqsave(&port->driver_lock, flags);
+	spin_lock_irqsave(&port->port.lock, flags);
 
 	if (tty_hung_up_p(filp)) {
-		spin_unlock_irqrestore(&port->driver_lock, flags);
+		spin_unlock_irqrestore(&port->port.lock, flags);
 		if (port->rd->hungup)
 			port->rd->hungup (port);
 		func_exit ();
@@ -538,7 +543,7 @@
 
 	if (port->port.count) {
 		gs_dprintk(GS_DEBUG_CLOSE, "gs_close port %p: count: %d\n", port, port->port.count);
-		spin_unlock_irqrestore(&port->driver_lock, flags);
+		spin_unlock_irqrestore(&port->port.lock, flags);
 		func_exit ();
 		return;
 	}
@@ -559,8 +564,10 @@
 	 * line status register.
 	 */
 
+	spin_lock_irqsave(&port->driver_lock, flags);
 	port->rd->disable_rx_interrupts (port);
 	spin_unlock_irqrestore(&port->driver_lock, flags);
+	spin_unlock_irqrestore(&port->port.lock, flags);
 
 	/* close has no way of returning "EINTR", so discard return value */
 	if (port->closing_wait != ASYNC_CLOSING_WAIT_NONE)
@@ -573,20 +580,25 @@
 	tty_ldisc_flush(tty);
 	tty->closing = 0;
 
+	spin_lock_irqsave(&port->driver_lock, flags);
 	port->event = 0;
 	port->rd->close (port);
 	port->rd->shutdown_port (port);
+	spin_unlock_irqrestore(&port->driver_lock, flags);
+
+	spin_lock_irqsave(&port->port.lock, flags);
 	port->port.tty = NULL;
 
 	if (port->port.blocked_open) {
 		if (port->close_delay) {
-			spin_unlock_irqrestore(&port->driver_lock, flags);
+			spin_unlock_irqrestore(&port->port.lock, flags);
 			msleep_interruptible(jiffies_to_msecs(port->close_delay));
-			spin_lock_irqsave(&port->driver_lock, flags);
+			spin_lock_irqsave(&port->port.lock, flags);
 		}
 		wake_up_interruptible(&port->port.open_wait);
 	}
 	port->port.flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING | ASYNC_INITIALIZED);
+	spin_unlock_irqrestore(&port->port.lock, flags);
 	wake_up_interruptible(&port->port.close_wait);
 
 	func_exit ();
diff --git a/drivers/char/hvc_console.c b/drivers/char/hvc_console.c
index 0587b66..5a8a4c2 100644
--- a/drivers/char/hvc_console.c
+++ b/drivers/char/hvc_console.c
@@ -529,7 +529,7 @@
 	tty = tty_kref_get(hp->tty);
 	spin_unlock_irqrestore(&hp->lock, hvc_flags);
 
-	tty_do_resize(tty, tty, &ws);
+	tty_do_resize(tty, &ws);
 	tty_kref_put(tty);
 }
 
diff --git a/drivers/char/hvsi.c b/drivers/char/hvsi.c
index af05528..406f874 100644
--- a/drivers/char/hvsi.c
+++ b/drivers/char/hvsi.c
@@ -997,14 +997,14 @@
 
 static int hvsi_write_room(struct tty_struct *tty)
 {
-	struct hvsi_struct *hp = (struct hvsi_struct *)tty->driver_data;
+	struct hvsi_struct *hp = tty->driver_data;
 
 	return N_OUTBUF - hp->n_outbuf;
 }
 
 static int hvsi_chars_in_buffer(struct tty_struct *tty)
 {
-	struct hvsi_struct *hp = (struct hvsi_struct *)tty->driver_data;
+	struct hvsi_struct *hp = tty->driver_data;
 
 	return hp->n_outbuf;
 }
@@ -1070,7 +1070,7 @@
  */
 static void hvsi_throttle(struct tty_struct *tty)
 {
-	struct hvsi_struct *hp = (struct hvsi_struct *)tty->driver_data;
+	struct hvsi_struct *hp = tty->driver_data;
 
 	pr_debug("%s\n", __func__);
 
@@ -1079,7 +1079,7 @@
 
 static void hvsi_unthrottle(struct tty_struct *tty)
 {
-	struct hvsi_struct *hp = (struct hvsi_struct *)tty->driver_data;
+	struct hvsi_struct *hp = tty->driver_data;
 	unsigned long flags;
 	int shouldflip = 0;
 
@@ -1100,7 +1100,7 @@
 
 static int hvsi_tiocmget(struct tty_struct *tty, struct file *file)
 {
-	struct hvsi_struct *hp = (struct hvsi_struct *)tty->driver_data;
+	struct hvsi_struct *hp = tty->driver_data;
 
 	hvsi_get_mctrl(hp);
 	return hp->mctrl;
@@ -1109,7 +1109,7 @@
 static int hvsi_tiocmset(struct tty_struct *tty, struct file *file,
 		unsigned int set, unsigned int clear)
 {
-	struct hvsi_struct *hp = (struct hvsi_struct *)tty->driver_data;
+	struct hvsi_struct *hp = tty->driver_data;
 	unsigned long flags;
 	uint16_t new_mctrl;
 
diff --git a/drivers/char/i8k.c b/drivers/char/i8k.c
index b60d425..fc8cf7a 100644
--- a/drivers/char/i8k.c
+++ b/drivers/char/i8k.c
@@ -485,7 +485,21 @@
 			DMI_MATCH(DMI_PRODUCT_NAME, "MP061"),
 		},
 	},
-	{ }
+	{
+		.ident = "Dell Precision",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Precision"),
+		},
+	},
+	{
+		.ident = "Dell Vostro",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Vostro"),
+		},
+	},
+        { }
 };
 
 /*
diff --git a/drivers/char/isicom.c b/drivers/char/isicom.c
index 04e4549..24aa6e8 100644
--- a/drivers/char/isicom.c
+++ b/drivers/char/isicom.c
@@ -328,11 +328,13 @@
 }
 
 /* card->lock MUST NOT be held */
-static inline void raise_dtr_rts(struct isi_port *port)
+
+static void isicom_raise_dtr_rts(struct tty_port *port)
 {
-	struct isi_board *card = port->card;
+	struct isi_port *ip = container_of(port, struct isi_port, port);
+	struct isi_board *card = ip->card;
 	unsigned long base = card->base;
-	u16 channel = port->channel;
+	u16 channel = ip->channel;
 
 	if (!lock_card(card))
 		return;
@@ -340,7 +342,7 @@
 	outw(0x8000 | (channel << card->shift_count) | 0x02, base);
 	outw(0x0f04, base);
 	InterruptTheCard(base);
-	port->status |= (ISI_DTR | ISI_RTS);
+	ip->status |= (ISI_DTR | ISI_RTS);
 	unlock_card(card);
 }
 
@@ -830,80 +832,10 @@
 	return 0;
 }
 
-static int block_til_ready(struct tty_struct *tty, struct file *filp,
-	struct isi_port *port)
+static int isicom_carrier_raised(struct tty_port *port)
 {
-	struct isi_board *card = port->card;
-	int do_clocal = 0, retval;
-	unsigned long flags;
-	DECLARE_WAITQUEUE(wait, current);
-
-	/* block if port is in the process of being closed */
-
-	if (tty_hung_up_p(filp) || port->port.flags & ASYNC_CLOSING) {
-		pr_dbg("block_til_ready: close in progress.\n");
-		interruptible_sleep_on(&port->port.close_wait);
-		if (port->port.flags & ASYNC_HUP_NOTIFY)
-			return -EAGAIN;
-		else
-			return -ERESTARTSYS;
-	}
-
-	/* if non-blocking mode is set ... */
-
-	if ((filp->f_flags & O_NONBLOCK) ||
-			(tty->flags & (1 << TTY_IO_ERROR))) {
-		pr_dbg("block_til_ready: non-block mode.\n");
-		port->port.flags |= ASYNC_NORMAL_ACTIVE;
-		return 0;
-	}
-
-	if (C_CLOCAL(tty))
-		do_clocal = 1;
-
-	/* block waiting for DCD to be asserted, and while
-						callout dev is busy */
-	retval = 0;
-	add_wait_queue(&port->port.open_wait, &wait);
-
-	spin_lock_irqsave(&card->card_lock, flags);
-	if (!tty_hung_up_p(filp))
-		port->port.count--;
-	port->port.blocked_open++;
-	spin_unlock_irqrestore(&card->card_lock, flags);
-
-	while (1) {
-		raise_dtr_rts(port);
-
-		set_current_state(TASK_INTERRUPTIBLE);
-		if (tty_hung_up_p(filp) || !(port->port.flags & ASYNC_INITIALIZED)) {
-			if (port->port.flags & ASYNC_HUP_NOTIFY)
-				retval = -EAGAIN;
-			else
-				retval = -ERESTARTSYS;
-			break;
-		}
-		if (!(port->port.flags & ASYNC_CLOSING) &&
-				(do_clocal || (port->status & ISI_DCD))) {
-			break;
-		}
-		if (signal_pending(current)) {
-			retval = -ERESTARTSYS;
-			break;
-		}
-		schedule();
-	}
-	set_current_state(TASK_RUNNING);
-	remove_wait_queue(&port->port.open_wait, &wait);
-	spin_lock_irqsave(&card->card_lock, flags);
-	if (!tty_hung_up_p(filp))
-		port->port.count++;
-	port->port.blocked_open--;
-	spin_unlock_irqrestore(&card->card_lock, flags);
-	if (retval)
-		return retval;
-	port->port.flags |= ASYNC_NORMAL_ACTIVE;
-	return 0;
+	struct isi_port *ip = container_of(port, struct isi_port, port);
+	return (ip->status & ISI_DCD)?1 : 0;
 }
 
 static int isicom_open(struct tty_struct *tty, struct file *filp)
@@ -932,12 +864,13 @@
 
 	isicom_setup_board(card);
 
+	/* FIXME: locking on port.count etc */
 	port->port.count++;
 	tty->driver_data = port;
 	tty_port_tty_set(&port->port, tty);
 	error = isicom_setup_port(tty);
 	if (error == 0)
-		error = block_til_ready(tty, filp, port);
+		error = tty_port_block_til_ready(&port->port, tty, filp);
 	return error;
 }
 
@@ -1012,76 +945,30 @@
 
 static void isicom_close(struct tty_struct *tty, struct file *filp)
 {
-	struct isi_port *port = tty->driver_data;
+	struct isi_port *ip = tty->driver_data;
+	struct tty_port *port = &ip->port;
 	struct isi_board *card;
 	unsigned long flags;
 
-	if (!port)
-		return;
-	card = port->card;
-	if (isicom_paranoia_check(port, tty->name, "isicom_close"))
+	BUG_ON(!ip);
+
+	card = ip->card;
+	if (isicom_paranoia_check(ip, tty->name, "isicom_close"))
 		return;
 
-	pr_dbg("Close start!!!.\n");
-
-	spin_lock_irqsave(&card->card_lock, flags);
-	if (tty_hung_up_p(filp)) {
-		spin_unlock_irqrestore(&card->card_lock, flags);
-		return;
-	}
-
-	if (tty->count == 1 && port->port.count != 1) {
-		printk(KERN_WARNING "ISICOM:(0x%lx) isicom_close: bad port "
-			"count tty->count = 1 port count = %d.\n",
-			card->base, port->port.count);
-		port->port.count = 1;
-	}
-	if (--port->port.count < 0) {
-		printk(KERN_WARNING "ISICOM:(0x%lx) isicom_close: bad port "
-			"count for channel%d = %d", card->base, port->channel,
-			port->port.count);
-		port->port.count = 0;
-	}
-
-	if (port->port.count) {
-		spin_unlock_irqrestore(&card->card_lock, flags);
-		return;
-	}
-	port->port.flags |= ASYNC_CLOSING;
-	tty->closing = 1;
-	spin_unlock_irqrestore(&card->card_lock, flags);
-
-	if (port->port.closing_wait != ASYNC_CLOSING_WAIT_NONE)
-		tty_wait_until_sent(tty, port->port.closing_wait);
 	/* indicate to the card that no more data can be received
 	   on this port */
 	spin_lock_irqsave(&card->card_lock, flags);
-	if (port->port.flags & ASYNC_INITIALIZED) {
-		card->port_status &= ~(1 << port->channel);
+	if (port->flags & ASYNC_INITIALIZED) {
+		card->port_status &= ~(1 << ip->channel);
 		outw(card->port_status, card->base + 0x02);
 	}
-	isicom_shutdown_port(port);
+	isicom_shutdown_port(ip);
 	spin_unlock_irqrestore(&card->card_lock, flags);
 
 	isicom_flush_buffer(tty);
-	tty_ldisc_flush(tty);
-
-	spin_lock_irqsave(&card->card_lock, flags);
-	tty->closing = 0;
-
-	if (port->port.blocked_open) {
-		spin_unlock_irqrestore(&card->card_lock, flags);
-		if (port->port.close_delay) {
-			pr_dbg("scheduling until time out.\n");
-			msleep_interruptible(
-				jiffies_to_msecs(port->port.close_delay));
-		}
-		spin_lock_irqsave(&card->card_lock, flags);
-		wake_up_interruptible(&port->port.open_wait);
-	}
-	port->port.flags &= ~(ASYNC_NORMAL_ACTIVE | ASYNC_CLOSING);
-	wake_up_interruptible(&port->port.close_wait);
-	spin_unlock_irqrestore(&card->card_lock, flags);
+	
+	tty_port_close_end(port, tty);
 }
 
 /* write et all */
@@ -1420,10 +1307,7 @@
 	isicom_shutdown_port(port);
 	spin_unlock_irqrestore(&port->card->card_lock, flags);
 
-	port->port.count = 0;
-	port->port.flags &= ~ASYNC_NORMAL_ACTIVE;
-	tty_port_tty_set(&port->port, NULL);
-	wake_up_interruptible(&port->port.open_wait);
+	tty_port_hangup(&port->port);
 }
 
 
@@ -1452,6 +1336,11 @@
 	.break_ctl		= isicom_send_break,
 };
 
+static const struct tty_port_operations isicom_port_ops = {
+	.carrier_raised		= isicom_carrier_raised,
+	.raise_dtr_rts		= isicom_raise_dtr_rts,
+};
+
 static int __devinit reset_card(struct pci_dev *pdev,
 	const unsigned int card, unsigned int *signature)
 {
@@ -1794,6 +1683,7 @@
 		spin_lock_init(&isi_card[idx].card_lock);
 		for (channel = 0; channel < 16; channel++, port++) {
 			tty_port_init(&port->port);
+			port->port.ops = &isicom_port_ops;
 			port->magic = ISICOM_MAGIC;
 			port->card = &isi_card[idx];
 			port->channel = channel;
diff --git a/drivers/char/istallion.c b/drivers/char/istallion.c
index 4b10770..5c3dc6b 100644
--- a/drivers/char/istallion.c
+++ b/drivers/char/istallion.c
@@ -151,7 +151,7 @@
 static char	*stli_serialname = "ttyE";
 
 static struct tty_driver	*stli_serial;
-
+static const struct tty_port_operations stli_port_ops;
 
 #define	STLI_TXBUFSIZE		4096
 
@@ -626,8 +626,6 @@
 static int	stli_initopen(struct tty_struct *tty, struct stlibrd *brdp, struct stliport *portp);
 static int	stli_rawopen(struct stlibrd *brdp, struct stliport *portp, unsigned long arg, int wait);
 static int	stli_rawclose(struct stlibrd *brdp, struct stliport *portp, unsigned long arg, int wait);
-static int	stli_waitcarrier(struct tty_struct *tty, struct stlibrd *brdp,
-				struct stliport *portp, struct file *filp);
 static int	stli_setport(struct tty_struct *tty);
 static int	stli_cmdwait(struct stlibrd *brdp, struct stliport *portp, unsigned long cmd, void *arg, int size, int copyback);
 static void	stli_sendcmd(struct stlibrd *brdp, struct stliport *portp, unsigned long cmd, void *arg, int size, int copyback);
@@ -769,7 +767,7 @@
 			break;
 	}
 	if (i == ARRAY_SIZE(stli_brdstr)) {
-		printk("STALLION: unknown board name, %s?\n", argp[0]);
+		printk(KERN_WARNING "istallion: unknown board name, %s?\n", argp[0]);
 		return 0;
 	}
 
@@ -787,6 +785,7 @@
 {
 	struct stlibrd *brdp;
 	struct stliport *portp;
+	struct tty_port *port;
 	unsigned int minordev, brdnr, portnr;
 	int rc;
 
@@ -808,30 +807,19 @@
 		return -ENODEV;
 	if (portp->devnr < 1)
 		return -ENODEV;
-
-
-/*
- *	Check if this port is in the middle of closing. If so then wait
- *	until it is closed then return error status based on flag settings.
- *	The sleep here does not need interrupt protection since the wakeup
- *	for it is done with the same context.
- */
-	if (portp->port.flags & ASYNC_CLOSING) {
-		interruptible_sleep_on(&portp->port.close_wait);
-		if (portp->port.flags & ASYNC_HUP_NOTIFY)
-			return -EAGAIN;
-		return -ERESTARTSYS;
-	}
+	port = &portp->port;
 
 /*
  *	On the first open of the device setup the port hardware, and
  *	initialize the per port data structure. Since initializing the port
  *	requires several commands to the board we will need to wait for any
  *	other open that is already initializing the port.
+ *
+ *	Review - locking
  */
-	tty_port_tty_set(&portp->port, tty);
+	tty_port_tty_set(port, tty);
 	tty->driver_data = portp;
-	portp->port.count++;
+	port->count++;
 
 	wait_event_interruptible(portp->raw_wait,
 			!test_bit(ST_INITIALIZING, &portp->state));
@@ -841,7 +829,8 @@
 	if ((portp->port.flags & ASYNC_INITIALIZED) == 0) {
 		set_bit(ST_INITIALIZING, &portp->state);
 		if ((rc = stli_initopen(tty, brdp, portp)) >= 0) {
-			portp->port.flags |= ASYNC_INITIALIZED;
+			/* Locking */
+			port->flags |= ASYNC_INITIALIZED;
 			clear_bit(TTY_IO_ERROR, &tty->flags);
 		}
 		clear_bit(ST_INITIALIZING, &portp->state);
@@ -849,31 +838,7 @@
 		if (rc < 0)
 			return rc;
 	}
-
-/*
- *	Check if this port is in the middle of closing. If so then wait
- *	until it is closed then return error status, based on flag settings.
- *	The sleep here does not need interrupt protection since the wakeup
- *	for it is done with the same context.
- */
-	if (portp->port.flags & ASYNC_CLOSING) {
-		interruptible_sleep_on(&portp->port.close_wait);
-		if (portp->port.flags & ASYNC_HUP_NOTIFY)
-			return -EAGAIN;
-		return -ERESTARTSYS;
-	}
-
-/*
- *	Based on type of open being done check if it can overlap with any
- *	previous opens still in effect. If we are a normal serial device
- *	then also we might have to wait for carrier.
- */
-	if (!(filp->f_flags & O_NONBLOCK)) {
-		if ((rc = stli_waitcarrier(tty, brdp, portp, filp)) != 0)
-			return rc;
-	}
-	portp->port.flags |= ASYNC_NORMAL_ACTIVE;
-	return 0;
+	return tty_port_block_til_ready(&portp->port, tty, filp);
 }
 
 /*****************************************************************************/
@@ -882,25 +847,16 @@
 {
 	struct stlibrd *brdp;
 	struct stliport *portp;
+	struct tty_port *port;
 	unsigned long flags;
 
 	portp = tty->driver_data;
 	if (portp == NULL)
 		return;
+	port = &portp->port;
 
-	spin_lock_irqsave(&stli_lock, flags);
-	if (tty_hung_up_p(filp)) {
-		spin_unlock_irqrestore(&stli_lock, flags);
+	if (tty_port_close_start(port, tty, filp) == 0)
 		return;
-	}
-	if ((tty->count == 1) && (portp->port.count != 1))
-		portp->port.count = 1;
-	if (portp->port.count-- > 1) {
-		spin_unlock_irqrestore(&stli_lock, flags);
-		return;
-	}
-
-	portp->port.flags |= ASYNC_CLOSING;
 
 /*
  *	May want to wait for data to drain before closing. The BUSY flag
@@ -908,15 +864,19 @@
  *	updated by messages from the slave - indicating when all chars
  *	really have drained.
  */
+ 	spin_lock_irqsave(&stli_lock, flags);
 	if (tty == stli_txcooktty)
 		stli_flushchars(tty);
-	tty->closing = 1;
 	spin_unlock_irqrestore(&stli_lock, flags);
 
+	/* We end up doing this twice for the moment. This needs looking at
+	   eventually. Note we still use portp->closing_wait as a result */
 	if (portp->closing_wait != ASYNC_CLOSING_WAIT_NONE)
 		tty_wait_until_sent(tty, portp->closing_wait);
 
-	portp->port.flags &= ~ASYNC_INITIALIZED;
+	/* FIXME: port locking here needs attending to */
+	port->flags &= ~ASYNC_INITIALIZED;
+
 	brdp = stli_brds[portp->brdnr];
 	stli_rawclose(brdp, portp, 0, 0);
 	if (tty->termios->c_cflag & HUPCL) {
@@ -934,17 +894,8 @@
 	set_bit(ST_DOFLUSHRX, &portp->state);
 	stli_flushbuffer(tty);
 
-	tty->closing = 0;
-	tty_port_tty_set(&portp->port, NULL);
-
-	if (portp->openwaitcnt) {
-		if (portp->close_delay)
-			msleep_interruptible(jiffies_to_msecs(portp->close_delay));
-		wake_up_interruptible(&portp->port.open_wait);
-	}
-
-	portp->port.flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING);
-	wake_up_interruptible(&portp->port.close_wait);
+	tty_port_close_end(port, tty);
+	tty_port_tty_set(port, NULL);
 }
 
 /*****************************************************************************/
@@ -1183,62 +1134,23 @@
 
 /*****************************************************************************/
 
-/*
- *	Possibly need to wait for carrier (DCD signal) to come high. Say
- *	maybe because if we are clocal then we don't need to wait...
- */
-
-static int stli_waitcarrier(struct tty_struct *tty, struct stlibrd *brdp,
-				struct stliport *portp, struct file *filp)
+static int stli_carrier_raised(struct tty_port *port)
 {
-	unsigned long flags;
-	int rc, doclocal;
-
-	rc = 0;
-	doclocal = 0;
-
-	if (tty->termios->c_cflag & CLOCAL)
-		doclocal++;
-
-	spin_lock_irqsave(&stli_lock, flags);
-	portp->openwaitcnt++;
-	if (! tty_hung_up_p(filp))
-		portp->port.count--;
-	spin_unlock_irqrestore(&stli_lock, flags);
-
-	for (;;) {
-		stli_mkasysigs(&portp->asig, 1, 1);
-		if ((rc = stli_cmdwait(brdp, portp, A_SETSIGNALS,
-		    &portp->asig, sizeof(asysigs_t), 0)) < 0)
-			break;
-		if (tty_hung_up_p(filp) ||
-		    ((portp->port.flags & ASYNC_INITIALIZED) == 0)) {
-			if (portp->port.flags & ASYNC_HUP_NOTIFY)
-				rc = -EBUSY;
-			else
-				rc = -ERESTARTSYS;
-			break;
-		}
-		if (((portp->port.flags & ASYNC_CLOSING) == 0) &&
-		    (doclocal || (portp->sigs & TIOCM_CD))) {
-			break;
-		}
-		if (signal_pending(current)) {
-			rc = -ERESTARTSYS;
-			break;
-		}
-		interruptible_sleep_on(&portp->port.open_wait);
-	}
-
-	spin_lock_irqsave(&stli_lock, flags);
-	if (! tty_hung_up_p(filp))
-		portp->port.count++;
-	portp->openwaitcnt--;
-	spin_unlock_irqrestore(&stli_lock, flags);
-
-	return rc;
+	struct stliport *portp = container_of(port, struct stliport, port);
+	return (portp->sigs & TIOCM_CD) ? 1 : 0;
 }
 
+static void stli_raise_dtr_rts(struct tty_port *port)
+{
+	struct stliport *portp = container_of(port, struct stliport, port);
+	struct stlibrd *brdp = stli_brds[portp->brdnr];
+	stli_mkasysigs(&portp->asig, 1, 1);
+	if (stli_cmdwait(brdp, portp, A_SETSIGNALS, &portp->asig,
+		sizeof(asysigs_t), 0) < 0)
+			printk(KERN_WARNING "istallion: dtr raise failed.\n");
+}
+
+
 /*****************************************************************************/
 
 /*
@@ -1550,7 +1462,7 @@
 	sio.irq = 0;
 	sio.flags = portp->port.flags;
 	sio.baud_base = portp->baud_base;
-	sio.close_delay = portp->close_delay;
+	sio.close_delay = portp->port.close_delay;
 	sio.closing_wait = portp->closing_wait;
 	sio.custom_divisor = portp->custom_divisor;
 	sio.xmit_fifo_size = 0;
@@ -1582,7 +1494,7 @@
 		return -EFAULT;
 	if (!capable(CAP_SYS_ADMIN)) {
 		if ((sio.baud_base != portp->baud_base) ||
-		    (sio.close_delay != portp->close_delay) ||
+		    (sio.close_delay != portp->port.close_delay) ||
 		    ((sio.flags & ~ASYNC_USR_MASK) !=
 		    (portp->port.flags & ~ASYNC_USR_MASK)))
 			return -EPERM;
@@ -1591,7 +1503,7 @@
 	portp->port.flags = (portp->port.flags & ~ASYNC_USR_MASK) |
 		(sio.flags & ASYNC_USR_MASK);
 	portp->baud_base = sio.baud_base;
-	portp->close_delay = sio.close_delay;
+	portp->port.close_delay = sio.close_delay;
 	portp->closing_wait = sio.closing_wait;
 	portp->custom_divisor = sio.custom_divisor;
 
@@ -1821,6 +1733,7 @@
 {
 	struct stliport *portp;
 	struct stlibrd *brdp;
+	struct tty_port *port;
 	unsigned long flags;
 
 	portp = tty->driver_data;
@@ -1831,8 +1744,11 @@
 	brdp = stli_brds[portp->brdnr];
 	if (brdp == NULL)
 		return;
+	port = &portp->port;
 
-	portp->port.flags &= ~ASYNC_INITIALIZED;
+	spin_lock_irqsave(&port->lock, flags);
+	port->flags &= ~ASYNC_INITIALIZED;
+	spin_unlock_irqrestore(&port->lock, flags);
 
 	if (!test_bit(ST_CLOSING, &portp->state))
 		stli_rawclose(brdp, portp, 0, 0);
@@ -1853,12 +1769,9 @@
 	clear_bit(ST_TXBUSY, &portp->state);
 	clear_bit(ST_RXSTOP, &portp->state);
 	set_bit(TTY_IO_ERROR, &tty->flags);
-	tty_port_tty_set(&portp->port, NULL);
-	portp->port.flags &= ~ASYNC_NORMAL_ACTIVE;
-	portp->port.count = 0;
 	spin_unlock_irqrestore(&stli_lock, flags);
 
-	wake_up_interruptible(&portp->port.open_wait);
+	tty_port_hangup(port);
 }
 
 /*****************************************************************************/
@@ -2132,7 +2045,7 @@
 	unsigned char __iomem *bits;
 
 	if (test_bit(ST_CMDING, &portp->state)) {
-		printk(KERN_ERR "STALLION: command already busy, cmd=%x!\n",
+		printk(KERN_ERR "istallion: command already busy, cmd=%x!\n",
 				(int) cmd);
 		return;
 	}
@@ -2692,16 +2605,17 @@
 	for (i = 0, panelnr = 0, panelport = 0; (i < brdp->nrports); i++) {
 		portp = kzalloc(sizeof(struct stliport), GFP_KERNEL);
 		if (!portp) {
-			printk("STALLION: failed to allocate port structure\n");
+			printk(KERN_WARNING "istallion: failed to allocate port structure\n");
 			continue;
 		}
 		tty_port_init(&portp->port);
+		portp->port.ops = &stli_port_ops;
 		portp->magic = STLI_PORTMAGIC;
 		portp->portnr = i;
 		portp->brdnr = brdp->brdnr;
 		portp->panelnr = panelnr;
 		portp->baud_base = STL_BAUDBASE;
-		portp->close_delay = STL_CLOSEDELAY;
+		portp->port.close_delay = STL_CLOSEDELAY;
 		portp->closing_wait = 30 * HZ;
 		init_waitqueue_head(&portp->port.open_wait);
 		init_waitqueue_head(&portp->port.close_wait);
@@ -2758,7 +2672,7 @@
 	unsigned char val;
 
 	if (offset > brdp->memsize) {
-		printk(KERN_ERR "STALLION: shared memory pointer=%x out of "
+		printk(KERN_ERR "istallion: shared memory pointer=%x out of "
 				"range at line=%d(%d), brd=%d\n",
 			(int) offset, line, __LINE__, brdp->brdnr);
 		ptr = NULL;
@@ -2832,7 +2746,7 @@
 	unsigned char	val;
 
 	if (offset > brdp->memsize) {
-		printk(KERN_ERR "STALLION: shared memory pointer=%x out of "
+		printk(KERN_ERR "istallion: shared memory pointer=%x out of "
 				"range at line=%d(%d), brd=%d\n",
 			(int) offset, line, __LINE__, brdp->brdnr);
 		ptr = NULL;
@@ -2884,7 +2798,7 @@
 	unsigned char val;
 
 	if (offset > brdp->memsize) {
-		printk(KERN_ERR "STALLION: shared memory pointer=%x out of "
+		printk(KERN_ERR "istallion: shared memory pointer=%x out of "
 				"range at line=%d(%d), brd=%d\n",
 			(int) offset, line, __LINE__, brdp->brdnr);
 		ptr = NULL;
@@ -2929,7 +2843,7 @@
 	unsigned char	val;
 
 	if (offset > brdp->memsize) {
-		printk(KERN_ERR "STALLION: shared memory pointer=%x out of "
+		printk(KERN_ERR "istallion: shared memory pointer=%x out of "
 				"range at line=%d(%d), board=%d\n",
 				(int) offset, line, __LINE__, brdp->brdnr);
 		ptr = NULL;
@@ -2994,7 +2908,7 @@
 	void __iomem *ptr;
 
 	if (offset > brdp->memsize) {
-		printk(KERN_ERR "STALLION: shared memory pointer=%x out of "
+		printk(KERN_ERR "istallion: shared memory pointer=%x out of "
 				"range at line=%d(%d), brd=%d\n",
 				(int) offset, line, __LINE__, brdp->brdnr);
 		ptr = NULL;
@@ -3060,7 +2974,7 @@
 	unsigned char val;
 
 	if (offset > brdp->memsize) {
-		printk(KERN_ERR "STALLION: shared memory pointer=%x out of "
+		printk(KERN_ERR "istallion: shared memory pointer=%x out of "
 				"range at line=%d(%d), brd=%d\n",
 			(int) offset, line, __LINE__, brdp->brdnr);
 		ptr = NULL;
@@ -3499,7 +3413,7 @@
 #endif
 
 	if (nrdevs < (brdp->nrports + 1)) {
-		printk(KERN_ERR "STALLION: slave failed to allocate memory for "
+		printk(KERN_ERR "istallion: slave failed to allocate memory for "
 				"all devices, devices=%d\n", nrdevs);
 		brdp->nrports = nrdevs - 1;
 	}
@@ -3509,13 +3423,13 @@
 	brdp->bitsize = (nrdevs + 7) / 8;
 	memoff = readl(&hdrp->memp);
 	if (memoff > brdp->memsize) {
-		printk(KERN_ERR "STALLION: corrupted shared memory region?\n");
+		printk(KERN_ERR "istallion: corrupted shared memory region?\n");
 		rc = -EIO;
 		goto stli_donestartup;
 	}
 	memp = (cdkmem_t __iomem *) EBRDGETMEMPTR(brdp, memoff);
 	if (readw(&memp->dtype) != TYP_ASYNCTRL) {
-		printk(KERN_ERR "STALLION: no slave control device found\n");
+		printk(KERN_ERR "istallion: no slave control device found\n");
 		goto stli_donestartup;
 	}
 	memp++;
@@ -3600,7 +3514,7 @@
 		retval = stli_initonb(brdp);
 		break;
 	default:
-		printk(KERN_ERR "STALLION: board=%d is unknown board "
+		printk(KERN_ERR "istallion: board=%d is unknown board "
 				"type=%d\n", brdp->brdnr, brdp->brdtype);
 		retval = -ENODEV;
 	}
@@ -3609,7 +3523,7 @@
 		return retval;
 
 	stli_initports(brdp);
-	printk(KERN_INFO "STALLION: %s found, board=%d io=%x mem=%x "
+	printk(KERN_INFO "istallion: %s found, board=%d io=%x mem=%x "
 		"nrpanels=%d nrports=%d\n", stli_brdnames[brdp->brdtype],
 		brdp->brdnr, brdp->iobase, (int) brdp->memaddr,
 		brdp->nrpanels, brdp->nrports);
@@ -3703,7 +3617,7 @@
 	if (! foundit) {
 		brdp->memaddr = 0;
 		brdp->membase = NULL;
-		printk(KERN_ERR "STALLION: failed to probe shared memory "
+		printk(KERN_ERR "istallion: failed to probe shared memory "
 				"region for %s in EISA slot=%d\n",
 			stli_brdnames[brdp->brdtype], (brdp->iobase >> 12));
 		return -ENODEV;
@@ -3848,7 +3762,7 @@
 	mutex_lock(&stli_brdslock);
 	brdnr = stli_getbrdnr();
 	if (brdnr < 0) {
-		printk(KERN_INFO "STALLION: too many boards found, "
+		printk(KERN_INFO "istallion: too many boards found, "
 			"maximum supported %d\n", STL_MAXBRDS);
 		mutex_unlock(&stli_brdslock);
 		retval = -EIO;
@@ -3920,7 +3834,7 @@
 
 	brdp = kzalloc(sizeof(struct stlibrd), GFP_KERNEL);
 	if (!brdp) {
-		printk(KERN_ERR "STALLION: failed to allocate memory "
+		printk(KERN_ERR "istallion: failed to allocate memory "
 				"(size=%Zd)\n", sizeof(struct stlibrd));
 		return NULL;
 	}
@@ -4518,6 +4432,11 @@
 	.tiocmset = stli_tiocmset,
 };
 
+static const struct tty_port_operations stli_port_ops = {
+	.carrier_raised = stli_carrier_raised,
+	.raise_dtr_rts = stli_raise_dtr_rts,
+};
+
 /*****************************************************************************/
 /*
  *	Loadable module initialization stuff.
@@ -4554,7 +4473,7 @@
 
 	stli_txcookbuf = kmalloc(STLI_TXBUFSIZE, GFP_KERNEL);
 	if (!stli_txcookbuf) {
-		printk(KERN_ERR "STALLION: failed to allocate memory "
+		printk(KERN_ERR "istallion: failed to allocate memory "
 				"(size=%d)\n", STLI_TXBUFSIZE);
 		retval = -ENOMEM;
 		goto err;
@@ -4579,7 +4498,7 @@
 
 	retval = tty_register_driver(stli_serial);
 	if (retval) {
-		printk(KERN_ERR "STALLION: failed to register serial driver\n");
+		printk(KERN_ERR "istallion: failed to register serial driver\n");
 		goto err_ttyput;
 	}
 
@@ -4593,7 +4512,7 @@
  */
 	retval = register_chrdev(STL_SIOMEMMAJOR, "staliomem", &stli_fsiomem);
 	if (retval) {
-		printk(KERN_ERR "STALLION: failed to register serial memory "
+		printk(KERN_ERR "istallion: failed to register serial memory "
 				"device\n");
 		goto err_deinit;
 	}
diff --git a/drivers/char/moxa.c b/drivers/char/moxa.c
index 12d327a..8b0da97 100644
--- a/drivers/char/moxa.c
+++ b/drivers/char/moxa.c
@@ -206,6 +206,7 @@
 static void moxa_set_tty_param(struct tty_struct *, struct ktermios *);
 static void moxa_setup_empty_event(struct tty_struct *);
 static void moxa_shut_down(struct tty_struct *);
+static int moxa_carrier_raised(struct tty_port *);
 /*
  * moxa board interface functions:
  */
@@ -405,6 +406,10 @@
 	.tiocmset = moxa_tiocmset,
 };
 
+static const struct tty_port_operations moxa_port_ops = {
+	.carrier_raised = moxa_carrier_raised,
+};
+
 static struct tty_driver *moxaDriver;
 static DEFINE_TIMER(moxaTimer, moxa_poll, 0, 0);
 static DEFINE_SPINLOCK(moxa_lock);
@@ -826,6 +831,7 @@
 
 	for (i = 0, p = brd->ports; i < MAX_PORTS_PER_BOARD; i++, p++) {
 		tty_port_init(&p->port);
+		p->port.ops = &moxa_port_ops;
 		p->type = PORT_16550A;
 		p->cflag = B9600 | CS8 | CREAD | CLOCAL | HUPCL;
 	}
@@ -1115,15 +1121,27 @@
 	tty_port_tty_set(&ch->port, NULL);
 }
 
+static int moxa_carrier_raised(struct tty_port *port)
+{
+	struct moxa_port *ch = container_of(port, struct moxa_port, port);
+	int dcd;
+
+	spin_lock_bh(&moxa_lock);
+	dcd = ch->DCDState;
+	spin_unlock_bh(&moxa_lock);
+	return dcd;
+}
+
 static int moxa_block_till_ready(struct tty_struct *tty, struct file *filp,
 			    struct moxa_port *ch)
 {
+	struct tty_port *port = &ch->port;
 	DEFINE_WAIT(wait);
 	int retval = 0;
 	u8 dcd;
 
 	while (1) {
-		prepare_to_wait(&ch->port.open_wait, &wait, TASK_INTERRUPTIBLE);
+		prepare_to_wait(&port->open_wait, &wait, TASK_INTERRUPTIBLE);
 		if (tty_hung_up_p(filp)) {
 #ifdef SERIAL_DO_RESTART
 			retval = -ERESTARTSYS;
@@ -1132,9 +1150,7 @@
 #endif
 			break;
 		}
-		spin_lock_bh(&moxa_lock);
-		dcd = ch->DCDState;
-		spin_unlock_bh(&moxa_lock);
+		dcd = tty_port_carrier_raised(port);
 		if (dcd)
 			break;
 
@@ -1144,7 +1160,7 @@
 		}
 		schedule();
 	}
-	finish_wait(&ch->port.open_wait, &wait);
+	finish_wait(&port->open_wait, &wait);
 
 	return retval;
 }
diff --git a/drivers/char/mxser.c b/drivers/char/mxser.c
index 0477669..402c9f2 100644
--- a/drivers/char/mxser.c
+++ b/drivers/char/mxser.c
@@ -541,74 +541,21 @@
 	return status;
 }
 
-static int mxser_block_til_ready(struct tty_struct *tty, struct file *filp,
-		struct mxser_port *port)
+static int mxser_carrier_raised(struct tty_port *port)
 {
-	DECLARE_WAITQUEUE(wait, current);
-	int retval;
-	int do_clocal = 0;
+	struct mxser_port *mp = container_of(port, struct mxser_port, port);
+	return (inb(mp->ioaddr + UART_MSR) & UART_MSR_DCD)?1:0;
+}
+
+static void mxser_raise_dtr_rts(struct tty_port *port)
+{
+	struct mxser_port *mp = container_of(port, struct mxser_port, port);
 	unsigned long flags;
 
-	/*
-	 * If non-blocking mode is set, or the port is not enabled,
-	 * then make the check up front and then exit.
-	 */
-	if ((filp->f_flags & O_NONBLOCK) ||
-			test_bit(TTY_IO_ERROR, &tty->flags)) {
-		port->port.flags |= ASYNC_NORMAL_ACTIVE;
-		return 0;
-	}
-
-	if (tty->termios->c_cflag & CLOCAL)
-		do_clocal = 1;
-
-	/*
-	 * Block waiting for the carrier detect and the line to become
-	 * free (i.e., not in use by the callout).  While we are in
-	 * this loop, port->port.count is dropped by one, so that
-	 * mxser_close() knows when to free things.  We restore it upon
-	 * exit, either normal or abnormal.
-	 */
-	retval = 0;
-	add_wait_queue(&port->port.open_wait, &wait);
-
-	spin_lock_irqsave(&port->slock, flags);
-	if (!tty_hung_up_p(filp))
-		port->port.count--;
-	spin_unlock_irqrestore(&port->slock, flags);
-	port->port.blocked_open++;
-	while (1) {
-		spin_lock_irqsave(&port->slock, flags);
-		outb(inb(port->ioaddr + UART_MCR) |
-			UART_MCR_DTR | UART_MCR_RTS, port->ioaddr + UART_MCR);
-		spin_unlock_irqrestore(&port->slock, flags);
-		set_current_state(TASK_INTERRUPTIBLE);
-		if (tty_hung_up_p(filp) || !(port->port.flags & ASYNC_INITIALIZED)) {
-			if (port->port.flags & ASYNC_HUP_NOTIFY)
-				retval = -EAGAIN;
-			else
-				retval = -ERESTARTSYS;
-			break;
-		}
-		if (!(port->port.flags & ASYNC_CLOSING) &&
-				(do_clocal ||
-				(inb(port->ioaddr + UART_MSR) & UART_MSR_DCD)))
-			break;
-		if (signal_pending(current)) {
-			retval = -ERESTARTSYS;
-			break;
-		}
-		schedule();
-	}
-	set_current_state(TASK_RUNNING);
-	remove_wait_queue(&port->port.open_wait, &wait);
-	if (!tty_hung_up_p(filp))
-		port->port.count++;
-	port->port.blocked_open--;
-	if (retval)
-		return retval;
-	port->port.flags |= ASYNC_NORMAL_ACTIVE;
-	return 0;
+	spin_lock_irqsave(&mp->slock, flags);
+	outb(inb(mp->ioaddr + UART_MCR) |
+		UART_MCR_DTR | UART_MCR_RTS, mp->ioaddr + UART_MCR);
+	spin_unlock_irqrestore(&mp->slock, flags);
 }
 
 static int mxser_set_baud(struct tty_struct *tty, long newspd)
@@ -1087,14 +1034,14 @@
 	/*
 	 * Start up serial port
 	 */
-	spin_lock_irqsave(&info->slock, flags);
+	spin_lock_irqsave(&info->port.lock, flags);
 	info->port.count++;
-	spin_unlock_irqrestore(&info->slock, flags);
+	spin_unlock_irqrestore(&info->port.lock, flags);
 	retval = mxser_startup(tty);
 	if (retval)
 		return retval;
 
-	retval = mxser_block_til_ready(tty, filp, info);
+	retval = tty_port_block_til_ready(&info->port, tty, filp);
 	if (retval)
 		return retval;
 
@@ -1133,58 +1080,27 @@
 static void mxser_close(struct tty_struct *tty, struct file *filp)
 {
 	struct mxser_port *info = tty->driver_data;
+	struct tty_port *port = &info->port;
 
 	unsigned long timeout;
-	unsigned long flags;
 
 	if (tty->index == MXSER_PORTS)
 		return;
 	if (!info)
 		return;
 
-	spin_lock_irqsave(&info->slock, flags);
+	if (tty_port_close_start(port, tty, filp) == 0)
+		return;
 
-	if (tty_hung_up_p(filp)) {
-		spin_unlock_irqrestore(&info->slock, flags);
-		return;
-	}
-	if ((tty->count == 1) && (info->port.count != 1)) {
-		/*
-		 * Uh, oh.  tty->count is 1, which means that the tty
-		 * structure will be freed.  Info->port.count should always
-		 * be one in these conditions.  If it's greater than
-		 * one, we've got real problems, since it means the
-		 * serial port won't be shutdown.
-		 */
-		printk(KERN_ERR "mxser_close: bad serial port count; "
-			"tty->count is 1, info->port.count is %d\n", info->port.count);
-		info->port.count = 1;
-	}
-	if (--info->port.count < 0) {
-		printk(KERN_ERR "mxser_close: bad serial port count for "
-			"ttys%d: %d\n", tty->index, info->port.count);
-		info->port.count = 0;
-	}
-	if (info->port.count) {
-		spin_unlock_irqrestore(&info->slock, flags);
-		return;
-	}
-	info->port.flags |= ASYNC_CLOSING;
-	spin_unlock_irqrestore(&info->slock, flags);
 	/*
 	 * Save the termios structure, since this port may have
 	 * separate termios for callout and dialin.
+	 *
+	 * FIXME: Can this go ?
 	 */
 	if (info->port.flags & ASYNC_NORMAL_ACTIVE)
 		info->normal_termios = *tty->termios;
 	/*
-	 * Now we wait for the transmit buffer to clear; and we notify
-	 * the line discipline to only process XON/XOFF characters.
-	 */
-	tty->closing = 1;
-	if (info->port.closing_wait != ASYNC_CLOSING_WAIT_NONE)
-		tty_wait_until_sent(tty, info->port.closing_wait);
-	/*
 	 * At this point we stop accepting input.  To do this, we
 	 * disable the receive line status interrupts, and tell the
 	 * interrupt driver to stop checking the data ready bit in the
@@ -1209,19 +1125,12 @@
 		}
 	}
 	mxser_shutdown(tty);
-
 	mxser_flush_buffer(tty);
-	tty_ldisc_flush(tty);
 
-	tty->closing = 0;
-	tty_port_tty_set(&info->port, NULL);
-	if (info->port.blocked_open) {
-		if (info->port.close_delay)
-			schedule_timeout_interruptible(info->port.close_delay);
-		wake_up_interruptible(&info->port.open_wait);
-	}
-
-	info->port.flags &= ~(ASYNC_NORMAL_ACTIVE | ASYNC_CLOSING);
+	/* Right now the tty_port set is done outside of the close_end helper
+	   as we don't yet have everyone using refcounts */	
+	tty_port_close_end(port, tty);
+	tty_port_tty_set(port, NULL);
 }
 
 static int mxser_write(struct tty_struct *tty, const unsigned char *buf, int count)
@@ -2146,10 +2055,7 @@
 
 	mxser_flush_buffer(tty);
 	mxser_shutdown(tty);
-	info->port.count = 0;
-	info->port.flags &= ~ASYNC_NORMAL_ACTIVE;
-	tty_port_tty_set(&info->port, NULL);
-	wake_up_interruptible(&info->port.open_wait);
+	tty_port_hangup(&info->port);
 }
 
 /*
@@ -2449,6 +2355,11 @@
 	.tiocmset = mxser_tiocmset,
 };
 
+struct tty_port_operations mxser_port_ops = {
+	.carrier_raised = mxser_carrier_raised,
+	.raise_dtr_rts = mxser_raise_dtr_rts,
+};
+
 /*
  * The MOXA Smartio/Industio serial driver boot-time initialization code!
  */
@@ -2482,6 +2393,7 @@
 	for (i = 0; i < brd->info->nports; i++) {
 		info = &brd->ports[i];
 		tty_port_init(&info->port);
+		info->port.ops = &mxser_port_ops;
 		info->board = brd;
 		info->stop_rx = 0;
 		info->ldisc_stop_rx = 0;
diff --git a/drivers/char/n_r3964.c b/drivers/char/n_r3964.c
index 4a8215a..d2e93e3 100644
--- a/drivers/char/n_r3964.c
+++ b/drivers/char/n_r3964.c
@@ -1003,7 +1003,7 @@
 
 static void r3964_close(struct tty_struct *tty)
 {
-	struct r3964_info *pInfo = (struct r3964_info *)tty->disc_data;
+	struct r3964_info *pInfo = tty->disc_data;
 	struct r3964_client_info *pClient, *pNext;
 	struct r3964_message *pMsg;
 	struct r3964_block_header *pHeader, *pNextHeader;
@@ -1058,7 +1058,7 @@
 static ssize_t r3964_read(struct tty_struct *tty, struct file *file,
 			  unsigned char __user * buf, size_t nr)
 {
-	struct r3964_info *pInfo = (struct r3964_info *)tty->disc_data;
+	struct r3964_info *pInfo = tty->disc_data;
 	struct r3964_client_info *pClient;
 	struct r3964_message *pMsg;
 	struct r3964_client_message theMsg;
@@ -1113,7 +1113,7 @@
 static ssize_t r3964_write(struct tty_struct *tty, struct file *file,
 			   const unsigned char *data, size_t count)
 {
-	struct r3964_info *pInfo = (struct r3964_info *)tty->disc_data;
+	struct r3964_info *pInfo = tty->disc_data;
 	struct r3964_block_header *pHeader;
 	struct r3964_client_info *pClient;
 	unsigned char *new_data;
@@ -1182,7 +1182,7 @@
 static int r3964_ioctl(struct tty_struct *tty, struct file *file,
 		unsigned int cmd, unsigned long arg)
 {
-	struct r3964_info *pInfo = (struct r3964_info *)tty->disc_data;
+	struct r3964_info *pInfo = tty->disc_data;
 	if (pInfo == NULL)
 		return -EINVAL;
 	switch (cmd) {
@@ -1216,7 +1216,7 @@
 static unsigned int r3964_poll(struct tty_struct *tty, struct file *file,
 			struct poll_table_struct *wait)
 {
-	struct r3964_info *pInfo = (struct r3964_info *)tty->disc_data;
+	struct r3964_info *pInfo = tty->disc_data;
 	struct r3964_client_info *pClient;
 	struct r3964_message *pMsg = NULL;
 	unsigned long flags;
@@ -1241,7 +1241,7 @@
 static void r3964_receive_buf(struct tty_struct *tty, const unsigned char *cp,
 			char *fp, int count)
 {
-	struct r3964_info *pInfo = (struct r3964_info *)tty->disc_data;
+	struct r3964_info *pInfo = tty->disc_data;
 	const unsigned char *p;
 	char *f, flags = 0;
 	int i;
diff --git a/drivers/char/n_tty.c b/drivers/char/n_tty.c
index efbfe961..f6f0e4e 100644
--- a/drivers/char/n_tty.c
+++ b/drivers/char/n_tty.c
@@ -47,8 +47,8 @@
 #include <linux/bitops.h>
 #include <linux/audit.h>
 #include <linux/file.h>
+#include <linux/uaccess.h>
 
-#include <asm/uaccess.h>
 #include <asm/system.h>
 
 /* number of characters left in xmit buffer before select has we have room */
@@ -62,6 +62,17 @@
 #define TTY_THRESHOLD_THROTTLE		128 /* now based on remaining room */
 #define TTY_THRESHOLD_UNTHROTTLE 	128
 
+/*
+ * Special byte codes used in the echo buffer to represent operations
+ * or special handling of characters.  Bytes in the echo buffer that
+ * are not part of such special blocks are treated as normal character
+ * codes.
+ */
+#define ECHO_OP_START 0xff
+#define ECHO_OP_MOVE_BACK_COL 0x80
+#define ECHO_OP_SET_CANON_COL 0x81
+#define ECHO_OP_ERASE_TAB 0x82
+
 static inline unsigned char *alloc_buf(void)
 {
 	gfp_t prio = in_interrupt() ? GFP_ATOMIC : GFP_KERNEL;
@@ -169,6 +180,7 @@
  *
  *	Locking: tty_read_lock for read fields.
  */
+
 static void reset_buffer_flags(struct tty_struct *tty)
 {
 	unsigned long flags;
@@ -176,6 +188,11 @@
 	spin_lock_irqsave(&tty->read_lock, flags);
 	tty->read_head = tty->read_tail = tty->read_cnt = 0;
 	spin_unlock_irqrestore(&tty->read_lock, flags);
+
+	mutex_lock(&tty->echo_lock);
+	tty->echo_pos = tty->echo_cnt = tty->echo_overrun = 0;
+	mutex_unlock(&tty->echo_lock);
+
 	tty->canon_head = tty->canon_data = tty->erasing = 0;
 	memset(&tty->read_flags, 0, sizeof tty->read_flags);
 	n_tty_set_room(tty);
@@ -266,89 +283,118 @@
 }
 
 /**
- *	opost			-	output post processor
+ *	do_output_char			-	output one character
+ *	@c: character (or partial unicode symbol)
+ *	@tty: terminal device
+ *	@space: space available in tty driver write buffer
+ *
+ *	This is a helper function that handles one output character
+ *	(including special characters like TAB, CR, LF, etc.),
+ *	putting the results in the tty driver's write buffer.
+ *
+ *	Note that Linux currently ignores TABDLY, CRDLY, VTDLY, FFDLY
+ *	and NLDLY.  They simply aren't relevant in the world today.
+ *	If you ever need them, add them here.
+ *
+ *	Returns the number of bytes of buffer space used or -1 if
+ *	no space left.
+ *
+ *	Locking: should be called under the output_lock to protect
+ *		 the column state and space left in the buffer
+ */
+
+static int do_output_char(unsigned char c, struct tty_struct *tty, int space)
+{
+	int	spaces;
+
+	if (!space)
+		return -1;
+
+	switch (c) {
+	case '\n':
+		if (O_ONLRET(tty))
+			tty->column = 0;
+		if (O_ONLCR(tty)) {
+			if (space < 2)
+				return -1;
+			tty->canon_column = tty->column = 0;
+			tty_put_char(tty, '\r');
+			tty_put_char(tty, c);
+			return 2;
+		}
+		tty->canon_column = tty->column;
+		break;
+	case '\r':
+		if (O_ONOCR(tty) && tty->column == 0)
+			return 0;
+		if (O_OCRNL(tty)) {
+			c = '\n';
+			if (O_ONLRET(tty))
+				tty->canon_column = tty->column = 0;
+			break;
+		}
+		tty->canon_column = tty->column = 0;
+		break;
+	case '\t':
+		spaces = 8 - (tty->column & 7);
+		if (O_TABDLY(tty) == XTABS) {
+			if (space < spaces)
+				return -1;
+			tty->column += spaces;
+			tty->ops->write(tty, "        ", spaces);
+			return spaces;
+		}
+		tty->column += spaces;
+		break;
+	case '\b':
+		if (tty->column > 0)
+			tty->column--;
+		break;
+	default:
+		if (!iscntrl(c)) {
+			if (O_OLCUC(tty))
+				c = toupper(c);
+			if (!is_continuation(c, tty))
+				tty->column++;
+		}
+		break;
+	}
+
+	tty_put_char(tty, c);
+	return 1;
+}
+
+/**
+ *	process_output			-	output post processor
  *	@c: character (or partial unicode symbol)
  *	@tty: terminal device
  *
  *	Perform OPOST processing.  Returns -1 when the output device is
- *	full and the character must be retried. Note that Linux currently
- *	ignores TABDLY, CRDLY, VTDLY, FFDLY and NLDLY. They simply aren't
- *	relevant in the world today. If you ever need them, add them here.
+ *	full and the character must be retried.
  *
- *	Called from both the receive and transmit sides and can be called
- *	re-entrantly. Relies on lock_kernel() for tty->column state.
+ *	Locking: output_lock to protect column state and space left
+ *		 (also, this is called from n_tty_write under the
+ *		  tty layer write lock)
  */
 
-static int opost(unsigned char c, struct tty_struct *tty)
+static int process_output(unsigned char c, struct tty_struct *tty)
 {
-	int	space, spaces;
+	int	space, retval;
+
+	mutex_lock(&tty->output_lock);
 
 	space = tty_write_room(tty);
-	if (!space)
-		return -1;
+	retval = do_output_char(c, tty, space);
 
-	lock_kernel();
-	if (O_OPOST(tty)) {
-		switch (c) {
-		case '\n':
-			if (O_ONLRET(tty))
-				tty->column = 0;
-			if (O_ONLCR(tty)) {
-				if (space < 2) {
-					unlock_kernel();
-					return -1;
-				}
-				tty_put_char(tty, '\r');
-				tty->column = 0;
-			}
-			tty->canon_column = tty->column;
-			break;
-		case '\r':
-			if (O_ONOCR(tty) && tty->column == 0) {
-				unlock_kernel();
-				return 0;
-			}
-			if (O_OCRNL(tty)) {
-				c = '\n';
-				if (O_ONLRET(tty))
-					tty->canon_column = tty->column = 0;
-				break;
-			}
-			tty->canon_column = tty->column = 0;
-			break;
-		case '\t':
-			spaces = 8 - (tty->column & 7);
-			if (O_TABDLY(tty) == XTABS) {
-				if (space < spaces) {
-					unlock_kernel();
-					return -1;
-				}
-				tty->column += spaces;
-				tty->ops->write(tty, "        ", spaces);
-				unlock_kernel();
-				return 0;
-			}
-			tty->column += spaces;
-			break;
-		case '\b':
-			if (tty->column > 0)
-				tty->column--;
-			break;
-		default:
-			if (O_OLCUC(tty))
-				c = toupper(c);
-			if (!iscntrl(c) && !is_continuation(c, tty))
-				tty->column++;
-			break;
-		}
-	}
-	tty_put_char(tty, c);
-	unlock_kernel();
-	return 0;
+	mutex_unlock(&tty->output_lock);
+	if (retval < 0)
+		return -1;
+	else
+		return 0;
 }
 
 /**
- *	opost_block		-	block postprocess
+ *	process_output_block		-	block post processor
  *	@tty: terminal device
  *	@inbuf: user buffer
  *	@nr: number of bytes
@@ -358,26 +404,32 @@
  *	the simple cases normally found and helps to generate blocks of
  *	symbols for the console driver and thus improve performance.
  *
- *	Called from n_tty_write under the tty layer write lock. Relies
- *	on lock_kernel for the tty->column state.
+ *	Locking: output_lock to protect column state and space left
+ *		 (also, this is called from n_tty_write under the
+ *		  tty layer write lock)
  */
 
-static ssize_t opost_block(struct tty_struct *tty,
-		       const unsigned char *buf, unsigned int nr)
+static ssize_t process_output_block(struct tty_struct *tty,
+				    const unsigned char *buf, unsigned int nr)
 {
 	int	space;
 	int 	i;
 	const unsigned char *cp;
 
+	mutex_lock(&tty->output_lock);
+
 	space = tty_write_room(tty);
-	if (!space)
+	if (!space) {
+		mutex_unlock(&tty->output_lock);
 		return 0;
+	}
 	if (nr > space)
 		nr = space;
 
-	lock_kernel();
 	for (i = 0, cp = buf; i < nr; i++, cp++) {
-		switch (*cp) {
+		unsigned char c = *cp;
+
+		switch (c) {
 		case '\n':
 			if (O_ONLRET(tty))
 				tty->column = 0;
@@ -399,54 +451,403 @@
 				tty->column--;
 			break;
 		default:
-			if (O_OLCUC(tty))
-				goto break_out;
-			if (!iscntrl(*cp))
-				tty->column++;
+			if (!iscntrl(c)) {
+				if (O_OLCUC(tty))
+					goto break_out;
+				if (!is_continuation(c, tty))
+					tty->column++;
+			}
 			break;
 		}
 	}
 break_out:
-	if (tty->ops->flush_chars)
-		tty->ops->flush_chars(tty);
 	i = tty->ops->write(tty, buf, i);
-	unlock_kernel();
+
+	mutex_unlock(&tty->output_lock);
 	return i;
 }
 
+/**
+ *	process_echoes	-	write pending echo characters
+ *	@tty: terminal device
+ *
+ *	Write previously buffered echo (and other ldisc-generated)
+ *	characters to the tty.
+ *
+ *	Characters generated by the ldisc (including echoes) need to
+ *	be buffered because the driver's write buffer can fill during
+ *	heavy program output.  Echoing straight to the driver will
+ *	often fail under these conditions, causing lost characters and
+ *	resulting mismatches of ldisc state information.
+ *
+ *	Since the ldisc state must represent the characters actually sent
+ *	to the driver at the time of the write, operations like certain
+ *	changes in column state are also saved in the buffer and executed
+ *	here.
+ *
+ *	A circular fifo buffer is used so that the most recent characters
+ *	are prioritized.  Also, when control characters are echoed with a
+ *	prefixed "^", the pair is treated atomically and thus not separated.
+ *
+ *	Locking: output_lock to protect column state and space left,
+ *		 echo_lock to protect the echo buffer
+ */
+
+static void process_echoes(struct tty_struct *tty)
+{
+	int	space, nr;
+	unsigned char c;
+	unsigned char *cp, *buf_end;
+
+	if (!tty->echo_cnt)
+		return;
+
+	mutex_lock(&tty->output_lock);
+	mutex_lock(&tty->echo_lock);
+
+	space = tty_write_room(tty);
+
+	buf_end = tty->echo_buf + N_TTY_BUF_SIZE;
+	cp = tty->echo_buf + tty->echo_pos;
+	nr = tty->echo_cnt;
+	while (nr > 0) {
+		c = *cp;
+		if (c == ECHO_OP_START) {
+			unsigned char op;
+			unsigned char *opp;
+			int no_space_left = 0;
+
+			/*
+			 * If the buffer byte is the start of a multi-byte
+			 * operation, get the next byte, which is either the
+			 * op code or a control character value.
+			 */
+			opp = cp + 1;
+			if (opp == buf_end)
+				opp -= N_TTY_BUF_SIZE;
+			op = *opp;
+
+			switch (op) {
+				unsigned int num_chars, num_bs;
+
+			case ECHO_OP_ERASE_TAB:
+				if (++opp == buf_end)
+					opp -= N_TTY_BUF_SIZE;
+				num_chars = *opp;
+
+				/*
+				 * Determine how many columns to go back
+				 * in order to erase the tab.
+				 * This depends on the number of columns
+				 * used by other characters within the tab
+				 * area.  If this (modulo 8) count is from
+				 * the start of input rather than from a
+				 * previous tab, we offset by canon column.
+				 * Otherwise, tab spacing is normal.
+				 */
+				if (!(num_chars & 0x80))
+					num_chars += tty->canon_column;
+				num_bs = 8 - (num_chars & 7);
+
+				if (num_bs > space) {
+					no_space_left = 1;
+					break;
+				}
+				space -= num_bs;
+				while (num_bs--) {
+					tty_put_char(tty, '\b');
+					if (tty->column > 0)
+						tty->column--;
+				}
+				cp += 3;
+				nr -= 3;
+				break;
+
+			case ECHO_OP_SET_CANON_COL:
+				tty->canon_column = tty->column;
+				cp += 2;
+				nr -= 2;
+				break;
+
+			case ECHO_OP_MOVE_BACK_COL:
+				if (tty->column > 0)
+					tty->column--;
+				cp += 2;
+				nr -= 2;
+				break;
+
+			case ECHO_OP_START:
+				/* This is an escaped echo op start code */
+				if (!space) {
+					no_space_left = 1;
+					break;
+				}
+				tty_put_char(tty, ECHO_OP_START);
+				tty->column++;
+				space--;
+				cp += 2;
+				nr -= 2;
+				break;
+
+			default:
+				if (iscntrl(op)) {
+					if (L_ECHOCTL(tty)) {
+						/*
+						 * Ensure there is enough space
+						 * for the whole ctrl pair.
+						 */
+						if (space < 2) {
+							no_space_left = 1;
+							break;
+						}
+						tty_put_char(tty, '^');
+						tty_put_char(tty, op ^ 0100);
+						tty->column += 2;
+						space -= 2;
+					} else {
+						if (!space) {
+							no_space_left = 1;
+							break;
+						}
+						tty_put_char(tty, op);
+						space--;
+					}
+				}
+				/*
+				 * If above falls through, this was an
+				 * undefined op.
+				 */
+				cp += 2;
+				nr -= 2;
+			}
+
+			if (no_space_left)
+				break;
+		} else {
+			int retval;
+
+			retval = do_output_char(c, tty, space);
+			if (retval < 0)
+				break;
+			space -= retval;
+			cp += 1;
+			nr -= 1;
+		}
+
+		/* When end of circular buffer reached, wrap around */
+		if (cp >= buf_end)
+			cp -= N_TTY_BUF_SIZE;
+	}
+
+	if (nr == 0) {
+		tty->echo_pos = 0;
+		tty->echo_cnt = 0;
+		tty->echo_overrun = 0;
+	} else {
+		int num_processed = tty->echo_cnt - nr;
+		tty->echo_pos += num_processed;
+		tty->echo_pos &= N_TTY_BUF_SIZE - 1;
+		tty->echo_cnt = nr;
+		if (num_processed > 0)
+			tty->echo_overrun = 0;
+	}
+
+	mutex_unlock(&tty->echo_lock);
+	mutex_unlock(&tty->output_lock);
+
+	if (tty->ops->flush_chars)
+		tty->ops->flush_chars(tty);
+}
 
 /**
- *	echo_char	-	echo characters
+ *	add_echo_byte	-	add a byte to the echo buffer
+ *	@c: unicode byte to echo
+ *	@tty: terminal device
+ *
+ *	Add a character or operation byte to the echo buffer.
+ *
+ *	Should be called under the echo lock to protect the echo buffer.
+ */
+
+static void add_echo_byte(unsigned char c, struct tty_struct *tty)
+{
+	int	new_byte_pos;
+
+	if (tty->echo_cnt == N_TTY_BUF_SIZE) {
+		/* Circular buffer is already at capacity */
+		new_byte_pos = tty->echo_pos;
+
+		/*
+		 * Since the buffer start position needs to be advanced,
+		 * be sure to step by a whole operation byte group.
+		 */
+		if (tty->echo_buf[tty->echo_pos] == ECHO_OP_START) {
+			if (tty->echo_buf[(tty->echo_pos + 1) &
+					  (N_TTY_BUF_SIZE - 1)] ==
+						ECHO_OP_ERASE_TAB) {
+				tty->echo_pos += 3;
+				tty->echo_cnt -= 2;
+			} else {
+				tty->echo_pos += 2;
+				tty->echo_cnt -= 1;
+			}
+		} else {
+			tty->echo_pos++;
+		}
+		tty->echo_pos &= N_TTY_BUF_SIZE - 1;
+
+		tty->echo_overrun = 1;
+	} else {
+		new_byte_pos = tty->echo_pos + tty->echo_cnt;
+		new_byte_pos &= N_TTY_BUF_SIZE - 1;
+		tty->echo_cnt++;
+	}
+
+	tty->echo_buf[new_byte_pos] = c;
+}
+
+/**
+ *	echo_move_back_col	-	add operation to move back a column
+ *	@tty: terminal device
+ *
+ *	Add an operation to the echo buffer to move back one column.
+ *
+ *	Locking: echo_lock to protect the echo buffer
+ */
+
+static void echo_move_back_col(struct tty_struct *tty)
+{
+	mutex_lock(&tty->echo_lock);
+
+	add_echo_byte(ECHO_OP_START, tty);
+	add_echo_byte(ECHO_OP_MOVE_BACK_COL, tty);
+
+	mutex_unlock(&tty->echo_lock);
+}
+
+/**
+ *	echo_set_canon_col	-	add operation to set the canon column
+ *	@tty: terminal device
+ *
+ *	Add an operation to the echo buffer to set the canon column
+ *	to the current column.
+ *
+ *	Locking: echo_lock to protect the echo buffer
+ */
+
+static void echo_set_canon_col(struct tty_struct *tty)
+{
+	mutex_lock(&tty->echo_lock);
+
+	add_echo_byte(ECHO_OP_START, tty);
+	add_echo_byte(ECHO_OP_SET_CANON_COL, tty);
+
+	mutex_unlock(&tty->echo_lock);
+}
+
+/**
+ *	echo_erase_tab	-	add operation to erase a tab
+ *	@num_chars: number of character columns already used
+ *	@after_tab: true if num_chars starts after a previous tab
+ *	@tty: terminal device
+ *
+ *	Add an operation to the echo buffer to erase a tab.
+ *
+ *	Called by the eraser function, which knows how many character
+ *	columns have been used since either a previous tab or the start
+ *	of input.  This information will be used later, along with
+ *	canon column (if applicable), to go back the correct number
+ *	of columns.
+ *
+ *	Locking: echo_lock to protect the echo buffer
+ */
+
+static void echo_erase_tab(unsigned int num_chars, int after_tab,
+			   struct tty_struct *tty)
+{
+	mutex_lock(&tty->echo_lock);
+
+	add_echo_byte(ECHO_OP_START, tty);
+	add_echo_byte(ECHO_OP_ERASE_TAB, tty);
+
+	/* We only need to know this modulo 8 (tab spacing) */
+	num_chars &= 7;
+
+	/* Set the high bit as a flag if num_chars is after a previous tab */
+	if (after_tab)
+		num_chars |= 0x80;
+
+	add_echo_byte(num_chars, tty);
+
+	mutex_unlock(&tty->echo_lock);
+}
+
+/**
+ *	echo_char_raw	-	echo a character raw
  *	@c: unicode byte to echo
  *	@tty: terminal device
  *
  *	Echo user input back onto the screen. This must be called only when
  *	L_ECHO(tty) is true. Called from the driver receive_buf path.
  *
- *	Relies on BKL for tty column locking
+ *	This variant does not treat control characters specially.
+ *
+ *	Locking: echo_lock to protect the echo buffer
+ */
+
+static void echo_char_raw(unsigned char c, struct tty_struct *tty)
+{
+	mutex_lock(&tty->echo_lock);
+
+	if (c == ECHO_OP_START) {
+		add_echo_byte(ECHO_OP_START, tty);
+		add_echo_byte(ECHO_OP_START, tty);
+	} else {
+		add_echo_byte(c, tty);
+	}
+
+	mutex_unlock(&tty->echo_lock);
+}
+
+/**
+ *	echo_char	-	echo a character
+ *	@c: unicode byte to echo
+ *	@tty: terminal device
+ *
+ *	Echo user input back onto the screen. This must be called only when
+ *	L_ECHO(tty) is true. Called from the driver receive_buf path.
+ *
+ *	This variant tags control characters to be possibly echoed as
+ *	as "^X" (where X is the letter representing the control char).
+ *
+ *	Locking: echo_lock to protect the echo buffer
  */
 
 static void echo_char(unsigned char c, struct tty_struct *tty)
 {
-	if (L_ECHOCTL(tty) && iscntrl(c) && c != '\t') {
-		tty_put_char(tty, '^');
-		tty_put_char(tty, c ^ 0100);
-		tty->column += 2;
-	} else
-		opost(c, tty);
+	mutex_lock(&tty->echo_lock);
+
+	if (c == ECHO_OP_START) {
+		add_echo_byte(ECHO_OP_START, tty);
+		add_echo_byte(ECHO_OP_START, tty);
+	} else {
+		if (iscntrl(c) && c != '\t')
+			add_echo_byte(ECHO_OP_START, tty);
+		add_echo_byte(c, tty);
+	}
+
+	mutex_unlock(&tty->echo_lock);
 }
 
 /**
- *	finsh_erasing		-	complete erase
+ *	finish_erasing		-	complete erase
  *	@tty: tty doing the erase
- *
- *	Relies on BKL for tty column locking
  */
+
 static inline void finish_erasing(struct tty_struct *tty)
 {
 	if (tty->erasing) {
-		tty_put_char(tty, '/');
-		tty->column++;
+		echo_char_raw('/', tty);
 		tty->erasing = 0;
 	}
 }
@@ -460,7 +861,7 @@
  *	present in the stream from the driver layer. Handles the complexities
  *	of UTF-8 multibyte symbols.
  *
- *	Locking: read_lock for tty buffers, BKL for column/erasing state
+ *	Locking: read_lock for tty buffers
  */
 
 static void eraser(unsigned char c, struct tty_struct *tty)
@@ -471,7 +872,7 @@
 
 	/* FIXME: locking needed ? */
 	if (tty->read_head == tty->canon_head) {
-		/* opost('\a', tty); */		/* what do you think? */
+		/* process_output('\a', tty); */ /* what do you think? */
 		return;
 	}
 	if (c == ERASE_CHAR(tty))
@@ -497,7 +898,7 @@
 			echo_char(KILL_CHAR(tty), tty);
 			/* Add a newline if ECHOK is on and ECHOKE is off. */
 			if (L_ECHOK(tty))
-				opost('\n', tty);
+				echo_char_raw('\n', tty);
 			return;
 		}
 		kill_type = KILL;
@@ -533,67 +934,61 @@
 		if (L_ECHO(tty)) {
 			if (L_ECHOPRT(tty)) {
 				if (!tty->erasing) {
-					tty_put_char(tty, '\\');
-					tty->column++;
+					echo_char_raw('\\', tty);
 					tty->erasing = 1;
 				}
 				/* if cnt > 1, output a multi-byte character */
 				echo_char(c, tty);
 				while (--cnt > 0) {
 					head = (head+1) & (N_TTY_BUF_SIZE-1);
-					tty_put_char(tty, tty->read_buf[head]);
+					echo_char_raw(tty->read_buf[head], tty);
+					echo_move_back_col(tty);
 				}
 			} else if (kill_type == ERASE && !L_ECHOE(tty)) {
 				echo_char(ERASE_CHAR(tty), tty);
 			} else if (c == '\t') {
-				unsigned int col = tty->canon_column;
-				unsigned long tail = tty->canon_head;
+				unsigned int num_chars = 0;
+				int after_tab = 0;
+				unsigned long tail = tty->read_head;
 
-				/* Find the column of the last char. */
-				while (tail != tty->read_head) {
+				/*
+				 * Count the columns used for characters
+				 * since the start of input or after a
+				 * previous tab.
+				 * This info is used to go back the correct
+				 * number of columns.
+				 */
+				while (tail != tty->canon_head) {
+					tail = (tail-1) & (N_TTY_BUF_SIZE-1);
 					c = tty->read_buf[tail];
-					if (c == '\t')
-						col = (col | 7) + 1;
-					else if (iscntrl(c)) {
+					if (c == '\t') {
+						after_tab = 1;
+						break;
+					} else if (iscntrl(c)) {
 						if (L_ECHOCTL(tty))
-							col += 2;
-					} else if (!is_continuation(c, tty))
-						col++;
-					tail = (tail+1) & (N_TTY_BUF_SIZE-1);
+							num_chars += 2;
+					} else if (!is_continuation(c, tty)) {
+						num_chars++;
+					}
 				}
-
-				/* should never happen */
-				if (tty->column > 0x80000000)
-					tty->column = 0;
-
-				/* Now backup to that column. */
-				while (tty->column > col) {
-					/* Can't use opost here. */
-					tty_put_char(tty, '\b');
-					if (tty->column > 0)
-						tty->column--;
-				}
+				echo_erase_tab(num_chars, after_tab, tty);
 			} else {
 				if (iscntrl(c) && L_ECHOCTL(tty)) {
-					tty_put_char(tty, '\b');
-					tty_put_char(tty, ' ');
-					tty_put_char(tty, '\b');
-					if (tty->column > 0)
-						tty->column--;
+					echo_char_raw('\b', tty);
+					echo_char_raw(' ', tty);
+					echo_char_raw('\b', tty);
 				}
 				if (!iscntrl(c) || L_ECHOCTL(tty)) {
-					tty_put_char(tty, '\b');
-					tty_put_char(tty, ' ');
-					tty_put_char(tty, '\b');
-					if (tty->column > 0)
-						tty->column--;
+					echo_char_raw('\b', tty);
+					echo_char_raw(' ', tty);
+					echo_char_raw('\b', tty);
 				}
 			}
 		}
 		if (kill_type == ERASE)
 			break;
 	}
-	if (tty->read_head == tty->canon_head)
+	if (tty->read_head == tty->canon_head && L_ECHO(tty))
 		finish_erasing(tty);
 }
 
@@ -712,6 +1107,7 @@
 static inline void n_tty_receive_char(struct tty_struct *tty, unsigned char c)
 {
 	unsigned long flags;
+	int parmrk;
 
 	if (tty->raw) {
 		put_tty_queue(c, tty);
@@ -721,18 +1117,21 @@
 	if (I_ISTRIP(tty))
 		c &= 0x7f;
 	if (I_IUCLC(tty) && L_IEXTEN(tty))
-		c=tolower(c);
+		c = tolower(c);
 
 	if (tty->stopped && !tty->flow_stopped && I_IXON(tty) &&
-	    ((I_IXANY(tty) && c != START_CHAR(tty) && c != STOP_CHAR(tty)) ||
-	     c == INTR_CHAR(tty) || c == QUIT_CHAR(tty) || c == SUSP_CHAR(tty)))
+	    I_IXANY(tty) && c != START_CHAR(tty) && c != STOP_CHAR(tty) &&
+	    c != INTR_CHAR(tty) && c != QUIT_CHAR(tty) && c != SUSP_CHAR(tty)) {
 		start_tty(tty);
+		process_echoes(tty);
+	}
 
 	if (tty->closing) {
 		if (I_IXON(tty)) {
-			if (c == START_CHAR(tty))
+			if (c == START_CHAR(tty)) {
 				start_tty(tty);
-			else if (c == STOP_CHAR(tty))
+				process_echoes(tty);
+			} else if (c == STOP_CHAR(tty))
 				stop_tty(tty);
 		}
 		return;
@@ -745,19 +1144,23 @@
 	 * up.
 	 */
 	if (!test_bit(c, tty->process_char_map) || tty->lnext) {
-		finish_erasing(tty);
 		tty->lnext = 0;
+		parmrk = (c == (unsigned char) '\377' && I_PARMRK(tty)) ? 1 : 0;
+		if (tty->read_cnt >= (N_TTY_BUF_SIZE - parmrk - 1)) {
+			/* beep if no space */
+			if (L_ECHO(tty))
+				process_output('\a', tty);
+			return;
+		}
 		if (L_ECHO(tty)) {
-			if (tty->read_cnt >= N_TTY_BUF_SIZE-1) {
-				tty_put_char(tty, '\a'); /* beep if no space */
-				return;
-			}
+			finish_erasing(tty);
 			/* Record the column of first canon char. */
 			if (tty->canon_head == tty->read_head)
-				tty->canon_column = tty->column;
+				echo_set_canon_col(tty);
 			echo_char(c, tty);
+			process_echoes(tty);
 		}
-		if (I_PARMRK(tty) && c == (unsigned char) '\377')
+		if (parmrk)
 			put_tty_queue(c, tty);
 		put_tty_queue(c, tty);
 		return;
@@ -766,6 +1169,7 @@
 	if (I_IXON(tty)) {
 		if (c == START_CHAR(tty)) {
 			start_tty(tty);
+			process_echoes(tty);
 			return;
 		}
 		if (c == STOP_CHAR(tty)) {
@@ -786,7 +1190,6 @@
 		if (c == SUSP_CHAR(tty)) {
 send_signal:
 			/*
-			 * Echo character, and then send the signal.
 			 * Note that we do not use isig() here because we want
 			 * the order to be:
 			 * 1) flush, 2) echo, 3) signal
@@ -795,8 +1198,12 @@
 				n_tty_flush_buffer(tty);
 				tty_driver_flush_buffer(tty);
 			}
-			if (L_ECHO(tty))
+			if (I_IXON(tty))
+				start_tty(tty);
+			if (L_ECHO(tty)) {
 				echo_char(c, tty);
+				process_echoes(tty);
+			}
 			if (tty->pgrp)
 				kill_pgrp(tty->pgrp, signal, 1);
 			return;
@@ -815,6 +1222,7 @@
 		if (c == ERASE_CHAR(tty) || c == KILL_CHAR(tty) ||
 		    (c == WERASE_CHAR(tty) && L_IEXTEN(tty))) {
 			eraser(c, tty);
+			process_echoes(tty);
 			return;
 		}
 		if (c == LNEXT_CHAR(tty) && L_IEXTEN(tty)) {
@@ -822,8 +1230,9 @@
 			if (L_ECHO(tty)) {
 				finish_erasing(tty);
 				if (L_ECHOCTL(tty)) {
-					tty_put_char(tty, '^');
-					tty_put_char(tty, '\b');
+					echo_char_raw('^', tty);
+					echo_char_raw('\b', tty);
+					process_echoes(tty);
 				}
 			}
 			return;
@@ -834,22 +1243,29 @@
 
 			finish_erasing(tty);
 			echo_char(c, tty);
-			opost('\n', tty);
+			echo_char_raw('\n', tty);
 			while (tail != tty->read_head) {
 				echo_char(tty->read_buf[tail], tty);
 				tail = (tail+1) & (N_TTY_BUF_SIZE-1);
 			}
+			process_echoes(tty);
 			return;
 		}
 		if (c == '\n') {
+			if (tty->read_cnt >= N_TTY_BUF_SIZE) {
+				if (L_ECHO(tty))
+					process_output('\a', tty);
+				return;
+			}
 			if (L_ECHO(tty) || L_ECHONL(tty)) {
-				if (tty->read_cnt >= N_TTY_BUF_SIZE-1)
-					tty_put_char(tty, '\a');
-				opost('\n', tty);
+				echo_char_raw('\n', tty);
+				process_echoes(tty);
 			}
 			goto handle_newline;
 		}
 		if (c == EOF_CHAR(tty)) {
+			if (tty->read_cnt >= N_TTY_BUF_SIZE)
+				return;
 			if (tty->canon_head != tty->read_head)
 				set_bit(TTY_PUSH, &tty->flags);
 			c = __DISABLED_CHAR;
@@ -857,22 +1273,28 @@
 		}
 		if ((c == EOL_CHAR(tty)) ||
 		    (c == EOL2_CHAR(tty) && L_IEXTEN(tty))) {
+			parmrk = (c == (unsigned char) '\377' && I_PARMRK(tty))
+				 ? 1 : 0;
+			if (tty->read_cnt >= (N_TTY_BUF_SIZE - parmrk)) {
+				if (L_ECHO(tty))
+					process_output('\a', tty);
+				return;
+			}
 			/*
 			 * XXX are EOL_CHAR and EOL2_CHAR echoed?!?
 			 */
 			if (L_ECHO(tty)) {
-				if (tty->read_cnt >= N_TTY_BUF_SIZE-1)
-					tty_put_char(tty, '\a');
 				/* Record the column of first canon char. */
 				if (tty->canon_head == tty->read_head)
-					tty->canon_column = tty->column;
+					echo_set_canon_col(tty);
 				echo_char(c, tty);
+				process_echoes(tty);
 			}
 			/*
 			 * XXX does PARMRK doubling happen for
 			 * EOL_CHAR and EOL2_CHAR?
 			 */
-			if (I_PARMRK(tty) && c == (unsigned char) '\377')
+			if (parmrk)
 				put_tty_queue(c, tty);
 
 handle_newline:
@@ -889,23 +1311,27 @@
 		}
 	}
 
-	finish_erasing(tty);
+	parmrk = (c == (unsigned char) '\377' && I_PARMRK(tty)) ? 1 : 0;
+	if (tty->read_cnt >= (N_TTY_BUF_SIZE - parmrk - 1)) {
+		/* beep if no space */
+		if (L_ECHO(tty))
+			process_output('\a', tty);
+		return;
+	}
 	if (L_ECHO(tty)) {
-		if (tty->read_cnt >= N_TTY_BUF_SIZE-1) {
-			tty_put_char(tty, '\a'); /* beep if no space */
-			return;
-		}
+		finish_erasing(tty);
 		if (c == '\n')
-			opost('\n', tty);
+			echo_char_raw('\n', tty);
 		else {
 			/* Record the column of first canon char. */
 			if (tty->canon_head == tty->read_head)
-				tty->canon_column = tty->column;
+				echo_set_canon_col(tty);
 			echo_char(c, tty);
 		}
+		process_echoes(tty);
 	}
 
-	if (I_PARMRK(tty) && c == (unsigned char) '\377')
+	if (parmrk)
 		put_tty_queue(c, tty);
 
 	put_tty_queue(c, tty);
@@ -923,10 +1349,11 @@
 
 static void n_tty_write_wakeup(struct tty_struct *tty)
 {
-	if (tty->fasync) {
-		set_bit(TTY_DO_WRITE_WAKEUP, &tty->flags);
+	/* Write out any echoed characters that are still pending */
+	process_echoes(tty);
+
+	if (tty->fasync && test_and_clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags))
 		kill_fasync(&tty->fasync, SIGIO, POLL_OUT);
-	}
 }
 
 /**
@@ -1134,6 +1561,10 @@
 		free_buf(tty->read_buf);
 		tty->read_buf = NULL;
 	}
+	if (tty->echo_buf) {
+		free_buf(tty->echo_buf);
+		tty->echo_buf = NULL;
+	}
 }
 
 /**
@@ -1151,13 +1582,19 @@
 	if (!tty)
 		return -EINVAL;
 
-	/* This one is ugly. Currently a malloc failure here can panic */
+	/* These are ugly. Currently a malloc failure here can panic */
 	if (!tty->read_buf) {
 		tty->read_buf = alloc_buf();
 		if (!tty->read_buf)
 			return -ENOMEM;
 	}
+	if (!tty->echo_buf) {
+		tty->echo_buf = alloc_buf();
+		if (!tty->echo_buf)
+			return -ENOMEM;
+	}
 	memset(tty->read_buf, 0, N_TTY_BUF_SIZE);
+	memset(tty->echo_buf, 0, N_TTY_BUF_SIZE);
 	reset_buffer_flags(tty);
 	tty->column = 0;
 	n_tty_set_termios(tty, NULL);
@@ -1487,16 +1924,23 @@
  *	@buf: userspace buffer pointer
  *	@nr: size of I/O
  *
- *	Write function of the terminal device. This is serialized with
+ *	Write function of the terminal device.  This is serialized with
  *	respect to other write callers but not to termios changes, reads
- *	and other such events. We must be careful with N_TTY as the receive
- *	code will echo characters, thus calling driver write methods.
+ *	and other such events.  Since the receive code will echo characters,
+ *	thus calling driver write methods, the output_lock is used in
+ *	the output processing functions called here as well as in the
+ *	echo processing function to protect the column state and space
+ *	left in the buffer.
  *
  *	This code must be sure never to sleep through a hangup.
+ *
+ *	Locking: output_lock to protect column state and space left
+ *		 (note that the process_output*() functions take this
+ *		  lock themselves)
  */
 
 static ssize_t n_tty_write(struct tty_struct *tty, struct file *file,
-			  const unsigned char *buf, size_t nr)
+			   const unsigned char *buf, size_t nr)
 {
 	const unsigned char *b = buf;
 	DECLARE_WAITQUEUE(wait, current);
@@ -1510,6 +1954,9 @@
 			return retval;
 	}
 
+	/* Write out any echoed characters that are still pending */
+	process_echoes(tty);
+
 	add_wait_queue(&tty->write_wait, &wait);
 	while (1) {
 		set_current_state(TASK_INTERRUPTIBLE);
@@ -1523,7 +1970,7 @@
 		}
 		if (O_OPOST(tty) && !(test_bit(TTY_HW_COOK_OUT, &tty->flags))) {
 			while (nr > 0) {
-				ssize_t num = opost_block(tty, b, nr);
+				ssize_t num = process_output_block(tty, b, nr);
 				if (num < 0) {
 					if (num == -EAGAIN)
 						break;
@@ -1535,7 +1982,7 @@
 				if (nr == 0)
 					break;
 				c = *b;
-				if (opost(c, tty) < 0)
+				if (process_output(c, tty) < 0)
 					break;
 				b++; nr--;
 			}
@@ -1565,6 +2012,8 @@
 break_out:
 	__set_current_state(TASK_RUNNING);
 	remove_wait_queue(&tty->write_wait, &wait);
+	if (b - buf != nr && tty->fasync)
+		set_bit(TTY_DO_WRITE_WAKEUP, &tty->flags);
 	return (b - buf) ? b - buf : retval;
 }
 
@@ -1663,4 +2112,3 @@
 	.receive_buf     = n_tty_receive_buf,
 	.write_wakeup    = n_tty_write_wakeup
 };
-
diff --git a/drivers/char/nozomi.c b/drivers/char/nozomi.c
index 9a34a19..d6102b6 100644
--- a/drivers/char/nozomi.c
+++ b/drivers/char/nozomi.c
@@ -353,6 +353,7 @@
 
 /* This holds all information that is needed regarding a port */
 struct port {
+	struct tty_port port;
 	u8 update_flow_control;
 	struct ctrl_ul ctrl_ul;
 	struct ctrl_dl ctrl_dl;
@@ -365,8 +366,6 @@
 	u8 toggle_ul;
 	u16 token_dl;
 
-	struct tty_struct *tty;
-	int tty_open_count;
 	/* mutex to ensure one access patch to this port */
 	struct mutex tty_sem;
 	wait_queue_head_t tty_wait;
@@ -788,14 +787,14 @@
  * Return 1 - send buffer to card and ack.
  * Return 0 - don't ack, don't send buffer to card.
  */
-static int send_data(enum port_type index, const struct nozomi *dc)
+static int send_data(enum port_type index, struct nozomi *dc)
 {
 	u32 size = 0;
-	const struct port *port = &dc->port[index];
+	struct port *port = &dc->port[index];
 	const u8 toggle = port->toggle_ul;
 	void __iomem *addr = port->ul_addr[toggle];
 	const u32 ul_size = port->ul_size[toggle];
-	struct tty_struct *tty = port->tty;
+	struct tty_struct *tty = tty_port_tty_get(&port->port);
 
 	/* Get data from tty and place in buf for now */
 	size = __kfifo_get(port->fifo_ul, dc->send_buf,
@@ -803,6 +802,7 @@
 
 	if (size == 0) {
 		DBG4("No more data to send, disable link:");
+		tty_kref_put(tty);
 		return 0;
 	}
 
@@ -815,6 +815,7 @@
 	if (tty)
 		tty_wakeup(tty);
 
+	tty_kref_put(tty);
 	return 1;
 }
 
@@ -826,7 +827,7 @@
 	u32 offset = 4;
 	struct port *port = &dc->port[index];
 	void __iomem *addr = port->dl_addr[port->toggle_dl];
-	struct tty_struct *tty = port->tty;
+	struct tty_struct *tty = tty_port_tty_get(&port->port);
 	int i;
 
 	if (unlikely(!tty)) {
@@ -870,7 +871,7 @@
 	}
 
 	set_bit(index, &dc->flip);
-
+	tty_kref_put(tty);
 	return 1;
 }
 
@@ -1276,9 +1277,15 @@
 
 exit_handler:
 	spin_unlock(&dc->spin_mutex);
-	for (a = 0; a < NOZOMI_MAX_PORTS; a++)
-		if (test_and_clear_bit(a, &dc->flip))
-			tty_flip_buffer_push(dc->port[a].tty);
+	for (a = 0; a < NOZOMI_MAX_PORTS; a++) {
+		struct tty_struct *tty;
+		if (test_and_clear_bit(a, &dc->flip)) {
+			tty = tty_port_tty_get(&dc->port[a].port);
+			if (tty)
+				tty_flip_buffer_push(tty);
+			tty_kref_put(tty);
+		}
+	}
 	return IRQ_HANDLED;
 none:
 	spin_unlock(&dc->spin_mutex);
@@ -1453,12 +1460,10 @@
 
 	for (i = 0; i < MAX_PORT; i++) {
 		mutex_init(&dc->port[i].tty_sem);
-		dc->port[i].tty_open_count = 0;
-		dc->port[i].tty = NULL;
+		tty_port_init(&dc->port[i].port);
 		tty_register_device(ntty_driver, dc->index_start + i,
 							&pdev->dev);
 	}
-
 	return 0;
 
 err_free_sbuf:
@@ -1482,14 +1487,16 @@
 
 	flush_scheduled_work();
 
-	for (i = 0; i < MAX_PORT; ++i)
-		if (dc->port[i].tty && \
-				list_empty(&dc->port[i].tty->hangup_work.entry))
-			tty_hangup(dc->port[i].tty);
-
+	for (i = 0; i < MAX_PORT; ++i) {
+		struct tty_struct *tty = tty_port_tty_get(&dc->port[i].port);
+		if (tty && list_empty(&tty->hangup_work.entry))
+			tty_hangup(tty);
+		tty_kref_put(tty);
+	}
+	/* Racy below - surely should wait for scheduled work to be done or
+	   complete off a hangup method ? */
 	while (dc->open_ttys)
 		msleep(1);
-
 	for (i = dc->index_start; i < dc->index_start + MAX_PORT; ++i)
 		tty_unregister_device(ntty_driver, i);
 }
@@ -1579,23 +1586,22 @@
 	if (mutex_lock_interruptible(&port->tty_sem))
 		return -ERESTARTSYS;
 
-	port->tty_open_count++;
+	port->port.count++;
 	dc->open_ttys++;
 
 	/* Enable interrupt downlink for channel */
-	if (port->tty_open_count == 1) {
+	if (port->port.count == 1) {
+		/* FIXME: is this needed now ? */
 		tty->low_latency = 1;
 		tty->driver_data = port;
-		port->tty = tty;
+		tty_port_tty_set(&port->port, tty);
 		DBG1("open: %d", port->token_dl);
 		spin_lock_irqsave(&dc->spin_mutex, flags);
 		dc->last_ier = dc->last_ier | port->token_dl;
 		writew(dc->last_ier, dc->reg_ier);
 		spin_unlock_irqrestore(&dc->spin_mutex, flags);
 	}
-
 	mutex_unlock(&port->tty_sem);
-
 	return 0;
 }
 
@@ -1606,31 +1612,30 @@
 static void ntty_close(struct tty_struct *tty, struct file *file)
 {
 	struct nozomi *dc = get_dc_by_tty(tty);
-	struct port *port = tty->driver_data;
+	struct port *nport = tty->driver_data;
+	struct tty_port *port = &nport->port;
 	unsigned long flags;
 
-	if (!dc || !port)
+	if (!dc || !nport)
 		return;
 
-	if (mutex_lock_interruptible(&port->tty_sem))
-		return;
+	/* Users cannot interrupt a close */
+	mutex_lock(&nport->tty_sem);
 
-	if (!port->tty_open_count)
-		goto exit;
+	WARN_ON(!port->count);
 
 	dc->open_ttys--;
-	port->tty_open_count--;
+	port->count--;
+	tty_port_tty_set(port, NULL);
 
-	if (port->tty_open_count == 0) {
-		DBG1("close: %d", port->token_dl);
+	if (port->count == 0) {
+		DBG1("close: %d", nport->token_dl);
 		spin_lock_irqsave(&dc->spin_mutex, flags);
-		dc->last_ier &= ~(port->token_dl);
+		dc->last_ier &= ~(nport->token_dl);
 		writew(dc->last_ier, dc->reg_ier);
 		spin_unlock_irqrestore(&dc->spin_mutex, flags);
 	}
-
-exit:
-	mutex_unlock(&port->tty_sem);
+	mutex_unlock(&nport->tty_sem);
 }
 
 /*
@@ -1660,7 +1665,7 @@
 		return -EAGAIN;
 	}
 
-	if (unlikely(!port->tty_open_count)) {
+	if (unlikely(!port->port.count)) {
 		DBG1(" ");
 		goto exit;
 	}
@@ -1710,7 +1715,7 @@
 	if (!mutex_trylock(&port->tty_sem))
 		return 0;
 
-	if (!port->tty_open_count)
+	if (!port->port.count)
 		goto exit;
 
 	room = port->fifo_ul->size - __kfifo_len(port->fifo_ul);
@@ -1866,7 +1871,7 @@
 		goto exit_in_buffer;
 	}
 
-	if (unlikely(!port->tty_open_count)) {
+	if (unlikely(!port->port.count)) {
 		dev_err(&dc->pdev->dev, "No tty open?\n");
 		rval = -ENODEV;
 		goto exit_in_buffer;
diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c
index 4d64a02..dc073e1 100644
--- a/drivers/char/pcmcia/synclink_cs.c
+++ b/drivers/char/pcmcia/synclink_cs.c
@@ -138,20 +138,15 @@
  */
 
 typedef struct _mgslpc_info {
+	struct tty_port		port;
 	void *if_ptr;	/* General purpose pointer (used by SPPP) */
 	int			magic;
-	int			flags;
-	int			count;		/* count of opens */
 	int			line;
-	unsigned short		close_delay;
-	unsigned short		closing_wait;	/* time to wait before closing */
 
 	struct mgsl_icount	icount;
 
-	struct tty_struct 	*tty;
 	int			timeout;
 	int			x_char;		/* xon/xoff character */
-	int			blocked_open;	/* # of blocked opens */
 	unsigned char		read_status_mask;
 	unsigned char		ignore_status_mask;
 
@@ -170,9 +165,6 @@
 	int            rx_buf_count;   /* total number of rx buffers */
 	int            rx_frame_count; /* number of full rx buffers */
 
-	wait_queue_head_t	open_wait;
-	wait_queue_head_t	close_wait;
-
 	wait_queue_head_t	status_event_wait_q;
 	wait_queue_head_t	event_wait_q;
 	struct timer_list	tx_timer;	/* HDLC transmit timeout timer */
@@ -375,7 +367,7 @@
 static void rx_start(MGSLPC_INFO *info);
 static void rx_stop(MGSLPC_INFO *info);
 
-static void tx_start(MGSLPC_INFO *info);
+static void tx_start(MGSLPC_INFO *info, struct tty_struct *tty);
 static void tx_stop(MGSLPC_INFO *info);
 static void tx_set_idle(MGSLPC_INFO *info);
 
@@ -389,7 +381,8 @@
 
 static void tx_timeout(unsigned long context);
 
-static int ioctl_common(MGSLPC_INFO *info, unsigned int cmd, unsigned long arg);
+static int carrier_raised(struct tty_port *port);
+static void raise_dtr_rts(struct tty_port *port);
 
 #if SYNCLINK_GENERIC_HDLC
 #define dev_to_port(D) (dev_to_hdlc(D)->priv)
@@ -410,7 +403,7 @@
 static void mgslpc_add_device(MGSLPC_INFO *info);
 static void mgslpc_remove_device(MGSLPC_INFO *info);
 
-static bool rx_get_frame(MGSLPC_INFO *info);
+static bool rx_get_frame(MGSLPC_INFO *info, struct tty_struct *tty);
 static void rx_reset_buffers(MGSLPC_INFO *info);
 static int  rx_alloc_buffers(MGSLPC_INFO *info);
 static void rx_free_buffers(MGSLPC_INFO *info);
@@ -421,7 +414,7 @@
  * Bottom half interrupt handlers
  */
 static void bh_handler(struct work_struct *work);
-static void bh_transmit(MGSLPC_INFO *info);
+static void bh_transmit(MGSLPC_INFO *info, struct tty_struct *tty);
 static void bh_status(MGSLPC_INFO *info);
 
 /*
@@ -432,10 +425,10 @@
 		    unsigned int set, unsigned int clear);
 static int get_stats(MGSLPC_INFO *info, struct mgsl_icount __user *user_icount);
 static int get_params(MGSLPC_INFO *info, MGSL_PARAMS __user *user_params);
-static int set_params(MGSLPC_INFO *info, MGSL_PARAMS __user *new_params);
+static int set_params(MGSLPC_INFO *info, MGSL_PARAMS __user *new_params, struct tty_struct *tty);
 static int get_txidle(MGSLPC_INFO *info, int __user *idle_mode);
 static int set_txidle(MGSLPC_INFO *info, int idle_mode);
-static int set_txenable(MGSLPC_INFO *info, int enable);
+static int set_txenable(MGSLPC_INFO *info, int enable, struct tty_struct *tty);
 static int tx_abort(MGSLPC_INFO *info);
 static int set_rxenable(MGSLPC_INFO *info, int enable);
 static int wait_events(MGSLPC_INFO *info, int __user *mask);
@@ -474,7 +467,7 @@
 /* number of characters left in xmit buffer before we ask for more */
 #define WAKEUP_CHARS 256
 
-static void mgslpc_change_params(MGSLPC_INFO *info);
+static void mgslpc_change_params(MGSLPC_INFO *info, struct tty_struct *tty);
 static void mgslpc_wait_until_sent(struct tty_struct *tty, int timeout);
 
 /* PCMCIA prototypes */
@@ -517,6 +510,11 @@
 	}
 }
 
+static const struct tty_port_operations mgslpc_port_ops = {
+	.carrier_raised = carrier_raised,
+	.raise_dtr_rts = raise_dtr_rts
+};
+
 static int mgslpc_probe(struct pcmcia_device *link)
 {
     MGSLPC_INFO *info;
@@ -532,12 +530,12 @@
     }
 
     info->magic = MGSLPC_MAGIC;
+    tty_port_init(&info->port);
+    info->port.ops = &mgslpc_port_ops;
     INIT_WORK(&info->task, bh_handler);
     info->max_frame_size = 4096;
-    info->close_delay = 5*HZ/10;
-    info->closing_wait = 30*HZ;
-    init_waitqueue_head(&info->open_wait);
-    init_waitqueue_head(&info->close_wait);
+    info->port.close_delay = 5*HZ/10;
+    info->port.closing_wait = 30*HZ;
     init_waitqueue_head(&info->status_event_wait_q);
     init_waitqueue_head(&info->event_wait_q);
     spin_lock_init(&info->lock);
@@ -784,7 +782,7 @@
 
 	spin_lock_irqsave(&info->lock,flags);
 	if (!info->tx_enabled)
-	 	tx_start(info);
+	 	tx_start(info, tty);
 	spin_unlock_irqrestore(&info->lock,flags);
 }
 
@@ -823,6 +821,7 @@
 static void bh_handler(struct work_struct *work)
 {
 	MGSLPC_INFO *info = container_of(work, MGSLPC_INFO, task);
+	struct tty_struct *tty;
 	int action;
 
 	if (!info)
@@ -833,6 +832,7 @@
 			__FILE__,__LINE__,info->device_name);
 
 	info->bh_running = true;
+	tty = tty_port_tty_get(&info->port);
 
 	while((action = bh_action(info)) != 0) {
 
@@ -844,10 +844,10 @@
 		switch (action) {
 
 		case BH_RECEIVE:
-			while(rx_get_frame(info));
+			while(rx_get_frame(info, tty));
 			break;
 		case BH_TRANSMIT:
-			bh_transmit(info);
+			bh_transmit(info, tty);
 			break;
 		case BH_STATUS:
 			bh_status(info);
@@ -859,14 +859,14 @@
 		}
 	}
 
+	tty_kref_put(tty);
 	if (debug_level >= DEBUG_LEVEL_BH)
 		printk( "%s(%d):bh_handler(%s) exit\n",
 			__FILE__,__LINE__,info->device_name);
 }
 
-static void bh_transmit(MGSLPC_INFO *info)
+static void bh_transmit(MGSLPC_INFO *info, struct tty_struct *tty)
 {
-	struct tty_struct *tty = info->tty;
 	if (debug_level >= DEBUG_LEVEL_BH)
 		printk("bh_transmit() entry on %s\n", info->device_name);
 
@@ -945,12 +945,11 @@
 	issue_command(info, CHA, CMD_RXFIFO);
 }
 
-static void rx_ready_async(MGSLPC_INFO *info, int tcd)
+static void rx_ready_async(MGSLPC_INFO *info, int tcd, struct tty_struct *tty)
 {
 	unsigned char data, status, flag;
 	int fifo_count;
 	int work = 0;
- 	struct tty_struct *tty = info->tty;
  	struct mgsl_icount *icount = &info->icount;
 
 	if (tcd) {
@@ -1013,7 +1012,7 @@
 }
 
 
-static void tx_done(MGSLPC_INFO *info)
+static void tx_done(MGSLPC_INFO *info, struct tty_struct *tty)
 {
 	if (!info->tx_active)
 		return;
@@ -1042,7 +1041,7 @@
 	else
 #endif
 	{
-		if (info->tty->stopped || info->tty->hw_stopped) {
+		if (tty->stopped || tty->hw_stopped) {
 			tx_stop(info);
 			return;
 		}
@@ -1050,7 +1049,7 @@
 	}
 }
 
-static void tx_ready(MGSLPC_INFO *info)
+static void tx_ready(MGSLPC_INFO *info, struct tty_struct *tty)
 {
 	unsigned char fifo_count = 32;
 	int c;
@@ -1062,7 +1061,7 @@
 		if (!info->tx_active)
 			return;
 	} else {
-		if (info->tty->stopped || info->tty->hw_stopped) {
+		if (tty->stopped || tty->hw_stopped) {
 			tx_stop(info);
 			return;
 		}
@@ -1099,7 +1098,7 @@
 	}
 }
 
-static void cts_change(MGSLPC_INFO *info)
+static void cts_change(MGSLPC_INFO *info, struct tty_struct *tty)
 {
 	get_signals(info);
 	if ((info->cts_chkcount)++ >= IO_PIN_SHUTDOWN_LIMIT)
@@ -1112,14 +1111,14 @@
 	wake_up_interruptible(&info->status_event_wait_q);
 	wake_up_interruptible(&info->event_wait_q);
 
-	if (info->flags & ASYNC_CTS_FLOW) {
-		if (info->tty->hw_stopped) {
+	if (info->port.flags & ASYNC_CTS_FLOW) {
+		if (tty->hw_stopped) {
 			if (info->serial_signals & SerialSignal_CTS) {
 				if (debug_level >= DEBUG_LEVEL_ISR)
 					printk("CTS tx start...");
-				if (info->tty)
-					info->tty->hw_stopped = 0;
-				tx_start(info);
+				if (tty)
+					tty->hw_stopped = 0;
+				tx_start(info, tty);
 				info->pending_bh |= BH_TRANSMIT;
 				return;
 			}
@@ -1127,8 +1126,8 @@
 			if (!(info->serial_signals & SerialSignal_CTS)) {
 				if (debug_level >= DEBUG_LEVEL_ISR)
 					printk("CTS tx stop...");
-				if (info->tty)
-					info->tty->hw_stopped = 1;
+				if (tty)
+					tty->hw_stopped = 1;
 				tx_stop(info);
 			}
 		}
@@ -1136,7 +1135,7 @@
 	info->pending_bh |= BH_STATUS;
 }
 
-static void dcd_change(MGSLPC_INFO *info)
+static void dcd_change(MGSLPC_INFO *info, struct tty_struct *tty)
 {
 	get_signals(info);
 	if ((info->dcd_chkcount)++ >= IO_PIN_SHUTDOWN_LIMIT)
@@ -1158,17 +1157,17 @@
 	wake_up_interruptible(&info->status_event_wait_q);
 	wake_up_interruptible(&info->event_wait_q);
 
-	if (info->flags & ASYNC_CHECK_CD) {
+	if (info->port.flags & ASYNC_CHECK_CD) {
 		if (debug_level >= DEBUG_LEVEL_ISR)
 			printk("%s CD now %s...", info->device_name,
 			       (info->serial_signals & SerialSignal_DCD) ? "on" : "off");
 		if (info->serial_signals & SerialSignal_DCD)
-			wake_up_interruptible(&info->open_wait);
+			wake_up_interruptible(&info->port.open_wait);
 		else {
 			if (debug_level >= DEBUG_LEVEL_ISR)
 				printk("doing serial hangup...");
-			if (info->tty)
-				tty_hangup(info->tty);
+			if (tty)
+				tty_hangup(tty);
 		}
 	}
 	info->pending_bh |= BH_STATUS;
@@ -1214,6 +1213,7 @@
 static irqreturn_t mgslpc_isr(int dummy, void *dev_id)
 {
 	MGSLPC_INFO *info = dev_id;
+	struct tty_struct *tty;
 	unsigned short isr;
 	unsigned char gis, pis;
 	int count=0;
@@ -1224,6 +1224,8 @@
 	if (!(info->p_dev->_locked))
 		return IRQ_HANDLED;
 
+	tty = tty_port_tty_get(&info->port);
+
 	spin_lock(&info->lock);
 
 	while ((gis = read_reg(info, CHA + GIS))) {
@@ -1239,9 +1241,9 @@
 		if (gis & (BIT1 + BIT0)) {
 			isr = read_reg16(info, CHB + ISR);
 			if (isr & IRQ_DCD)
-				dcd_change(info);
+				dcd_change(info, tty);
 			if (isr & IRQ_CTS)
-				cts_change(info);
+				cts_change(info, tty);
 		}
 		if (gis & (BIT3 + BIT2))
 		{
@@ -1258,8 +1260,8 @@
 			}
 			if (isr & IRQ_BREAK_ON) {
 				info->icount.brk++;
-				if (info->flags & ASYNC_SAK)
-					do_SAK(info->tty);
+				if (info->port.flags & ASYNC_SAK)
+					do_SAK(tty);
 			}
 			if (isr & IRQ_RXTIME) {
 				issue_command(info, CHA, CMD_RXFIFO_READ);
@@ -1268,7 +1270,7 @@
 				if (info->params.mode == MGSL_MODE_HDLC)
 					rx_ready_hdlc(info, isr & IRQ_RXEOM);
 				else
-					rx_ready_async(info, isr & IRQ_RXEOM);
+					rx_ready_async(info, isr & IRQ_RXEOM, tty);
 			}
 
 			/* transmit IRQs */
@@ -1277,14 +1279,14 @@
 					info->icount.txabort++;
 				else
 					info->icount.txunder++;
-				tx_done(info);
+				tx_done(info, tty);
 			}
 			else if (isr & IRQ_ALLSENT) {
 				info->icount.txok++;
-				tx_done(info);
+				tx_done(info, tty);
 			}
 			else if (isr & IRQ_TXFIFO)
-				tx_ready(info);
+				tx_ready(info, tty);
 		}
 		if (gis & BIT7) {
 			pis = read_reg(info, CHA + PIS);
@@ -1308,6 +1310,7 @@
 	}
 
 	spin_unlock(&info->lock);
+	tty_kref_put(tty);
 
 	if (debug_level >= DEBUG_LEVEL_ISR)
 		printk("%s(%d):mgslpc_isr(%d)exit.\n",
@@ -1318,14 +1321,14 @@
 
 /* Initialize and start device.
  */
-static int startup(MGSLPC_INFO * info)
+static int startup(MGSLPC_INFO * info, struct tty_struct *tty)
 {
 	int retval = 0;
 
 	if (debug_level >= DEBUG_LEVEL_INFO)
 		printk("%s(%d):startup(%s)\n",__FILE__,__LINE__,info->device_name);
 
-	if (info->flags & ASYNC_INITIALIZED)
+	if (info->port.flags & ASYNC_INITIALIZED)
 		return 0;
 
 	if (!info->tx_buf) {
@@ -1352,30 +1355,30 @@
 		retval = adapter_test(info);
 
 	if ( retval ) {
-  		if (capable(CAP_SYS_ADMIN) && info->tty)
-			set_bit(TTY_IO_ERROR, &info->tty->flags);
+  		if (capable(CAP_SYS_ADMIN) && tty)
+			set_bit(TTY_IO_ERROR, &tty->flags);
 		release_resources(info);
   		return retval;
   	}
 
 	/* program hardware for current parameters */
-	mgslpc_change_params(info);
+	mgslpc_change_params(info, tty);
 
-	if (info->tty)
-		clear_bit(TTY_IO_ERROR, &info->tty->flags);
+	if (tty)
+		clear_bit(TTY_IO_ERROR, &tty->flags);
 
-	info->flags |= ASYNC_INITIALIZED;
+	info->port.flags |= ASYNC_INITIALIZED;
 
 	return 0;
 }
 
 /* Called by mgslpc_close() and mgslpc_hangup() to shutdown hardware
  */
-static void shutdown(MGSLPC_INFO * info)
+static void shutdown(MGSLPC_INFO * info, struct tty_struct *tty)
 {
 	unsigned long flags;
 
-	if (!(info->flags & ASYNC_INITIALIZED))
+	if (!(info->port.flags & ASYNC_INITIALIZED))
 		return;
 
 	if (debug_level >= DEBUG_LEVEL_INFO)
@@ -1402,7 +1405,7 @@
 	/* TODO:disable interrupts instead of reset to preserve signal states */
 	reset_device(info);
 
- 	if (!info->tty || info->tty->termios->c_cflag & HUPCL) {
+ 	if (!tty || tty->termios->c_cflag & HUPCL) {
  		info->serial_signals &= ~(SerialSignal_DTR + SerialSignal_RTS);
 		set_signals(info);
 	}
@@ -1411,13 +1414,13 @@
 
 	release_resources(info);
 
-	if (info->tty)
-		set_bit(TTY_IO_ERROR, &info->tty->flags);
+	if (tty)
+		set_bit(TTY_IO_ERROR, &tty->flags);
 
-	info->flags &= ~ASYNC_INITIALIZED;
+	info->port.flags &= ~ASYNC_INITIALIZED;
 }
 
-static void mgslpc_program_hw(MGSLPC_INFO *info)
+static void mgslpc_program_hw(MGSLPC_INFO *info, struct tty_struct *tty)
 {
 	unsigned long flags;
 
@@ -1443,7 +1446,7 @@
 	port_irq_enable(info, (unsigned char) PVR_DSR | PVR_RI);
 	get_signals(info);
 
-	if (info->netcount || info->tty->termios->c_cflag & CREAD)
+	if (info->netcount || (tty && (tty->termios->c_cflag & CREAD)))
 		rx_start(info);
 
 	spin_unlock_irqrestore(&info->lock,flags);
@@ -1451,19 +1454,19 @@
 
 /* Reconfigure adapter based on new parameters
  */
-static void mgslpc_change_params(MGSLPC_INFO *info)
+static void mgslpc_change_params(MGSLPC_INFO *info, struct tty_struct *tty)
 {
 	unsigned cflag;
 	int bits_per_char;
 
-	if (!info->tty || !info->tty->termios)
+	if (!tty || !tty->termios)
 		return;
 
 	if (debug_level >= DEBUG_LEVEL_INFO)
 		printk("%s(%d):mgslpc_change_params(%s)\n",
 			 __FILE__,__LINE__, info->device_name );
 
-	cflag = info->tty->termios->c_cflag;
+	cflag = tty->termios->c_cflag;
 
 	/* if B0 rate (hangup) specified then negate DTR and RTS */
 	/* otherwise assert DTR and RTS */
@@ -1510,7 +1513,7 @@
 	 * current data rate.
 	 */
 	if (info->params.data_rate <= 460800) {
-		info->params.data_rate = tty_get_baud_rate(info->tty);
+		info->params.data_rate = tty_get_baud_rate(tty);
 	}
 
 	if ( info->params.data_rate ) {
@@ -1520,24 +1523,24 @@
 	info->timeout += HZ/50;		/* Add .02 seconds of slop */
 
 	if (cflag & CRTSCTS)
-		info->flags |= ASYNC_CTS_FLOW;
+		info->port.flags |= ASYNC_CTS_FLOW;
 	else
-		info->flags &= ~ASYNC_CTS_FLOW;
+		info->port.flags &= ~ASYNC_CTS_FLOW;
 
 	if (cflag & CLOCAL)
-		info->flags &= ~ASYNC_CHECK_CD;
+		info->port.flags &= ~ASYNC_CHECK_CD;
 	else
-		info->flags |= ASYNC_CHECK_CD;
+		info->port.flags |= ASYNC_CHECK_CD;
 
 	/* process tty input control flags */
 
 	info->read_status_mask = 0;
-	if (I_INPCK(info->tty))
+	if (I_INPCK(tty))
 		info->read_status_mask |= BIT7 | BIT6;
-	if (I_IGNPAR(info->tty))
+	if (I_IGNPAR(tty))
 		info->ignore_status_mask |= BIT7 | BIT6;
 
-	mgslpc_program_hw(info);
+	mgslpc_program_hw(info, tty);
 }
 
 /* Add a character to the transmit buffer
@@ -1597,7 +1600,7 @@
 
 	spin_lock_irqsave(&info->lock,flags);
 	if (!info->tx_active)
-	 	tx_start(info);
+	 	tx_start(info, tty);
 	spin_unlock_irqrestore(&info->lock,flags);
 }
 
@@ -1659,7 +1662,7 @@
  	if (info->tx_count && !tty->stopped && !tty->hw_stopped) {
 		spin_lock_irqsave(&info->lock,flags);
 		if (!info->tx_active)
-		 	tx_start(info);
+		 	tx_start(info, tty);
 		spin_unlock_irqrestore(&info->lock,flags);
  	}
 cleanup:
@@ -1764,7 +1767,7 @@
 	if (ch) {
 		spin_lock_irqsave(&info->lock,flags);
 		if (!info->tx_enabled)
-		 	tx_start(info);
+		 	tx_start(info, tty);
 		spin_unlock_irqrestore(&info->lock,flags);
 	}
 }
@@ -1862,7 +1865,7 @@
  *
  * Returns:	0 if success, otherwise error code
  */
-static int set_params(MGSLPC_INFO * info, MGSL_PARAMS __user *new_params)
+static int set_params(MGSLPC_INFO * info, MGSL_PARAMS __user *new_params, struct tty_struct *tty)
 {
  	unsigned long flags;
 	MGSL_PARAMS tmp_params;
@@ -1883,7 +1886,7 @@
 	memcpy(&info->params,&tmp_params,sizeof(MGSL_PARAMS));
 	spin_unlock_irqrestore(&info->lock,flags);
 
- 	mgslpc_change_params(info);
+ 	mgslpc_change_params(info, tty);
 
 	return 0;
 }
@@ -1944,7 +1947,7 @@
 	return 0;
 }
 
-static int set_txenable(MGSLPC_INFO * info, int enable)
+static int set_txenable(MGSLPC_INFO * info, int enable, struct tty_struct *tty)
 {
  	unsigned long flags;
 
@@ -1954,7 +1957,7 @@
 	spin_lock_irqsave(&info->lock,flags);
 	if (enable) {
 		if (!info->tx_enabled)
-			tx_start(info);
+			tx_start(info, tty);
 	} else {
 		if (info->tx_enabled)
 			tx_stop(info);
@@ -2263,6 +2266,11 @@
 			unsigned int cmd, unsigned long arg)
 {
 	MGSLPC_INFO * info = (MGSLPC_INFO *)tty->driver_data;
+	int error;
+	struct mgsl_icount cnow;	/* kernel counter temps */
+	struct serial_icounter_struct __user *p_cuser;	/* user space */
+	void __user *argp = (void __user *)arg;
+	unsigned long flags;
 
 	if (debug_level >= DEBUG_LEVEL_INFO)
 		printk("%s(%d):mgslpc_ioctl %s cmd=%08X\n", __FILE__,__LINE__,
@@ -2277,22 +2285,11 @@
 		    return -EIO;
 	}
 
-	return ioctl_common(info, cmd, arg);
-}
-
-static int ioctl_common(MGSLPC_INFO *info, unsigned int cmd, unsigned long arg)
-{
-	int error;
-	struct mgsl_icount cnow;	/* kernel counter temps */
-	struct serial_icounter_struct __user *p_cuser;	/* user space */
-	void __user *argp = (void __user *)arg;
-	unsigned long flags;
-
 	switch (cmd) {
 	case MGSL_IOCGPARAMS:
 		return get_params(info, argp);
 	case MGSL_IOCSPARAMS:
-		return set_params(info, argp);
+		return set_params(info, argp, tty);
 	case MGSL_IOCGTXIDLE:
 		return get_txidle(info, argp);
 	case MGSL_IOCSTXIDLE:
@@ -2302,7 +2299,7 @@
 	case MGSL_IOCSIF:
 		return set_interface(info,(int)arg);
 	case MGSL_IOCTXENABLE:
-		return set_txenable(info,(int)arg);
+		return set_txenable(info,(int)arg, tty);
 	case MGSL_IOCRXENABLE:
 		return set_rxenable(info,(int)arg);
 	case MGSL_IOCTXABORT:
@@ -2369,7 +2366,7 @@
 		== RELEVANT_IFLAG(old_termios->c_iflag)))
 	  return;
 
-	mgslpc_change_params(info);
+	mgslpc_change_params(info, tty);
 
 	/* Handle transition to B0 status */
 	if (old_termios->c_cflag & CBAUD &&
@@ -2404,81 +2401,34 @@
 static void mgslpc_close(struct tty_struct *tty, struct file * filp)
 {
 	MGSLPC_INFO * info = (MGSLPC_INFO *)tty->driver_data;
+	struct tty_port *port = &info->port;
 
 	if (mgslpc_paranoia_check(info, tty->name, "mgslpc_close"))
 		return;
 
 	if (debug_level >= DEBUG_LEVEL_INFO)
 		printk("%s(%d):mgslpc_close(%s) entry, count=%d\n",
-			 __FILE__,__LINE__, info->device_name, info->count);
+			 __FILE__,__LINE__, info->device_name, port->count);
 
-	if (!info->count)
-		return;
+	WARN_ON(!port->count);
 
-	if (tty_hung_up_p(filp))
+	if (tty_port_close_start(port, tty, filp) == 0)
 		goto cleanup;
 
-	if ((tty->count == 1) && (info->count != 1)) {
-		/*
-		 * tty->count is 1 and the tty structure will be freed.
-		 * info->count should be one in this case.
-		 * if it's not, correct it so that the port is shutdown.
-		 */
-		printk("mgslpc_close: bad refcount; tty->count is 1, "
-		       "info->count is %d\n", info->count);
-		info->count = 1;
-	}
-
-	info->count--;
-
-	/* if at least one open remaining, leave hardware active */
-	if (info->count)
-		goto cleanup;
-
-	info->flags |= ASYNC_CLOSING;
-
-	/* set tty->closing to notify line discipline to
-	 * only process XON/XOFF characters. Only the N_TTY
-	 * discipline appears to use this (ppp does not).
-	 */
-	tty->closing = 1;
-
-	/* wait for transmit data to clear all layers */
-
-	if (info->closing_wait != ASYNC_CLOSING_WAIT_NONE) {
-		if (debug_level >= DEBUG_LEVEL_INFO)
-			printk("%s(%d):mgslpc_close(%s) calling tty_wait_until_sent\n",
-				 __FILE__,__LINE__, info->device_name );
-		tty_wait_until_sent(tty, info->closing_wait);
-	}
-
- 	if (info->flags & ASYNC_INITIALIZED)
+ 	if (port->flags & ASYNC_INITIALIZED)
  		mgslpc_wait_until_sent(tty, info->timeout);
 
 	mgslpc_flush_buffer(tty);
 
 	tty_ldisc_flush(tty);
-
-	shutdown(info);
-
-	tty->closing = 0;
-	info->tty = NULL;
-
-	if (info->blocked_open) {
-		if (info->close_delay) {
-			msleep_interruptible(jiffies_to_msecs(info->close_delay));
-		}
-		wake_up_interruptible(&info->open_wait);
-	}
-
-	info->flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING);
-
-	wake_up_interruptible(&info->close_wait);
-
+	shutdown(info, tty);
+	
+	tty_port_close_end(port, tty);
+	tty_port_tty_set(port, NULL);
 cleanup:
 	if (debug_level >= DEBUG_LEVEL_INFO)
 		printk("%s(%d):mgslpc_close(%s) exit, count=%d\n", __FILE__,__LINE__,
-			tty->driver->name, info->count);
+			tty->driver->name, port->count);
 }
 
 /* Wait until the transmitter is empty.
@@ -2498,7 +2448,7 @@
 	if (mgslpc_paranoia_check(info, tty->name, "mgslpc_wait_until_sent"))
 		return;
 
-	if (!(info->flags & ASYNC_INITIALIZED))
+	if (!(info->port.flags & ASYNC_INITIALIZED))
 		goto exit;
 
 	orig_jiffies = jiffies;
@@ -2559,120 +2509,40 @@
 		return;
 
 	mgslpc_flush_buffer(tty);
-	shutdown(info);
-
-	info->count = 0;
-	info->flags &= ~ASYNC_NORMAL_ACTIVE;
-	info->tty = NULL;
-
-	wake_up_interruptible(&info->open_wait);
+	shutdown(info, tty);
+	tty_port_hangup(&info->port);
 }
 
-/* Block the current process until the specified port
- * is ready to be opened.
- */
-static int block_til_ready(struct tty_struct *tty, struct file *filp,
-			   MGSLPC_INFO *info)
+static int carrier_raised(struct tty_port *port)
 {
-	DECLARE_WAITQUEUE(wait, current);
-	int		retval;
-	bool		do_clocal = false;
-	bool		extra_count = false;
-	unsigned long	flags;
+	MGSLPC_INFO *info = container_of(port, MGSLPC_INFO, port);
+	unsigned long flags;
 
-	if (debug_level >= DEBUG_LEVEL_INFO)
-		printk("%s(%d):block_til_ready on %s\n",
-			 __FILE__,__LINE__, tty->driver->name );
+	spin_lock_irqsave(&info->lock,flags);
+ 	get_signals(info);
+	spin_unlock_irqrestore(&info->lock,flags);
 
-	if (filp->f_flags & O_NONBLOCK || tty->flags & (1 << TTY_IO_ERROR)){
-		/* nonblock mode is set or port is not enabled */
-		/* just verify that callout device is not active */
-		info->flags |= ASYNC_NORMAL_ACTIVE;
-		return 0;
-	}
-
-	if (tty->termios->c_cflag & CLOCAL)
-		do_clocal = true;
-
-	/* Wait for carrier detect and the line to become
-	 * free (i.e., not in use by the callout).  While we are in
-	 * this loop, info->count is dropped by one, so that
-	 * mgslpc_close() knows when to free things.  We restore it upon
-	 * exit, either normal or abnormal.
-	 */
-
-	retval = 0;
-	add_wait_queue(&info->open_wait, &wait);
-
-	if (debug_level >= DEBUG_LEVEL_INFO)
-		printk("%s(%d):block_til_ready before block on %s count=%d\n",
-			 __FILE__,__LINE__, tty->driver->name, info->count );
-
-	spin_lock_irqsave(&info->lock, flags);
-	if (!tty_hung_up_p(filp)) {
-		extra_count = true;
-		info->count--;
-	}
-	spin_unlock_irqrestore(&info->lock, flags);
-	info->blocked_open++;
-
-	while (1) {
-		if ((tty->termios->c_cflag & CBAUD)) {
-			spin_lock_irqsave(&info->lock,flags);
-			info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR;
-		 	set_signals(info);
-			spin_unlock_irqrestore(&info->lock,flags);
-		}
-
-		set_current_state(TASK_INTERRUPTIBLE);
-
-		if (tty_hung_up_p(filp) || !(info->flags & ASYNC_INITIALIZED)){
-			retval = (info->flags & ASYNC_HUP_NOTIFY) ?
-					-EAGAIN : -ERESTARTSYS;
-			break;
-		}
-
-		spin_lock_irqsave(&info->lock,flags);
-	 	get_signals(info);
-		spin_unlock_irqrestore(&info->lock,flags);
-
- 		if (!(info->flags & ASYNC_CLOSING) &&
- 		    (do_clocal || (info->serial_signals & SerialSignal_DCD)) ) {
- 			break;
-		}
-
-		if (signal_pending(current)) {
-			retval = -ERESTARTSYS;
-			break;
-		}
-
-		if (debug_level >= DEBUG_LEVEL_INFO)
-			printk("%s(%d):block_til_ready blocking on %s count=%d\n",
-				 __FILE__,__LINE__, tty->driver->name, info->count );
-
-		schedule();
-	}
-
-	set_current_state(TASK_RUNNING);
-	remove_wait_queue(&info->open_wait, &wait);
-
-	if (extra_count)
-		info->count++;
-	info->blocked_open--;
-
-	if (debug_level >= DEBUG_LEVEL_INFO)
-		printk("%s(%d):block_til_ready after blocking on %s count=%d\n",
-			 __FILE__,__LINE__, tty->driver->name, info->count );
-
-	if (!retval)
-		info->flags |= ASYNC_NORMAL_ACTIVE;
-
-	return retval;
+	if (info->serial_signals & SerialSignal_DCD)
+		return 1;
+	return 0;
 }
 
+static void raise_dtr_rts(struct tty_port *port)
+{
+	MGSLPC_INFO *info = container_of(port, MGSLPC_INFO, port);
+	unsigned long flags;
+
+	spin_lock_irqsave(&info->lock,flags);
+	info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR;
+	set_signals(info);
+	spin_unlock_irqrestore(&info->lock,flags);
+}
+
+
 static int mgslpc_open(struct tty_struct *tty, struct file * filp)
 {
 	MGSLPC_INFO	*info;
+	struct tty_port *port;
 	int 			retval, line;
 	unsigned long flags;
 
@@ -2691,23 +2561,24 @@
 	if (mgslpc_paranoia_check(info, tty->name, "mgslpc_open"))
 		return -ENODEV;
 
+	port = &info->port;
 	tty->driver_data = info;
-	info->tty = tty;
+	tty_port_tty_set(port, tty);
 
 	if (debug_level >= DEBUG_LEVEL_INFO)
 		printk("%s(%d):mgslpc_open(%s), old ref count = %d\n",
-			 __FILE__,__LINE__,tty->driver->name, info->count);
+			 __FILE__,__LINE__,tty->driver->name, port->count);
 
 	/* If port is closing, signal caller to try again */
-	if (tty_hung_up_p(filp) || info->flags & ASYNC_CLOSING){
-		if (info->flags & ASYNC_CLOSING)
-			interruptible_sleep_on(&info->close_wait);
-		retval = ((info->flags & ASYNC_HUP_NOTIFY) ?
+	if (tty_hung_up_p(filp) || port->flags & ASYNC_CLOSING){
+		if (port->flags & ASYNC_CLOSING)
+			interruptible_sleep_on(&port->close_wait);
+		retval = ((port->flags & ASYNC_HUP_NOTIFY) ?
 			-EAGAIN : -ERESTARTSYS);
 		goto cleanup;
 	}
 
-	info->tty->low_latency = (info->flags & ASYNC_LOW_LATENCY) ? 1 : 0;
+	tty->low_latency = (port->flags & ASYNC_LOW_LATENCY) ? 1 : 0;
 
 	spin_lock_irqsave(&info->netlock, flags);
 	if (info->netcount) {
@@ -2715,17 +2586,19 @@
 		spin_unlock_irqrestore(&info->netlock, flags);
 		goto cleanup;
 	}
-	info->count++;
+	spin_lock(&port->lock);
+	port->count++;
+	spin_unlock(&port->lock);
 	spin_unlock_irqrestore(&info->netlock, flags);
 
-	if (info->count == 1) {
+	if (port->count == 1) {
 		/* 1st open on this device, init hardware */
-		retval = startup(info);
+		retval = startup(info, tty);
 		if (retval < 0)
 			goto cleanup;
 	}
 
-	retval = block_til_ready(tty, filp, info);
+	retval = tty_port_block_til_ready(&info->port, tty, filp);
 	if (retval) {
 		if (debug_level >= DEBUG_LEVEL_INFO)
 			printk("%s(%d):block_til_ready(%s) returned %d\n",
@@ -2739,13 +2612,6 @@
 	retval = 0;
 
 cleanup:
-	if (retval) {
-		if (tty->count == 1)
-			info->tty = NULL; /* tty layer will release tty struct */
-		if(info->count)
-			info->count--;
-	}
-
 	return retval;
 }
 
@@ -3500,7 +3366,7 @@
 	info->rx_enabled = true;
 }
 
-static void tx_start(MGSLPC_INFO *info)
+static void tx_start(MGSLPC_INFO *info, struct tty_struct *tty)
 {
 	if (debug_level >= DEBUG_LEVEL_ISR)
 		printk("%s(%d):tx_start(%s)\n",
@@ -3524,11 +3390,11 @@
 		if (info->params.mode == MGSL_MODE_ASYNC) {
 			if (!info->tx_active) {
 				info->tx_active = true;
-				tx_ready(info);
+				tx_ready(info, tty);
 			}
 		} else {
 			info->tx_active = true;
-			tx_ready(info);
+			tx_ready(info, tty);
 			mod_timer(&info->tx_timer, jiffies +
 					msecs_to_jiffies(5000));
 		}
@@ -3849,13 +3715,12 @@
  *
  * Returns true if frame returned, otherwise false
  */
-static bool rx_get_frame(MGSLPC_INFO *info)
+static bool rx_get_frame(MGSLPC_INFO *info, struct tty_struct *tty)
 {
 	unsigned short status;
 	RXBUF *buf;
 	unsigned int framesize = 0;
 	unsigned long flags;
-	struct tty_struct *tty = info->tty;
 	bool return_frame = false;
 
 	if (info->rx_frame_count == 0)
@@ -4075,7 +3940,11 @@
 		hdlcdev_tx_done(info);
 	else
 #endif
-		bh_transmit(info);
+	{
+		struct tty_struct *tty = tty_port_tty_get(&info->port);
+		bh_transmit(info, tty);
+		tty_kref_put(tty);
+	}
 }
 
 #if SYNCLINK_GENERIC_HDLC
@@ -4094,11 +3963,12 @@
 			  unsigned short parity)
 {
 	MGSLPC_INFO *info = dev_to_port(dev);
+	struct tty_struct *tty;
 	unsigned char  new_encoding;
 	unsigned short new_crctype;
 
 	/* return error if TTY interface open */
-	if (info->count)
+	if (info->port.count)
 		return -EBUSY;
 
 	switch (encoding)
@@ -4123,8 +3993,11 @@
 	info->params.crc_type = new_crctype;
 
 	/* if network interface up, reprogram hardware */
-	if (info->netcount)
-		mgslpc_program_hw(info);
+	if (info->netcount) {
+		tty = tty_port_tty_get(&info->port);
+		mgslpc_program_hw(info, tty);
+		tty_kref_put(tty);
+	}
 
 	return 0;
 }
@@ -4165,8 +4038,11 @@
 
 	/* start hardware transmitter if necessary */
 	spin_lock_irqsave(&info->lock,flags);
-	if (!info->tx_active)
-	 	tx_start(info);
+	if (!info->tx_active) {
+		struct tty_struct *tty = tty_port_tty_get(&info->port);
+	 	tx_start(info, tty);
+	 	tty_kref_put(tty);
+	}
 	spin_unlock_irqrestore(&info->lock,flags);
 
 	return 0;
@@ -4183,6 +4059,7 @@
 static int hdlcdev_open(struct net_device *dev)
 {
 	MGSLPC_INFO *info = dev_to_port(dev);
+	struct tty_struct *tty;
 	int rc;
 	unsigned long flags;
 
@@ -4195,7 +4072,7 @@
 
 	/* arbitrate between network and tty opens */
 	spin_lock_irqsave(&info->netlock, flags);
-	if (info->count != 0 || info->netcount != 0) {
+	if (info->port.count != 0 || info->netcount != 0) {
 		printk(KERN_WARNING "%s: hdlc_open returning busy\n", dev->name);
 		spin_unlock_irqrestore(&info->netlock, flags);
 		return -EBUSY;
@@ -4203,17 +4080,19 @@
 	info->netcount=1;
 	spin_unlock_irqrestore(&info->netlock, flags);
 
+	tty = tty_port_tty_get(&info->port);
 	/* claim resources and init adapter */
-	if ((rc = startup(info)) != 0) {
+	if ((rc = startup(info, tty)) != 0) {
+		tty_kref_put(tty);
 		spin_lock_irqsave(&info->netlock, flags);
 		info->netcount=0;
 		spin_unlock_irqrestore(&info->netlock, flags);
 		return rc;
 	}
-
 	/* assert DTR and RTS, apply hardware settings */
 	info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR;
-	mgslpc_program_hw(info);
+	mgslpc_program_hw(info, tty);
+	tty_kref_put(tty);
 
 	/* enable network layer transmit */
 	dev->trans_start = jiffies;
@@ -4241,6 +4120,7 @@
 static int hdlcdev_close(struct net_device *dev)
 {
 	MGSLPC_INFO *info = dev_to_port(dev);
+	struct tty_struct *tty = tty_port_tty_get(&info->port);
 	unsigned long flags;
 
 	if (debug_level >= DEBUG_LEVEL_INFO)
@@ -4249,8 +4129,8 @@
 	netif_stop_queue(dev);
 
 	/* shutdown adapter and release resources */
-	shutdown(info);
-
+	shutdown(info, tty);
+	tty_kref_put(tty);
 	hdlc_close(dev);
 
 	spin_lock_irqsave(&info->netlock, flags);
@@ -4281,7 +4161,7 @@
 		printk("%s:hdlcdev_ioctl(%s)\n",__FILE__,dev->name);
 
 	/* return error if TTY interface open */
-	if (info->count)
+	if (info->port.count)
 		return -EBUSY;
 
 	if (cmd != SIOCWANDEV)
@@ -4354,8 +4234,11 @@
 			info->params.clock_speed = 0;
 
 		/* if network interface up, reprogram hardware */
-		if (info->netcount)
-			mgslpc_program_hw(info);
+		if (info->netcount) {
+			struct tty_struct *tty = tty_port_tty_get(&info->port);
+			mgslpc_program_hw(info, tty);
+			tty_kref_put(tty);
+		}
 		return 0;
 
 	default:
diff --git a/drivers/char/pty.c b/drivers/char/pty.c
index 6d45827..112a6ba 100644
--- a/drivers/char/pty.c
+++ b/drivers/char/pty.c
@@ -5,8 +5,6 @@
  *
  *  Added support for a Unix98-style ptmx device.
  *    -- C. Scott Ananian <cananian@alumni.princeton.edu>, 14-Jan-1998
- *  Added TTY_DO_WRITE_WAKEUP to enable n_tty to send POLL_OUT to
- *      waiting writers -- Sapan Bhatia <sapan@corewars.org>
  *
  *  When reading this code see also fs/devpts. In particular note that the
  *  driver_data field is used by the devpts side as a binding to the devpts
@@ -217,7 +215,6 @@
 
 	clear_bit(TTY_OTHER_CLOSED, &tty->link->flags);
 	set_bit(TTY_THROTTLED, &tty->flags);
-	set_bit(TTY_DO_WRITE_WAKEUP, &tty->flags);
 	retval = 0;
 out:
 	return retval;
@@ -230,6 +227,55 @@
 	tty->termios->c_cflag |= (CS8 | CREAD);
 }
 
+/**
+ *	pty_do_resize		-	resize event
+ *	@tty: tty being resized
+ *	@real_tty: real tty (not the same as tty if using a pty/tty pair)
+ *	@rows: rows (character)
+ *	@cols: cols (character)
+ *
+ *	Update the termios variables and send the neccessary signals to
+ *	peform a terminal resize correctly
+ */
+
+int pty_resize(struct tty_struct *tty,  struct winsize *ws)
+{
+	struct pid *pgrp, *rpgrp;
+	unsigned long flags;
+	struct tty_struct *pty = tty->link;
+
+	/* For a PTY we need to lock the tty side */
+	mutex_lock(&tty->termios_mutex);
+	if (!memcmp(ws, &tty->winsize, sizeof(*ws)))
+		goto done;
+
+	/* Get the PID values and reference them so we can
+	   avoid holding the tty ctrl lock while sending signals.
+	   We need to lock these individually however. */
+
+	spin_lock_irqsave(&tty->ctrl_lock, flags);
+	pgrp = get_pid(tty->pgrp);
+	spin_unlock_irqrestore(&tty->ctrl_lock, flags);
+
+	spin_lock_irqsave(&pty->ctrl_lock, flags);
+	rpgrp = get_pid(pty->pgrp);
+	spin_unlock_irqrestore(&pty->ctrl_lock, flags);
+
+	if (pgrp)
+		kill_pgrp(pgrp, SIGWINCH, 1);
+	if (rpgrp != pgrp && rpgrp)
+		kill_pgrp(rpgrp, SIGWINCH, 1);
+
+	put_pid(pgrp);
+	put_pid(rpgrp);
+
+	tty->winsize = *ws;
+	pty->winsize = *ws;	/* Never used so will go away soon */
+done:
+	mutex_unlock(&tty->termios_mutex);
+	return 0;
+}
+
 static int pty_install(struct tty_driver *driver, struct tty_struct *tty)
 {
 	struct tty_struct *o_tty;
@@ -290,6 +336,7 @@
 	.chars_in_buffer = pty_chars_in_buffer,
 	.unthrottle = pty_unthrottle,
 	.set_termios = pty_set_termios,
+	.resize = pty_resize
 };
 
 /* Traditional BSD devices */
@@ -319,6 +366,7 @@
 	.unthrottle = pty_unthrottle,
 	.set_termios = pty_set_termios,
 	.ioctl = pty_bsd_ioctl,
+	.resize = pty_resize
 };
 
 static void __init legacy_pty_init(void)
@@ -561,7 +609,8 @@
 	.unthrottle = pty_unthrottle,
 	.set_termios = pty_set_termios,
 	.ioctl = pty_unix98_ioctl,
-	.shutdown = pty_unix98_shutdown
+	.shutdown = pty_unix98_shutdown,
+	.resize = pty_resize
 };
 
 static const struct tty_operations pty_unix98_ops = {
diff --git a/drivers/char/rio/rio_linux.c b/drivers/char/rio/rio_linux.c
index a8f68a3..2e8a6ee 100644
--- a/drivers/char/rio/rio_linux.c
+++ b/drivers/char/rio/rio_linux.c
@@ -173,7 +173,7 @@
 static void rio_enable_tx_interrupts(void *ptr);
 static void rio_disable_rx_interrupts(void *ptr);
 static void rio_enable_rx_interrupts(void *ptr);
-static int rio_get_CD(void *ptr);
+static int rio_carrier_raised(struct tty_port *port);
 static void rio_shutdown_port(void *ptr);
 static int rio_set_real_termios(void *ptr);
 static void rio_hungup(void *ptr);
@@ -224,7 +224,6 @@
 	rio_enable_tx_interrupts,
 	rio_disable_rx_interrupts,
 	rio_enable_rx_interrupts,
-	rio_get_CD,
 	rio_shutdown_port,
 	rio_set_real_termios,
 	rio_chars_in_buffer,
@@ -476,9 +475,9 @@
 
 
 /* Jeez. Isn't this simple?  */
-static int rio_get_CD(void *ptr)
+static int rio_carrier_raised(struct tty_port *port)
 {
-	struct Port *PortP = ptr;
+	struct Port *PortP = container_of(port, struct Port, gs.port);
 	int rv;
 
 	func_enter();
@@ -797,16 +796,9 @@
 	return 1;
 }
 
-
-static void *ckmalloc(int size)
-{
-	void *p;
-
-	p = kzalloc(size, GFP_KERNEL);
-	return p;
-}
-
-
+static const struct tty_port_operations rio_port_ops = {
+	.carrier_raised = rio_carrier_raised,
+};
 
 static int rio_init_datastructures(void)
 {
@@ -826,33 +818,30 @@
 #define TMIO_SZ sizeof(struct termios *)
 	rio_dprintk(RIO_DEBUG_INIT, "getting : %Zd %Zd %Zd %Zd %Zd bytes\n", RI_SZ, RIO_HOSTS * HOST_SZ, RIO_PORTS * PORT_SZ, RIO_PORTS * TMIO_SZ, RIO_PORTS * TMIO_SZ);
 
-	if (!(p = ckmalloc(RI_SZ)))
+	if (!(p = kzalloc(RI_SZ, GFP_KERNEL)))
 		goto free0;
-	if (!(p->RIOHosts = ckmalloc(RIO_HOSTS * HOST_SZ)))
+	if (!(p->RIOHosts = kzalloc(RIO_HOSTS * HOST_SZ, GFP_KERNEL)))
 		goto free1;
-	if (!(p->RIOPortp = ckmalloc(RIO_PORTS * PORT_SZ)))
+	if (!(p->RIOPortp = kzalloc(RIO_PORTS * PORT_SZ, GFP_KERNEL)))
 		goto free2;
 	p->RIOConf = RIOConf;
 	rio_dprintk(RIO_DEBUG_INIT, "Got : %p %p %p\n", p, p->RIOHosts, p->RIOPortp);
 
 #if 1
 	for (i = 0; i < RIO_PORTS; i++) {
-		port = p->RIOPortp[i] = ckmalloc(sizeof(struct Port));
+		port = p->RIOPortp[i] = kzalloc(sizeof(struct Port), GFP_KERNEL);
 		if (!port) {
 			goto free6;
 		}
 		rio_dprintk(RIO_DEBUG_INIT, "initing port %d (%d)\n", i, port->Mapped);
+		tty_port_init(&port->gs.port);
+		port->gs.port.ops = &rio_port_ops;
 		port->PortNum = i;
 		port->gs.magic = RIO_MAGIC;
 		port->gs.close_delay = HZ / 2;
 		port->gs.closing_wait = 30 * HZ;
 		port->gs.rd = &rio_real_driver;
 		spin_lock_init(&port->portSem);
-		/*
-		 * Initializing wait queue
-		 */
-		init_waitqueue_head(&port->gs.port.open_wait);
-		init_waitqueue_head(&port->gs.port.close_wait);
 	}
 #else
 	/* We could postpone initializing them to when they are configured. */
diff --git a/drivers/char/riscom8.c b/drivers/char/riscom8.c
index 2c6c8f3..9af8d74 100644
--- a/drivers/char/riscom8.c
+++ b/drivers/char/riscom8.c
@@ -857,98 +857,21 @@
 		rc_shutdown_board(bp);
 }
 
-static int block_til_ready(struct tty_struct *tty, struct file *filp,
-			   struct riscom_port *port)
+static int carrier_raised(struct tty_port *port)
 {
-	DECLARE_WAITQUEUE(wait, current);
-	struct riscom_board *bp = port_Board(port);
-	int    retval;
-	int    do_clocal = 0;
-	int    CD;
+	struct riscom_port *p = container_of(port, struct riscom_port, port);
+	struct riscom_board *bp = port_Board(p);
 	unsigned long flags;
-
-	/*
-	 * If the device is in the middle of being closed, then block
-	 * until it's done, and then try again.
-	 */
-	if (tty_hung_up_p(filp) || port->port.flags & ASYNC_CLOSING) {
-		interruptible_sleep_on(&port->port.close_wait);
-		if (port->port.flags & ASYNC_HUP_NOTIFY)
-			return -EAGAIN;
-		else
-			return -ERESTARTSYS;
-	}
-
-	/*
-	 * If non-blocking mode is set, or the port is not enabled,
-	 * then make the check up front and then exit.
-	 */
-	if ((filp->f_flags & O_NONBLOCK) ||
-	    (tty->flags & (1 << TTY_IO_ERROR))) {
-		port->port.flags |= ASYNC_NORMAL_ACTIVE;
-		return 0;
-	}
-
-	if (C_CLOCAL(tty))
-		do_clocal = 1;
-
-	/*
-	 * Block waiting for the carrier detect and the line to become
-	 * free (i.e., not in use by the callout).  While we are in
-	 * this loop, info->count is dropped by one, so that
-	 * rs_close() knows when to free things.  We restore it upon
-	 * exit, either normal or abnormal.
-	 */
-	retval = 0;
-	add_wait_queue(&port->port.open_wait, &wait);
-
+	int CD;
+	
 	spin_lock_irqsave(&riscom_lock, flags);
-
-	if (!tty_hung_up_p(filp))
-		port->port.count--;
-
+	rc_out(bp, CD180_CAR, port_No(p));
+	CD = rc_in(bp, CD180_MSVR) & MSVR_CD;
+	rc_out(bp, CD180_MSVR, MSVR_RTS);
+	bp->DTR &= ~(1u << port_No(p));
+	rc_out(bp, RC_DTR, bp->DTR);
 	spin_unlock_irqrestore(&riscom_lock, flags);
-
-	port->port.blocked_open++;
-	while (1) {
-		spin_lock_irqsave(&riscom_lock, flags);
-
-		rc_out(bp, CD180_CAR, port_No(port));
-		CD = rc_in(bp, CD180_MSVR) & MSVR_CD;
-		rc_out(bp, CD180_MSVR, MSVR_RTS);
-		bp->DTR &= ~(1u << port_No(port));
-		rc_out(bp, RC_DTR, bp->DTR);
-
-		spin_unlock_irqrestore(&riscom_lock, flags);
-
-		set_current_state(TASK_INTERRUPTIBLE);
-		if (tty_hung_up_p(filp) ||
-		    !(port->port.flags & ASYNC_INITIALIZED)) {
-			if (port->port.flags & ASYNC_HUP_NOTIFY)
-				retval = -EAGAIN;
-			else
-				retval = -ERESTARTSYS;
-			break;
-		}
-		if (!(port->port.flags & ASYNC_CLOSING) &&
-		    (do_clocal || CD))
-			break;
-		if (signal_pending(current)) {
-			retval = -ERESTARTSYS;
-			break;
-		}
-		schedule();
-	}
-	__set_current_state(TASK_RUNNING);
-	remove_wait_queue(&port->port.open_wait, &wait);
-	if (!tty_hung_up_p(filp))
-		port->port.count++;
-	port->port.blocked_open--;
-	if (retval)
-		return retval;
-
-	port->port.flags |= ASYNC_NORMAL_ACTIVE;
-	return 0;
+	return CD;
 }
 
 static int rc_open(struct tty_struct *tty, struct file *filp)
@@ -977,13 +900,13 @@
 
 	error = rc_setup_port(bp, port);
 	if (error == 0)
-		error = block_til_ready(tty, filp, port);
+		error = tty_port_block_til_ready(&port->port, tty, filp);
 	return error;
 }
 
 static void rc_flush_buffer(struct tty_struct *tty)
 {
-	struct riscom_port *port = (struct riscom_port *)tty->driver_data;
+	struct riscom_port *port = tty->driver_data;
 	unsigned long flags;
 
 	if (rc_paranoia_check(port, tty->name, "rc_flush_buffer"))
@@ -998,7 +921,7 @@
 
 static void rc_close(struct tty_struct *tty, struct file *filp)
 {
-	struct riscom_port *port = (struct riscom_port *) tty->driver_data;
+	struct riscom_port *port = tty->driver_data;
 	struct riscom_board *bp;
 	unsigned long flags;
 	unsigned long timeout;
@@ -1006,40 +929,19 @@
 	if (!port || rc_paranoia_check(port, tty->name, "close"))
 		return;
 
-	spin_lock_irqsave(&riscom_lock, flags);
-
-	if (tty_hung_up_p(filp))
-		goto out;
-
 	bp = port_Board(port);
-	if ((tty->count == 1) && (port->port.count != 1))  {
-		printk(KERN_INFO "rc%d: rc_close: bad port count;"
-		       " tty->count is 1, port count is %d\n",
-		       board_No(bp), port->port.count);
-		port->port.count = 1;
-	}
-	if (--port->port.count < 0)  {
-		printk(KERN_INFO "rc%d: rc_close: bad port count "
-				 "for tty%d: %d\n",
-		       board_No(bp), port_No(port), port->port.count);
-		port->port.count = 0;
-	}
-	if (port->port.count)
-		goto out;
-	port->port.flags |= ASYNC_CLOSING;
-	/*
-	 * Now we wait for the transmit buffer to clear; and we notify
-	 * the line discipline to only process XON/XOFF characters.
-	 */
-	tty->closing = 1;
-	if (port->port.closing_wait != ASYNC_CLOSING_WAIT_NONE)
-		tty_wait_until_sent(tty, port->port.closing_wait);
+	
+	if (tty_port_close_start(&port->port, tty, filp) == 0)
+		return;
+	
 	/*
 	 * At this point we stop accepting input.  To do this, we
 	 * disable the receive line status interrupts, and tell the
 	 * interrupt driver to stop checking the data ready bit in the
 	 * line status register.
 	 */
+
+	spin_lock_irqsave(&riscom_lock, flags);
 	port->IER &= ~IER_RXD;
 	if (port->port.flags & ASYNC_INITIALIZED) {
 		port->IER &= ~IER_TXRDY;
@@ -1053,33 +955,24 @@
 		 */
 		timeout = jiffies + HZ;
 		while (port->IER & IER_TXEMPTY) {
+			spin_unlock_irqrestore(&riscom_lock, flags);
 			msleep_interruptible(jiffies_to_msecs(port->timeout));
+			spin_lock_irqsave(&riscom_lock, flags);
 			if (time_after(jiffies, timeout))
 				break;
 		}
 	}
 	rc_shutdown_port(tty, bp, port);
 	rc_flush_buffer(tty);
-	tty_ldisc_flush(tty);
-
-	tty->closing = 0;
-	port->port.tty = NULL;
-	if (port->port.blocked_open) {
-		if (port->port.close_delay)
-			msleep_interruptible(jiffies_to_msecs(port->port.close_delay));
-		wake_up_interruptible(&port->port.open_wait);
-	}
-	port->port.flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING);
-	wake_up_interruptible(&port->port.close_wait);
-
-out:
 	spin_unlock_irqrestore(&riscom_lock, flags);
+
+	tty_port_close_end(&port->port, tty);
 }
 
 static int rc_write(struct tty_struct *tty,
 		    const unsigned char *buf, int count)
 {
-	struct riscom_port *port = (struct riscom_port *)tty->driver_data;
+	struct riscom_port *port = tty->driver_data;
 	struct riscom_board *bp;
 	int c, total = 0;
 	unsigned long flags;
@@ -1122,7 +1015,7 @@
 
 static int rc_put_char(struct tty_struct *tty, unsigned char ch)
 {
-	struct riscom_port *port = (struct riscom_port *)tty->driver_data;
+	struct riscom_port *port = tty->driver_data;
 	unsigned long flags;
 	int ret = 0;
 
@@ -1146,7 +1039,7 @@
 
 static void rc_flush_chars(struct tty_struct *tty)
 {
-	struct riscom_port *port = (struct riscom_port *)tty->driver_data;
+	struct riscom_port *port = tty->driver_data;
 	unsigned long flags;
 
 	if (rc_paranoia_check(port, tty->name, "rc_flush_chars"))
@@ -1166,7 +1059,7 @@
 
 static int rc_write_room(struct tty_struct *tty)
 {
-	struct riscom_port *port = (struct riscom_port *)tty->driver_data;
+	struct riscom_port *port = tty->driver_data;
 	int	ret;
 
 	if (rc_paranoia_check(port, tty->name, "rc_write_room"))
@@ -1180,7 +1073,7 @@
 
 static int rc_chars_in_buffer(struct tty_struct *tty)
 {
-	struct riscom_port *port = (struct riscom_port *)tty->driver_data;
+	struct riscom_port *port = tty->driver_data;
 
 	if (rc_paranoia_check(port, tty->name, "rc_chars_in_buffer"))
 		return 0;
@@ -1190,7 +1083,7 @@
 
 static int rc_tiocmget(struct tty_struct *tty, struct file *file)
 {
-	struct riscom_port *port = (struct riscom_port *)tty->driver_data;
+	struct riscom_port *port = tty->driver_data;
 	struct riscom_board *bp;
 	unsigned char status;
 	unsigned int result;
@@ -1220,7 +1113,7 @@
 static int rc_tiocmset(struct tty_struct *tty, struct file *file,
 		       unsigned int set, unsigned int clear)
 {
-	struct riscom_port *port = (struct riscom_port *)tty->driver_data;
+	struct riscom_port *port = tty->driver_data;
 	unsigned long flags;
 	struct riscom_board *bp;
 
@@ -1252,7 +1145,7 @@
 
 static int rc_send_break(struct tty_struct *tty, int length)
 {
-	struct riscom_port *port = (struct riscom_port *)tty->driver_data;
+	struct riscom_port *port = tty->driver_data;
 	struct riscom_board *bp = port_Board(port);
 	unsigned long flags;
 
@@ -1345,7 +1238,7 @@
 static int rc_ioctl(struct tty_struct *tty, struct file *filp,
 		    unsigned int cmd, unsigned long arg)
 {
-	struct riscom_port *port = (struct riscom_port *)tty->driver_data;
+	struct riscom_port *port = tty->driver_data;
 	void __user *argp = (void __user *)arg;
 	int retval;
 
@@ -1371,7 +1264,7 @@
 
 static void rc_throttle(struct tty_struct *tty)
 {
-	struct riscom_port *port = (struct riscom_port *)tty->driver_data;
+	struct riscom_port *port = tty->driver_data;
 	struct riscom_board *bp;
 	unsigned long flags;
 
@@ -1393,7 +1286,7 @@
 
 static void rc_unthrottle(struct tty_struct *tty)
 {
-	struct riscom_port *port = (struct riscom_port *)tty->driver_data;
+	struct riscom_port *port = tty->driver_data;
 	struct riscom_board *bp;
 	unsigned long flags;
 
@@ -1415,7 +1308,7 @@
 
 static void rc_stop(struct tty_struct *tty)
 {
-	struct riscom_port *port = (struct riscom_port *)tty->driver_data;
+	struct riscom_port *port = tty->driver_data;
 	struct riscom_board *bp;
 	unsigned long flags;
 
@@ -1433,7 +1326,7 @@
 
 static void rc_start(struct tty_struct *tty)
 {
-	struct riscom_port *port = (struct riscom_port *)tty->driver_data;
+	struct riscom_port *port = tty->driver_data;
 	struct riscom_board *bp;
 	unsigned long flags;
 
@@ -1454,8 +1347,9 @@
 
 static void rc_hangup(struct tty_struct *tty)
 {
-	struct riscom_port *port = (struct riscom_port *)tty->driver_data;
+	struct riscom_port *port = tty->driver_data;
 	struct riscom_board *bp;
+	unsigned long flags;
 
 	if (rc_paranoia_check(port, tty->name, "rc_hangup"))
 		return;
@@ -1463,16 +1357,18 @@
 	bp = port_Board(port);
 
 	rc_shutdown_port(tty, bp, port);
+	spin_lock_irqsave(&port->port.lock, flags);
 	port->port.count = 0;
 	port->port.flags &= ~ASYNC_NORMAL_ACTIVE;
 	port->port.tty = NULL;
 	wake_up_interruptible(&port->port.open_wait);
+	spin_unlock_irqrestore(&port->port.lock, flags);
 }
 
 static void rc_set_termios(struct tty_struct *tty,
 					struct ktermios *old_termios)
 {
-	struct riscom_port *port = (struct riscom_port *)tty->driver_data;
+	struct riscom_port *port = tty->driver_data;
 	unsigned long flags;
 
 	if (rc_paranoia_check(port, tty->name, "rc_set_termios"))
@@ -1510,6 +1406,11 @@
 	.break_ctl = rc_send_break,
 };
 
+static const struct tty_port_operations riscom_port_ops = {
+	.carrier_raised = carrier_raised,
+};
+
+
 static int __init rc_init_drivers(void)
 {
 	int error;
@@ -1541,6 +1442,7 @@
 	memset(rc_port, 0, sizeof(rc_port));
 	for (i = 0; i < RC_NPORT * RC_NBOARD; i++)  {
 		tty_port_init(&rc_port[i].port);
+		rc_port[i].port.ops = &riscom_port_ops;
 		rc_port[i].magic = RISCOM8_MAGIC;
 	}
 	return 0;
diff --git a/drivers/char/rocket.c b/drivers/char/rocket.c
index 584d791..f59fc5c 100644
--- a/drivers/char/rocket.c
+++ b/drivers/char/rocket.c
@@ -135,6 +135,7 @@
 static int is_PCI[NUM_BOARDS];
 static rocketModel_t rocketModel[NUM_BOARDS];
 static int max_board;
+static const struct tty_port_operations rocket_port_ops;
 
 /*
  * The following arrays define the interrupt bits corresponding to each AIOP.
@@ -435,15 +436,15 @@
 #endif
 	if (!info)
 		return;
-	if (!info->port.tty) {
-		printk(KERN_WARNING "rp: WARNING %s called with "
-				"info->port.tty==NULL\n", __func__);
+	tty = tty_port_tty_get(&info->port);
+
+	if (tty == NULL) {
+		printk(KERN_WARNING "rp: WARNING %s called with tty==NULL\n", __func__);
 		clear_bit((info->aiop * 8) + info->chan, (void *) &xmit_flags[info->board]);
 		return;
 	}
 
 	spin_lock_irqsave(&info->slock, flags);
-	tty = info->port.tty;
 	info->xmit_fifo_room = TXFIFO_SIZE - sGetTxCnt(cp);
 
 	/*  Loop sending data to FIFO until done or FIFO full */
@@ -477,6 +478,7 @@
 	}
 
 	spin_unlock_irqrestore(&info->slock, flags);
+	tty_kref_put(tty);
 
 #ifdef ROCKET_DEBUG_INTR
 	printk(KERN_DEBUG "(%d,%d,%d,%d)...\n", info->xmit_cnt, info->xmit_head,
@@ -498,18 +500,18 @@
 	if (!info)
 		return;
 
-	if ((info->flags & ROCKET_INITIALIZED) == 0) {
+	if ((info->port.flags & ASYNC_INITIALIZED) == 0) {
 		printk(KERN_WARNING "rp: WARNING: rp_handle_port called with "
 				"info->flags & NOT_INIT\n");
 		return;
 	}
-	if (!info->port.tty) {
+	tty = tty_port_tty_get(&info->port);
+	if (!tty) {
 		printk(KERN_WARNING "rp: WARNING: rp_handle_port called with "
-				"info->port.tty==NULL\n");
+				"tty==NULL\n");
 		return;
 	}
 	cp = &info->channel;
-	tty = info->port.tty;
 
 	IntMask = sGetChanIntID(cp) & info->intmask;
 #ifdef ROCKET_DEBUG_INTR
@@ -541,6 +543,7 @@
 		printk(KERN_INFO "DSR change...\n");
 	}
 #endif
+	tty_kref_put(tty);
 }
 
 /*
@@ -649,9 +652,8 @@
 	info->board = board;
 	info->aiop = aiop;
 	info->chan = chan;
-	info->port.closing_wait = 3000;
-	info->port.close_delay = 50;
-	init_waitqueue_head(&info->port.open_wait);
+	tty_port_init(&info->port);
+	info->port.ops = &rocket_port_ops;
 	init_completion(&info->close_wait);
 	info->flags &= ~ROCKET_MODE_MASK;
 	switch (pc104[board][line]) {
@@ -710,7 +712,7 @@
  *  Configures a rocketport port according to its termio settings.  Called from 
  *  user mode into the driver (exception handler).  *info CD manipulation is spinlock protected.
  */
-static void configure_r_port(struct r_port *info,
+static void configure_r_port(struct tty_struct *tty, struct r_port *info,
 			     struct ktermios *old_termios)
 {
 	unsigned cflag;
@@ -718,7 +720,7 @@
 	unsigned rocketMode;
 	int bits, baud, divisor;
 	CHANNEL_t *cp;
-	struct ktermios *t = info->port.tty->termios;
+	struct ktermios *t = tty->termios;
 
 	cp = &info->channel;
 	cflag = t->c_cflag;
@@ -751,7 +753,7 @@
 	}
 
 	/* baud rate */
-	baud = tty_get_baud_rate(info->port.tty);
+	baud = tty_get_baud_rate(tty);
 	if (!baud)
 		baud = 9600;
 	divisor = ((rp_baud_base[info->board] + (baud >> 1)) / baud) - 1;
@@ -769,7 +771,7 @@
 	sSetBaud(cp, divisor);
 
 	/* FIXME: Should really back compute a baud rate from the divisor */
-	tty_encode_baud_rate(info->port.tty, baud, baud);
+	tty_encode_baud_rate(tty, baud, baud);
 
 	if (cflag & CRTSCTS) {
 		info->intmask |= DELTA_CTS;
@@ -794,15 +796,15 @@
 	 * Handle software flow control in the board
 	 */
 #ifdef ROCKET_SOFT_FLOW
-	if (I_IXON(info->port.tty)) {
+	if (I_IXON(tty)) {
 		sEnTxSoftFlowCtl(cp);
-		if (I_IXANY(info->port.tty)) {
+		if (I_IXANY(tty)) {
 			sEnIXANY(cp);
 		} else {
 			sDisIXANY(cp);
 		}
-		sSetTxXONChar(cp, START_CHAR(info->port.tty));
-		sSetTxXOFFChar(cp, STOP_CHAR(info->port.tty));
+		sSetTxXONChar(cp, START_CHAR(tty));
+		sSetTxXOFFChar(cp, STOP_CHAR(tty));
 	} else {
 		sDisTxSoftFlowCtl(cp);
 		sDisIXANY(cp);
@@ -814,24 +816,24 @@
 	 * Set up ignore/read mask words
 	 */
 	info->read_status_mask = STMRCVROVRH | 0xFF;
-	if (I_INPCK(info->port.tty))
+	if (I_INPCK(tty))
 		info->read_status_mask |= STMFRAMEH | STMPARITYH;
-	if (I_BRKINT(info->port.tty) || I_PARMRK(info->port.tty))
+	if (I_BRKINT(tty) || I_PARMRK(tty))
 		info->read_status_mask |= STMBREAKH;
 
 	/*
 	 * Characters to ignore
 	 */
 	info->ignore_status_mask = 0;
-	if (I_IGNPAR(info->port.tty))
+	if (I_IGNPAR(tty))
 		info->ignore_status_mask |= STMFRAMEH | STMPARITYH;
-	if (I_IGNBRK(info->port.tty)) {
+	if (I_IGNBRK(tty)) {
 		info->ignore_status_mask |= STMBREAKH;
 		/*
 		 * If we're ignoring parity and break indicators,
 		 * ignore overruns too.  (For real raw support).
 		 */
-		if (I_IGNPAR(info->port.tty))
+		if (I_IGNPAR(tty))
 			info->ignore_status_mask |= STMRCVROVRH;
 	}
 
@@ -864,106 +866,17 @@
 	}
 }
 
-/*  info->port.count is considered critical, protected by spinlocks.  */
-static int block_til_ready(struct tty_struct *tty, struct file *filp,
-			   struct r_port *info)
+static int carrier_raised(struct tty_port *port)
 {
-	DECLARE_WAITQUEUE(wait, current);
-	int retval;
-	int do_clocal = 0, extra_count = 0;
-	unsigned long flags;
+	struct r_port *info = container_of(port, struct r_port, port);
+	return (sGetChanStatusLo(&info->channel) & CD_ACT) ? 1 : 0;
+}
 
-	/*
-	 * If the device is in the middle of being closed, then block
-	 * until it's done, and then try again.
-	 */
-	if (tty_hung_up_p(filp))
-		return ((info->flags & ROCKET_HUP_NOTIFY) ? -EAGAIN : -ERESTARTSYS);
-	if (info->flags & ROCKET_CLOSING) {
-		if (wait_for_completion_interruptible(&info->close_wait))
-			return -ERESTARTSYS;
-		return ((info->flags & ROCKET_HUP_NOTIFY) ? -EAGAIN : -ERESTARTSYS);
-	}
-
-	/*
-	 * If non-blocking mode is set, or the port is not enabled,
-	 * then make the check up front and then exit.
-	 */
-	if ((filp->f_flags & O_NONBLOCK) || (tty->flags & (1 << TTY_IO_ERROR))) {
-		info->flags |= ROCKET_NORMAL_ACTIVE;
-		return 0;
-	}
-	if (tty->termios->c_cflag & CLOCAL)
-		do_clocal = 1;
-
-	/*
-	 * Block waiting for the carrier detect and the line to become free.  While we are in
-	 * this loop, info->port.count is dropped by one, so that rp_close() knows when to free things.
-         * We restore it upon exit, either normal or abnormal.
-	 */
-	retval = 0;
-	add_wait_queue(&info->port.open_wait, &wait);
-#ifdef ROCKET_DEBUG_OPEN
-	printk(KERN_INFO "block_til_ready before block: ttyR%d, count = %d\n", info->line, info->port.count);
-#endif
-	spin_lock_irqsave(&info->slock, flags);
-
-#ifdef ROCKET_DISABLE_SIMUSAGE
-	info->flags |= ROCKET_NORMAL_ACTIVE;
-#else
-	if (!tty_hung_up_p(filp)) {
-		extra_count = 1;
-		info->port.count--;
-	}
-#endif
-	info->port.blocked_open++;
-
-	spin_unlock_irqrestore(&info->slock, flags);
-
-	while (1) {
-		if (tty->termios->c_cflag & CBAUD) {
-			sSetDTR(&info->channel);
-			sSetRTS(&info->channel);
-		}
-		set_current_state(TASK_INTERRUPTIBLE);
-		if (tty_hung_up_p(filp) || !(info->flags & ROCKET_INITIALIZED)) {
-			if (info->flags & ROCKET_HUP_NOTIFY)
-				retval = -EAGAIN;
-			else
-				retval = -ERESTARTSYS;
-			break;
-		}
-		if (!(info->flags & ROCKET_CLOSING) && (do_clocal || (sGetChanStatusLo(&info->channel) & CD_ACT)))
-			break;
-		if (signal_pending(current)) {
-			retval = -ERESTARTSYS;
-			break;
-		}
-#ifdef ROCKET_DEBUG_OPEN
-		printk(KERN_INFO "block_til_ready blocking: ttyR%d, count = %d, flags=0x%0x\n",
-		     info->line, info->port.count, info->flags);
-#endif
-		schedule();	/*  Don't hold spinlock here, will hang PC */
-	}
-	__set_current_state(TASK_RUNNING);
-	remove_wait_queue(&info->port.open_wait, &wait);
-
-	spin_lock_irqsave(&info->slock, flags);
-
-	if (extra_count)
-		info->port.count++;
-	info->port.blocked_open--;
-
-	spin_unlock_irqrestore(&info->slock, flags);
-
-#ifdef ROCKET_DEBUG_OPEN
-	printk(KERN_INFO "block_til_ready after blocking: ttyR%d, count = %d\n",
-	       info->line, info->port.count);
-#endif
-	if (retval)
-		return retval;
-	info->flags |= ROCKET_NORMAL_ACTIVE;
-	return 0;
+static void raise_dtr_rts(struct tty_port *port)
+{
+	struct r_port *info = container_of(port, struct r_port, port);
+	sSetDTR(&info->channel);
+	sSetRTS(&info->channel);
 }
 
 /*
@@ -973,24 +886,26 @@
 static int rp_open(struct tty_struct *tty, struct file *filp)
 {
 	struct r_port *info;
+	struct tty_port *port;
 	int line = 0, retval;
 	CHANNEL_t *cp;
 	unsigned long page;
 
 	line = tty->index;
-	if ((line < 0) || (line >= MAX_RP_PORTS) || ((info = rp_table[line]) == NULL))
+	if (line < 0 || line >= MAX_RP_PORTS || ((info = rp_table[line]) == NULL))
 		return -ENXIO;
-
+	port = &info->port;
+	
 	page = __get_free_page(GFP_KERNEL);
 	if (!page)
 		return -ENOMEM;
 
-	if (info->flags & ROCKET_CLOSING) {
+	if (port->flags & ASYNC_CLOSING) {
 		retval = wait_for_completion_interruptible(&info->close_wait);
 		free_page(page);
 		if (retval)
 			return retval;
-		return ((info->flags & ROCKET_HUP_NOTIFY) ? -EAGAIN : -ERESTARTSYS);
+		return ((port->flags & ASYNC_HUP_NOTIFY) ? -EAGAIN : -ERESTARTSYS);
 	}
 
 	/*
@@ -1002,9 +917,9 @@
 		info->xmit_buf = (unsigned char *) page;
 
 	tty->driver_data = info;
-	info->port.tty = tty;
+	tty_port_tty_set(port, tty);
 
-	if (info->port.count++ == 0) {
+	if (port->count++ == 0) {
 		atomic_inc(&rp_num_ports_open);
 
 #ifdef ROCKET_DEBUG_OPEN
@@ -1019,7 +934,7 @@
 	/*
 	 * Info->count is now 1; so it's safe to sleep now.
 	 */
-	if ((info->flags & ROCKET_INITIALIZED) == 0) {
+	if (!test_bit(ASYNC_INITIALIZED, &port->flags)) {
 		cp = &info->channel;
 		sSetRxTrigger(cp, TRIG_1);
 		if (sGetChanStatus(cp) & CD_ACT)
@@ -1043,21 +958,21 @@
 		sEnRxFIFO(cp);
 		sEnTransmit(cp);
 
-		info->flags |= ROCKET_INITIALIZED;
+		set_bit(ASYNC_INITIALIZED, &info->port.flags);
 
 		/*
 		 * Set up the tty->alt_speed kludge
 		 */
 		if ((info->flags & ROCKET_SPD_MASK) == ROCKET_SPD_HI)
-			info->port.tty->alt_speed = 57600;
+			tty->alt_speed = 57600;
 		if ((info->flags & ROCKET_SPD_MASK) == ROCKET_SPD_VHI)
-			info->port.tty->alt_speed = 115200;
+			tty->alt_speed = 115200;
 		if ((info->flags & ROCKET_SPD_MASK) == ROCKET_SPD_SHI)
-			info->port.tty->alt_speed = 230400;
+			tty->alt_speed = 230400;
 		if ((info->flags & ROCKET_SPD_MASK) == ROCKET_SPD_WARP)
-			info->port.tty->alt_speed = 460800;
+			tty->alt_speed = 460800;
 
-		configure_r_port(info, NULL);
+		configure_r_port(tty, info, NULL);
 		if (tty->termios->c_cflag & CBAUD) {
 			sSetDTR(cp);
 			sSetRTS(cp);
@@ -1066,7 +981,7 @@
 	/*  Starts (or resets) the maint polling loop */
 	mod_timer(&rocket_timer, jiffies + POLL_PERIOD);
 
-	retval = block_til_ready(tty, filp, info);
+	retval = tty_port_block_til_ready(port, tty, filp);
 	if (retval) {
 #ifdef ROCKET_DEBUG_OPEN
 		printk(KERN_INFO "rp_open returning after block_til_ready with %d\n", retval);
@@ -1081,8 +996,8 @@
  */
 static void rp_close(struct tty_struct *tty, struct file *filp)
 {
-	struct r_port *info = (struct r_port *) tty->driver_data;
-	unsigned long flags;
+	struct r_port *info = tty->driver_data;
+	struct tty_port *port = &info->port;
 	int timeout;
 	CHANNEL_t *cp;
 	
@@ -1093,53 +1008,10 @@
 	printk(KERN_INFO "rp_close ttyR%d, count = %d\n", info->line, info->port.count);
 #endif
 
-	if (tty_hung_up_p(filp))
+	if (tty_port_close_start(port, tty, filp) == 0)
 		return;
-	spin_lock_irqsave(&info->slock, flags);
-
-	if ((tty->count == 1) && (info->port.count != 1)) {
-		/*
-		 * Uh, oh.  tty->count is 1, which means that the tty
-		 * structure will be freed.  Info->count should always
-		 * be one in these conditions.  If it's greater than
-		 * one, we've got real problems, since it means the
-		 * serial port won't be shutdown.
-		 */
-		printk(KERN_WARNING "rp_close: bad serial port count; "
-			"tty->count is 1, info->port.count is %d\n", info->port.count);
-		info->port.count = 1;
-	}
-	if (--info->port.count < 0) {
-		printk(KERN_WARNING "rp_close: bad serial port count for "
-				"ttyR%d: %d\n", info->line, info->port.count);
-		info->port.count = 0;
-	}
-	if (info->port.count) {
-		spin_unlock_irqrestore(&info->slock, flags);
-		return;
-	}
-	info->flags |= ROCKET_CLOSING;
-	spin_unlock_irqrestore(&info->slock, flags);
 
 	cp = &info->channel;
-
-	/*
-	 * Notify the line discpline to only process XON/XOFF characters
-	 */
-	tty->closing = 1;
-
-	/*
-	 * If transmission was throttled by the application request,
-	 * just flush the xmit buffer.
-	 */
-	if (tty->flow_stopped)
-		rp_flush_buffer(tty);
-
-	/*
-	 * Wait for the transmit buffer to clear
-	 */
-	if (info->port.closing_wait != ROCKET_CLOSING_WAIT_NONE)
-		tty_wait_until_sent(tty, info->port.closing_wait);
 	/*
 	 * Before we drop DTR, make sure the UART transmitter
 	 * has completely drained; this is especially
@@ -1168,19 +1040,24 @@
 
 	clear_bit((info->aiop * 8) + info->chan, (void *) &xmit_flags[info->board]);
 
-	if (info->port.blocked_open) {
-		if (info->port.close_delay) {
-			msleep_interruptible(jiffies_to_msecs(info->port.close_delay));
+	/* We can't yet use tty_port_close_end as the buffer handling in this
+	   driver is a bit different to the usual */
+
+	if (port->blocked_open) {
+		if (port->close_delay) {
+			msleep_interruptible(jiffies_to_msecs(port->close_delay));
 		}
-		wake_up_interruptible(&info->port.open_wait);
+		wake_up_interruptible(&port->open_wait);
 	} else {
 		if (info->xmit_buf) {
 			free_page((unsigned long) info->xmit_buf);
 			info->xmit_buf = NULL;
 		}
 	}
-	info->flags &= ~(ROCKET_INITIALIZED | ROCKET_CLOSING | ROCKET_NORMAL_ACTIVE);
+	info->port.flags &= ~(ASYNC_INITIALIZED | ASYNC_CLOSING | ASYNC_NORMAL_ACTIVE);
 	tty->closing = 0;
+	tty_port_tty_set(port, NULL);
+	wake_up_interruptible(&port->close_wait);
 	complete_all(&info->close_wait);
 	atomic_dec(&rp_num_ports_open);
 
@@ -1195,7 +1072,7 @@
 static void rp_set_termios(struct tty_struct *tty,
 			   struct ktermios *old_termios)
 {
-	struct r_port *info = (struct r_port *) tty->driver_data;
+	struct r_port *info = tty->driver_data;
 	CHANNEL_t *cp;
 	unsigned cflag;
 
@@ -1213,7 +1090,7 @@
 	/* Or CMSPAR */
 	tty->termios->c_cflag &= ~CMSPAR;
 
-	configure_r_port(info, old_termios);
+	configure_r_port(tty, info, old_termios);
 
 	cp = &info->channel;
 
@@ -1238,7 +1115,7 @@
 
 static int rp_break(struct tty_struct *tty, int break_state)
 {
-	struct r_port *info = (struct r_port *) tty->driver_data;
+	struct r_port *info = tty->driver_data;
 	unsigned long flags;
 
 	if (rocket_paranoia_check(info, "rp_break"))
@@ -1284,7 +1161,7 @@
  */
 static int rp_tiocmget(struct tty_struct *tty, struct file *file)
 {
-	struct r_port *info = (struct r_port *)tty->driver_data;
+	struct r_port *info = tty->driver_data;
 	unsigned int control, result, ChanStatus;
 
 	ChanStatus = sGetChanStatusLo(&info->channel);
@@ -1305,7 +1182,7 @@
 static int rp_tiocmset(struct tty_struct *tty, struct file *file,
 		    unsigned int set, unsigned int clear)
 {
-	struct r_port *info = (struct r_port *)tty->driver_data;
+	struct r_port *info = tty->driver_data;
 
 	if (set & TIOCM_RTS)
 		info->channel.TxControl[3] |= SET_RTS;
@@ -1338,7 +1215,8 @@
 	return 0;
 }
 
-static int set_config(struct r_port *info, struct rocket_config __user *new_info)
+static int set_config(struct tty_struct *tty, struct r_port *info,
+					struct rocket_config __user *new_info)
 {
 	struct rocket_config new_serial;
 
@@ -1350,7 +1228,7 @@
 		if ((new_serial.flags & ~ROCKET_USR_MASK) != (info->flags & ~ROCKET_USR_MASK))
 			return -EPERM;
 		info->flags = ((info->flags & ~ROCKET_USR_MASK) | (new_serial.flags & ROCKET_USR_MASK));
-		configure_r_port(info, NULL);
+		configure_r_port(tty, info, NULL);
 		return 0;
 	}
 
@@ -1359,15 +1237,15 @@
 	info->port.closing_wait = new_serial.closing_wait;
 
 	if ((info->flags & ROCKET_SPD_MASK) == ROCKET_SPD_HI)
-		info->port.tty->alt_speed = 57600;
+		tty->alt_speed = 57600;
 	if ((info->flags & ROCKET_SPD_MASK) == ROCKET_SPD_VHI)
-		info->port.tty->alt_speed = 115200;
+		tty->alt_speed = 115200;
 	if ((info->flags & ROCKET_SPD_MASK) == ROCKET_SPD_SHI)
-		info->port.tty->alt_speed = 230400;
+		tty->alt_speed = 230400;
 	if ((info->flags & ROCKET_SPD_MASK) == ROCKET_SPD_WARP)
-		info->port.tty->alt_speed = 460800;
+		tty->alt_speed = 460800;
 
-	configure_r_port(info, NULL);
+	configure_r_port(tty, info, NULL);
 	return 0;
 }
 
@@ -1434,7 +1312,7 @@
 static int rp_ioctl(struct tty_struct *tty, struct file *file,
 		    unsigned int cmd, unsigned long arg)
 {
-	struct r_port *info = (struct r_port *) tty->driver_data;
+	struct r_port *info = tty->driver_data;
 	void __user *argp = (void __user *)arg;
 	int ret = 0;
 
@@ -1452,7 +1330,7 @@
 		ret = get_config(info, argp);
 		break;
 	case RCKP_SET_CONFIG:
-		ret = set_config(info, argp);
+		ret = set_config(tty, info, argp);
 		break;
 	case RCKP_GET_PORTS:
 		ret = get_ports(info, argp);
@@ -1472,7 +1350,7 @@
 
 static void rp_send_xchar(struct tty_struct *tty, char ch)
 {
-	struct r_port *info = (struct r_port *) tty->driver_data;
+	struct r_port *info = tty->driver_data;
 	CHANNEL_t *cp;
 
 	if (rocket_paranoia_check(info, "rp_send_xchar"))
@@ -1487,7 +1365,7 @@
 
 static void rp_throttle(struct tty_struct *tty)
 {
-	struct r_port *info = (struct r_port *) tty->driver_data;
+	struct r_port *info = tty->driver_data;
 	CHANNEL_t *cp;
 
 #ifdef ROCKET_DEBUG_THROTTLE
@@ -1507,7 +1385,7 @@
 
 static void rp_unthrottle(struct tty_struct *tty)
 {
-	struct r_port *info = (struct r_port *) tty->driver_data;
+	struct r_port *info = tty->driver_data;
 	CHANNEL_t *cp;
 #ifdef ROCKET_DEBUG_THROTTLE
 	printk(KERN_INFO "unthrottle %s: %d....\n", tty->name,
@@ -1534,7 +1412,7 @@
  */
 static void rp_stop(struct tty_struct *tty)
 {
-	struct r_port *info = (struct r_port *) tty->driver_data;
+	struct r_port *info = tty->driver_data;
 
 #ifdef ROCKET_DEBUG_FLOW
 	printk(KERN_INFO "stop %s: %d %d....\n", tty->name,
@@ -1550,7 +1428,7 @@
 
 static void rp_start(struct tty_struct *tty)
 {
-	struct r_port *info = (struct r_port *) tty->driver_data;
+	struct r_port *info = tty->driver_data;
 
 #ifdef ROCKET_DEBUG_FLOW
 	printk(KERN_INFO "start %s: %d %d....\n", tty->name,
@@ -1570,7 +1448,7 @@
  */
 static void rp_wait_until_sent(struct tty_struct *tty, int timeout)
 {
-	struct r_port *info = (struct r_port *) tty->driver_data;
+	struct r_port *info = tty->driver_data;
 	CHANNEL_t *cp;
 	unsigned long orig_jiffies;
 	int check_time, exit_time;
@@ -1627,7 +1505,7 @@
 static void rp_hangup(struct tty_struct *tty)
 {
 	CHANNEL_t *cp;
-	struct r_port *info = (struct r_port *) tty->driver_data;
+	struct r_port *info = tty->driver_data;
 
 	if (rocket_paranoia_check(info, "rp_hangup"))
 		return;
@@ -1636,15 +1514,13 @@
 	printk(KERN_INFO "rp_hangup of ttyR%d...\n", info->line);
 #endif
 	rp_flush_buffer(tty);
-	if (info->flags & ROCKET_CLOSING)
+	if (info->port.flags & ASYNC_CLOSING)
 		return;
 	if (info->port.count)
 		atomic_dec(&rp_num_ports_open);
 	clear_bit((info->aiop * 8) + info->chan, (void *) &xmit_flags[info->board]);
 
-	info->port.count = 0;
-	info->flags &= ~ROCKET_NORMAL_ACTIVE;
-	info->port.tty = NULL;
+	tty_port_hangup(&info->port);
 
 	cp = &info->channel;
 	sDisRxFIFO(cp);
@@ -1653,7 +1529,7 @@
 	sDisCTSFlowCtl(cp);
 	sDisTxSoftFlowCtl(cp);
 	sClrTxXOFF(cp);
-	info->flags &= ~ROCKET_INITIALIZED;
+	info->port.flags &= ~ASYNC_INITIALIZED;
 
 	wake_up_interruptible(&info->port.open_wait);
 }
@@ -1667,7 +1543,7 @@
  */
 static int rp_put_char(struct tty_struct *tty, unsigned char ch)
 {
-	struct r_port *info = (struct r_port *) tty->driver_data;
+	struct r_port *info = tty->driver_data;
 	CHANNEL_t *cp;
 	unsigned long flags;
 
@@ -1714,7 +1590,7 @@
 static int rp_write(struct tty_struct *tty,
 		    const unsigned char *buf, int count)
 {
-	struct r_port *info = (struct r_port *) tty->driver_data;
+	struct r_port *info = tty->driver_data;
 	CHANNEL_t *cp;
 	const unsigned char *b;
 	int c, retval = 0;
@@ -1764,7 +1640,8 @@
 
 	/*  Write remaining data into the port's xmit_buf */
 	while (1) {
-		if (!info->port.tty)		/* Seemingly obligatory check... */
+		/* Hung up ? */
+		if (!test_bit(ASYNC_NORMAL_ACTIVE, &info->port.flags))
 			goto end;
 		c = min(count, XMIT_BUF_SIZE - info->xmit_cnt - 1);
 		c = min(c, XMIT_BUF_SIZE - info->xmit_head);
@@ -1806,7 +1683,7 @@
  */
 static int rp_write_room(struct tty_struct *tty)
 {
-	struct r_port *info = (struct r_port *) tty->driver_data;
+	struct r_port *info = tty->driver_data;
 	int ret;
 
 	if (rocket_paranoia_check(info, "rp_write_room"))
@@ -1827,7 +1704,7 @@
  */
 static int rp_chars_in_buffer(struct tty_struct *tty)
 {
-	struct r_port *info = (struct r_port *) tty->driver_data;
+	struct r_port *info = tty->driver_data;
 	CHANNEL_t *cp;
 
 	if (rocket_paranoia_check(info, "rp_chars_in_buffer"))
@@ -1848,7 +1725,7 @@
  */
 static void rp_flush_buffer(struct tty_struct *tty)
 {
-	struct r_port *info = (struct r_port *) tty->driver_data;
+	struct r_port *info = tty->driver_data;
 	CHANNEL_t *cp;
 	unsigned long flags;
 
@@ -2371,6 +2248,11 @@
 	.tiocmset = rp_tiocmset,
 };
 
+static const struct tty_port_operations rocket_port_ops = {
+	.carrier_raised = carrier_raised,
+	.raise_dtr_rts = raise_dtr_rts,
+};
+
 /*
  * The module "startup" routine; it's run when the module is loaded.
  */
diff --git a/drivers/char/rocket.h b/drivers/char/rocket.h
index a8b0919..ec863f3 100644
--- a/drivers/char/rocket.h
+++ b/drivers/char/rocket.h
@@ -39,7 +39,7 @@
 /*
  * Rocketport flags
  */
-#define ROCKET_CALLOUT_NOHUP    0x00000001
+/*#define ROCKET_CALLOUT_NOHUP    0x00000001 */
 #define ROCKET_FORCE_CD		0x00000002
 #define ROCKET_HUP_NOTIFY	0x00000004
 #define ROCKET_SPLIT_TERMIOS	0x00000008
diff --git a/drivers/char/rocket_int.h b/drivers/char/rocket_int.h
index 21f3ff5..67e0f1e 100644
--- a/drivers/char/rocket_int.h
+++ b/drivers/char/rocket_int.h
@@ -1162,11 +1162,6 @@
 /* number of characters left in xmit buffer before we ask for more */
 #define WAKEUP_CHARS 256
 
-/* Internal flags used only by the rocketport driver */
-#define ROCKET_INITIALIZED	0x80000000	/* Port is active */
-#define ROCKET_CLOSING		0x40000000	/* Serial port is closing */
-#define ROCKET_NORMAL_ACTIVE	0x20000000	/* Normal port is active */
-
 /*
  * Assigned major numbers for the Comtrol Rocketport
  */
diff --git a/drivers/char/selection.c b/drivers/char/selection.c
index 2978a49..f29fbe9 100644
--- a/drivers/char/selection.c
+++ b/drivers/char/selection.c
@@ -306,7 +306,7 @@
  */
 int paste_selection(struct tty_struct *tty)
 {
-	struct vc_data *vc = (struct vc_data *)tty->driver_data;
+	struct vc_data *vc = tty->driver_data;
 	int	pasted = 0;
 	unsigned int count;
 	struct  tty_ldisc *ld;
diff --git a/drivers/char/ser_a2232.c b/drivers/char/ser_a2232.c
index 7b0c352..33872a2 100644
--- a/drivers/char/ser_a2232.c
+++ b/drivers/char/ser_a2232.c
@@ -122,7 +122,7 @@
 static void a2232_enable_tx_interrupts(void *ptr);
 static void a2232_disable_rx_interrupts(void *ptr);
 static void a2232_enable_rx_interrupts(void *ptr);
-static int  a2232_get_CD(void *ptr);
+static int  a2232_carrier_raised(struct tty_port *port);
 static void a2232_shutdown_port(void *ptr);
 static int  a2232_set_real_termios(void *ptr);
 static int  a2232_chars_in_buffer(void *ptr);
@@ -148,7 +148,6 @@
         a2232_enable_tx_interrupts,
         a2232_disable_rx_interrupts,
         a2232_enable_rx_interrupts,
-        a2232_get_CD,
         a2232_shutdown_port,
         a2232_set_real_termios,
         a2232_chars_in_buffer,
@@ -260,9 +259,10 @@
 	port->disable_rx = 0;
 }
 
-static int  a2232_get_CD(void *ptr)
+static int  a2232_carrier_raised(struct tty_port *port)
 {
-	return ((struct a2232_port *) ptr)->cd_status;
+	struct a2232_port *ap = container_of(port, struct a2232_port, gs.port);
+	return ap->cd_status;
 }
 
 static void a2232_shutdown_port(void *ptr)
@@ -460,14 +460,14 @@
    if switched on. So the only thing we can do at this
    layer here is not taking any characters out of the
    A2232 buffer any more. */
-	struct a2232_port *port = (struct a2232_port *) tty->driver_data;
+	struct a2232_port *port = tty->driver_data;
 	port->throttle_input = -1;
 }
 
 static void a2232_unthrottle(struct tty_struct *tty)
 {
 /* Unthrottle: dual to "throttle()" above. */
-	struct a2232_port *port = (struct a2232_port *) tty->driver_data;
+	struct a2232_port *port = tty->driver_data;
 	port->throttle_input = 0;
 }
 
@@ -638,6 +638,10 @@
 	return IRQ_HANDLED;
 }
 
+static const struct tty_port_operations a2232_port_ops = {
+	.carrier_raised = a2232_carrier_raised,
+};
+
 static void a2232_init_portstructs(void)
 {
 	struct a2232_port *port;
@@ -645,6 +649,8 @@
 
 	for (i = 0; i < MAX_A2232_BOARDS*NUMLINES; i++) {
 		port = a2232_ports + i;
+		tty_port_init(&port->gs.port);
+		port->gs.port.ops = &a2232_port_ops;
 		port->which_a2232 = i/NUMLINES;
 		port->which_port_on_a2232 = i%NUMLINES;
 		port->disable_rx = port->throttle_input = port->cd_status = 0;
@@ -652,11 +658,6 @@
 		port->gs.close_delay = HZ/2;
 		port->gs.closing_wait = 30 * HZ;
 		port->gs.rd = &a2232_real_driver;
-#ifdef NEW_WRITE_LOCKING
-		mutex_init(&(port->gs.port_write_mutex));
-#endif
-		init_waitqueue_head(&port->gs.port.open_wait);
-		init_waitqueue_head(&port->gs.port.close_wait);
 	}
 }
 
diff --git a/drivers/char/serial167.c b/drivers/char/serial167.c
index a8f15e6..f1f24f0 100644
--- a/drivers/char/serial167.c
+++ b/drivers/char/serial167.c
@@ -315,7 +315,7 @@
 
 static void cy_stop(struct tty_struct *tty)
 {
-	struct cyclades_port *info = (struct cyclades_port *)tty->driver_data;
+	struct cyclades_port *info = tty->driver_data;
 	volatile unsigned char *base_addr = (unsigned char *)BASE_ADDR;
 	int channel;
 	unsigned long flags;
@@ -337,7 +337,7 @@
 
 static void cy_start(struct tty_struct *tty)
 {
-	struct cyclades_port *info = (struct cyclades_port *)tty->driver_data;
+	struct cyclades_port *info = tty->driver_data;
 	volatile unsigned char *base_addr = (unsigned char *)BASE_ADDR;
 	int channel;
 	unsigned long flags;
@@ -1062,7 +1062,7 @@
 
 static int cy_put_char(struct tty_struct *tty, unsigned char ch)
 {
-	struct cyclades_port *info = (struct cyclades_port *)tty->driver_data;
+	struct cyclades_port *info = tty->driver_data;
 	unsigned long flags;
 
 #ifdef SERIAL_DEBUG_IO
@@ -1090,7 +1090,7 @@
 
 static void cy_flush_chars(struct tty_struct *tty)
 {
-	struct cyclades_port *info = (struct cyclades_port *)tty->driver_data;
+	struct cyclades_port *info = tty->driver_data;
 	unsigned long flags;
 	volatile unsigned char *base_addr = (u_char *) BASE_ADDR;
 	int channel;
@@ -1122,7 +1122,7 @@
  */
 static int cy_write(struct tty_struct *tty, const unsigned char *buf, int count)
 {
-	struct cyclades_port *info = (struct cyclades_port *)tty->driver_data;
+	struct cyclades_port *info = tty->driver_data;
 	unsigned long flags;
 	int c, total = 0;
 
@@ -1166,7 +1166,7 @@
 
 static int cy_write_room(struct tty_struct *tty)
 {
-	struct cyclades_port *info = (struct cyclades_port *)tty->driver_data;
+	struct cyclades_port *info = tty->driver_data;
 	int ret;
 
 #ifdef SERIAL_DEBUG_IO
@@ -1183,7 +1183,7 @@
 
 static int cy_chars_in_buffer(struct tty_struct *tty)
 {
-	struct cyclades_port *info = (struct cyclades_port *)tty->driver_data;
+	struct cyclades_port *info = tty->driver_data;
 
 #ifdef SERIAL_DEBUG_IO
 	printk("cy_chars_in_buffer %s %d\n", tty->name, info->xmit_cnt);	/* */
@@ -1197,7 +1197,7 @@
 
 static void cy_flush_buffer(struct tty_struct *tty)
 {
-	struct cyclades_port *info = (struct cyclades_port *)tty->driver_data;
+	struct cyclades_port *info = tty->driver_data;
 	unsigned long flags;
 
 #ifdef SERIAL_DEBUG_IO
@@ -1218,7 +1218,7 @@
  */
 static void cy_throttle(struct tty_struct *tty)
 {
-	struct cyclades_port *info = (struct cyclades_port *)tty->driver_data;
+	struct cyclades_port *info = tty->driver_data;
 	unsigned long flags;
 	volatile unsigned char *base_addr = (u_char *) BASE_ADDR;
 	int channel;
@@ -1250,7 +1250,7 @@
 
 static void cy_unthrottle(struct tty_struct *tty)
 {
-	struct cyclades_port *info = (struct cyclades_port *)tty->driver_data;
+	struct cyclades_port *info = tty->driver_data;
 	unsigned long flags;
 	volatile unsigned char *base_addr = (u_char *) BASE_ADDR;
 	int channel;
@@ -1345,7 +1345,7 @@
 
 static int cy_tiocmget(struct tty_struct *tty, struct file *file)
 {
-	struct cyclades_port *info = (struct cyclades_port *)tty->driver_data;
+	struct cyclades_port *info = tty->driver_data;
 	int channel;
 	volatile unsigned char *base_addr = (u_char *) BASE_ADDR;
 	unsigned long flags;
@@ -1369,7 +1369,7 @@
 cy_tiocmset(struct tty_struct *tty, struct file *file,
 	    unsigned int set, unsigned int clear)
 {
-	struct cyclades_port *info = (struct cyclades_port *)tty->driver_data;
+	struct cyclades_port *info = tty->driver_data;
 	int channel;
 	volatile unsigned char *base_addr = (u_char *) BASE_ADDR;
 	unsigned long flags;
@@ -1532,7 +1532,7 @@
 	 unsigned int cmd, unsigned long arg)
 {
 	unsigned long val;
-	struct cyclades_port *info = (struct cyclades_port *)tty->driver_data;
+	struct cyclades_port *info = tty->driver_data;
 	int ret_val = 0;
 	void __user *argp = (void __user *)arg;
 
@@ -1607,7 +1607,7 @@
 
 static void cy_set_termios(struct tty_struct *tty, struct ktermios *old_termios)
 {
-	struct cyclades_port *info = (struct cyclades_port *)tty->driver_data;
+	struct cyclades_port *info = tty->driver_data;
 
 #ifdef SERIAL_DEBUG_OTHER
 	printk("cy_set_termios %s\n", tty->name);
@@ -1631,7 +1631,7 @@
 
 static void cy_close(struct tty_struct *tty, struct file *filp)
 {
-	struct cyclades_port *info = (struct cyclades_port *)tty->driver_data;
+	struct cyclades_port *info = tty->driver_data;
 
 /* CP('C'); */
 #ifdef SERIAL_DEBUG_OTHER
@@ -1698,7 +1698,7 @@
  */
 void cy_hangup(struct tty_struct *tty)
 {
-	struct cyclades_port *info = (struct cyclades_port *)tty->driver_data;
+	struct cyclades_port *info = tty->driver_data;
 
 #ifdef SERIAL_DEBUG_OTHER
 	printk("cy_hangup %s\n", tty->name);	/* */
diff --git a/drivers/char/specialix.c b/drivers/char/specialix.c
index a16b94f..3c67c3d 100644
--- a/drivers/char/specialix.c
+++ b/drivers/char/specialix.c
@@ -1450,7 +1450,7 @@
 
 static void sx_flush_buffer(struct tty_struct *tty)
 {
-	struct specialix_port *port = (struct specialix_port *)tty->driver_data;
+	struct specialix_port *port = tty->driver_data;
 	unsigned long flags;
 	struct specialix_board  *bp;
 
@@ -1472,7 +1472,7 @@
 
 static void sx_close(struct tty_struct *tty, struct file *filp)
 {
-	struct specialix_port *port = (struct specialix_port *)tty->driver_data;
+	struct specialix_port *port = tty->driver_data;
 	struct specialix_board *bp;
 	unsigned long flags;
 	unsigned long timeout;
@@ -1585,7 +1585,7 @@
 static int sx_write(struct tty_struct *tty,
 					const unsigned char *buf, int count)
 {
-	struct specialix_port *port = (struct specialix_port *)tty->driver_data;
+	struct specialix_port *port = tty->driver_data;
 	struct specialix_board *bp;
 	int c, total = 0;
 	unsigned long flags;
@@ -1637,7 +1637,7 @@
 
 static int sx_put_char(struct tty_struct *tty, unsigned char ch)
 {
-	struct specialix_port *port = (struct specialix_port *)tty->driver_data;
+	struct specialix_port *port = tty->driver_data;
 	unsigned long flags;
 	struct specialix_board  *bp;
 
@@ -1676,7 +1676,7 @@
 
 static void sx_flush_chars(struct tty_struct *tty)
 {
-	struct specialix_port *port = (struct specialix_port *)tty->driver_data;
+	struct specialix_port *port = tty->driver_data;
 	unsigned long flags;
 	struct specialix_board  *bp = port_Board(port);
 
@@ -1703,7 +1703,7 @@
 
 static int sx_write_room(struct tty_struct *tty)
 {
-	struct specialix_port *port = (struct specialix_port *)tty->driver_data;
+	struct specialix_port *port = tty->driver_data;
 	int	ret;
 
 	func_enter();
@@ -1724,7 +1724,7 @@
 
 static int sx_chars_in_buffer(struct tty_struct *tty)
 {
-	struct specialix_port *port = (struct specialix_port *)tty->driver_data;
+	struct specialix_port *port = tty->driver_data;
 
 	func_enter();
 
@@ -1738,7 +1738,7 @@
 
 static int sx_tiocmget(struct tty_struct *tty, struct file *file)
 {
-	struct specialix_port *port = (struct specialix_port *)tty->driver_data;
+	struct specialix_port *port = tty->driver_data;
 	struct specialix_board *bp;
 	unsigned char status;
 	unsigned int result;
@@ -1780,7 +1780,7 @@
 static int sx_tiocmset(struct tty_struct *tty, struct file *file,
 		       unsigned int set, unsigned int clear)
 {
-	struct specialix_port *port = (struct specialix_port *)tty->driver_data;
+	struct specialix_port *port = tty->driver_data;
 	unsigned long flags;
 	struct specialix_board *bp;
 
@@ -1820,7 +1820,7 @@
 
 static int sx_send_break(struct tty_struct *tty, int length)
 {
-	struct specialix_port *port = (struct specialix_port *)tty->driver_data;
+	struct specialix_port *port = tty->driver_data;
 	struct specialix_board *bp = port_Board(port);
 	unsigned long flags;
 
@@ -1931,7 +1931,7 @@
 static int sx_ioctl(struct tty_struct *tty, struct file *filp,
 				unsigned int cmd, unsigned long arg)
 {
-	struct specialix_port *port = (struct specialix_port *)tty->driver_data;
+	struct specialix_port *port = tty->driver_data;
 	void __user *argp = (void __user *)arg;
 
 	func_enter();
@@ -1959,7 +1959,7 @@
 
 static void sx_throttle(struct tty_struct *tty)
 {
-	struct specialix_port *port = (struct specialix_port *)tty->driver_data;
+	struct specialix_port *port = tty->driver_data;
 	struct specialix_board *bp;
 	unsigned long flags;
 
@@ -2004,7 +2004,7 @@
 
 static void sx_unthrottle(struct tty_struct *tty)
 {
-	struct specialix_port *port = (struct specialix_port *)tty->driver_data;
+	struct specialix_port *port = tty->driver_data;
 	struct specialix_board *bp;
 	unsigned long flags;
 
@@ -2045,7 +2045,7 @@
 
 static void sx_stop(struct tty_struct *tty)
 {
-	struct specialix_port *port = (struct specialix_port *)tty->driver_data;
+	struct specialix_port *port = tty->driver_data;
 	struct specialix_board *bp;
 	unsigned long flags;
 
@@ -2072,7 +2072,7 @@
 
 static void sx_start(struct tty_struct *tty)
 {
-	struct specialix_port *port = (struct specialix_port *)tty->driver_data;
+	struct specialix_port *port = tty->driver_data;
 	struct specialix_board *bp;
 	unsigned long flags;
 
@@ -2100,7 +2100,7 @@
 
 static void sx_hangup(struct tty_struct *tty)
 {
-	struct specialix_port *port = (struct specialix_port *)tty->driver_data;
+	struct specialix_port *port = tty->driver_data;
 	struct specialix_board *bp;
 	unsigned long flags;
 
@@ -2135,7 +2135,7 @@
 static void sx_set_termios(struct tty_struct *tty,
 					struct ktermios *old_termios)
 {
-	struct specialix_port *port = (struct specialix_port *)tty->driver_data;
+	struct specialix_port *port = tty->driver_data;
 	unsigned long flags;
 	struct specialix_board  *bp;
 
diff --git a/drivers/char/stallion.c b/drivers/char/stallion.c
index 963b03f..e1e0dd8 100644
--- a/drivers/char/stallion.c
+++ b/drivers/char/stallion.c
@@ -130,6 +130,8 @@
 static DEFINE_MUTEX(stl_brdslock);
 static struct stlbrd		*stl_brds[STL_MAXBRDS];
 
+static const struct tty_port_operations stl_port_ops;
+
 /*
  *	Per board state flags. Used with the state field of the board struct.
  *	Not really much here!
@@ -407,7 +409,6 @@
 static int	stl_brdinit(struct stlbrd *brdp);
 static int	stl_getportstats(struct tty_struct *tty, struct stlport *portp, comstats_t __user *cp);
 static int	stl_clrportstats(struct stlport *portp, comstats_t __user *cp);
-static int	stl_waitcarrier(struct tty_struct *tty, struct stlport *portp, struct file *filp);
 
 /*
  *	CD1400 uart specific handling functions.
@@ -703,8 +704,9 @@
 {
 	struct stlport	*portp;
 	struct stlbrd	*brdp;
+	struct tty_port *port;
 	unsigned int	minordev, brdnr, panelnr;
-	int		portnr, rc;
+	int		portnr;
 
 	pr_debug("stl_open(tty=%p,filp=%p): device=%s\n", tty, filp, tty->name);
 
@@ -715,6 +717,7 @@
 	brdp = stl_brds[brdnr];
 	if (brdp == NULL)
 		return -ENODEV;
+
 	minordev = MINOR2PORT(minordev);
 	for (portnr = -1, panelnr = 0; panelnr < STL_MAXPANELS; panelnr++) {
 		if (brdp->panels[panelnr] == NULL)
@@ -731,16 +734,17 @@
 	portp = brdp->panels[panelnr]->ports[portnr];
 	if (portp == NULL)
 		return -ENODEV;
+	port = &portp->port;
 
 /*
  *	On the first open of the device setup the port hardware, and
  *	initialize the per port data structure.
  */
-	tty_port_tty_set(&portp->port, tty);
+	tty_port_tty_set(port, tty);
 	tty->driver_data = portp;
-	portp->port.count++;
+	port->count++;
 
-	if ((portp->port.flags & ASYNC_INITIALIZED) == 0) {
+	if ((port->flags & ASYNC_INITIALIZED) == 0) {
 		if (!portp->tx.buf) {
 			portp->tx.buf = kmalloc(STL_TXBUFSIZE, GFP_KERNEL);
 			if (!portp->tx.buf)
@@ -754,91 +758,24 @@
 		stl_enablerxtx(portp, 1, 1);
 		stl_startrxtx(portp, 1, 0);
 		clear_bit(TTY_IO_ERROR, &tty->flags);
-		portp->port.flags |= ASYNC_INITIALIZED;
+		port->flags |= ASYNC_INITIALIZED;
 	}
-
-/*
- *	Check if this port is in the middle of closing. If so then wait
- *	until it is closed then return error status, based on flag settings.
- *	The sleep here does not need interrupt protection since the wakeup
- *	for it is done with the same context.
- */
-	if (portp->port.flags & ASYNC_CLOSING) {
-		interruptible_sleep_on(&portp->port.close_wait);
-		if (portp->port.flags & ASYNC_HUP_NOTIFY)
-			return -EAGAIN;
-		return -ERESTARTSYS;
-	}
-
-/*
- *	Based on type of open being done check if it can overlap with any
- *	previous opens still in effect. If we are a normal serial device
- *	then also we might have to wait for carrier.
- */
-	if (!(filp->f_flags & O_NONBLOCK))
-		if ((rc = stl_waitcarrier(tty, portp, filp)) != 0)
-			return rc;
-
-	portp->port.flags |= ASYNC_NORMAL_ACTIVE;
-
-	return 0;
+	return tty_port_block_til_ready(port, tty, filp);
 }
 
 /*****************************************************************************/
 
-/*
- *	Possibly need to wait for carrier (DCD signal) to come high. Say
- *	maybe because if we are clocal then we don't need to wait...
- */
-
-static int stl_waitcarrier(struct tty_struct *tty, struct stlport *portp,
-							struct file *filp)
+static int stl_carrier_raised(struct tty_port *port)
 {
-	unsigned long	flags;
-	int		rc, doclocal;
+	struct stlport *portp = container_of(port, struct stlport, port);
+	return (portp->sigs & TIOCM_CD) ? 1 : 0;
+}
 
-	pr_debug("stl_waitcarrier(portp=%p,filp=%p)\n", portp, filp);
-
-	rc = 0;
-	doclocal = 0;
-
-	spin_lock_irqsave(&stallion_lock, flags);
-
-	if (tty->termios->c_cflag & CLOCAL)
-		doclocal++;
-
-	portp->openwaitcnt++;
-	if (! tty_hung_up_p(filp))
-		portp->port.count--;
-
-	for (;;) {
-		/* Takes brd_lock internally */
-		stl_setsignals(portp, 1, 1);
-		if (tty_hung_up_p(filp) ||
-		    ((portp->port.flags & ASYNC_INITIALIZED) == 0)) {
-			if (portp->port.flags & ASYNC_HUP_NOTIFY)
-				rc = -EBUSY;
-			else
-				rc = -ERESTARTSYS;
-			break;
-		}
-		if (((portp->port.flags & ASYNC_CLOSING) == 0) &&
-		    (doclocal || (portp->sigs & TIOCM_CD)))
-			break;
-		if (signal_pending(current)) {
-			rc = -ERESTARTSYS;
-			break;
-		}
-		/* FIXME */
-		interruptible_sleep_on(&portp->port.open_wait);
-	}
-
-	if (! tty_hung_up_p(filp))
-		portp->port.count++;
-	portp->openwaitcnt--;
-	spin_unlock_irqrestore(&stallion_lock, flags);
-
-	return rc;
+static void stl_raise_dtr_rts(struct tty_port *port)
+{
+	struct stlport *portp = container_of(port, struct stlport, port);
+	/* Takes brd_lock internally */
+	stl_setsignals(portp, 1, 1);
 }
 
 /*****************************************************************************/
@@ -890,47 +827,29 @@
 static void stl_close(struct tty_struct *tty, struct file *filp)
 {
 	struct stlport	*portp;
+	struct tty_port *port;
 	unsigned long	flags;
 
 	pr_debug("stl_close(tty=%p,filp=%p)\n", tty, filp);
 
 	portp = tty->driver_data;
-	if (portp == NULL)
-		return;
+	BUG_ON(portp == NULL);
 
-	spin_lock_irqsave(&stallion_lock, flags);
-	if (tty_hung_up_p(filp)) {
-		spin_unlock_irqrestore(&stallion_lock, flags);
-		return;
-	}
-	if ((tty->count == 1) && (portp->port.count != 1))
-		portp->port.count = 1;
-	if (portp->port.count-- > 1) {
-		spin_unlock_irqrestore(&stallion_lock, flags);
-		return;
-	}
+	port = &portp->port;
 
-	portp->port.count = 0;
-	portp->port.flags |= ASYNC_CLOSING;
-
+	if (tty_port_close_start(port, tty, filp) == 0)
+		return;
 /*
  *	May want to wait for any data to drain before closing. The BUSY
  *	flag keeps track of whether we are still sending or not - it is
  *	very accurate for the cd1400, not quite so for the sc26198.
  *	(The sc26198 has no "end-of-data" interrupt only empty FIFO)
  */
-	tty->closing = 1;
-
-	spin_unlock_irqrestore(&stallion_lock, flags);
-
-	if (portp->closing_wait != ASYNC_CLOSING_WAIT_NONE)
-		tty_wait_until_sent(tty, portp->closing_wait);
 	stl_waituntilsent(tty, (HZ / 2));
 
-
-	spin_lock_irqsave(&stallion_lock, flags);
+	spin_lock_irqsave(&port->lock, flags);
 	portp->port.flags &= ~ASYNC_INITIALIZED;
-	spin_unlock_irqrestore(&stallion_lock, flags);
+	spin_unlock_irqrestore(&port->lock, flags);
 
 	stl_disableintrs(portp);
 	if (tty->termios->c_cflag & HUPCL)
@@ -944,20 +863,9 @@
 		portp->tx.head = NULL;
 		portp->tx.tail = NULL;
 	}
-	set_bit(TTY_IO_ERROR, &tty->flags);
-	tty_ldisc_flush(tty);
 
-	tty->closing = 0;
-	tty_port_tty_set(&portp->port, NULL);
-
-	if (portp->openwaitcnt) {
-		if (portp->close_delay)
-			msleep_interruptible(jiffies_to_msecs(portp->close_delay));
-		wake_up_interruptible(&portp->port.open_wait);
-	}
-
-	portp->port.flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING);
-	wake_up_interruptible(&portp->port.close_wait);
+	tty_port_close_end(port, tty);
+	tty_port_tty_set(port, NULL);
 }
 
 /*****************************************************************************/
@@ -1405,14 +1313,20 @@
 static void stl_hangup(struct tty_struct *tty)
 {
 	struct stlport	*portp;
+	struct tty_port *port;
+	unsigned long flags;
 
 	pr_debug("stl_hangup(tty=%p)\n", tty);
 
 	portp = tty->driver_data;
 	if (portp == NULL)
 		return;
+	port = &portp->port;
 
-	portp->port.flags &= ~ASYNC_INITIALIZED;
+	spin_lock_irqsave(&port->lock, flags);
+	port->flags &= ~ASYNC_INITIALIZED;
+	spin_unlock_irqrestore(&port->lock, flags);
+
 	stl_disableintrs(portp);
 	if (tty->termios->c_cflag & HUPCL)
 		stl_setsignals(portp, 0, 0);
@@ -1426,10 +1340,7 @@
 		portp->tx.head = NULL;
 		portp->tx.tail = NULL;
 	}
-	tty_port_tty_set(&portp->port, NULL);
-	portp->port.flags &= ~ASYNC_NORMAL_ACTIVE;
-	portp->port.count = 0;
-	wake_up_interruptible(&portp->port.open_wait);
+	tty_port_hangup(port);
 }
 
 /*****************************************************************************/
@@ -1776,6 +1687,7 @@
 			break;
 		}
 		tty_port_init(&portp->port);
+		portp->port.ops = &stl_port_ops;
 		portp->magic = STL_PORTMAGIC;
 		portp->portnr = i;
 		portp->brdnr = panelp->brdnr;
@@ -2659,6 +2571,11 @@
 	.tiocmset = stl_tiocmset,
 };
 
+static const struct tty_port_operations stl_port_ops = {
+	.carrier_raised = stl_carrier_raised,
+	.raise_dtr_rts = stl_raise_dtr_rts,
+};
+
 /*****************************************************************************/
 /*                       CD1400 HARDWARE FUNCTIONS                           */
 /*****************************************************************************/
diff --git a/drivers/char/sx.c b/drivers/char/sx.c
index ba4e862..b60be7b 100644
--- a/drivers/char/sx.c
+++ b/drivers/char/sx.c
@@ -279,7 +279,7 @@
 static void sx_enable_tx_interrupts(void *ptr);
 static void sx_disable_rx_interrupts(void *ptr);
 static void sx_enable_rx_interrupts(void *ptr);
-static int sx_get_CD(void *ptr);
+static int sx_carrier_raised(struct tty_port *port);
 static void sx_shutdown_port(void *ptr);
 static int sx_set_real_termios(void *ptr);
 static void sx_close(void *ptr);
@@ -360,7 +360,6 @@
 	sx_enable_tx_interrupts,
 	sx_disable_rx_interrupts,
 	sx_enable_rx_interrupts,
-	sx_get_CD,
 	sx_shutdown_port,
 	sx_set_real_termios,
 	sx_chars_in_buffer,
@@ -791,7 +790,7 @@
 	sx_dprintk(SX_DEBUG_MODEMSIGNALS, "getsignals: %d/%d  (%d/%d) "
 			"%02x/%02x\n",
 			(o_stat & OP_DTR) != 0, (o_stat & OP_RTS) != 0,
-			port->c_dcd, sx_get_CD(port),
+			port->c_dcd, tty_port_carrier_raised(&port->gs.port),
 			sx_read_channel_byte(port, hi_ip),
 			sx_read_channel_byte(port, hi_state));
 
@@ -1190,7 +1189,7 @@
 
 	hi_state = sx_read_channel_byte(port, hi_state);
 	sx_dprintk(SX_DEBUG_MODEMSIGNALS, "Checking modem signals (%d/%d)\n",
-			port->c_dcd, sx_get_CD(port));
+			port->c_dcd, tty_port_carrier_raised(&port->gs.port));
 
 	if (hi_state & ST_BREAK) {
 		hi_state &= ~ST_BREAK;
@@ -1202,11 +1201,11 @@
 		hi_state &= ~ST_DCD;
 		sx_dprintk(SX_DEBUG_MODEMSIGNALS, "got a DCD change.\n");
 		sx_write_channel_byte(port, hi_state, hi_state);
-		c_dcd = sx_get_CD(port);
+		c_dcd = tty_port_carrier_raised(&port->gs.port);
 		sx_dprintk(SX_DEBUG_MODEMSIGNALS, "DCD is now %d\n", c_dcd);
 		if (c_dcd != port->c_dcd) {
 			port->c_dcd = c_dcd;
-			if (sx_get_CD(port)) {
+			if (tty_port_carrier_raised(&port->gs.port)) {
 				/* DCD went UP */
 				if ((sx_read_channel_byte(port, hi_hstat) !=
 						HS_IDLE_CLOSED) &&
@@ -1415,13 +1414,10 @@
 }
 
 /* Jeez. Isn't this simple? */
-static int sx_get_CD(void *ptr)
+static int sx_carrier_raised(struct tty_port *port)
 {
-	struct sx_port *port = ptr;
-	func_enter2();
-
-	func_exit();
-	return ((sx_read_channel_byte(port, hi_ip) & IP_DCD) != 0);
+	struct sx_port *sp = container_of(port, struct sx_port, gs.port);
+	return ((sx_read_channel_byte(sp, hi_ip) & IP_DCD) != 0);
 }
 
 /* Jeez. Isn't this simple? */
@@ -1536,7 +1532,7 @@
 	}
 	/* tty->low_latency = 1; */
 
-	port->c_dcd = sx_get_CD(port);
+	port->c_dcd = sx_carrier_raised(&port->gs.port);
 	sx_dprintk(SX_DEBUG_OPEN, "at open: cd=%d\n", port->c_dcd);
 
 	func_exit();
@@ -1945,7 +1941,7 @@
 
 static void sx_throttle(struct tty_struct *tty)
 {
-	struct sx_port *port = (struct sx_port *)tty->driver_data;
+	struct sx_port *port = tty->driver_data;
 
 	func_enter2();
 	/* If the port is using any type of input flow
@@ -1959,7 +1955,7 @@
 
 static void sx_unthrottle(struct tty_struct *tty)
 {
-	struct sx_port *port = (struct sx_port *)tty->driver_data;
+	struct sx_port *port = tty->driver_data;
 
 	func_enter2();
 	/* Always unthrottle even if flow control is not enabled on
@@ -2354,6 +2350,10 @@
 	.tiocmset = sx_tiocmset,
 };
 
+static const struct tty_port_operations sx_port_ops = {
+	.carrier_raised = sx_carrier_raised,
+};
+
 static int sx_init_drivers(void)
 {
 	int error;
@@ -2410,6 +2410,7 @@
 		for (j = 0; j < boards[i].nports; j++) {
 			sx_dprintk(SX_DEBUG_INIT, "initing port %d\n", j);
 			tty_port_init(&port->gs.port);
+			port->gs.port.ops = &sx_port_ops;
 			port->gs.magic = SX_MAGIC;
 			port->gs.close_delay = HZ / 2;
 			port->gs.closing_wait = 30 * HZ;
diff --git a/drivers/char/synclink.c b/drivers/char/synclink.c
index 500f517..b8063d4 100644
--- a/drivers/char/synclink.c
+++ b/drivers/char/synclink.c
@@ -977,7 +977,7 @@
  */
 static void mgsl_stop(struct tty_struct *tty)
 {
-	struct mgsl_struct *info = (struct mgsl_struct *)tty->driver_data;
+	struct mgsl_struct *info = tty->driver_data;
 	unsigned long flags;
 	
 	if (mgsl_paranoia_check(info, tty->name, "mgsl_stop"))
@@ -1000,7 +1000,7 @@
  */
 static void mgsl_start(struct tty_struct *tty)
 {
-	struct mgsl_struct *info = (struct mgsl_struct *)tty->driver_data;
+	struct mgsl_struct *info = tty->driver_data;
 	unsigned long flags;
 	
 	if (mgsl_paranoia_check(info, tty->name, "mgsl_start"))
@@ -2057,7 +2057,7 @@
  */
 static void mgsl_flush_chars(struct tty_struct *tty)
 {
-	struct mgsl_struct *info = (struct mgsl_struct *)tty->driver_data;
+	struct mgsl_struct *info = tty->driver_data;
 	unsigned long flags;
 				
 	if ( debug_level >= DEBUG_LEVEL_INFO )
@@ -2109,7 +2109,7 @@
 		    const unsigned char *buf, int count)
 {
 	int	c, ret = 0;
-	struct mgsl_struct *info = (struct mgsl_struct *)tty->driver_data;
+	struct mgsl_struct *info = tty->driver_data;
 	unsigned long flags;
 	
 	if ( debug_level >= DEBUG_LEVEL_INFO )
@@ -2232,7 +2232,7 @@
  */
 static int mgsl_write_room(struct tty_struct *tty)
 {
-	struct mgsl_struct *info = (struct mgsl_struct *)tty->driver_data;
+	struct mgsl_struct *info = tty->driver_data;
 	int	ret;
 				
 	if (mgsl_paranoia_check(info, tty->name, "mgsl_write_room"))
@@ -2267,7 +2267,7 @@
  */
 static int mgsl_chars_in_buffer(struct tty_struct *tty)
 {
-	struct mgsl_struct *info = (struct mgsl_struct *)tty->driver_data;
+	struct mgsl_struct *info = tty->driver_data;
 			 
 	if (debug_level >= DEBUG_LEVEL_INFO)
 		printk("%s(%d):mgsl_chars_in_buffer(%s)\n",
@@ -2301,7 +2301,7 @@
  */
 static void mgsl_flush_buffer(struct tty_struct *tty)
 {
-	struct mgsl_struct *info = (struct mgsl_struct *)tty->driver_data;
+	struct mgsl_struct *info = tty->driver_data;
 	unsigned long flags;
 	
 	if (debug_level >= DEBUG_LEVEL_INFO)
@@ -2329,7 +2329,7 @@
  */
 static void mgsl_send_xchar(struct tty_struct *tty, char ch)
 {
-	struct mgsl_struct *info = (struct mgsl_struct *)tty->driver_data;
+	struct mgsl_struct *info = tty->driver_data;
 	unsigned long flags;
 
 	if (debug_level >= DEBUG_LEVEL_INFO)
@@ -2358,7 +2358,7 @@
  */
 static void mgsl_throttle(struct tty_struct * tty)
 {
-	struct mgsl_struct *info = (struct mgsl_struct *)tty->driver_data;
+	struct mgsl_struct *info = tty->driver_data;
 	unsigned long flags;
 	
 	if (debug_level >= DEBUG_LEVEL_INFO)
@@ -2388,7 +2388,7 @@
  */
 static void mgsl_unthrottle(struct tty_struct * tty)
 {
-	struct mgsl_struct *info = (struct mgsl_struct *)tty->driver_data;
+	struct mgsl_struct *info = tty->driver_data;
 	unsigned long flags;
 	
 	if (debug_level >= DEBUG_LEVEL_INFO)
@@ -2841,7 +2841,7 @@
  */
 static int tiocmget(struct tty_struct *tty, struct file *file)
 {
-	struct mgsl_struct *info = (struct mgsl_struct *)tty->driver_data;
+	struct mgsl_struct *info = tty->driver_data;
 	unsigned int result;
  	unsigned long flags;
 
@@ -2867,7 +2867,7 @@
 static int tiocmset(struct tty_struct *tty, struct file *file,
 		    unsigned int set, unsigned int clear)
 {
-	struct mgsl_struct *info = (struct mgsl_struct *)tty->driver_data;
+	struct mgsl_struct *info = tty->driver_data;
  	unsigned long flags;
 
 	if (debug_level >= DEBUG_LEVEL_INFO)
@@ -2898,7 +2898,7 @@
  */
 static int mgsl_break(struct tty_struct *tty, int break_state)
 {
-	struct mgsl_struct * info = (struct mgsl_struct *)tty->driver_data;
+	struct mgsl_struct * info = tty->driver_data;
 	unsigned long flags;
 	
 	if (debug_level >= DEBUG_LEVEL_INFO)
@@ -2932,7 +2932,7 @@
 static int mgsl_ioctl(struct tty_struct *tty, struct file * file,
 		    unsigned int cmd, unsigned long arg)
 {
-	struct mgsl_struct * info = (struct mgsl_struct *)tty->driver_data;
+	struct mgsl_struct * info = tty->driver_data;
 	int ret;
 	
 	if (debug_level >= DEBUG_LEVEL_INFO)
@@ -3042,7 +3042,7 @@
  */
 static void mgsl_set_termios(struct tty_struct *tty, struct ktermios *old_termios)
 {
-	struct mgsl_struct *info = (struct mgsl_struct *)tty->driver_data;
+	struct mgsl_struct *info = tty->driver_data;
 	unsigned long flags;
 	
 	if (debug_level >= DEBUG_LEVEL_INFO)
@@ -3096,7 +3096,7 @@
  */
 static void mgsl_close(struct tty_struct *tty, struct file * filp)
 {
-	struct mgsl_struct * info = (struct mgsl_struct *)tty->driver_data;
+	struct mgsl_struct * info = tty->driver_data;
 
 	if (mgsl_paranoia_check(info, tty->name, "mgsl_close"))
 		return;
@@ -3104,70 +3104,18 @@
 	if (debug_level >= DEBUG_LEVEL_INFO)
 		printk("%s(%d):mgsl_close(%s) entry, count=%d\n",
 			 __FILE__,__LINE__, info->device_name, info->port.count);
-			 
-	if (!info->port.count)
-		return;
 
-	if (tty_hung_up_p(filp))
+	if (tty_port_close_start(&info->port, tty, filp) == 0)			 
 		goto cleanup;
 			
-	if ((tty->count == 1) && (info->port.count != 1)) {
-		/*
-		 * tty->count is 1 and the tty structure will be freed.
-		 * info->port.count should be one in this case.
-		 * if it's not, correct it so that the port is shutdown.
-		 */
-		printk("mgsl_close: bad refcount; tty->count is 1, "
-		       "info->port.count is %d\n", info->port.count);
-		info->port.count = 1;
-	}
-	
-	info->port.count--;
-	
-	/* if at least one open remaining, leave hardware active */
-	if (info->port.count)
-		goto cleanup;
-	
-	info->port.flags |= ASYNC_CLOSING;
-	
-	/* set tty->closing to notify line discipline to 
-	 * only process XON/XOFF characters. Only the N_TTY
-	 * discipline appears to use this (ppp does not).
-	 */
-	tty->closing = 1;
-	
-	/* wait for transmit data to clear all layers */
-	
-	if (info->port.closing_wait != ASYNC_CLOSING_WAIT_NONE) {
-		if (debug_level >= DEBUG_LEVEL_INFO)
-			printk("%s(%d):mgsl_close(%s) calling tty_wait_until_sent\n",
-				 __FILE__,__LINE__, info->device_name );
-		tty_wait_until_sent(tty, info->port.closing_wait);
-	}
-		
  	if (info->port.flags & ASYNC_INITIALIZED)
  		mgsl_wait_until_sent(tty, info->timeout);
-
 	mgsl_flush_buffer(tty);
-
 	tty_ldisc_flush(tty);
-		
 	shutdown(info);
-	
-	tty->closing = 0;
+
+	tty_port_close_end(&info->port, tty);	
 	info->port.tty = NULL;
-	
-	if (info->port.blocked_open) {
-		if (info->port.close_delay) {
-			msleep_interruptible(jiffies_to_msecs(info->port.close_delay));
-		}
-		wake_up_interruptible(&info->port.open_wait);
-	}
-	
-	info->port.flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING);
-			 
-	wake_up_interruptible(&info->port.close_wait);
-	
 cleanup:			
 	if (debug_level >= DEBUG_LEVEL_INFO)
 		printk("%s(%d):mgsl_close(%s) exit, count=%d\n", __FILE__,__LINE__,
@@ -3188,7 +3136,7 @@
  */
 static void mgsl_wait_until_sent(struct tty_struct *tty, int timeout)
 {
-	struct mgsl_struct * info = (struct mgsl_struct *)tty->driver_data;
+	struct mgsl_struct * info = tty->driver_data;
 	unsigned long orig_jiffies, char_time;
 
 	if (!info )
@@ -3261,7 +3209,7 @@
  */
 static void mgsl_hangup(struct tty_struct *tty)
 {
-	struct mgsl_struct * info = (struct mgsl_struct *)tty->driver_data;
+	struct mgsl_struct * info = tty->driver_data;
 	
 	if (debug_level >= DEBUG_LEVEL_INFO)
 		printk("%s(%d):mgsl_hangup(%s)\n",
@@ -3281,6 +3229,35 @@
 	
 }	/* end of mgsl_hangup() */
 
+/*
+ * carrier_raised()
+ *
+ *	Return true if carrier is raised
+ */
+
+static int carrier_raised(struct tty_port *port)
+{
+	unsigned long flags;
+	struct mgsl_struct *info = container_of(port, struct mgsl_struct, port);
+	
+	spin_lock_irqsave(&info->irq_spinlock, flags);
+ 	usc_get_serial_signals(info);
+	spin_unlock_irqrestore(&info->irq_spinlock, flags);
+	return (info->serial_signals & SerialSignal_DCD) ? 1 : 0;
+}
+
+static void raise_dtr_rts(struct tty_port *port)
+{
+	struct mgsl_struct *info = container_of(port, struct mgsl_struct, port);
+	unsigned long flags;
+
+	spin_lock_irqsave(&info->irq_spinlock,flags);
+	info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR;
+ 	usc_set_serial_signals(info);
+	spin_unlock_irqrestore(&info->irq_spinlock,flags);
+}
+
+
 /* block_til_ready()
  * 
  * 	Block the current process until the specified port
@@ -3302,6 +3279,8 @@
 	bool		do_clocal = false;
 	bool		extra_count = false;
 	unsigned long	flags;
+	int		dcd;
+	struct tty_port *port = &info->port;
 	
 	if (debug_level >= DEBUG_LEVEL_INFO)
 		printk("%s(%d):block_til_ready on %s\n",
@@ -3309,7 +3288,7 @@
 
 	if (filp->f_flags & O_NONBLOCK || tty->flags & (1 << TTY_IO_ERROR)){
 		/* nonblock mode is set or port is not enabled */
-		info->port.flags |= ASYNC_NORMAL_ACTIVE;
+		port->flags |= ASYNC_NORMAL_ACTIVE;
 		return 0;
 	}
 
@@ -3318,50 +3297,42 @@
 
 	/* Wait for carrier detect and the line to become
 	 * free (i.e., not in use by the callout).  While we are in
-	 * this loop, info->port.count is dropped by one, so that
+	 * this loop, port->count is dropped by one, so that
 	 * mgsl_close() knows when to free things.  We restore it upon
 	 * exit, either normal or abnormal.
 	 */
 	 
 	retval = 0;
-	add_wait_queue(&info->port.open_wait, &wait);
+	add_wait_queue(&port->open_wait, &wait);
 	
 	if (debug_level >= DEBUG_LEVEL_INFO)
 		printk("%s(%d):block_til_ready before block on %s count=%d\n",
-			 __FILE__,__LINE__, tty->driver->name, info->port.count );
+			 __FILE__,__LINE__, tty->driver->name, port->count );
 
 	spin_lock_irqsave(&info->irq_spinlock, flags);
 	if (!tty_hung_up_p(filp)) {
 		extra_count = true;
-		info->port.count--;
+		port->count--;
 	}
 	spin_unlock_irqrestore(&info->irq_spinlock, flags);
-	info->port.blocked_open++;
+	port->blocked_open++;
 	
 	while (1) {
-		if (tty->termios->c_cflag & CBAUD) {
-			spin_lock_irqsave(&info->irq_spinlock,flags);
-			info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR;
-		 	usc_set_serial_signals(info);
-			spin_unlock_irqrestore(&info->irq_spinlock,flags);
-		}
+		if (tty->termios->c_cflag & CBAUD)
+			tty_port_raise_dtr_rts(port);
 		
 		set_current_state(TASK_INTERRUPTIBLE);
 		
-		if (tty_hung_up_p(filp) || !(info->port.flags & ASYNC_INITIALIZED)){
-			retval = (info->port.flags & ASYNC_HUP_NOTIFY) ?
+		if (tty_hung_up_p(filp) || !(port->flags & ASYNC_INITIALIZED)){
+			retval = (port->flags & ASYNC_HUP_NOTIFY) ?
 					-EAGAIN : -ERESTARTSYS;
 			break;
 		}
 		
-		spin_lock_irqsave(&info->irq_spinlock,flags);
-	 	usc_get_serial_signals(info);
-		spin_unlock_irqrestore(&info->irq_spinlock,flags);
+		dcd = tty_port_carrier_raised(&info->port);
 		
- 		if (!(info->port.flags & ASYNC_CLOSING) &&
- 		    (do_clocal || (info->serial_signals & SerialSignal_DCD)) ) {
+ 		if (!(port->flags & ASYNC_CLOSING) && (do_clocal || dcd))
  			break;
-		}
 			
 		if (signal_pending(current)) {
 			retval = -ERESTARTSYS;
@@ -3370,24 +3341,25 @@
 		
 		if (debug_level >= DEBUG_LEVEL_INFO)
 			printk("%s(%d):block_til_ready blocking on %s count=%d\n",
-				 __FILE__,__LINE__, tty->driver->name, info->port.count );
+				 __FILE__,__LINE__, tty->driver->name, port->count );
 				 
 		schedule();
 	}
 	
 	set_current_state(TASK_RUNNING);
-	remove_wait_queue(&info->port.open_wait, &wait);
+	remove_wait_queue(&port->open_wait, &wait);
 	
+	/* FIXME: Racy on hangup during close wait */
 	if (extra_count)
-		info->port.count++;
-	info->port.blocked_open--;
+		port->count++;
+	port->blocked_open--;
 	
 	if (debug_level >= DEBUG_LEVEL_INFO)
 		printk("%s(%d):block_til_ready after blocking on %s count=%d\n",
-			 __FILE__,__LINE__, tty->driver->name, info->port.count );
+			 __FILE__,__LINE__, tty->driver->name, port->count );
 			 
 	if (!retval)
-		info->port.flags |= ASYNC_NORMAL_ACTIVE;
+		port->flags |= ASYNC_NORMAL_ACTIVE;
 		
 	return retval;
 	
@@ -4304,6 +4276,12 @@
 
 }	/* end of mgsl_add_device() */
 
+static const struct tty_port_operations mgsl_port_ops = {
+	.carrier_raised = carrier_raised,
+	.raise_dtr_rts = raise_dtr_rts,
+};
+
+
 /* mgsl_allocate_device()
  * 
  * 	Allocate and initialize a device instance structure
@@ -4322,6 +4300,7 @@
 		printk("Error can't allocate device instance data\n");
 	} else {
 		tty_port_init(&info->port);
+		info->port.ops = &mgsl_port_ops;
 		info->magic = MGSL_MAGIC;
 		INIT_WORK(&info->task, mgsl_bh_handler);
 		info->max_frame_size = 4096;
diff --git a/drivers/char/synclink_gt.c b/drivers/char/synclink_gt.c
index 08911ed..53544e2 100644
--- a/drivers/char/synclink_gt.c
+++ b/drivers/char/synclink_gt.c
@@ -720,44 +720,9 @@
 		return;
 	DBGINFO(("%s close entry, count=%d\n", info->device_name, info->port.count));
 
-	if (!info->port.count)
-		return;
-
-	if (tty_hung_up_p(filp))
+	if (tty_port_close_start(&info->port, tty, filp) == 0)
 		goto cleanup;
 
-	if ((tty->count == 1) && (info->port.count != 1)) {
-		/*
-		 * tty->count is 1 and the tty structure will be freed.
-		 * info->port.count should be one in this case.
-		 * if it's not, correct it so that the port is shutdown.
-		 */
-		DBGERR(("%s close: bad refcount; tty->count=1, "
-		       "info->port.count=%d\n", info->device_name, info->port.count));
-		info->port.count = 1;
-	}
-
-	info->port.count--;
-
-	/* if at least one open remaining, leave hardware active */
-	if (info->port.count)
-		goto cleanup;
-
-	info->port.flags |= ASYNC_CLOSING;
-
-	/* set tty->closing to notify line discipline to
-	 * only process XON/XOFF characters. Only the N_TTY
-	 * discipline appears to use this (ppp does not).
-	 */
-	tty->closing = 1;
-
-	/* wait for transmit data to clear all layers */
-
-	if (info->port.closing_wait != ASYNC_CLOSING_WAIT_NONE) {
-		DBGINFO(("%s call tty_wait_until_sent\n", info->device_name));
-		tty_wait_until_sent(tty, info->port.closing_wait);
-	}
-
  	if (info->port.flags & ASYNC_INITIALIZED)
  		wait_until_sent(tty, info->timeout);
 	flush_buffer(tty);
@@ -765,20 +730,8 @@
 
 	shutdown(info);
 
-	tty->closing = 0;
+	tty_port_close_end(&info->port, tty);
 	info->port.tty = NULL;
-
-	if (info->port.blocked_open) {
-		if (info->port.close_delay) {
-			msleep_interruptible(jiffies_to_msecs(info->port.close_delay));
-		}
-		wake_up_interruptible(&info->port.open_wait);
-	}
-
-	info->port.flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING);
-
-	wake_up_interruptible(&info->port.close_wait);
-
 cleanup:
 	DBGINFO(("%s close exit, count=%d\n", tty->driver->name, info->port.count));
 }
@@ -3132,6 +3085,29 @@
 	return 0;
 }
 
+static int carrier_raised(struct tty_port *port)
+{
+	unsigned long flags;
+	struct slgt_info *info = container_of(port, struct slgt_info, port);
+
+	spin_lock_irqsave(&info->lock,flags);
+ 	get_signals(info);
+	spin_unlock_irqrestore(&info->lock,flags);
+	return (info->signals & SerialSignal_DCD) ? 1 : 0;
+}
+
+static void raise_dtr_rts(struct tty_port *port)
+{
+	unsigned long flags;
+	struct slgt_info *info = container_of(port, struct slgt_info, port);
+
+	spin_lock_irqsave(&info->lock,flags);
+	info->signals |= SerialSignal_RTS + SerialSignal_DTR;
+ 	set_signals(info);
+	spin_unlock_irqrestore(&info->lock,flags);
+}
+
+
 /*
  *  block current process until the device is ready to open
  */
@@ -3143,12 +3119,14 @@
 	bool		do_clocal = false;
 	bool		extra_count = false;
 	unsigned long	flags;
+	int		cd;
+	struct tty_port *port = &info->port;
 
 	DBGINFO(("%s block_til_ready\n", tty->driver->name));
 
 	if (filp->f_flags & O_NONBLOCK || tty->flags & (1 << TTY_IO_ERROR)){
 		/* nonblock mode is set or port is not enabled */
-		info->port.flags |= ASYNC_NORMAL_ACTIVE;
+		port->flags |= ASYNC_NORMAL_ACTIVE;
 		return 0;
 	}
 
@@ -3157,46 +3135,38 @@
 
 	/* Wait for carrier detect and the line to become
 	 * free (i.e., not in use by the callout).  While we are in
-	 * this loop, info->port.count is dropped by one, so that
+	 * this loop, port->count is dropped by one, so that
 	 * close() knows when to free things.  We restore it upon
 	 * exit, either normal or abnormal.
 	 */
 
 	retval = 0;
-	add_wait_queue(&info->port.open_wait, &wait);
+	add_wait_queue(&port->open_wait, &wait);
 
 	spin_lock_irqsave(&info->lock, flags);
 	if (!tty_hung_up_p(filp)) {
 		extra_count = true;
-		info->port.count--;
+		port->count--;
 	}
 	spin_unlock_irqrestore(&info->lock, flags);
-	info->port.blocked_open++;
+	port->blocked_open++;
 
 	while (1) {
-		if ((tty->termios->c_cflag & CBAUD)) {
-			spin_lock_irqsave(&info->lock,flags);
-			info->signals |= SerialSignal_RTS + SerialSignal_DTR;
-		 	set_signals(info);
-			spin_unlock_irqrestore(&info->lock,flags);
-		}
+		if ((tty->termios->c_cflag & CBAUD))
+			tty_port_raise_dtr_rts(port);
 
 		set_current_state(TASK_INTERRUPTIBLE);
 
-		if (tty_hung_up_p(filp) || !(info->port.flags & ASYNC_INITIALIZED)){
-			retval = (info->port.flags & ASYNC_HUP_NOTIFY) ?
+		if (tty_hung_up_p(filp) || !(port->flags & ASYNC_INITIALIZED)){
+			retval = (port->flags & ASYNC_HUP_NOTIFY) ?
 					-EAGAIN : -ERESTARTSYS;
 			break;
 		}
 
-		spin_lock_irqsave(&info->lock,flags);
-	 	get_signals(info);
-		spin_unlock_irqrestore(&info->lock,flags);
+		cd = tty_port_carrier_raised(port);
 
- 		if (!(info->port.flags & ASYNC_CLOSING) &&
- 		    (do_clocal || (info->signals & SerialSignal_DCD)) ) {
+ 		if (!(port->flags & ASYNC_CLOSING) && (do_clocal || cd ))
  			break;
-		}
 
 		if (signal_pending(current)) {
 			retval = -ERESTARTSYS;
@@ -3208,14 +3178,14 @@
 	}
 
 	set_current_state(TASK_RUNNING);
-	remove_wait_queue(&info->port.open_wait, &wait);
+	remove_wait_queue(&port->open_wait, &wait);
 
 	if (extra_count)
-		info->port.count++;
-	info->port.blocked_open--;
+		port->count++;
+	port->blocked_open--;
 
 	if (!retval)
-		info->port.flags |= ASYNC_NORMAL_ACTIVE;
+		port->flags |= ASYNC_NORMAL_ACTIVE;
 
 	DBGINFO(("%s block_til_ready ready, rc=%d\n", tty->driver->name, retval));
 	return retval;
@@ -3444,6 +3414,11 @@
 #endif
 }
 
+static const struct tty_port_operations slgt_port_ops = {
+	.carrier_raised = carrier_raised,
+	.raise_dtr_rts = raise_dtr_rts,
+};
+
 /*
  *  allocate device instance structure, return NULL on failure
  */
@@ -3458,6 +3433,7 @@
 			driver_name, adapter_num, port_num));
 	} else {
 		tty_port_init(&info->port);
+		info->port.ops = &slgt_port_ops;
 		info->magic = MGSL_MAGIC;
 		INIT_WORK(&info->task, bh_handler);
 		info->max_frame_size = 4096;
diff --git a/drivers/char/synclinkmp.c b/drivers/char/synclinkmp.c
index 6bdb44f..7b0c5b2 100644
--- a/drivers/char/synclinkmp.c
+++ b/drivers/char/synclinkmp.c
@@ -558,6 +558,7 @@
 
 static int  startup(SLMP_INFO *info);
 static int  block_til_ready(struct tty_struct *tty, struct file * filp,SLMP_INFO *info);
+static int carrier_raised(struct tty_port *port);
 static void shutdown(SLMP_INFO *info);
 static void program_hw(SLMP_INFO *info);
 static void change_params(SLMP_INFO *info);
@@ -800,7 +801,7 @@
  */
 static void close(struct tty_struct *tty, struct file *filp)
 {
-	SLMP_INFO * info = (SLMP_INFO *)tty->driver_data;
+	SLMP_INFO * info = tty->driver_data;
 
 	if (sanity_check(info, tty->name, "close"))
 		return;
@@ -809,70 +810,18 @@
 		printk("%s(%d):%s close() entry, count=%d\n",
 			 __FILE__,__LINE__, info->device_name, info->port.count);
 
-	if (!info->port.count)
-		return;
-
-	if (tty_hung_up_p(filp))
+	if (tty_port_close_start(&info->port, tty, filp) == 0)
 		goto cleanup;
-
-	if ((tty->count == 1) && (info->port.count != 1)) {
-		/*
-		 * tty->count is 1 and the tty structure will be freed.
-		 * info->port.count should be one in this case.
-		 * if it's not, correct it so that the port is shutdown.
-		 */
-		printk("%s(%d):%s close: bad refcount; tty->count is 1, "
-		       "info->port.count is %d\n",
-			 __FILE__,__LINE__, info->device_name, info->port.count);
-		info->port.count = 1;
-	}
-
-	info->port.count--;
-
-	/* if at least one open remaining, leave hardware active */
-	if (info->port.count)
-		goto cleanup;
-
-	info->port.flags |= ASYNC_CLOSING;
-
-	/* set tty->closing to notify line discipline to
-	 * only process XON/XOFF characters. Only the N_TTY
-	 * discipline appears to use this (ppp does not).
-	 */
-	tty->closing = 1;
-
-	/* wait for transmit data to clear all layers */
-
-	if (info->port.closing_wait != ASYNC_CLOSING_WAIT_NONE) {
-		if (debug_level >= DEBUG_LEVEL_INFO)
-			printk("%s(%d):%s close() calling tty_wait_until_sent\n",
-				 __FILE__,__LINE__, info->device_name );
-		tty_wait_until_sent(tty, info->port.closing_wait);
-	}
-
+		
  	if (info->port.flags & ASYNC_INITIALIZED)
  		wait_until_sent(tty, info->timeout);
 
 	flush_buffer(tty);
-
 	tty_ldisc_flush(tty);
-
 	shutdown(info);
 
-	tty->closing = 0;
+	tty_port_close_end(&info->port, tty);
 	info->port.tty = NULL;
-
-	if (info->port.blocked_open) {
-		if (info->port.close_delay) {
-			msleep_interruptible(jiffies_to_msecs(info->port.close_delay));
-		}
-		wake_up_interruptible(&info->port.open_wait);
-	}
-
-	info->port.flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING);
-
-	wake_up_interruptible(&info->port.close_wait);
-
 cleanup:
 	if (debug_level >= DEBUG_LEVEL_INFO)
 		printk("%s(%d):%s close() exit, count=%d\n", __FILE__,__LINE__,
@@ -884,7 +833,7 @@
  */
 static void hangup(struct tty_struct *tty)
 {
-	SLMP_INFO *info = (SLMP_INFO *)tty->driver_data;
+	SLMP_INFO *info = tty->driver_data;
 
 	if (debug_level >= DEBUG_LEVEL_INFO)
 		printk("%s(%d):%s hangup()\n",
@@ -907,7 +856,7 @@
  */
 static void set_termios(struct tty_struct *tty, struct ktermios *old_termios)
 {
-	SLMP_INFO *info = (SLMP_INFO *)tty->driver_data;
+	SLMP_INFO *info = tty->driver_data;
 	unsigned long flags;
 
 	if (debug_level >= DEBUG_LEVEL_INFO)
@@ -960,7 +909,7 @@
 		 const unsigned char *buf, int count)
 {
 	int	c, ret = 0;
-	SLMP_INFO *info = (SLMP_INFO *)tty->driver_data;
+	SLMP_INFO *info = tty->driver_data;
 	unsigned long flags;
 
 	if (debug_level >= DEBUG_LEVEL_INFO)
@@ -1038,7 +987,7 @@
  */
 static int put_char(struct tty_struct *tty, unsigned char ch)
 {
-	SLMP_INFO *info = (SLMP_INFO *)tty->driver_data;
+	SLMP_INFO *info = tty->driver_data;
 	unsigned long flags;
 	int ret = 0;
 
@@ -1075,7 +1024,7 @@
  */
 static void send_xchar(struct tty_struct *tty, char ch)
 {
-	SLMP_INFO *info = (SLMP_INFO *)tty->driver_data;
+	SLMP_INFO *info = tty->driver_data;
 	unsigned long flags;
 
 	if (debug_level >= DEBUG_LEVEL_INFO)
@@ -1099,7 +1048,7 @@
  */
 static void wait_until_sent(struct tty_struct *tty, int timeout)
 {
-	SLMP_INFO * info = (SLMP_INFO *)tty->driver_data;
+	SLMP_INFO * info = tty->driver_data;
 	unsigned long orig_jiffies, char_time;
 
 	if (!info )
@@ -1166,7 +1115,7 @@
  */
 static int write_room(struct tty_struct *tty)
 {
-	SLMP_INFO *info = (SLMP_INFO *)tty->driver_data;
+	SLMP_INFO *info = tty->driver_data;
 	int ret;
 
 	if (sanity_check(info, tty->name, "write_room"))
@@ -1193,7 +1142,7 @@
  */
 static void flush_chars(struct tty_struct *tty)
 {
-	SLMP_INFO *info = (SLMP_INFO *)tty->driver_data;
+	SLMP_INFO *info = tty->driver_data;
 	unsigned long flags;
 
 	if ( debug_level >= DEBUG_LEVEL_INFO )
@@ -1232,7 +1181,7 @@
  */
 static void flush_buffer(struct tty_struct *tty)
 {
-	SLMP_INFO *info = (SLMP_INFO *)tty->driver_data;
+	SLMP_INFO *info = tty->driver_data;
 	unsigned long flags;
 
 	if (debug_level >= DEBUG_LEVEL_INFO)
@@ -1254,7 +1203,7 @@
  */
 static void tx_hold(struct tty_struct *tty)
 {
-	SLMP_INFO *info = (SLMP_INFO *)tty->driver_data;
+	SLMP_INFO *info = tty->driver_data;
 	unsigned long flags;
 
 	if (sanity_check(info, tty->name, "tx_hold"))
@@ -1274,7 +1223,7 @@
  */
 static void tx_release(struct tty_struct *tty)
 {
-	SLMP_INFO *info = (SLMP_INFO *)tty->driver_data;
+	SLMP_INFO *info = tty->driver_data;
 	unsigned long flags;
 
 	if (sanity_check(info, tty->name, "tx_release"))
@@ -1304,7 +1253,7 @@
 static int do_ioctl(struct tty_struct *tty, struct file *file,
 		 unsigned int cmd, unsigned long arg)
 {
-	SLMP_INFO *info = (SLMP_INFO *)tty->driver_data;
+	SLMP_INFO *info = tty->driver_data;
 	int error;
 	struct mgsl_icount cnow;	/* kernel counter temps */
 	struct serial_icounter_struct __user *p_cuser;	/* user space */
@@ -1515,7 +1464,7 @@
  */
 static int chars_in_buffer(struct tty_struct *tty)
 {
-	SLMP_INFO *info = (SLMP_INFO *)tty->driver_data;
+	SLMP_INFO *info = tty->driver_data;
 
 	if (sanity_check(info, tty->name, "chars_in_buffer"))
 		return 0;
@@ -1531,7 +1480,7 @@
  */
 static void throttle(struct tty_struct * tty)
 {
-	SLMP_INFO *info = (SLMP_INFO *)tty->driver_data;
+	SLMP_INFO *info = tty->driver_data;
 	unsigned long flags;
 
 	if (debug_level >= DEBUG_LEVEL_INFO)
@@ -1556,7 +1505,7 @@
  */
 static void unthrottle(struct tty_struct * tty)
 {
-	SLMP_INFO *info = (SLMP_INFO *)tty->driver_data;
+	SLMP_INFO *info = tty->driver_data;
 	unsigned long flags;
 
 	if (debug_level >= DEBUG_LEVEL_INFO)
@@ -1587,7 +1536,7 @@
 static int set_break(struct tty_struct *tty, int break_state)
 {
 	unsigned char RegValue;
-	SLMP_INFO * info = (SLMP_INFO *)tty->driver_data;
+	SLMP_INFO * info = tty->driver_data;
 	unsigned long flags;
 
 	if (debug_level >= DEBUG_LEVEL_INFO)
@@ -3269,7 +3218,7 @@
  */
 static int tiocmget(struct tty_struct *tty, struct file *file)
 {
-	SLMP_INFO *info = (SLMP_INFO *)tty->driver_data;
+	SLMP_INFO *info = tty->driver_data;
 	unsigned int result;
  	unsigned long flags;
 
@@ -3295,7 +3244,7 @@
 static int tiocmset(struct tty_struct *tty, struct file *file,
 		    unsigned int set, unsigned int clear)
 {
-	SLMP_INFO *info = (SLMP_INFO *)tty->driver_data;
+	SLMP_INFO *info = tty->driver_data;
  	unsigned long flags;
 
 	if (debug_level >= DEBUG_LEVEL_INFO)
@@ -3318,7 +3267,28 @@
 	return 0;
 }
 
+static int carrier_raised(struct tty_port *port)
+{
+	SLMP_INFO *info = container_of(port, SLMP_INFO, port);
+	unsigned long flags;
 
+	spin_lock_irqsave(&info->lock,flags);
+ 	get_signals(info);
+	spin_unlock_irqrestore(&info->lock,flags);
+
+	return (info->serial_signals & SerialSignal_DCD) ? 1 : 0;
+}
+
+static void raise_dtr_rts(struct tty_port *port)
+{
+	SLMP_INFO *info = container_of(port, SLMP_INFO, port);
+	unsigned long flags;
+
+	spin_lock_irqsave(&info->lock,flags);
+	info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR;
+ 	set_signals(info);
+	spin_unlock_irqrestore(&info->lock,flags);
+}
 
 /* Block the current process until the specified port is ready to open.
  */
@@ -3330,6 +3300,8 @@
 	bool		do_clocal = false;
 	bool		extra_count = false;
 	unsigned long	flags;
+	int		cd;
+	struct tty_port *port = &info->port;
 
 	if (debug_level >= DEBUG_LEVEL_INFO)
 		printk("%s(%d):%s block_til_ready()\n",
@@ -3338,7 +3310,7 @@
 	if (filp->f_flags & O_NONBLOCK || tty->flags & (1 << TTY_IO_ERROR)){
 		/* nonblock mode is set or port is not enabled */
 		/* just verify that callout device is not active */
-		info->port.flags |= ASYNC_NORMAL_ACTIVE;
+		port->flags |= ASYNC_NORMAL_ACTIVE;
 		return 0;
 	}
 
@@ -3347,50 +3319,42 @@
 
 	/* Wait for carrier detect and the line to become
 	 * free (i.e., not in use by the callout).  While we are in
-	 * this loop, info->port.count is dropped by one, so that
+	 * this loop, port->count is dropped by one, so that
 	 * close() knows when to free things.  We restore it upon
 	 * exit, either normal or abnormal.
 	 */
 
 	retval = 0;
-	add_wait_queue(&info->port.open_wait, &wait);
+	add_wait_queue(&port->open_wait, &wait);
 
 	if (debug_level >= DEBUG_LEVEL_INFO)
 		printk("%s(%d):%s block_til_ready() before block, count=%d\n",
-			 __FILE__,__LINE__, tty->driver->name, info->port.count );
+			 __FILE__,__LINE__, tty->driver->name, port->count );
 
 	spin_lock_irqsave(&info->lock, flags);
 	if (!tty_hung_up_p(filp)) {
 		extra_count = true;
-		info->port.count--;
+		port->count--;
 	}
 	spin_unlock_irqrestore(&info->lock, flags);
-	info->port.blocked_open++;
+	port->blocked_open++;
 
 	while (1) {
-		if ((tty->termios->c_cflag & CBAUD)) {
-			spin_lock_irqsave(&info->lock,flags);
-			info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR;
-		 	set_signals(info);
-			spin_unlock_irqrestore(&info->lock,flags);
-		}
+		if (tty->termios->c_cflag & CBAUD)
+			tty_port_raise_dtr_rts(port);
 
 		set_current_state(TASK_INTERRUPTIBLE);
 
-		if (tty_hung_up_p(filp) || !(info->port.flags & ASYNC_INITIALIZED)){
-			retval = (info->port.flags & ASYNC_HUP_NOTIFY) ?
+		if (tty_hung_up_p(filp) || !(port->flags & ASYNC_INITIALIZED)){
+			retval = (port->flags & ASYNC_HUP_NOTIFY) ?
 					-EAGAIN : -ERESTARTSYS;
 			break;
 		}
 
-		spin_lock_irqsave(&info->lock,flags);
-	 	get_signals(info);
-		spin_unlock_irqrestore(&info->lock,flags);
+		cd = tty_port_carrier_raised(port);
 
- 		if (!(info->port.flags & ASYNC_CLOSING) &&
- 		    (do_clocal || (info->serial_signals & SerialSignal_DCD)) ) {
+ 		if (!(port->flags & ASYNC_CLOSING) && (do_clocal || cd))
  			break;
-		}
 
 		if (signal_pending(current)) {
 			retval = -ERESTARTSYS;
@@ -3399,24 +3363,24 @@
 
 		if (debug_level >= DEBUG_LEVEL_INFO)
 			printk("%s(%d):%s block_til_ready() count=%d\n",
-				 __FILE__,__LINE__, tty->driver->name, info->port.count );
+				 __FILE__,__LINE__, tty->driver->name, port->count );
 
 		schedule();
 	}
 
 	set_current_state(TASK_RUNNING);
-	remove_wait_queue(&info->port.open_wait, &wait);
+	remove_wait_queue(&port->open_wait, &wait);
 
 	if (extra_count)
-		info->port.count++;
-	info->port.blocked_open--;
+		port->count++;
+	port->blocked_open--;
 
 	if (debug_level >= DEBUG_LEVEL_INFO)
 		printk("%s(%d):%s block_til_ready() after, count=%d\n",
-			 __FILE__,__LINE__, tty->driver->name, info->port.count );
+			 __FILE__,__LINE__, tty->driver->name, port->count );
 
 	if (!retval)
-		info->port.flags |= ASYNC_NORMAL_ACTIVE;
+		port->flags |= ASYNC_NORMAL_ACTIVE;
 
 	return retval;
 }
@@ -3782,6 +3746,11 @@
 #endif
 }
 
+static const struct tty_port_operations port_ops = {
+	.carrier_raised = carrier_raised,
+	.raise_dtr_rts = raise_dtr_rts,
+};
+
 /* Allocate and initialize a device instance structure
  *
  * Return Value:	pointer to SLMP_INFO if success, otherwise NULL
@@ -3798,6 +3767,7 @@
 			__FILE__,__LINE__, adapter_num, port_num);
 	} else {
 		tty_port_init(&info->port);
+		info->port.ops = &port_ops;
 		info->magic = MGSL_MAGIC;
 		INIT_WORK(&info->task, bh_handler);
 		info->max_frame_size = 4096;
@@ -3940,6 +3910,7 @@
 	.tiocmset = tiocmset,
 };
 
+
 static void synclinkmp_cleanup(void)
 {
 	int rc;
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index db15f9b..d33e5ab 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -1111,9 +1111,7 @@
  *		Locks the line discipline as required
  *		Writes to the tty driver are serialized by the atomic_write_lock
  *	and are then processed in chunks to the device. The line discipline
- *	write method will not be involked in parallel for each device
- *		The line discipline write method is called under the big
- *	kernel lock for historical reasons. New code should not rely on this.
+ *	write method will not be invoked in parallel for each device.
  */
 
 static ssize_t tty_write(struct file *file, const char __user *buf,
@@ -1213,7 +1211,7 @@
  *	be held until the 'fast-open' is also done. Will change once we
  *	have refcounting in the driver and per driver locking
  */
-struct tty_struct *tty_driver_lookup_tty(struct tty_driver *driver,
+static struct tty_struct *tty_driver_lookup_tty(struct tty_driver *driver,
 		struct inode *inode, int idx)
 {
 	struct tty_struct *tty;
@@ -2050,7 +2048,6 @@
 /**
  *	tty_do_resize		-	resize event
  *	@tty: tty being resized
- *	@real_tty: real tty (not the same as tty if using a pty/tty pair)
  *	@rows: rows (character)
  *	@cols: cols (character)
  *
@@ -2058,41 +2055,34 @@
  *	peform a terminal resize correctly
  */
 
-int tty_do_resize(struct tty_struct *tty, struct tty_struct *real_tty,
-					struct winsize *ws)
+int tty_do_resize(struct tty_struct *tty, struct winsize *ws)
 {
-	struct pid *pgrp, *rpgrp;
+	struct pid *pgrp;
 	unsigned long flags;
 
-	/* For a PTY we need to lock the tty side */
-	mutex_lock(&real_tty->termios_mutex);
-	if (!memcmp(ws, &real_tty->winsize, sizeof(*ws)))
+	/* Lock the tty */
+	mutex_lock(&tty->termios_mutex);
+	if (!memcmp(ws, &tty->winsize, sizeof(*ws)))
 		goto done;
 	/* Get the PID values and reference them so we can
 	   avoid holding the tty ctrl lock while sending signals */
 	spin_lock_irqsave(&tty->ctrl_lock, flags);
 	pgrp = get_pid(tty->pgrp);
-	rpgrp = get_pid(real_tty->pgrp);
 	spin_unlock_irqrestore(&tty->ctrl_lock, flags);
 
 	if (pgrp)
 		kill_pgrp(pgrp, SIGWINCH, 1);
-	if (rpgrp != pgrp && rpgrp)
-		kill_pgrp(rpgrp, SIGWINCH, 1);
-
 	put_pid(pgrp);
-	put_pid(rpgrp);
 
 	tty->winsize = *ws;
-	real_tty->winsize = *ws;
 done:
-	mutex_unlock(&real_tty->termios_mutex);
+	mutex_unlock(&tty->termios_mutex);
 	return 0;
 }
 
 /**
  *	tiocswinsz		-	implement window size set ioctl
- *	@tty; tty
+ *	@tty; tty side of tty
  *	@arg: user buffer for result
  *
  *	Copies the user idea of the window size to the kernel. Traditionally
@@ -2105,17 +2095,16 @@
  *	then calls into the default method.
  */
 
-static int tiocswinsz(struct tty_struct *tty, struct tty_struct *real_tty,
-	struct winsize __user *arg)
+static int tiocswinsz(struct tty_struct *tty, struct winsize __user *arg)
 {
 	struct winsize tmp_ws;
 	if (copy_from_user(&tmp_ws, arg, sizeof(*arg)))
 		return -EFAULT;
 
 	if (tty->ops->resize)
-		return tty->ops->resize(tty, real_tty, &tmp_ws);
+		return tty->ops->resize(tty, &tmp_ws);
 	else
-		return tty_do_resize(tty, real_tty, &tmp_ws);
+		return tty_do_resize(tty, &tmp_ws);
 }
 
 /**
@@ -2540,7 +2529,7 @@
 	case TIOCGWINSZ:
 		return tiocgwinsz(real_tty, p);
 	case TIOCSWINSZ:
-		return tiocswinsz(tty, real_tty, p);
+		return tiocswinsz(real_tty, p);
 	case TIOCCONS:
 		return real_tty != tty ? -EINVAL : tioccons(file);
 	case FIONBIO:
@@ -2785,6 +2774,8 @@
 	INIT_WORK(&tty->hangup_work, do_tty_hangup);
 	mutex_init(&tty->atomic_read_lock);
 	mutex_init(&tty->atomic_write_lock);
+	mutex_init(&tty->output_lock);
+	mutex_init(&tty->echo_lock);
 	spin_lock_init(&tty->read_lock);
 	spin_lock_init(&tty->ctrl_lock);
 	INIT_LIST_HEAD(&tty->tty_files);
diff --git a/drivers/char/tty_ldisc.c b/drivers/char/tty_ldisc.c
index f307f13..7a84b40 100644
--- a/drivers/char/tty_ldisc.c
+++ b/drivers/char/tty_ldisc.c
@@ -316,8 +316,7 @@
 {
 	/* wait_event is a macro */
 	wait_event(tty_ldisc_wait, tty_ldisc_try(tty));
-	if (tty->ldisc.refcount == 0)
-		printk(KERN_ERR "tty_ldisc_ref_wait\n");
+	WARN_ON(tty->ldisc.refcount == 0);
 	return &tty->ldisc;
 }
 
@@ -376,15 +375,17 @@
  *	@tty: terminal to activate ldisc on
  *
  *	Set the TTY_LDISC flag when the line discipline can be called
- *	again. Do necessary wakeups for existing sleepers.
+ *	again. Do necessary wakeups for existing sleepers. Clear the LDISC
+ *	changing flag to indicate any ldisc change is now over.
  *
- *	Note: nobody should set this bit except via this function. Clearing
- *	directly is allowed.
+ *	Note: nobody should set the TTY_LDISC bit except via this function.
+ *	Clearing directly is allowed.
  */
 
 void tty_ldisc_enable(struct tty_struct *tty)
 {
 	set_bit(TTY_LDISC, &tty->flags);
+	clear_bit(TTY_LDISC_CHANGING, &tty->flags);
 	wake_up(&tty_ldisc_wait);
 }
 
@@ -496,7 +497,14 @@
 	 *	reference to the line discipline. The TTY_LDISC bit
 	 *	prevents anyone taking a reference once it is clear.
 	 *	We need the lock to avoid racing reference takers.
+	 *
+	 *	We must clear the TTY_LDISC bit here to avoid a livelock
+	 *	with a userspace app continually trying to use the tty in
+	 *	parallel to the change and re-referencing the tty.
 	 */
+	clear_bit(TTY_LDISC, &tty->flags);
+	if (o_tty)
+		clear_bit(TTY_LDISC, &o_tty->flags);
 
 	spin_lock_irqsave(&tty_ldisc_lock, flags);
 	if (tty->ldisc.refcount || (o_tty && o_tty->ldisc.refcount)) {
@@ -528,7 +536,7 @@
 	 *	If the TTY_LDISC bit is set, then we are racing against
 	 *	another ldisc change
 	 */
-	if (!test_bit(TTY_LDISC, &tty->flags)) {
+	if (test_bit(TTY_LDISC_CHANGING, &tty->flags)) {
 		struct tty_ldisc *ld;
 		spin_unlock_irqrestore(&tty_ldisc_lock, flags);
 		tty_ldisc_put(new_ldisc.ops);
@@ -536,10 +544,14 @@
 		tty_ldisc_deref(ld);
 		goto restart;
 	}
-
-	clear_bit(TTY_LDISC, &tty->flags);
+	/*
+	 *	This flag is used to avoid two parallel ldisc changes. Once
+	 *	open and close are fine grained locked this may work better
+	 *	as a mutex shared with the open/close/hup paths
+	 */
+	set_bit(TTY_LDISC_CHANGING, &tty->flags);
 	if (o_tty)
-		clear_bit(TTY_LDISC, &o_tty->flags);
+		set_bit(TTY_LDISC_CHANGING, &o_tty->flags);
 	spin_unlock_irqrestore(&tty_ldisc_lock, flags);
 	
 	/*
diff --git a/drivers/char/tty_port.c b/drivers/char/tty_port.c
index c8f8024..9b8004c 100644
--- a/drivers/char/tty_port.c
+++ b/drivers/char/tty_port.c
@@ -7,6 +7,7 @@
 #include <linux/tty.h>
 #include <linux/tty_driver.h>
 #include <linux/tty_flip.h>
+#include <linux/serial.h>
 #include <linux/timer.h>
 #include <linux/string.h>
 #include <linux/slab.h>
@@ -94,3 +95,227 @@
 	spin_unlock_irqrestore(&port->lock, flags);
 }
 EXPORT_SYMBOL(tty_port_tty_set);
+
+/**
+ *	tty_port_hangup		-	hangup helper
+ *	@port: tty port
+ *
+ *	Perform port level tty hangup flag and count changes. Drop the tty
+ *	reference.
+ */
+
+void tty_port_hangup(struct tty_port *port)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&port->lock, flags);
+	port->count = 0;
+	port->flags &= ~ASYNC_NORMAL_ACTIVE;
+	if (port->tty)
+		tty_kref_put(port->tty);
+	port->tty = NULL;
+	spin_unlock_irqrestore(&port->lock, flags);
+	wake_up_interruptible(&port->open_wait);
+}
+EXPORT_SYMBOL(tty_port_hangup);
+
+/**
+ *	tty_port_carrier_raised	-	carrier raised check
+ *	@port: tty port
+ *
+ *	Wrapper for the carrier detect logic. For the moment this is used
+ *	to hide some internal details. This will eventually become entirely
+ *	internal to the tty port.
+ */
+
+int tty_port_carrier_raised(struct tty_port *port)
+{
+	if (port->ops->carrier_raised == NULL)
+		return 1;
+	return port->ops->carrier_raised(port);
+}
+EXPORT_SYMBOL(tty_port_carrier_raised);
+
+/**
+ *	tty_port_raise_dtr_rts	-	Riase DTR/RTS
+ *	@port: tty port
+ *
+ *	Wrapper for the DTR/RTS raise logic. For the moment this is used
+ *	to hide some internal details. This will eventually become entirely
+ *	internal to the tty port.
+ */
+
+void tty_port_raise_dtr_rts(struct tty_port *port)
+{
+	if (port->ops->raise_dtr_rts)
+		port->ops->raise_dtr_rts(port);
+}
+EXPORT_SYMBOL(tty_port_raise_dtr_rts);
+
+/**
+ *	tty_port_block_til_ready	-	Waiting logic for tty open
+ *	@port: the tty port being opened
+ *	@tty: the tty device being bound
+ *	@filp: the file pointer of the opener
+ *
+ *	Implement the core POSIX/SuS tty behaviour when opening a tty device.
+ *	Handles:
+ *		- hangup (both before and during)
+ *		- non blocking open
+ *		- rts/dtr/dcd
+ *		- signals
+ *		- port flags and counts
+ *
+ *	The passed tty_port must implement the carrier_raised method if it can
+ *	do carrier detect and the raise_dtr_rts method if it supports software
+ *	management of these lines. Note that the dtr/rts raise is done each
+ *	iteration as a hangup may have previously dropped them while we wait.
+ */
+ 
+int tty_port_block_til_ready(struct tty_port *port,
+				struct tty_struct *tty, struct file *filp)
+{
+	int do_clocal = 0, retval;
+	unsigned long flags;
+	DECLARE_WAITQUEUE(wait, current);
+	int cd;
+
+	/* block if port is in the process of being closed */
+	if (tty_hung_up_p(filp) || port->flags & ASYNC_CLOSING) {
+		interruptible_sleep_on(&port->close_wait);
+		if (port->flags & ASYNC_HUP_NOTIFY)
+			return -EAGAIN;
+		else
+			return -ERESTARTSYS;
+	}
+
+	/* if non-blocking mode is set we can pass directly to open unless
+	   the port has just hung up or is in another error state */
+	if ((filp->f_flags & O_NONBLOCK) ||
+			(tty->flags & (1 << TTY_IO_ERROR))) {
+		port->flags |= ASYNC_NORMAL_ACTIVE;
+		return 0;
+	}
+
+	if (C_CLOCAL(tty))
+		do_clocal = 1;
+
+	/* Block waiting until we can proceed. We may need to wait for the
+	   carrier, but we must also wait for any close that is in progress
+	   before the next open may complete */
+
+	retval = 0;
+	add_wait_queue(&port->open_wait, &wait);
+
+	/* The port lock protects the port counts */
+	spin_lock_irqsave(&port->lock, flags);
+	if (!tty_hung_up_p(filp))
+		port->count--;
+	port->blocked_open++;
+	spin_unlock_irqrestore(&port->lock, flags);
+
+	while (1) {
+		/* Indicate we are open */
+		if (tty->termios->c_cflag & CBAUD)
+			tty_port_raise_dtr_rts(port);
+
+		set_current_state(TASK_INTERRUPTIBLE);
+		/* Check for a hangup or uninitialised port. Return accordingly */
+		if (tty_hung_up_p(filp) || !(port->flags & ASYNC_INITIALIZED)) {
+			if (port->flags & ASYNC_HUP_NOTIFY)
+				retval = -EAGAIN;
+			else
+				retval = -ERESTARTSYS;
+			break;
+		}
+		/* Probe the carrier. For devices with no carrier detect this
+		   will always return true */
+		cd = tty_port_carrier_raised(port);
+		if (!(port->flags & ASYNC_CLOSING) &&
+				(do_clocal || cd))
+			break;
+		if (signal_pending(current)) {
+			retval = -ERESTARTSYS;
+			break;
+		}
+		schedule();
+	}
+	set_current_state(TASK_RUNNING);
+	remove_wait_queue(&port->open_wait, &wait);
+
+	/* Update counts. A parallel hangup will have set count to zero and
+	   we must not mess that up further */
+	spin_lock_irqsave(&port->lock, flags);
+	if (!tty_hung_up_p(filp))
+		port->count++;
+	port->blocked_open--;
+	if (retval == 0)
+		port->flags |= ASYNC_NORMAL_ACTIVE;
+	spin_unlock_irqrestore(&port->lock, flags);
+	return 0;
+	
+}
+EXPORT_SYMBOL(tty_port_block_til_ready);
+
+int tty_port_close_start(struct tty_port *port, struct tty_struct *tty, struct file *filp)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&port->lock, flags);
+	if (tty_hung_up_p(filp)) {
+		spin_unlock_irqrestore(&port->lock, flags);
+		return 0;
+	}
+
+	if( tty->count == 1 && port->count != 1) {
+		printk(KERN_WARNING
+		    "tty_port_close_start: tty->count = 1 port count = %d.\n",
+								port->count);
+		port->count = 1;
+	}
+	if (--port->count < 0) {
+		printk(KERN_WARNING "tty_port_close_start: count = %d\n",
+								port->count);
+		port->count = 0;
+	}
+
+	if (port->count) {
+		spin_unlock_irqrestore(&port->lock, flags);
+		return 0;
+	}
+	port->flags |= ASYNC_CLOSING;
+	tty->closing = 1;
+	spin_unlock_irqrestore(&port->lock, flags);
+	/* Don't block on a stalled port, just pull the chain */
+	if (tty->flow_stopped)
+		tty_driver_flush_buffer(tty);
+	if (port->flags & ASYNC_INITIALIZED &&
+			port->closing_wait != ASYNC_CLOSING_WAIT_NONE)
+		tty_wait_until_sent(tty, port->closing_wait);
+	return 1;
+}
+EXPORT_SYMBOL(tty_port_close_start);
+
+void tty_port_close_end(struct tty_port *port, struct tty_struct *tty)
+{
+	unsigned long flags;
+
+	tty_ldisc_flush(tty);
+
+	spin_lock_irqsave(&port->lock, flags);
+	tty->closing = 0;
+
+	if (port->blocked_open) {
+		spin_unlock_irqrestore(&port->lock, flags);
+		if (port->close_delay) {
+			msleep_interruptible(
+				jiffies_to_msecs(port->close_delay));
+		}
+		spin_lock_irqsave(&port->lock, flags);
+		wake_up_interruptible(&port->open_wait);
+	}
+	port->flags &= ~(ASYNC_NORMAL_ACTIVE | ASYNC_CLOSING);
+	wake_up_interruptible(&port->close_wait);
+	spin_unlock_irqrestore(&port->lock, flags);
+}
+EXPORT_SYMBOL(tty_port_close_end);
diff --git a/drivers/char/vme_scc.c b/drivers/char/vme_scc.c
index 1718b3c..0e8234b 100644
--- a/drivers/char/vme_scc.c
+++ b/drivers/char/vme_scc.c
@@ -69,7 +69,7 @@
 static void scc_enable_tx_interrupts(void * ptr);
 static void scc_disable_rx_interrupts(void * ptr);
 static void scc_enable_rx_interrupts(void * ptr);
-static int  scc_get_CD(void * ptr);
+static int  scc_carrier_raised(struct tty_port *port);
 static void scc_shutdown_port(void * ptr);
 static int scc_set_real_termios(void  *ptr);
 static void scc_hungup(void  *ptr);
@@ -100,7 +100,6 @@
         scc_enable_tx_interrupts,
         scc_disable_rx_interrupts,
         scc_enable_rx_interrupts,
-        scc_get_CD,
         scc_shutdown_port,
         scc_set_real_termios,
         scc_chars_in_buffer,
@@ -129,6 +128,10 @@
 	.break_ctl = scc_break_ctl,
 };
 
+static const struct tty_port_operations scc_port_ops = {
+	.carrier_raised = scc_carrier_raised,
+};
+
 /*----------------------------------------------------------------------------
  * vme_scc_init() and support functions
  *---------------------------------------------------------------------------*/
@@ -176,6 +179,8 @@
 
 	for (i = 0; i < 2; i++) {
 		port = scc_ports + i;
+		tty_port_init(&port->gs.port);
+		port->gs.port.ops = &scc_port_ops;
 		port->gs.magic = SCC_MAGIC;
 		port->gs.close_delay = HZ/2;
 		port->gs.closing_wait = 30 * HZ;
@@ -624,10 +629,10 @@
 }
 
 
-static int scc_get_CD(void *ptr)
+static int scc_carrier_raised(struct tty_port *port)
 {
-	struct scc_port *port = ptr;
-	unsigned channel = port->channel;
+	struct scc_port *sc = container_of(port, struct scc_port, gs.port);
+	unsigned channel = sc->channel;
 
 	return !!(scc_last_status_reg[channel] & SR_DCD);
 }
@@ -638,7 +643,7 @@
 	struct scc_port *port = ptr;
 
 	port->gs.port.flags &= ~ GS_ACTIVE;
-	if (port->gs.port.tty && port->gs.port.tty->termios->c_cflag & HUPCL) {
+	if (port->gs.port.tty && (port->gs.port.tty->termios->c_cflag & HUPCL)) {
 		scc_setsignals (port, 0, 0);
 	}
 }
@@ -779,7 +784,7 @@
 
 static void scc_send_xchar(struct tty_struct *tty, char ch)
 {
-	struct scc_port *port = (struct scc_port *)tty->driver_data;
+	struct scc_port *port = tty->driver_data;
 
 	port->x_char = ch;
 	if (ch)
@@ -896,7 +901,7 @@
 		return retval;
 	}
 
-	port->c_dcd = scc_get_CD (port);
+	port->c_dcd = tty_port_carrier_raised(&port->gs.port);
 
 	scc_enable_rx_interrupts(port);
 
@@ -906,7 +911,7 @@
 
 static void scc_throttle (struct tty_struct * tty)
 {
-	struct scc_port *port = (struct scc_port *)tty->driver_data;
+	struct scc_port *port = tty->driver_data;
 	unsigned long	flags;
 	SCC_ACCESS_INIT(port);
 
@@ -922,7 +927,7 @@
 
 static void scc_unthrottle (struct tty_struct * tty)
 {
-	struct scc_port *port = (struct scc_port *)tty->driver_data;
+	struct scc_port *port = tty->driver_data;
 	unsigned long	flags;
 	SCC_ACCESS_INIT(port);
 
@@ -945,7 +950,7 @@
 
 static int scc_break_ctl(struct tty_struct *tty, int break_state)
 {
-	struct scc_port *port = (struct scc_port *)tty->driver_data;
+	struct scc_port *port = tty->driver_data;
 	unsigned long	flags;
 	SCC_ACCESS_INIT(port);
 
diff --git a/drivers/char/vt.c b/drivers/char/vt.c
index 008176e..8001421 100644
--- a/drivers/char/vt.c
+++ b/drivers/char/vt.c
@@ -819,8 +819,8 @@
  *	ctrl_lock of the tty IFF a tty is passed.
  */
 
-static int vc_do_resize(struct tty_struct *tty, struct tty_struct *real_tty,
-		struct vc_data *vc, unsigned int cols, unsigned int lines)
+static int vc_do_resize(struct tty_struct *tty, struct vc_data *vc,
+				unsigned int cols, unsigned int lines)
 {
 	unsigned long old_origin, new_origin, new_scr_end, rlth, rrem, err = 0;
 	unsigned int old_cols, old_rows, old_row_size, old_screen_size;
@@ -932,7 +932,7 @@
 		ws.ws_row = vc->vc_rows;
 		ws.ws_col = vc->vc_cols;
 		ws.ws_ypixel = vc->vc_scan_lines;
-		tty_do_resize(tty, real_tty, &ws);
+		tty_do_resize(tty, &ws);
 	}
 
 	if (CON_IS_VISIBLE(vc))
@@ -954,13 +954,12 @@
 
 int vc_resize(struct vc_data *vc, unsigned int cols, unsigned int rows)
 {
-	return vc_do_resize(vc->vc_tty, vc->vc_tty, vc, cols, rows);
+	return vc_do_resize(vc->vc_tty, vc, cols, rows);
 }
 
 /**
  *	vt_resize		-	resize a VT
  *	@tty: tty to resize
- *	@real_tty: tty if a pty/tty pair
  *	@ws: winsize attributes
  *
  *	Resize a virtual terminal. This is called by the tty layer as we
@@ -971,14 +970,13 @@
  *	termios_mutex and the tty ctrl_lock in that order.
  */
 
-int vt_resize(struct tty_struct *tty, struct tty_struct *real_tty,
-	struct winsize *ws)
+int vt_resize(struct tty_struct *tty, struct winsize *ws)
 {
 	struct vc_data *vc = tty->driver_data;
 	int ret;
 
 	acquire_console_sem();
-	ret = vc_do_resize(tty, real_tty, vc, ws->ws_col, ws->ws_row);
+	ret = vc_do_resize(tty, vc, ws->ws_col, ws->ws_row);
 	release_console_sem();
 	return ret;
 }
@@ -2679,7 +2677,7 @@
 {
 	if (tty->stopped)
 		return 0;
-	return 4096;		/* No limit, really; we're not buffering */
+	return 32768;		/* No limit, really; we're not buffering */
 }
 
 static int con_chars_in_buffer(struct tty_struct *tty)
diff --git a/drivers/char/vt_ioctl.c b/drivers/char/vt_ioctl.c
index 8944ce5..a2dee0e 100644
--- a/drivers/char/vt_ioctl.c
+++ b/drivers/char/vt_ioctl.c
@@ -366,7 +366,7 @@
 int vt_ioctl(struct tty_struct *tty, struct file * file,
 	     unsigned int cmd, unsigned long arg)
 {
-	struct vc_data *vc = (struct vc_data *)tty->driver_data;
+	struct vc_data *vc = tty->driver_data;
 	struct console_font_op op;	/* used in multiple places here */
 	struct kbd_struct * kbd;
 	unsigned int console;
diff --git a/drivers/clocksource/tcb_clksrc.c b/drivers/clocksource/tcb_clksrc.c
index f450588..254f106 100644
--- a/drivers/clocksource/tcb_clksrc.c
+++ b/drivers/clocksource/tcb_clksrc.c
@@ -154,7 +154,6 @@
 		.shift		= 32,
 		/* Should be lower than at91rm9200's system timer */
 		.rating		= 125,
-		.cpumask	= CPU_MASK_CPU0,
 		.set_next_event	= tc_next_event,
 		.set_mode	= tc_mode,
 	},
@@ -195,6 +194,7 @@
 	clkevt.clkevt.max_delta_ns
 		= clockevent_delta2ns(0xffff, &clkevt.clkevt);
 	clkevt.clkevt.min_delta_ns = clockevent_delta2ns(1, &clkevt.clkevt) + 1;
+	clkevt.clkevt.cpumask = cpumask_of(0);
 
 	setup_irq(irq, &tc_irqaction);
 
diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig
index c9f21e3..4ee85fc 100644
--- a/drivers/ide/Kconfig
+++ b/drivers/ide/Kconfig
@@ -137,6 +137,7 @@
 
 config BLK_DEV_IDECD
 	tristate "Include IDE/ATAPI CDROM support"
+	select IDE_ATAPI
 	---help---
 	  If you have a CD-ROM drive using the ATAPI protocol, say Y. ATAPI is
 	  a newer protocol used by IDE CD-ROM and TAPE drives, similar to the
@@ -185,23 +186,6 @@
 	  To compile this driver as a module, choose M here: the
 	  module will be called ide-tape.
 
-config BLK_DEV_IDESCSI
-	tristate "SCSI emulation support (DEPRECATED)"
-	depends on SCSI
-	select IDE_ATAPI
-	---help---
-	  WARNING: ide-scsi is no longer needed for cd writing applications!
-	  The 2.6 kernel supports direct writing to ide-cd, which eliminates
-	  the need for ide-scsi + the entire scsi stack just for writing a
-	  cd. The new method is more efficient in every way.
-
-	  This will provide SCSI host adapter emulation for IDE ATAPI devices,
-	  and will allow you to use a SCSI device driver instead of a native
-	  ATAPI driver.
-
-	  If both this SCSI emulation and native ATAPI support are compiled
-	  into the kernel, the native support will be used.
-
 config BLK_DEV_IDEACPI
 	bool "IDE ACPI support"
 	depends on ACPI
diff --git a/drivers/ide/Makefile b/drivers/ide/Makefile
index 177e3f8..4107289 100644
--- a/drivers/ide/Makefile
+++ b/drivers/ide/Makefile
@@ -5,7 +5,7 @@
 EXTRA_CFLAGS				+= -Idrivers/ide
 
 ide-core-y += ide.o ide-ioctls.o ide-io.o ide-iops.o ide-lib.o ide-probe.o \
-	      ide-taskfile.o ide-pm.o ide-park.o ide-pio-blacklist.o
+	      ide-taskfile.o ide-pm.o ide-park.o ide-pio-blacklist.o ide-sysfs.o
 
 # core IDE code
 ide-core-$(CONFIG_IDE_TIMINGS)		+= ide-timings.o
diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index 4e58b9e..e8688c0 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -3,6 +3,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/cdrom.h>
 #include <linux/delay.h>
 #include <linux/ide.h>
 #include <scsi/scsi.h>
@@ -14,6 +15,13 @@
 #define debug_log(fmt, args...) do {} while (0)
 #endif
 
+#define ATAPI_MIN_CDB_BYTES	12
+
+static inline int dev_is_idecd(ide_drive_t *drive)
+{
+	return drive->media == ide_cdrom || drive->media == ide_optical;
+}
+
 /*
  * Check whether we can support a device,
  * based on the ATAPI IDENTIFY command results.
@@ -233,18 +241,49 @@
 }
 EXPORT_SYMBOL_GPL(ide_retry_pc);
 
-int ide_scsi_expiry(ide_drive_t *drive)
+int ide_cd_expiry(ide_drive_t *drive)
 {
-	struct ide_atapi_pc *pc = drive->pc;
+	struct request *rq = HWGROUP(drive)->rq;
+	unsigned long wait = 0;
 
-	debug_log("%s called for %lu at %lu\n", __func__,
-		  pc->scsi_cmd->serial_number, jiffies);
+	debug_log("%s: rq->cmd[0]: 0x%x\n", __func__, rq->cmd[0]);
 
-	pc->flags |= PC_FLAG_TIMEDOUT;
-
-	return 0; /* we do not want the IDE subsystem to retry */
+	/*
+	 * Some commands are *slow* and normally take a long time to complete.
+	 * Usually we can use the ATAPI "disconnect" to bypass this, but not all
+	 * commands/drives support that. Let ide_timer_expiry keep polling us
+	 * for these.
+	 */
+	switch (rq->cmd[0]) {
+	case GPCMD_BLANK:
+	case GPCMD_FORMAT_UNIT:
+	case GPCMD_RESERVE_RZONE_TRACK:
+	case GPCMD_CLOSE_TRACK:
+	case GPCMD_FLUSH_CACHE:
+		wait = ATAPI_WAIT_PC;
+		break;
+	default:
+		if (!(rq->cmd_flags & REQ_QUIET))
+			printk(KERN_INFO "cmd 0x%x timed out\n",
+					 rq->cmd[0]);
+		wait = 0;
+		break;
+	}
+	return wait;
 }
-EXPORT_SYMBOL_GPL(ide_scsi_expiry);
+EXPORT_SYMBOL_GPL(ide_cd_expiry);
+
+int ide_cd_get_xferlen(struct request *rq)
+{
+	if (blk_fs_request(rq))
+		return 32768;
+	else if (blk_sense_request(rq) || blk_pc_request(rq) ||
+			 rq->cmd_type == REQ_TYPE_ATA_PC)
+		return rq->data_len;
+	else
+		return 0;
+}
+EXPORT_SYMBOL_GPL(ide_cd_get_xferlen);
 
 /*
  * This is the usual interrupt handler which will be called during a packet
@@ -258,21 +297,14 @@
 	struct request *rq = hwif->hwgroup->rq;
 	const struct ide_tp_ops *tp_ops = hwif->tp_ops;
 	xfer_func_t *xferfunc;
-	ide_expiry_t *expiry;
 	unsigned int timeout, temp;
 	u16 bcount;
-	u8 stat, ireason, scsi = !!(drive->dev_flags & IDE_DFLAG_SCSI), dsc = 0;
+	u8 stat, ireason, dsc = 0;
 
 	debug_log("Enter %s - interrupt handler\n", __func__);
 
-	if (scsi) {
-		timeout = ide_scsi_get_timeout(pc);
-		expiry = ide_scsi_expiry;
-	} else {
-		timeout = (drive->media == ide_floppy) ? WAIT_FLOPPY_CMD
-						       : WAIT_TAPE_CMD;
-		expiry = NULL;
-	}
+	timeout = (drive->media == ide_floppy) ? WAIT_FLOPPY_CMD
+					       : WAIT_TAPE_CMD;
 
 	if (pc->flags & PC_FLAG_TIMEDOUT) {
 		drive->pc_callback(drive, 0);
@@ -284,8 +316,8 @@
 
 	if (pc->flags & PC_FLAG_DMA_IN_PROGRESS) {
 		if (hwif->dma_ops->dma_end(drive) ||
-		    (drive->media == ide_tape && !scsi && (stat & ATA_ERR))) {
-			if (drive->media == ide_floppy && !scsi)
+		    (drive->media == ide_tape && (stat & ATA_ERR))) {
+			if (drive->media == ide_floppy)
 				printk(KERN_ERR "%s: DMA %s error\n",
 					drive->name, rq_data_dir(pc->rq)
 						     ? "write" : "read");
@@ -307,7 +339,7 @@
 
 		local_irq_enable_in_hardirq();
 
-		if (drive->media == ide_tape && !scsi &&
+		if (drive->media == ide_tape &&
 		    (stat & ATA_ERR) && rq->cmd[0] == REQUEST_SENSE)
 			stat &= ~ATA_ERR;
 
@@ -315,11 +347,8 @@
 			/* Error detected */
 			debug_log("%s: I/O error\n", drive->name);
 
-			if (drive->media != ide_tape || scsi) {
+			if (drive->media != ide_tape)
 				pc->rq->errors++;
-				if (scsi)
-					goto cmd_finished;
-			}
 
 			if (rq->cmd[0] == REQUEST_SENSE) {
 				printk(KERN_ERR "%s: I/O error in request sense"
@@ -335,7 +364,6 @@
 			/* queued, but not started */
 			return ide_stopped;
 		}
-cmd_finished:
 		pc->error = 0;
 
 		if ((pc->flags & PC_FLAG_WAIT_FOR_DSC) && (stat & ATA_DSC) == 0)
@@ -382,25 +410,8 @@
 						"us more data than expected - "
 						"discarding data\n",
 						drive->name);
-				if (scsi)
-					temp = pc->buf_size - pc->xferred;
-				else
-					temp = 0;
-				if (temp) {
-					if (pc->sg)
-						drive->pc_io_buffers(drive, pc,
-								     temp, 0);
-					else
-						tp_ops->input_data(drive, NULL,
-							pc->cur_pos, temp);
-					printk(KERN_ERR "%s: transferred %d of "
-							"%d bytes\n",
-							drive->name,
-							temp, bcount);
-				}
-				pc->xferred += temp;
-				pc->cur_pos += temp;
-				ide_pad_transfer(drive, 0, bcount - temp);
+
+				ide_pad_transfer(drive, 0, bcount);
 				goto next_irq;
 			}
 			debug_log("The device wants to send us more data than "
@@ -410,14 +421,13 @@
 	} else
 		xferfunc = tp_ops->output_data;
 
-	if ((drive->media == ide_floppy && !scsi && !pc->buf) ||
-	    (drive->media == ide_tape && !scsi && pc->bh) ||
-	    (scsi && pc->sg)) {
+	if ((drive->media == ide_floppy && !pc->buf) ||
+	    (drive->media == ide_tape && pc->bh)) {
 		int done = drive->pc_io_buffers(drive, pc, bcount,
 				  !!(pc->flags & PC_FLAG_WRITING));
 
 		/* FIXME: don't do partial completions */
-		if (drive->media == ide_floppy && !scsi)
+		if (drive->media == ide_floppy)
 			ide_end_request(drive, 1, done >> 9);
 	} else
 		xferfunc(drive, NULL, pc->cur_pos, bcount);
@@ -430,7 +440,7 @@
 		  rq->cmd[0], bcount);
 next_irq:
 	/* And set the interrupt handler again */
-	ide_set_handler(drive, ide_pc_intr, timeout, expiry);
+	ide_set_handler(drive, ide_pc_intr, timeout, NULL);
 	return ide_started;
 }
 
@@ -479,11 +489,12 @@
 
 static ide_startstop_t ide_transfer_pc(ide_drive_t *drive)
 {
-	struct ide_atapi_pc *pc = drive->pc;
+	struct ide_atapi_pc *uninitialized_var(pc);
 	ide_hwif_t *hwif = drive->hwif;
 	struct request *rq = hwif->hwgroup->rq;
 	ide_expiry_t *expiry;
 	unsigned int timeout;
+	int cmd_len;
 	ide_startstop_t startstop;
 	u8 ireason;
 
@@ -493,101 +504,124 @@
 		return startstop;
 	}
 
-	ireason = ide_read_ireason(drive);
-	if (drive->media == ide_tape &&
-	    (drive->dev_flags & IDE_DFLAG_SCSI) == 0)
-		ireason = ide_wait_ireason(drive, ireason);
-
-	if ((ireason & ATAPI_COD) == 0 || (ireason & ATAPI_IO)) {
-		printk(KERN_ERR "%s: (IO,CoD) != (0,1) while issuing "
-				"a packet command\n", drive->name);
-		return ide_do_reset(drive);
+	if (drive->atapi_flags & IDE_AFLAG_DRQ_INTERRUPT) {
+		if (drive->dma)
+			drive->waiting_for_dma = 1;
 	}
 
-	/*
-	 * If necessary schedule the packet transfer to occur 'timeout'
-	 * miliseconds later in ide_delayed_transfer_pc() after the device
-	 * says it's ready for a packet.
-	 */
-	if (drive->atapi_flags & IDE_AFLAG_ZIP_DRIVE) {
-		timeout = drive->pc_delay;
-		expiry = &ide_delayed_transfer_pc;
+	if (dev_is_idecd(drive)) {
+		/* ATAPI commands get padded out to 12 bytes minimum */
+		cmd_len = COMMAND_SIZE(rq->cmd[0]);
+		if (cmd_len < ATAPI_MIN_CDB_BYTES)
+			cmd_len = ATAPI_MIN_CDB_BYTES;
+
+		timeout = rq->timeout;
+		expiry  = ide_cd_expiry;
 	} else {
-		if (drive->dev_flags & IDE_DFLAG_SCSI) {
-			timeout = ide_scsi_get_timeout(pc);
-			expiry = ide_scsi_expiry;
+		pc = drive->pc;
+
+		cmd_len = ATAPI_MIN_CDB_BYTES;
+
+		/*
+		 * If necessary schedule the packet transfer to occur 'timeout'
+		 * miliseconds later in ide_delayed_transfer_pc() after the
+		 * device says it's ready for a packet.
+		 */
+		if (drive->atapi_flags & IDE_AFLAG_ZIP_DRIVE) {
+			timeout = drive->pc_delay;
+			expiry = &ide_delayed_transfer_pc;
 		} else {
 			timeout = (drive->media == ide_floppy) ? WAIT_FLOPPY_CMD
 							       : WAIT_TAPE_CMD;
 			expiry = NULL;
 		}
+
+		ireason = ide_read_ireason(drive);
+		if (drive->media == ide_tape)
+			ireason = ide_wait_ireason(drive, ireason);
+
+		if ((ireason & ATAPI_COD) == 0 || (ireason & ATAPI_IO)) {
+			printk(KERN_ERR "%s: (IO,CoD) != (0,1) while issuing "
+					"a packet command\n", drive->name);
+
+			return ide_do_reset(drive);
+		}
 	}
 
 	/* Set the interrupt routine */
 	ide_set_handler(drive, ide_pc_intr, timeout, expiry);
 
 	/* Begin DMA, if necessary */
-	if (pc->flags & PC_FLAG_DMA_OK) {
-		pc->flags |= PC_FLAG_DMA_IN_PROGRESS;
-		hwif->dma_ops->dma_start(drive);
+	if (dev_is_idecd(drive)) {
+		if (drive->dma)
+			hwif->dma_ops->dma_start(drive);
+	} else {
+		if (pc->flags & PC_FLAG_DMA_OK) {
+			pc->flags |= PC_FLAG_DMA_IN_PROGRESS;
+			hwif->dma_ops->dma_start(drive);
+		}
 	}
 
 	/* Send the actual packet */
 	if ((drive->atapi_flags & IDE_AFLAG_ZIP_DRIVE) == 0)
-		hwif->tp_ops->output_data(drive, NULL, rq->cmd, 12);
+		hwif->tp_ops->output_data(drive, NULL, rq->cmd, cmd_len);
 
 	return ide_started;
 }
 
-ide_startstop_t ide_issue_pc(ide_drive_t *drive, unsigned int timeout,
-			     ide_expiry_t *expiry)
+ide_startstop_t ide_issue_pc(ide_drive_t *drive)
 {
-	struct ide_atapi_pc *pc = drive->pc;
+	struct ide_atapi_pc *pc;
 	ide_hwif_t *hwif = drive->hwif;
+	ide_expiry_t *expiry = NULL;
+	unsigned int timeout;
 	u32 tf_flags;
 	u16 bcount;
-	u8 scsi = !!(drive->dev_flags & IDE_DFLAG_SCSI);
 
-	/* We haven't transferred any data yet */
-	pc->xferred = 0;
-	pc->cur_pos = pc->buf;
-
-	/* Request to transfer the entire buffer at once */
-	if (drive->media == ide_tape && scsi == 0)
-		bcount = pc->req_xfer;
-	else
-		bcount = min(pc->req_xfer, 63 * 1024);
-
-	if (pc->flags & PC_FLAG_DMA_ERROR) {
-		pc->flags &= ~PC_FLAG_DMA_ERROR;
-		ide_dma_off(drive);
-	}
-
-	if ((pc->flags & PC_FLAG_DMA_OK) &&
-	    (drive->dev_flags & IDE_DFLAG_USING_DMA)) {
-		if (scsi)
-			hwif->sg_mapped = 1;
-		drive->dma = !hwif->dma_ops->dma_setup(drive);
-		if (scsi)
-			hwif->sg_mapped = 0;
-	}
-
-	if (!drive->dma)
-		pc->flags &= ~PC_FLAG_DMA_OK;
-
-	if (scsi)
-		tf_flags = 0;
-	else if (drive->media == ide_cdrom || drive->media == ide_optical)
+	if (dev_is_idecd(drive)) {
 		tf_flags = IDE_TFLAG_OUT_NSECT | IDE_TFLAG_OUT_LBAL;
-	else
+		bcount = ide_cd_get_xferlen(hwif->hwgroup->rq);
+		expiry = ide_cd_expiry;
+		timeout = ATAPI_WAIT_PC;
+
+		if (drive->dma)
+			drive->dma = !hwif->dma_ops->dma_setup(drive);
+	} else {
+		pc = drive->pc;
+
+		/* We haven't transferred any data yet */
+		pc->xferred = 0;
+		pc->cur_pos = pc->buf;
+
 		tf_flags = IDE_TFLAG_OUT_DEVICE;
+		bcount = ((drive->media == ide_tape) ?
+				pc->req_xfer :
+				min(pc->req_xfer, 63 * 1024));
+
+		if (pc->flags & PC_FLAG_DMA_ERROR) {
+			pc->flags &= ~PC_FLAG_DMA_ERROR;
+			ide_dma_off(drive);
+		}
+
+		if ((pc->flags & PC_FLAG_DMA_OK) &&
+		     (drive->dev_flags & IDE_DFLAG_USING_DMA))
+			drive->dma = !hwif->dma_ops->dma_setup(drive);
+
+		if (!drive->dma)
+			pc->flags &= ~PC_FLAG_DMA_OK;
+
+		timeout = (drive->media == ide_floppy) ? WAIT_FLOPPY_CMD
+						       : WAIT_TAPE_CMD;
+	}
 
 	ide_pktcmd_tf_load(drive, tf_flags, bcount, drive->dma);
 
 	/* Issue the packet command */
 	if (drive->atapi_flags & IDE_AFLAG_DRQ_INTERRUPT) {
+		if (drive->dma)
+			drive->waiting_for_dma = 0;
 		ide_execute_command(drive, ATA_CMD_PACKET, ide_transfer_pc,
-				    timeout, NULL);
+				    timeout, expiry);
 		return ide_started;
 	} else {
 		ide_execute_pkt_cmd(drive);
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 5daa4dd..1a7410f 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -53,14 +53,6 @@
 
 #include "ide-cd.h"
 
-#define IDECD_DEBUG_LOG		1
-
-#if IDECD_DEBUG_LOG
-#define ide_debug_log(lvl, fmt, args...) __ide_debug_log(lvl, fmt, args)
-#else
-#define ide_debug_log(lvl, fmt, args...) do {} while (0)
-#endif
-
 static DEFINE_MUTEX(idecd_ref_mutex);
 
 static void ide_cd_release(struct kref *);
@@ -519,37 +511,8 @@
 	return 1;
 }
 
-static int cdrom_timer_expiry(ide_drive_t *drive)
-{
-	struct request *rq = HWGROUP(drive)->rq;
-	unsigned long wait = 0;
-
-	ide_debug_log(IDE_DBG_RQ, "Call %s: rq->cmd[0]: 0x%x\n", __func__,
-		      rq->cmd[0]);
-
-	/*
-	 * Some commands are *slow* and normally take a long time to complete.
-	 * Usually we can use the ATAPI "disconnect" to bypass this, but not all
-	 * commands/drives support that. Let ide_timer_expiry keep polling us
-	 * for these.
-	 */
-	switch (rq->cmd[0]) {
-	case GPCMD_BLANK:
-	case GPCMD_FORMAT_UNIT:
-	case GPCMD_RESERVE_RZONE_TRACK:
-	case GPCMD_CLOSE_TRACK:
-	case GPCMD_FLUSH_CACHE:
-		wait = ATAPI_WAIT_PC;
-		break;
-	default:
-		if (!(rq->cmd_flags & REQ_QUIET))
-			printk(KERN_INFO PFX "cmd 0x%x timed out\n",
-					 rq->cmd[0]);
-		wait = 0;
-		break;
-	}
-	return wait;
-}
+static ide_startstop_t cdrom_transfer_packet_command(ide_drive_t *);
+static ide_startstop_t cdrom_newpc_intr(ide_drive_t *);
 
 /*
  * Set up the device registers for transferring a packet command on DEV,
@@ -559,11 +522,13 @@
  * called when the interrupt from the drive arrives.  Otherwise, HANDLER
  * will be called immediately after the drive is prepared for the transfer.
  */
-static ide_startstop_t cdrom_start_packet_command(ide_drive_t *drive,
-						  int xferlen,
-						  ide_handler_t *handler)
+static ide_startstop_t cdrom_start_packet_command(ide_drive_t *drive)
 {
 	ide_hwif_t *hwif = drive->hwif;
+	struct request *rq = hwif->hwgroup->rq;
+	int xferlen;
+
+	xferlen = ide_cd_get_xferlen(rq);
 
 	ide_debug_log(IDE_DBG_PC, "Call %s, xferlen: %d\n", __func__, xferlen);
 
@@ -581,13 +546,14 @@
 			drive->waiting_for_dma = 0;
 
 		/* packet command */
-		ide_execute_command(drive, ATA_CMD_PACKET, handler,
-				    ATAPI_WAIT_PC, cdrom_timer_expiry);
+		ide_execute_command(drive, ATA_CMD_PACKET,
+				    cdrom_transfer_packet_command,
+				    ATAPI_WAIT_PC, ide_cd_expiry);
 		return ide_started;
 	} else {
 		ide_execute_pkt_cmd(drive);
 
-		return (*handler) (drive);
+		return cdrom_transfer_packet_command(drive);
 	}
 }
 
@@ -598,11 +564,10 @@
  * there's data ready.
  */
 #define ATAPI_MIN_CDB_BYTES 12
-static ide_startstop_t cdrom_transfer_packet_command(ide_drive_t *drive,
-					  struct request *rq,
-					  ide_handler_t *handler)
+static ide_startstop_t cdrom_transfer_packet_command(ide_drive_t *drive)
 {
 	ide_hwif_t *hwif = drive->hwif;
+	struct request *rq = hwif->hwgroup->rq;
 	int cmd_len;
 	ide_startstop_t startstop;
 
@@ -629,7 +594,7 @@
 	}
 
 	/* arm the interrupt handler */
-	ide_set_handler(drive, handler, rq->timeout, cdrom_timer_expiry);
+	ide_set_handler(drive, cdrom_newpc_intr, rq->timeout, ide_cd_expiry);
 
 	/* ATAPI commands get padded out to 12 bytes minimum */
 	cmd_len = COMMAND_SIZE(rq->cmd[0]);
@@ -717,8 +682,6 @@
 	return 1;
 }
 
-static ide_startstop_t cdrom_newpc_intr(ide_drive_t *);
-
 static ide_startstop_t ide_cd_prepare_rw_request(ide_drive_t *drive,
 						 struct request *rq)
 {
@@ -761,20 +724,6 @@
 }
 
 /*
- * Routine to send a read/write packet command to the drive. This is usually
- * called directly from cdrom_start_{read,write}(). However, for drq_interrupt
- * devices, it is called from an interrupt when the drive is ready to accept
- * the command.
- */
-static ide_startstop_t cdrom_start_rw_cont(ide_drive_t *drive)
-{
-	struct request *rq = drive->hwif->hwgroup->rq;
-
-	/* send the command to the drive and return */
-	return cdrom_transfer_packet_command(drive, rq, cdrom_newpc_intr);
-}
-
-/*
  * Fix up a possibly partially-processed request so that we can start it over
  * entirely, or even put it back on the request queue.
  */
@@ -1096,7 +1045,7 @@
 	} else {
 		timeout = ATAPI_WAIT_PC;
 		if (!blk_fs_request(rq))
-			expiry = cdrom_timer_expiry;
+			expiry = ide_cd_expiry;
 	}
 
 	ide_set_handler(drive, cdrom_newpc_intr, timeout, expiry);
@@ -1163,13 +1112,6 @@
 	return ide_started;
 }
 
-static ide_startstop_t cdrom_do_newpc_cont(ide_drive_t *drive)
-{
-	struct request *rq = HWGROUP(drive)->rq;
-
-	return cdrom_transfer_packet_command(drive, rq, cdrom_newpc_intr);
-}
-
 static void cdrom_do_block_pc(ide_drive_t *drive, struct request *rq)
 {
 
@@ -1214,18 +1156,12 @@
 static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq,
 					sector_t block)
 {
-	ide_handler_t *fn;
-	int xferlen;
-
 	ide_debug_log(IDE_DBG_RQ, "Call %s, rq->cmd[0]: 0x%x, "
 		      "rq->cmd_type: 0x%x, block: %llu\n",
 		      __func__, rq->cmd[0], rq->cmd_type,
 		      (unsigned long long)block);
 
 	if (blk_fs_request(rq)) {
-		xferlen = 32768;
-		fn = cdrom_start_rw_cont;
-
 		if (cdrom_start_rw(drive, rq) == ide_stopped)
 			return ide_stopped;
 
@@ -1233,9 +1169,6 @@
 			return ide_stopped;
 	} else if (blk_sense_request(rq) || blk_pc_request(rq) ||
 		   rq->cmd_type == REQ_TYPE_ATA_PC) {
-		xferlen = rq->data_len;
-		fn = cdrom_do_newpc_cont;
-
 		if (!rq->timeout)
 			rq->timeout = ATAPI_WAIT_PC;
 
@@ -1250,7 +1183,7 @@
 		return ide_stopped;
 	}
 
-	return cdrom_start_packet_command(drive, xferlen, fn);
+	return cdrom_start_packet_command(drive);
 }
 
 /*
diff --git a/drivers/ide/ide-cd.h b/drivers/ide/ide-cd.h
index d5ce336..bf676b2 100644
--- a/drivers/ide/ide-cd.h
+++ b/drivers/ide/ide-cd.h
@@ -8,10 +8,14 @@
 #include <linux/cdrom.h>
 #include <asm/byteorder.h>
 
-/*
- * typical timeout for packet command
- */
-#define ATAPI_WAIT_PC		(60 * HZ)
+#define IDECD_DEBUG_LOG		0
+
+#if IDECD_DEBUG_LOG
+#define ide_debug_log(lvl, fmt, args...) __ide_debug_log(lvl, fmt, args)
+#else
+#define ide_debug_log(lvl, fmt, args...) do {} while (0)
+#endif
+
 #define ATAPI_WAIT_WRITE_BUSY	(10 * HZ)
 
 /************************************************************************/
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index aeb1ad7..0a48e2d 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -197,7 +197,7 @@
 
 	pc->retries++;
 
-	return ide_issue_pc(drive, WAIT_FLOPPY_CMD, NULL);
+	return ide_issue_pc(drive);
 }
 
 void ide_floppy_create_read_capacity_cmd(struct ide_atapi_pc *pc)
@@ -342,38 +342,38 @@
  * Look at the flexible disk page parameters. We ignore the CHS capacity
  * parameters and use the LBA parameters instead.
  */
-static int ide_floppy_get_flexible_disk_page(ide_drive_t *drive)
+static int ide_floppy_get_flexible_disk_page(ide_drive_t *drive,
+					     struct ide_atapi_pc *pc)
 {
 	struct ide_disk_obj *floppy = drive->driver_data;
 	struct gendisk *disk = floppy->disk;
-	struct ide_atapi_pc pc;
 	u8 *page;
 	int capacity, lba_capacity;
 	u16 transfer_rate, sector_size, cyls, rpm;
 	u8 heads, sectors;
 
-	ide_floppy_create_mode_sense_cmd(&pc, IDEFLOPPY_FLEXIBLE_DISK_PAGE);
+	ide_floppy_create_mode_sense_cmd(pc, IDEFLOPPY_FLEXIBLE_DISK_PAGE);
 
-	if (ide_queue_pc_tail(drive, disk, &pc)) {
+	if (ide_queue_pc_tail(drive, disk, pc)) {
 		printk(KERN_ERR PFX "Can't get flexible disk page params\n");
 		return 1;
 	}
 
-	if (pc.buf[3] & 0x80)
+	if (pc->buf[3] & 0x80)
 		drive->dev_flags |= IDE_DFLAG_WP;
 	else
 		drive->dev_flags &= ~IDE_DFLAG_WP;
 
 	set_disk_ro(disk, !!(drive->dev_flags & IDE_DFLAG_WP));
 
-	page = &pc.buf[8];
+	page = &pc->buf[8];
 
-	transfer_rate = be16_to_cpup((__be16 *)&pc.buf[8 + 2]);
-	sector_size   = be16_to_cpup((__be16 *)&pc.buf[8 + 6]);
-	cyls          = be16_to_cpup((__be16 *)&pc.buf[8 + 8]);
-	rpm           = be16_to_cpup((__be16 *)&pc.buf[8 + 28]);
-	heads         = pc.buf[8 + 4];
-	sectors       = pc.buf[8 + 5];
+	transfer_rate = be16_to_cpup((__be16 *)&pc->buf[8 + 2]);
+	sector_size   = be16_to_cpup((__be16 *)&pc->buf[8 + 6]);
+	cyls          = be16_to_cpup((__be16 *)&pc->buf[8 + 8]);
+	rpm           = be16_to_cpup((__be16 *)&pc->buf[8 + 28]);
+	heads         = pc->buf[8 + 4];
+	sectors       = pc->buf[8 + 5];
 
 	capacity = cyls * heads * sectors * sector_size;
 
@@ -499,7 +499,7 @@
 
 	/* Clik! disk does not support get_flexible_disk_page */
 	if (!(drive->atapi_flags & IDE_AFLAG_CLIK_DRIVE))
-		(void) ide_floppy_get_flexible_disk_page(drive);
+		(void) ide_floppy_get_flexible_disk_page(drive, &pc);
 
 	return rc;
 }
diff --git a/drivers/ide/ide-floppy_ioctl.c b/drivers/ide/ide-floppy_ioctl.c
index 2bc51ff..8f8be85 100644
--- a/drivers/ide/ide-floppy_ioctl.c
+++ b/drivers/ide/ide-floppy_ioctl.c
@@ -31,10 +31,11 @@
  * On exit we set nformats to the number of records we've actually initialized.
  */
 
-static int ide_floppy_get_format_capacities(ide_drive_t *drive, int __user *arg)
+static int ide_floppy_get_format_capacities(ide_drive_t *drive,
+					    struct ide_atapi_pc *pc,
+					    int __user *arg)
 {
 	struct ide_disk_obj *floppy = drive->driver_data;
-	struct ide_atapi_pc pc;
 	u8 header_len, desc_cnt;
 	int i, blocks, length, u_array_size, u_index;
 	int __user *argp;
@@ -45,13 +46,13 @@
 	if (u_array_size <= 0)
 		return -EINVAL;
 
-	ide_floppy_create_read_capacity_cmd(&pc);
-	if (ide_queue_pc_tail(drive, floppy->disk, &pc)) {
+	ide_floppy_create_read_capacity_cmd(pc);
+	if (ide_queue_pc_tail(drive, floppy->disk, pc)) {
 		printk(KERN_ERR "ide-floppy: Can't get floppy parameters\n");
 		return -EIO;
 	}
 
-	header_len = pc.buf[3];
+	header_len = pc->buf[3];
 	desc_cnt = header_len / 8; /* capacity descriptor of 8 bytes */
 
 	u_index = 0;
@@ -68,8 +69,8 @@
 		if (u_index >= u_array_size)
 			break;	/* User-supplied buffer too small */
 
-		blocks = be32_to_cpup((__be32 *)&pc.buf[desc_start]);
-		length = be16_to_cpup((__be16 *)&pc.buf[desc_start + 6]);
+		blocks = be32_to_cpup((__be32 *)&pc->buf[desc_start]);
+		length = be16_to_cpup((__be16 *)&pc->buf[desc_start + 6]);
 
 		if (put_user(blocks, argp))
 			return -EFAULT;
@@ -111,29 +112,28 @@
 	pc->flags |= PC_FLAG_WRITING;
 }
 
-static int ide_floppy_get_sfrp_bit(ide_drive_t *drive)
+static int ide_floppy_get_sfrp_bit(ide_drive_t *drive, struct ide_atapi_pc *pc)
 {
 	struct ide_disk_obj *floppy = drive->driver_data;
-	struct ide_atapi_pc pc;
 
 	drive->atapi_flags &= ~IDE_AFLAG_SRFP;
 
-	ide_floppy_create_mode_sense_cmd(&pc, IDEFLOPPY_CAPABILITIES_PAGE);
-	pc.flags |= PC_FLAG_SUPPRESS_ERROR;
+	ide_floppy_create_mode_sense_cmd(pc, IDEFLOPPY_CAPABILITIES_PAGE);
+	pc->flags |= PC_FLAG_SUPPRESS_ERROR;
 
-	if (ide_queue_pc_tail(drive, floppy->disk, &pc))
+	if (ide_queue_pc_tail(drive, floppy->disk, pc))
 		return 1;
 
-	if (pc.buf[8 + 2] & 0x40)
+	if (pc->buf[8 + 2] & 0x40)
 		drive->atapi_flags |= IDE_AFLAG_SRFP;
 
 	return 0;
 }
 
-static int ide_floppy_format_unit(ide_drive_t *drive, int __user *arg)
+static int ide_floppy_format_unit(ide_drive_t *drive, struct ide_atapi_pc *pc,
+				  int __user *arg)
 {
 	struct ide_disk_obj *floppy = drive->driver_data;
-	struct ide_atapi_pc pc;
 	int blocks, length, flags, err = 0;
 
 	if (floppy->openers > 1) {
@@ -166,10 +166,10 @@
 		goto out;
 	}
 
-	(void)ide_floppy_get_sfrp_bit(drive);
-	ide_floppy_create_format_unit_cmd(&pc, blocks, length, flags);
+	ide_floppy_get_sfrp_bit(drive, pc);
+	ide_floppy_create_format_unit_cmd(pc, blocks, length, flags);
 
-	if (ide_queue_pc_tail(drive, floppy->disk, &pc))
+	if (ide_queue_pc_tail(drive, floppy->disk, pc))
 		err = -EIO;
 
 out:
@@ -188,15 +188,16 @@
  * the dsc bit, and return either 0 or 65536.
  */
 
-static int ide_floppy_get_format_progress(ide_drive_t *drive, int __user *arg)
+static int ide_floppy_get_format_progress(ide_drive_t *drive,
+					  struct ide_atapi_pc *pc,
+					  int __user *arg)
 {
 	struct ide_disk_obj *floppy = drive->driver_data;
-	struct ide_atapi_pc pc;
 	int progress_indication = 0x10000;
 
 	if (drive->atapi_flags & IDE_AFLAG_SRFP) {
-		ide_create_request_sense_cmd(drive, &pc);
-		if (ide_queue_pc_tail(drive, floppy->disk, &pc))
+		ide_create_request_sense_cmd(drive, pc);
+		if (ide_queue_pc_tail(drive, floppy->disk, pc))
 			return -EIO;
 
 		if (floppy->sense_key == 2 &&
@@ -241,20 +242,21 @@
 	return 0;
 }
 
-static int ide_floppy_format_ioctl(ide_drive_t *drive, fmode_t mode,
-				   unsigned int cmd, void __user *argp)
+static int ide_floppy_format_ioctl(ide_drive_t *drive, struct ide_atapi_pc *pc,
+				   fmode_t mode, unsigned int cmd,
+				   void __user *argp)
 {
 	switch (cmd) {
 	case IDEFLOPPY_IOCTL_FORMAT_SUPPORTED:
 		return 0;
 	case IDEFLOPPY_IOCTL_FORMAT_GET_CAPACITY:
-		return ide_floppy_get_format_capacities(drive, argp);
+		return ide_floppy_get_format_capacities(drive, pc, argp);
 	case IDEFLOPPY_IOCTL_FORMAT_START:
 		if (!(mode & FMODE_WRITE))
 			return -EPERM;
-		return ide_floppy_format_unit(drive, (int __user *)argp);
+		return ide_floppy_format_unit(drive, pc, (int __user *)argp);
 	case IDEFLOPPY_IOCTL_FORMAT_GET_PROGRESS:
-		return ide_floppy_get_format_progress(drive, argp);
+		return ide_floppy_get_format_progress(drive, pc, argp);
 	default:
 		return -ENOTTY;
 	}
@@ -270,7 +272,7 @@
 	if (cmd == CDROMEJECT || cmd == CDROM_LOCKDOOR)
 		return ide_floppy_lockdoor(drive, &pc, arg, cmd);
 
-	err = ide_floppy_format_ioctl(drive, mode, cmd, argp);
+	err = ide_floppy_format_ioctl(drive, &pc, mode, cmd, argp);
 	if (err != -ENOTTY)
 		return err;
 
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index ecacc00..1c36a8e 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -426,9 +426,6 @@
 	ide_hwif_t *hwif = drive->hwif;
 	struct scatterlist *sg = hwif->sg_table;
 
-	if (hwif->sg_mapped)	/* needed by ide-scsi */
-		return;
-
 	if (rq->cmd_type != REQ_TYPE_ATA_TASKFILE) {
 		hwif->sg_nents = blk_rq_map_sg(drive->queue, rq, sg);
 	} else {
@@ -667,85 +664,10 @@
 	drive->sleep = timeout + jiffies;
 	drive->dev_flags |= IDE_DFLAG_SLEEPING;
 }
-
 EXPORT_SYMBOL(ide_stall_queue);
 
-#define WAKEUP(drive)	((drive)->service_start + 2 * (drive)->service_time)
-
-/**
- *	choose_drive		-	select a drive to service
- *	@hwgroup: hardware group to select on
- *
- *	choose_drive() selects the next drive which will be serviced.
- *	This is necessary because the IDE layer can't issue commands
- *	to both drives on the same cable, unlike SCSI.
- */
- 
-static inline ide_drive_t *choose_drive (ide_hwgroup_t *hwgroup)
-{
-	ide_drive_t *drive, *best;
-
-repeat:	
-	best = NULL;
-	drive = hwgroup->drive;
-
-	/*
-	 * drive is doing pre-flush, ordered write, post-flush sequence. even
-	 * though that is 3 requests, it must be seen as a single transaction.
-	 * we must not preempt this drive until that is complete
-	 */
-	if (blk_queue_flushing(drive->queue)) {
-		/*
-		 * small race where queue could get replugged during
-		 * the 3-request flush cycle, just yank the plug since
-		 * we want it to finish asap
-		 */
-		blk_remove_plug(drive->queue);
-		return drive;
-	}
-
-	do {
-		u8 dev_s = !!(drive->dev_flags & IDE_DFLAG_SLEEPING);
-		u8 best_s = (best && !!(best->dev_flags & IDE_DFLAG_SLEEPING));
-
-		if ((dev_s == 0 || time_after_eq(jiffies, drive->sleep)) &&
-		    !elv_queue_empty(drive->queue)) {
-			if (best == NULL ||
-			    (dev_s && (best_s == 0 || time_before(drive->sleep, best->sleep))) ||
-			    (best_s == 0 && time_before(WAKEUP(drive), WAKEUP(best)))) {
-				if (!blk_queue_plugged(drive->queue))
-					best = drive;
-			}
-		}
-	} while ((drive = drive->next) != hwgroup->drive);
-
-	if (best && (best->dev_flags & IDE_DFLAG_NICE1) &&
-	    (best->dev_flags & IDE_DFLAG_SLEEPING) == 0 &&
-	    best != hwgroup->drive && best->service_time > WAIT_MIN_SLEEP) {
-		long t = (signed long)(WAKEUP(best) - jiffies);
-		if (t >= WAIT_MIN_SLEEP) {
-		/*
-		 * We *may* have some time to spare, but first let's see if
-		 * someone can potentially benefit from our nice mood today..
-		 */
-			drive = best->next;
-			do {
-				if ((drive->dev_flags & IDE_DFLAG_SLEEPING) == 0
-				 && time_before(jiffies - best->service_time, WAKEUP(drive))
-				 && time_before(WAKEUP(drive), jiffies + t))
-				{
-					ide_stall_queue(best, min_t(long, t, 10 * WAIT_MIN_SLEEP));
-					goto repeat;
-				}
-			} while ((drive = drive->next) != best);
-		}
-	}
-	return best;
-}
-
 /*
  * Issue a new request to a drive from hwgroup
- * Caller must have already done spin_lock_irqsave(&hwgroup->lock, ..);
  *
  * A hwgroup is a serialized group of IDE interfaces.  Usually there is
  * exactly one hwif (interface) per hwgroup, but buggy controllers (eg. CMD640)
@@ -757,8 +679,7 @@
  * possibly along with many other devices.  This is especially common in
  * PCI-based systems with off-board IDE controller cards.
  *
- * The IDE driver uses a per-hwgroup spinlock to protect
- * access to the request queues, and to protect the hwgroup->busy flag.
+ * The IDE driver uses a per-hwgroup lock to protect the hwgroup->busy flag.
  *
  * The first thread into the driver for a particular hwgroup sets the
  * hwgroup->busy flag to indicate that this hwgroup is now active,
@@ -778,69 +699,41 @@
  * the driver.  This makes the driver much more friendlier to shared IRQs
  * than previous designs, while remaining 100% (?) SMP safe and capable.
  */
-static void ide_do_request (ide_hwgroup_t *hwgroup, int masked_irq)
+void do_ide_request(struct request_queue *q)
 {
-	ide_drive_t	*drive;
-	ide_hwif_t	*hwif;
+	ide_drive_t	*drive = q->queuedata;
+	ide_hwif_t	*hwif = drive->hwif;
+	ide_hwgroup_t	*hwgroup = hwif->hwgroup;
 	struct request	*rq;
 	ide_startstop_t	startstop;
-	int             loops = 0;
 
-	/* caller must own hwgroup->lock */
-	BUG_ON(!irqs_disabled());
-
-	while (!hwgroup->busy) {
-		hwgroup->busy = 1;
-		/* for atari only */
-		ide_get_lock(ide_intr, hwgroup);
-		drive = choose_drive(hwgroup);
-		if (drive == NULL) {
-			int sleeping = 0;
-			unsigned long sleep = 0; /* shut up, gcc */
-			hwgroup->rq = NULL;
-			drive = hwgroup->drive;
-			do {
-				if ((drive->dev_flags & IDE_DFLAG_SLEEPING) &&
-				    (sleeping == 0 ||
-				     time_before(drive->sleep, sleep))) {
-					sleeping = 1;
-					sleep = drive->sleep;
-				}
-			} while ((drive = drive->next) != hwgroup->drive);
-			if (sleeping) {
+	/*
+	 * drive is doing pre-flush, ordered write, post-flush sequence. even
+	 * though that is 3 requests, it must be seen as a single transaction.
+	 * we must not preempt this drive until that is complete
+	 */
+	if (blk_queue_flushing(q))
 		/*
-		 * Take a short snooze, and then wake up this hwgroup again.
-		 * This gives other hwgroups on the same a chance to
-		 * play fairly with us, just in case there are big differences
-		 * in relative throughputs.. don't want to hog the cpu too much.
+		 * small race where queue could get replugged during
+		 * the 3-request flush cycle, just yank the plug since
+		 * we want it to finish asap
 		 */
-				if (time_before(sleep, jiffies + WAIT_MIN_SLEEP))
-					sleep = jiffies + WAIT_MIN_SLEEP;
-#if 1
-				if (timer_pending(&hwgroup->timer))
-					printk(KERN_CRIT "ide_set_handler: timer already active\n");
-#endif
-				/* so that ide_timer_expiry knows what to do */
-				hwgroup->sleeping = 1;
-				hwgroup->req_gen_timer = hwgroup->req_gen;
-				mod_timer(&hwgroup->timer, sleep);
-				/* we purposely leave hwgroup->busy==1
-				 * while sleeping */
-			} else {
-				/* Ugly, but how can we sleep for the lock
-				 * otherwise? perhaps from tq_disk?
-				 */
+		blk_remove_plug(q);
 
-				/* for atari only */
-				ide_release_lock();
-				hwgroup->busy = 0;
+	spin_unlock_irq(q->queue_lock);
+	spin_lock_irq(&hwgroup->lock);
+
+	if (!ide_lock_hwgroup(hwgroup)) {
+repeat:
+		hwgroup->rq = NULL;
+
+		if (drive->dev_flags & IDE_DFLAG_SLEEPING) {
+			if (time_before(drive->sleep, jiffies)) {
+				ide_unlock_hwgroup(hwgroup);
+				goto plug_device;
 			}
-
-			/* no more work for this hwgroup (for now) */
-			return;
 		}
-	again:
-		hwif = HWIF(drive);
+
 		if (hwif != hwgroup->hwif) {
 			/*
 			 * set nIEN for previous hwif, drives in the
@@ -852,16 +745,20 @@
 		hwgroup->hwif = hwif;
 		hwgroup->drive = drive;
 		drive->dev_flags &= ~(IDE_DFLAG_SLEEPING | IDE_DFLAG_PARKED);
-		drive->service_start = jiffies;
 
+		spin_unlock_irq(&hwgroup->lock);
+		spin_lock_irq(q->queue_lock);
 		/*
 		 * we know that the queue isn't empty, but this can happen
 		 * if the q->prep_rq_fn() decides to kill a request
 		 */
 		rq = elv_next_request(drive->queue);
+		spin_unlock_irq(q->queue_lock);
+		spin_lock_irq(&hwgroup->lock);
+
 		if (!rq) {
-			hwgroup->busy = 0;
-			break;
+			ide_unlock_hwgroup(hwgroup);
+			goto out;
 		}
 
 		/*
@@ -876,53 +773,36 @@
 		 * though. I hope that doesn't happen too much, hopefully not
 		 * unless the subdriver triggers such a thing in its own PM
 		 * state machine.
-		 *
-		 * We count how many times we loop here to make sure we service
-		 * all drives in the hwgroup without looping for ever
 		 */
 		if ((drive->dev_flags & IDE_DFLAG_BLOCKED) &&
 		    blk_pm_request(rq) == 0 &&
 		    (rq->cmd_flags & REQ_PREEMPT) == 0) {
-			drive = drive->next ? drive->next : hwgroup->drive;
-			if (loops++ < 4 && !blk_queue_plugged(drive->queue))
-				goto again;
-			/* We clear busy, there should be no pending ATA command at this point. */
-			hwgroup->busy = 0;
-			break;
+			/* there should be no pending command at this point */
+			ide_unlock_hwgroup(hwgroup);
+			goto plug_device;
 		}
 
 		hwgroup->rq = rq;
 
-		/*
-		 * Some systems have trouble with IDE IRQs arriving while
-		 * the driver is still setting things up.  So, here we disable
-		 * the IRQ used by this interface while the request is being started.
-		 * This may look bad at first, but pretty much the same thing
-		 * happens anyway when any interrupt comes in, IDE or otherwise
-		 *  -- the kernel masks the IRQ while it is being handled.
-		 */
-		if (masked_irq != IDE_NO_IRQ && hwif->irq != masked_irq)
-			disable_irq_nosync(hwif->irq);
-		spin_unlock(&hwgroup->lock);
-		local_irq_enable_in_hardirq();
-			/* allow other IRQs while we start this request */
+		spin_unlock_irq(&hwgroup->lock);
 		startstop = start_request(drive, rq);
 		spin_lock_irq(&hwgroup->lock);
-		if (masked_irq != IDE_NO_IRQ && hwif->irq != masked_irq)
-			enable_irq(hwif->irq);
+
 		if (startstop == ide_stopped)
-			hwgroup->busy = 0;
-	}
-}
+			goto repeat;
+	} else
+		goto plug_device;
+out:
+	spin_unlock_irq(&hwgroup->lock);
+	spin_lock_irq(q->queue_lock);
+	return;
 
-/*
- * Passes the stuff to ide_do_request
- */
-void do_ide_request(struct request_queue *q)
-{
-	ide_drive_t *drive = q->queuedata;
+plug_device:
+	spin_unlock_irq(&hwgroup->lock);
+	spin_lock_irq(q->queue_lock);
 
-	ide_do_request(HWGROUP(drive), IDE_NO_IRQ);
+	if (!elv_queue_empty(q))
+		blk_plug_device(q);
 }
 
 /*
@@ -983,6 +863,17 @@
 	return ret;
 }
 
+static void ide_plug_device(ide_drive_t *drive)
+{
+	struct request_queue *q = drive->queue;
+	unsigned long flags;
+
+	spin_lock_irqsave(q->queue_lock, flags);
+	if (!elv_queue_empty(q))
+		blk_plug_device(q);
+	spin_unlock_irqrestore(q->queue_lock, flags);
+}
+
 /**
  *	ide_timer_expiry	-	handle lack of an IDE interrupt
  *	@data: timer callback magic (hwgroup)
@@ -1000,10 +891,12 @@
 void ide_timer_expiry (unsigned long data)
 {
 	ide_hwgroup_t	*hwgroup = (ide_hwgroup_t *) data;
+	ide_drive_t	*uninitialized_var(drive);
 	ide_handler_t	*handler;
 	ide_expiry_t	*expiry;
 	unsigned long	flags;
 	unsigned long	wait = -1;
+	int		plug_device = 0;
 
 	spin_lock_irqsave(&hwgroup->lock, flags);
 
@@ -1015,22 +908,15 @@
 		 * or we were "sleeping" to give other devices a chance.
 		 * Either way, we don't really want to complain about anything.
 		 */
-		if (hwgroup->sleeping) {
-			hwgroup->sleeping = 0;
-			hwgroup->busy = 0;
-		}
 	} else {
-		ide_drive_t *drive = hwgroup->drive;
+		drive = hwgroup->drive;
 		if (!drive) {
 			printk(KERN_ERR "ide_timer_expiry: hwgroup->drive was NULL\n");
 			hwgroup->handler = NULL;
 		} else {
 			ide_hwif_t *hwif;
 			ide_startstop_t startstop = ide_stopped;
-			if (!hwgroup->busy) {
-				hwgroup->busy = 1;	/* paranoia */
-				printk(KERN_ERR "%s: ide_timer_expiry: hwgroup->busy was 0 ??\n", drive->name);
-			}
+
 			if ((expiry = hwgroup->expiry) != NULL) {
 				/* continue */
 				if ((wait = expiry(drive)) > 0) {
@@ -1071,15 +957,18 @@
 					ide_error(drive, "irq timeout",
 						  hwif->tp_ops->read_status(hwif));
 			}
-			drive->service_time = jiffies - drive->service_start;
 			spin_lock_irq(&hwgroup->lock);
 			enable_irq(hwif->irq);
-			if (startstop == ide_stopped)
-				hwgroup->busy = 0;
+			if (startstop == ide_stopped) {
+				ide_unlock_hwgroup(hwgroup);
+				plug_device = 1;
+			}
 		}
 	}
-	ide_do_request(hwgroup, IDE_NO_IRQ);
 	spin_unlock_irqrestore(&hwgroup->lock, flags);
+
+	if (plug_device)
+		ide_plug_device(drive);
 }
 
 /**
@@ -1173,10 +1062,11 @@
 	unsigned long flags;
 	ide_hwgroup_t *hwgroup = (ide_hwgroup_t *)dev_id;
 	ide_hwif_t *hwif = hwgroup->hwif;
-	ide_drive_t *drive;
+	ide_drive_t *uninitialized_var(drive);
 	ide_handler_t *handler;
 	ide_startstop_t startstop;
 	irqreturn_t irq_ret = IRQ_NONE;
+	int plug_device = 0;
 
 	spin_lock_irqsave(&hwgroup->lock, flags);
 
@@ -1241,10 +1131,6 @@
 		 */
 		goto out;
 
-	if (!hwgroup->busy) {
-		hwgroup->busy = 1;	/* paranoia */
-		printk(KERN_ERR "%s: ide_intr: hwgroup->busy was 0 ??\n", drive->name);
-	}
 	hwgroup->handler = NULL;
 	hwgroup->req_gen++;
 	del_timer(&hwgroup->timer);
@@ -1267,20 +1153,22 @@
 	 * same irq as is currently being serviced here, and Linux
 	 * won't allow another of the same (on any CPU) until we return.
 	 */
-	drive->service_time = jiffies - drive->service_start;
 	if (startstop == ide_stopped) {
 		if (hwgroup->handler == NULL) {	/* paranoia */
-			hwgroup->busy = 0;
-			ide_do_request(hwgroup, hwif->irq);
-		} else {
-			printk(KERN_ERR "%s: ide_intr: huh? expected NULL handler "
-				"on exit\n", drive->name);
-		}
+			ide_unlock_hwgroup(hwgroup);
+			plug_device = 1;
+		} else
+			printk(KERN_ERR "%s: %s: huh? expected NULL handler "
+					"on exit\n", __func__, drive->name);
 	}
 out_handled:
 	irq_ret = IRQ_HANDLED;
 out:
 	spin_unlock_irqrestore(&hwgroup->lock, flags);
+
+	if (plug_device)
+		ide_plug_device(drive);
+
 	return irq_ret;
 }
 
diff --git a/drivers/ide/ide-ioctls.c b/drivers/ide/ide-ioctls.c
index 28232c6..1be263e 100644
--- a/drivers/ide/ide-ioctls.c
+++ b/drivers/ide/ide-ioctls.c
@@ -95,8 +95,7 @@
 		return -EPERM;
 
 	if (((arg >> IDE_NICE_DSC_OVERLAP) & 1) &&
-	    (drive->media != ide_tape ||
-	     (drive->dev_flags & IDE_DFLAG_SCSI)))
+	    (drive->media != ide_tape))
 		return -EPERM;
 
 	if ((arg >> IDE_NICE_DSC_OVERLAP) & 1)
diff --git a/drivers/ide/ide-park.c b/drivers/ide/ide-park.c
index 63d01c5..678454a 100644
--- a/drivers/ide/ide-park.c
+++ b/drivers/ide/ide-park.c
@@ -16,16 +16,19 @@
 	spin_lock_irq(&hwgroup->lock);
 	if (drive->dev_flags & IDE_DFLAG_PARKED) {
 		int reset_timer = time_before(timeout, drive->sleep);
+		int start_queue = 0;
 
 		drive->sleep = timeout;
 		wake_up_all(&ide_park_wq);
-		if (reset_timer && hwgroup->sleeping &&
-		    del_timer(&hwgroup->timer)) {
-			hwgroup->sleeping = 0;
-			hwgroup->busy = 0;
-			blk_start_queueing(q);
-		}
+		if (reset_timer && del_timer(&hwgroup->timer))
+			start_queue = 1;
 		spin_unlock_irq(&hwgroup->lock);
+
+		if (start_queue) {
+			spin_lock_irq(q->queue_lock);
+			blk_start_queueing(q);
+			spin_unlock_irq(q->queue_lock);
+		}
 		return;
 	}
 	spin_unlock_irq(&hwgroup->lock);
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index a64ec25..c5adb7b 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -101,6 +101,82 @@
 	}
 }
 
+static void ide_classify_ata_dev(ide_drive_t *drive)
+{
+	u16 *id = drive->id;
+	char *m = (char *)&id[ATA_ID_PROD];
+	int is_cfa = ata_id_is_cfa(id);
+
+	/* CF devices are *not* removable in Linux definition of the term */
+	if (is_cfa == 0 && (id[ATA_ID_CONFIG] & (1 << 7)))
+		drive->dev_flags |= IDE_DFLAG_REMOVABLE;
+
+	drive->media = ide_disk;
+
+	if (!ata_id_has_unload(drive->id))
+		drive->dev_flags |= IDE_DFLAG_NO_UNLOAD;
+
+	printk(KERN_INFO "%s: %s, %s DISK drive\n", drive->name, m,
+		is_cfa ? "CFA" : "ATA");
+}
+
+static void ide_classify_atapi_dev(ide_drive_t *drive)
+{
+	u16 *id = drive->id;
+	char *m = (char *)&id[ATA_ID_PROD];
+	u8 type = (id[ATA_ID_CONFIG] >> 8) & 0x1f;
+
+	printk(KERN_INFO "%s: %s, ATAPI ", drive->name, m);
+	switch (type) {
+	case ide_floppy:
+		if (!strstr(m, "CD-ROM")) {
+			if (!strstr(m, "oppy") &&
+			    !strstr(m, "poyp") &&
+			    !strstr(m, "ZIP"))
+				printk(KERN_CONT "cdrom or floppy?, assuming ");
+			if (drive->media != ide_cdrom) {
+				printk(KERN_CONT "FLOPPY");
+				drive->dev_flags |= IDE_DFLAG_REMOVABLE;
+				break;
+			}
+		}
+		/* Early cdrom models used zero */
+		type = ide_cdrom;
+	case ide_cdrom:
+		drive->dev_flags |= IDE_DFLAG_REMOVABLE;
+#ifdef CONFIG_PPC
+		/* kludge for Apple PowerBook internal zip */
+		if (!strstr(m, "CD-ROM") && strstr(m, "ZIP")) {
+			printk(KERN_CONT "FLOPPY");
+			type = ide_floppy;
+			break;
+		}
+#endif
+		printk(KERN_CONT "CD/DVD-ROM");
+		break;
+	case ide_tape:
+		printk(KERN_CONT "TAPE");
+		break;
+	case ide_optical:
+		printk(KERN_CONT "OPTICAL");
+		drive->dev_flags |= IDE_DFLAG_REMOVABLE;
+		break;
+	default:
+		printk(KERN_CONT "UNKNOWN (type %d)", type);
+		break;
+	}
+
+	printk(KERN_CONT " drive\n");
+	drive->media = type;
+	/* an ATAPI device ignores DRDY */
+	drive->ready_stat = 0;
+	if (ata_id_cdb_intr(id))
+		drive->atapi_flags |= IDE_AFLAG_DRQ_INTERRUPT;
+	drive->dev_flags |= IDE_DFLAG_DOORLOCKING;
+	/* we don't do head unloading on ATAPI devices */
+	drive->dev_flags |= IDE_DFLAG_NO_UNLOAD;
+}
+
 /**
  *	do_identify	-	identify a drive
  *	@drive: drive to identify 
@@ -117,7 +193,7 @@
 	u16 *id = drive->id;
 	char *m = (char *)&id[ATA_ID_PROD];
 	unsigned long flags;
-	int bswap = 1, is_cfa;
+	int bswap = 1;
 
 	/* local CPU only; some systems need this */
 	local_irq_save(flags);
@@ -154,91 +230,23 @@
 	if (strstr(m, "E X A B Y T E N E S T"))
 		goto err_misc;
 
-	printk(KERN_INFO "%s: %s, ", drive->name, m);
-
 	drive->dev_flags |= IDE_DFLAG_PRESENT;
 	drive->dev_flags &= ~IDE_DFLAG_DEAD;
 
 	/*
 	 * Check for an ATAPI device
 	 */
-	if (cmd == ATA_CMD_ID_ATAPI) {
-		u8 type = (id[ATA_ID_CONFIG] >> 8) & 0x1f;
-
-		printk(KERN_CONT "ATAPI ");
-		switch (type) {
-			case ide_floppy:
-				if (!strstr(m, "CD-ROM")) {
-					if (!strstr(m, "oppy") &&
-					    !strstr(m, "poyp") &&
-					    !strstr(m, "ZIP"))
-						printk(KERN_CONT "cdrom or floppy?, assuming ");
-					if (drive->media != ide_cdrom) {
-						printk(KERN_CONT "FLOPPY");
-						drive->dev_flags |= IDE_DFLAG_REMOVABLE;
-						break;
-					}
-				}
-				/* Early cdrom models used zero */
-				type = ide_cdrom;
-			case ide_cdrom:
-				drive->dev_flags |= IDE_DFLAG_REMOVABLE;
-#ifdef CONFIG_PPC
-				/* kludge for Apple PowerBook internal zip */
-				if (!strstr(m, "CD-ROM") && strstr(m, "ZIP")) {
-					printk(KERN_CONT "FLOPPY");
-					type = ide_floppy;
-					break;
-				}
-#endif
-				printk(KERN_CONT "CD/DVD-ROM");
-				break;
-			case ide_tape:
-				printk(KERN_CONT "TAPE");
-				break;
-			case ide_optical:
-				printk(KERN_CONT "OPTICAL");
-				drive->dev_flags |= IDE_DFLAG_REMOVABLE;
-				break;
-			default:
-				printk(KERN_CONT "UNKNOWN (type %d)", type);
-				break;
-		}
-		printk(KERN_CONT " drive\n");
-		drive->media = type;
-		/* an ATAPI device ignores DRDY */
-		drive->ready_stat = 0;
-		if (ata_id_cdb_intr(id))
-			drive->atapi_flags |= IDE_AFLAG_DRQ_INTERRUPT;
-		drive->dev_flags |= IDE_DFLAG_DOORLOCKING;
-		/* we don't do head unloading on ATAPI devices */
-		drive->dev_flags |= IDE_DFLAG_NO_UNLOAD;
-		return;
-	}
-
+	if (cmd == ATA_CMD_ID_ATAPI)
+		ide_classify_atapi_dev(drive);
+	else
 	/*
 	 * Not an ATAPI device: looks like a "regular" hard disk
 	 */
-
-	is_cfa = ata_id_is_cfa(id);
-
-	/* CF devices are *not* removable in Linux definition of the term */
-	if (is_cfa == 0 && (id[ATA_ID_CONFIG] & (1 << 7)))
-		drive->dev_flags |= IDE_DFLAG_REMOVABLE;
-
-	drive->media = ide_disk;
-
-	if (!ata_id_has_unload(drive->id))
-		drive->dev_flags |= IDE_DFLAG_NO_UNLOAD;
-
-	printk(KERN_CONT "%s DISK drive\n", is_cfa ? "CFA" : "ATA");
-
+		ide_classify_ata_dev(drive);
 	return;
-
 err_misc:
 	kfree(id);
 	drive->dev_flags &= ~IDE_DFLAG_PRESENT;
-	return;
 }
 
 /**
@@ -641,14 +649,9 @@
 	/* register with global device tree */
 	dev_set_name(&hwif->gendev, hwif->name);
 	hwif->gendev.driver_data = hwif;
-	if (hwif->gendev.parent == NULL) {
-		if (hwif->dev)
-			hwif->gendev.parent = hwif->dev;
-		else
-			/* Would like to do = &device_legacy */
-			hwif->gendev.parent = NULL;
-	}
+	hwif->gendev.parent = hwif->dev;
 	hwif->gendev.release = hwif_release_dev;
+
 	ret = device_register(&hwif->gendev);
 	if (ret < 0) {
 		printk(KERN_WARNING "IDE: %s: device_register error: %d\n",
@@ -878,8 +881,7 @@
 	 *	do not.
 	 */
 
-	q = blk_init_queue_node(do_ide_request, &hwif->hwgroup->lock,
-				hwif_to_node(hwif));
+	q = blk_init_queue_node(do_ide_request, NULL, hwif_to_node(hwif));
 	if (!q)
 		return 1;
 
@@ -1139,8 +1141,6 @@
 
 	if (drive->media == ide_disk)
 		request_module("ide-disk");
-	if (drive->dev_flags & IDE_DFLAG_SCSI)
-		request_module("ide-scsi");
 	if (drive->media == ide_cdrom || drive->media == ide_optical)
 		request_module("ide-cd");
 	if (drive->media == ide_tape)
@@ -1417,58 +1417,6 @@
 	}
 }
 
-static ssize_t store_delete_devices(struct device *portdev,
-				    struct device_attribute *attr,
-				    const char *buf, size_t n)
-{
-	ide_hwif_t *hwif = dev_get_drvdata(portdev);
-
-	if (strncmp(buf, "1", n))
-		return -EINVAL;
-
-	ide_port_unregister_devices(hwif);
-
-	return n;
-};
-
-static DEVICE_ATTR(delete_devices, S_IWUSR, NULL, store_delete_devices);
-
-static ssize_t store_scan(struct device *portdev,
-			  struct device_attribute *attr,
-			  const char *buf, size_t n)
-{
-	ide_hwif_t *hwif = dev_get_drvdata(portdev);
-
-	if (strncmp(buf, "1", n))
-		return -EINVAL;
-
-	ide_port_unregister_devices(hwif);
-	ide_port_scan(hwif);
-
-	return n;
-};
-
-static DEVICE_ATTR(scan, S_IWUSR, NULL, store_scan);
-
-static struct device_attribute *ide_port_attrs[] = {
-	&dev_attr_delete_devices,
-	&dev_attr_scan,
-	NULL
-};
-
-static int ide_sysfs_register_port(ide_hwif_t *hwif)
-{
-	int i, uninitialized_var(rc);
-
-	for (i = 0; ide_port_attrs[i]; i++) {
-		rc = device_create_file(hwif->portdev, ide_port_attrs[i]);
-		if (rc)
-			break;
-	}
-
-	return rc;
-}
-
 static unsigned int ide_indexes;
 
 /**
@@ -1655,9 +1603,6 @@
 		if (hwif == NULL)
 			continue;
 
-		if (hwif->chipset == ide_unknown)
-			hwif->chipset = ide_generic;
-
 		if (hwif->present)
 			hwif_register_devices(hwif);
 	}
diff --git a/drivers/ide/ide-sysfs.c b/drivers/ide/ide-sysfs.c
new file mode 100644
index 0000000..883ffac
--- /dev/null
+++ b/drivers/ide/ide-sysfs.c
@@ -0,0 +1,125 @@
+#include <linux/kernel.h>
+#include <linux/ide.h>
+
+char *ide_media_string(ide_drive_t *drive)
+{
+	switch (drive->media) {
+	case ide_disk:
+		return "disk";
+	case ide_cdrom:
+		return "cdrom";
+	case ide_tape:
+		return "tape";
+	case ide_floppy:
+		return "floppy";
+	case ide_optical:
+		return "optical";
+	default:
+		return "UNKNOWN";
+	}
+}
+
+static ssize_t media_show(struct device *dev, struct device_attribute *attr,
+			  char *buf)
+{
+	ide_drive_t *drive = to_ide_device(dev);
+	return sprintf(buf, "%s\n", ide_media_string(drive));
+}
+
+static ssize_t drivename_show(struct device *dev, struct device_attribute *attr,
+			      char *buf)
+{
+	ide_drive_t *drive = to_ide_device(dev);
+	return sprintf(buf, "%s\n", drive->name);
+}
+
+static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
+			     char *buf)
+{
+	ide_drive_t *drive = to_ide_device(dev);
+	return sprintf(buf, "ide:m-%s\n", ide_media_string(drive));
+}
+
+static ssize_t model_show(struct device *dev, struct device_attribute *attr,
+			  char *buf)
+{
+	ide_drive_t *drive = to_ide_device(dev);
+	return sprintf(buf, "%s\n", (char *)&drive->id[ATA_ID_PROD]);
+}
+
+static ssize_t firmware_show(struct device *dev, struct device_attribute *attr,
+			     char *buf)
+{
+	ide_drive_t *drive = to_ide_device(dev);
+	return sprintf(buf, "%s\n", (char *)&drive->id[ATA_ID_FW_REV]);
+}
+
+static ssize_t serial_show(struct device *dev, struct device_attribute *attr,
+			   char *buf)
+{
+	ide_drive_t *drive = to_ide_device(dev);
+	return sprintf(buf, "%s\n", (char *)&drive->id[ATA_ID_SERNO]);
+}
+
+struct device_attribute ide_dev_attrs[] = {
+	__ATTR_RO(media),
+	__ATTR_RO(drivename),
+	__ATTR_RO(modalias),
+	__ATTR_RO(model),
+	__ATTR_RO(firmware),
+	__ATTR(serial, 0400, serial_show, NULL),
+	__ATTR(unload_heads, 0644, ide_park_show, ide_park_store),
+	__ATTR_NULL
+};
+
+static ssize_t store_delete_devices(struct device *portdev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t n)
+{
+	ide_hwif_t *hwif = dev_get_drvdata(portdev);
+
+	if (strncmp(buf, "1", n))
+		return -EINVAL;
+
+	ide_port_unregister_devices(hwif);
+
+	return n;
+};
+
+static DEVICE_ATTR(delete_devices, S_IWUSR, NULL, store_delete_devices);
+
+static ssize_t store_scan(struct device *portdev,
+			  struct device_attribute *attr,
+			  const char *buf, size_t n)
+{
+	ide_hwif_t *hwif = dev_get_drvdata(portdev);
+
+	if (strncmp(buf, "1", n))
+		return -EINVAL;
+
+	ide_port_unregister_devices(hwif);
+	ide_port_scan(hwif);
+
+	return n;
+};
+
+static DEVICE_ATTR(scan, S_IWUSR, NULL, store_scan);
+
+static struct device_attribute *ide_port_attrs[] = {
+	&dev_attr_delete_devices,
+	&dev_attr_scan,
+	NULL
+};
+
+int ide_sysfs_register_port(ide_hwif_t *hwif)
+{
+	int i, uninitialized_var(rc);
+
+	for (i = 0; ide_port_attrs[i]; i++) {
+		rc = device_create_file(hwif->portdev, ide_port_attrs[i]);
+		if (rc)
+			break;
+	}
+
+	return rc;
+}
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index a2d470e..5d2aa22 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -694,7 +694,7 @@
 
 	pc->retries++;
 
-	return ide_issue_pc(drive, WAIT_TAPE_CMD, NULL);
+	return ide_issue_pc(drive);
 }
 
 /* A mode sense command is used to "sense" tape parameters. */
diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c
index f0f09f7..46a2d4c 100644
--- a/drivers/ide/ide.c
+++ b/drivers/ide/ide.c
@@ -440,81 +440,13 @@
 	return 1;
 }
 
-static char *media_string(ide_drive_t *drive)
-{
-	switch (drive->media) {
-	case ide_disk:
-		return "disk";
-	case ide_cdrom:
-		return "cdrom";
-	case ide_tape:
-		return "tape";
-	case ide_floppy:
-		return "floppy";
-	case ide_optical:
-		return "optical";
-	default:
-		return "UNKNOWN";
-	}
-}
-
-static ssize_t media_show(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	ide_drive_t *drive = to_ide_device(dev);
-	return sprintf(buf, "%s\n", media_string(drive));
-}
-
-static ssize_t drivename_show(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	ide_drive_t *drive = to_ide_device(dev);
-	return sprintf(buf, "%s\n", drive->name);
-}
-
-static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	ide_drive_t *drive = to_ide_device(dev);
-	return sprintf(buf, "ide:m-%s\n", media_string(drive));
-}
-
-static ssize_t model_show(struct device *dev, struct device_attribute *attr,
-			  char *buf)
-{
-	ide_drive_t *drive = to_ide_device(dev);
-	return sprintf(buf, "%s\n", (char *)&drive->id[ATA_ID_PROD]);
-}
-
-static ssize_t firmware_show(struct device *dev, struct device_attribute *attr,
-			     char *buf)
-{
-	ide_drive_t *drive = to_ide_device(dev);
-	return sprintf(buf, "%s\n", (char *)&drive->id[ATA_ID_FW_REV]);
-}
-
-static ssize_t serial_show(struct device *dev, struct device_attribute *attr,
-			   char *buf)
-{
-	ide_drive_t *drive = to_ide_device(dev);
-	return sprintf(buf, "%s\n", (char *)&drive->id[ATA_ID_SERNO]);
-}
-
-static struct device_attribute ide_dev_attrs[] = {
-	__ATTR_RO(media),
-	__ATTR_RO(drivename),
-	__ATTR_RO(modalias),
-	__ATTR_RO(model),
-	__ATTR_RO(firmware),
-	__ATTR(serial, 0400, serial_show, NULL),
-	__ATTR(unload_heads, 0644, ide_park_show, ide_park_store),
-	__ATTR_NULL
-};
-
 static int ide_uevent(struct device *dev, struct kobj_uevent_env *env)
 {
 	ide_drive_t *drive = to_ide_device(dev);
 
-	add_uevent_var(env, "MEDIA=%s", media_string(drive));
+	add_uevent_var(env, "MEDIA=%s", ide_media_string(drive));
 	add_uevent_var(env, "DRIVENAME=%s", drive->name);
-	add_uevent_var(env, "MODALIAS=ide:m-%s", media_string(drive));
+	add_uevent_var(env, "MODALIAS=ide:m-%s", ide_media_string(drive));
 	return 0;
 }
 
diff --git a/drivers/ide/tx4938ide.c b/drivers/ide/tx4938ide.c
index 13b63e7..b4ef218 100644
--- a/drivers/ide/tx4938ide.c
+++ b/drivers/ide/tx4938ide.c
@@ -216,16 +216,17 @@
 #endif	/* __BIG_ENDIAN */
 
 static const struct ide_port_ops tx4938ide_port_ops = {
-	.set_pio_mode = tx4938ide_set_pio_mode,
+	.set_pio_mode		= tx4938ide_set_pio_mode,
 };
 
 static const struct ide_port_info tx4938ide_port_info __initdata = {
-	.port_ops = &tx4938ide_port_ops,
+	.port_ops		= &tx4938ide_port_ops,
 #ifdef __BIG_ENDIAN
-	.tp_ops = &tx4938ide_tp_ops,
+	.tp_ops			= &tx4938ide_tp_ops,
 #endif
-	.host_flags = IDE_HFLAG_MMIO | IDE_HFLAG_NO_DMA,
-	.pio_mask = ATA_PIO5,
+	.host_flags		= IDE_HFLAG_MMIO | IDE_HFLAG_NO_DMA,
+	.pio_mask		= ATA_PIO5,
+	.chipset		= ide_generic,
 };
 
 static int __init tx4938ide_probe(struct platform_device *pdev)
diff --git a/drivers/ide/tx4939ide.c b/drivers/ide/tx4939ide.c
index 97cd9e0..4a8c5a2 100644
--- a/drivers/ide/tx4939ide.c
+++ b/drivers/ide/tx4939ide.c
@@ -623,33 +623,34 @@
 #endif	/* __LITTLE_ENDIAN */
 
 static const struct ide_port_ops tx4939ide_port_ops = {
-	.set_pio_mode = tx4939ide_set_pio_mode,
-	.set_dma_mode = tx4939ide_set_dma_mode,
-	.clear_irq = tx4939ide_clear_irq,
-	.cable_detect = tx4939ide_cable_detect,
+	.set_pio_mode		= tx4939ide_set_pio_mode,
+	.set_dma_mode		= tx4939ide_set_dma_mode,
+	.clear_irq		= tx4939ide_clear_irq,
+	.cable_detect		= tx4939ide_cable_detect,
 };
 
 static const struct ide_dma_ops tx4939ide_dma_ops = {
-	.dma_host_set = tx4939ide_dma_host_set,
-	.dma_setup = tx4939ide_dma_setup,
-	.dma_exec_cmd = ide_dma_exec_cmd,
-	.dma_start = ide_dma_start,
-	.dma_end = tx4939ide_dma_end,
-	.dma_test_irq = tx4939ide_dma_test_irq,
-	.dma_lost_irq = ide_dma_lost_irq,
-	.dma_timeout = ide_dma_timeout,
+	.dma_host_set		= tx4939ide_dma_host_set,
+	.dma_setup		= tx4939ide_dma_setup,
+	.dma_exec_cmd		= ide_dma_exec_cmd,
+	.dma_start		= ide_dma_start,
+	.dma_end		= tx4939ide_dma_end,
+	.dma_test_irq		= tx4939ide_dma_test_irq,
+	.dma_lost_irq		= ide_dma_lost_irq,
+	.dma_timeout		= ide_dma_timeout,
 };
 
 static const struct ide_port_info tx4939ide_port_info __initdata = {
-	.init_hwif = tx4939ide_init_hwif,
-	.init_dma = tx4939ide_init_dma,
-	.port_ops = &tx4939ide_port_ops,
-	.dma_ops = &tx4939ide_dma_ops,
-	.tp_ops = &tx4939ide_tp_ops,
-	.host_flags = IDE_HFLAG_MMIO,
-	.pio_mask = ATA_PIO4,
-	.mwdma_mask = ATA_MWDMA2,
-	.udma_mask = ATA_UDMA5,
+	.init_hwif		= tx4939ide_init_hwif,
+	.init_dma		= tx4939ide_init_dma,
+	.port_ops		= &tx4939ide_port_ops,
+	.dma_ops		= &tx4939ide_dma_ops,
+	.tp_ops			= &tx4939ide_tp_ops,
+	.host_flags		= IDE_HFLAG_MMIO,
+	.pio_mask		= ATA_PIO4,
+	.mwdma_mask		= ATA_MWDMA2,
+	.udma_mask		= ATA_UDMA5,
+	.chipset		= ide_generic,
 };
 
 static int __init tx4939ide_probe(struct platform_device *pdev)
diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c
index a103906..415fab0 100644
--- a/drivers/lguest/interrupts_and_traps.c
+++ b/drivers/lguest/interrupts_and_traps.c
@@ -222,11 +222,16 @@
 int init_interrupts(void)
 {
 	/* If they want some strange system call vector, reserve it now */
-	if (syscall_vector != SYSCALL_VECTOR
-	    && test_and_set_bit(syscall_vector, used_vectors)) {
-		printk("lg: couldn't reserve syscall %u\n", syscall_vector);
-		return -EBUSY;
+	if (syscall_vector != SYSCALL_VECTOR) {
+		if (test_bit(syscall_vector, used_vectors) ||
+		    vector_used_by_percpu_irq(syscall_vector)) {
+			printk(KERN_ERR "lg: couldn't reserve syscall %u\n",
+				 syscall_vector);
+			return -EBUSY;
+		}
+		set_bit(syscall_vector, used_vectors);
 	}
+
 	return 0;
 }
 
diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c
index 9f7896a..c4918b8 100644
--- a/drivers/net/usb/hso.c
+++ b/drivers/net/usb/hso.c
@@ -3,6 +3,8 @@
  * Driver for Option High Speed Mobile Devices.
  *
  *  Copyright (C) 2008 Option International
+ *                     Filip Aben <f.aben@option.com>
+ *                     Denis Joseph Barrow <d.barow@option.com>
  *  Copyright (C) 2007 Andrew Bird (Sphere Systems Ltd)
  *  			<ajb@spheresystems.co.uk>
  *  Copyright (C) 2008 Greg Kroah-Hartman <gregkh@suse.de>
@@ -39,8 +41,11 @@
  *		port is opened, as this have a huge impact on the network port
  *		throughput.
  *
- * Interface 2:	Standard modem interface - circuit switched interface, should
- *		not be used.
+ * Interface 2:	Standard modem interface - circuit switched interface, this
+ *		can be used to make a standard ppp connection however it
+ *              should not be used in conjunction with the IP network interface
+ *              enabled for USB performance reasons i.e. if using this set
+ *              ideally disable_net=1.
  *
  *****************************************************************************/
 
@@ -63,6 +68,8 @@
 #include <linux/usb/cdc.h>
 #include <net/arp.h>
 #include <asm/byteorder.h>
+#include <linux/serial_core.h>
+#include <linux/serial.h>
 
 
 #define DRIVER_VERSION			"1.2"
@@ -182,6 +189,41 @@
 	RX_PENDING
 };
 
+#define BM_REQUEST_TYPE (0xa1)
+#define B_NOTIFICATION  (0x20)
+#define W_VALUE         (0x0)
+#define W_INDEX         (0x2)
+#define W_LENGTH        (0x2)
+
+#define B_OVERRUN       (0x1<<6)
+#define B_PARITY        (0x1<<5)
+#define B_FRAMING       (0x1<<4)
+#define B_RING_SIGNAL   (0x1<<3)
+#define B_BREAK         (0x1<<2)
+#define B_TX_CARRIER    (0x1<<1)
+#define B_RX_CARRIER    (0x1<<0)
+
+struct hso_serial_state_notification {
+	u8 bmRequestType;
+	u8 bNotification;
+	u16 wValue;
+	u16 wIndex;
+	u16 wLength;
+	u16 UART_state_bitmap;
+} __attribute__((packed));
+
+struct hso_tiocmget {
+	struct mutex mutex;
+	wait_queue_head_t waitq;
+	int    intr_completed;
+	struct usb_endpoint_descriptor *endp;
+	struct urb *urb;
+	struct hso_serial_state_notification serial_state_notification;
+	u16    prev_UART_state_bitmap;
+	struct uart_icount icount;
+};
+
+
 struct hso_serial {
 	struct hso_device *parent;
 	int magic;
@@ -219,6 +261,7 @@
 	spinlock_t serial_lock;
 
 	int (*write_data) (struct hso_serial *serial);
+	struct hso_tiocmget  *tiocmget;
 	/* Hacks required to get flow control
 	 * working on the serial receive buffers
 	 * so as not to drop characters on the floor.
@@ -305,7 +348,7 @@
 static void async_put_intf(struct work_struct *data);
 static int hso_put_activity(struct hso_device *hso_dev);
 static int hso_get_activity(struct hso_device *hso_dev);
-
+static void tiocmget_intr_callback(struct urb *urb);
 /*****************************************************************************/
 /* Helping functions                                                         */
 /*****************************************************************************/
@@ -362,8 +405,6 @@
 static struct hso_device *serial_table[HSO_SERIAL_TTY_MINORS];
 static struct hso_device *network_table[HSO_MAX_NET_DEVICES];
 static spinlock_t serial_table_lock;
-static struct ktermios *hso_serial_termios[HSO_SERIAL_TTY_MINORS];
-static struct ktermios *hso_serial_termios_locked[HSO_SERIAL_TTY_MINORS];
 
 static const s32 default_port_spec[] = {
 	HSO_INTF_MUX | HSO_PORT_NETWORK,
@@ -1009,23 +1050,11 @@
 
 /* Serial driver functions */
 
-static void _hso_serial_set_termios(struct tty_struct *tty,
-				    struct ktermios *old)
+static void hso_init_termios(struct ktermios *termios)
 {
-	struct hso_serial *serial = get_serial_by_tty(tty);
-	struct ktermios *termios;
-
-	if ((!tty) || (!tty->termios) || (!serial)) {
-		printk(KERN_ERR "%s: no tty structures", __func__);
-		return;
-	}
-
-	D4("port %d", serial->minor);
-
 	/*
 	 * The default requirements for this device are:
 	 */
-	termios = tty->termios;
 	termios->c_iflag &=
 		~(IGNBRK	/* disable ignore break */
 		| BRKINT	/* disable break causes interrupt */
@@ -1057,15 +1086,38 @@
 	termios->c_cflag |= CS8;	/* character size 8 bits */
 
 	/* baud rate 115200 */
-	tty_encode_baud_rate(serial->tty, 115200, 115200);
+	tty_termios_encode_baud_rate(termios, 115200, 115200);
+}
+
+static void _hso_serial_set_termios(struct tty_struct *tty,
+				    struct ktermios *old)
+{
+	struct hso_serial *serial = get_serial_by_tty(tty);
+	struct ktermios *termios;
+
+	if (!serial) {
+		printk(KERN_ERR "%s: no tty structures", __func__);
+		return;
+	}
+
+	D4("port %d", serial->minor);
 
 	/*
-	 * Force low_latency on; otherwise the pushes are scheduled;
-	 * this is bad as it opens up the possibility of dropping bytes
-	 * on the floor.  We don't want to drop bytes on the floor. :)
+	 *	Fix up unsupported bits
 	 */
-	serial->tty->low_latency = 1;
-	return;
+	termios = tty->termios;
+	termios->c_iflag &= ~IXON; /* disable enable XON/XOFF flow control */
+
+	termios->c_cflag &=
+		~(CSIZE		/* no size */
+		| PARENB	/* disable parity bit */
+		| CBAUD		/* clear current baud rate */
+		| CBAUDEX);	/* clear current buad rate */
+
+	termios->c_cflag |= CS8;	/* character size 8 bits */
+
+	/* baud rate 115200 */
+	tty_encode_baud_rate(tty, 115200, 115200);
 }
 
 static void hso_resubmit_rx_bulk_urb(struct hso_serial *serial, struct urb *urb)
@@ -1228,6 +1280,7 @@
 
 	/* sanity check */
 	if (serial == NULL || serial->magic != HSO_SERIAL_MAGIC) {
+		WARN_ON(1);
 		tty->driver_data = NULL;
 		D1("Failed to open port");
 		return -ENODEV;
@@ -1242,8 +1295,10 @@
 	kref_get(&serial->parent->ref);
 
 	/* setup */
+	spin_lock_irq(&serial->serial_lock);
 	tty->driver_data = serial;
-	serial->tty = tty;
+	serial->tty = tty_kref_get(tty);
+	spin_unlock_irq(&serial->serial_lock);
 
 	/* check for port already opened, if not set the termios */
 	serial->open_count++;
@@ -1285,6 +1340,10 @@
 
 	D1("Closing serial port");
 
+	/* Open failed, no close cleanup required */
+	if (serial == NULL)
+		return;
+
 	mutex_lock(&serial->parent->mutex);
 	usb_gone = serial->parent->usb_gone;
 
@@ -1297,10 +1356,13 @@
 	kref_put(&serial->parent->ref, hso_serial_ref_free);
 	if (serial->open_count <= 0) {
 		serial->open_count = 0;
-		if (serial->tty) {
+		spin_lock_irq(&serial->serial_lock);
+		if (serial->tty == tty) {
 			serial->tty->driver_data = NULL;
 			serial->tty = NULL;
+			tty_kref_put(tty);
 		}
+		spin_unlock_irq(&serial->serial_lock);
 		if (!usb_gone)
 			hso_stop_serial_device(serial->parent);
 		tasklet_kill(&serial->unthrottle_tasklet);
@@ -1400,25 +1462,217 @@
 
 	return chars;
 }
+int tiocmget_submit_urb(struct hso_serial *serial,
+			struct hso_tiocmget  *tiocmget,
+			struct usb_device *usb)
+{
+	int result;
+
+	if (serial->parent->usb_gone)
+		return -ENODEV;
+	usb_fill_int_urb(tiocmget->urb, usb,
+			 usb_rcvintpipe(usb,
+					tiocmget->endp->
+					bEndpointAddress & 0x7F),
+			 &tiocmget->serial_state_notification,
+			 sizeof(struct hso_serial_state_notification),
+			 tiocmget_intr_callback, serial,
+			 tiocmget->endp->bInterval);
+	result = usb_submit_urb(tiocmget->urb, GFP_ATOMIC);
+	if (result) {
+		dev_warn(&usb->dev, "%s usb_submit_urb failed %d\n", __func__,
+			 result);
+	}
+	return result;
+
+}
+
+static void tiocmget_intr_callback(struct urb *urb)
+{
+	struct hso_serial *serial = urb->context;
+	struct hso_tiocmget *tiocmget;
+	int status = urb->status;
+	u16 UART_state_bitmap, prev_UART_state_bitmap;
+	struct uart_icount *icount;
+	struct hso_serial_state_notification *serial_state_notification;
+	struct usb_device *usb;
+
+	/* Sanity checks */
+	if (!serial)
+		return;
+	if (status) {
+		log_usb_status(status, __func__);
+		return;
+	}
+	tiocmget = serial->tiocmget;
+	if (!tiocmget)
+		return;
+	usb = serial->parent->usb;
+	serial_state_notification = &tiocmget->serial_state_notification;
+	if (serial_state_notification->bmRequestType != BM_REQUEST_TYPE ||
+	    serial_state_notification->bNotification != B_NOTIFICATION ||
+	    le16_to_cpu(serial_state_notification->wValue) != W_VALUE ||
+	    le16_to_cpu(serial_state_notification->wIndex) != W_INDEX ||
+	    le16_to_cpu(serial_state_notification->wLength) != W_LENGTH) {
+		dev_warn(&usb->dev,
+			 "hso received invalid serial state notification\n");
+		DUMP(serial_state_notification,
+		     sizeof(hso_serial_state_notifation))
+	} else {
+
+		UART_state_bitmap = le16_to_cpu(serial_state_notification->
+						UART_state_bitmap);
+		prev_UART_state_bitmap = tiocmget->prev_UART_state_bitmap;
+		icount = &tiocmget->icount;
+		spin_lock(&serial->serial_lock);
+		if ((UART_state_bitmap & B_OVERRUN) !=
+		   (prev_UART_state_bitmap & B_OVERRUN))
+			icount->parity++;
+		if ((UART_state_bitmap & B_PARITY) !=
+		   (prev_UART_state_bitmap & B_PARITY))
+			icount->parity++;
+		if ((UART_state_bitmap & B_FRAMING) !=
+		   (prev_UART_state_bitmap & B_FRAMING))
+			icount->frame++;
+		if ((UART_state_bitmap & B_RING_SIGNAL) &&
+		   !(prev_UART_state_bitmap & B_RING_SIGNAL))
+			icount->rng++;
+		if ((UART_state_bitmap & B_BREAK) !=
+		   (prev_UART_state_bitmap & B_BREAK))
+			icount->brk++;
+		if ((UART_state_bitmap & B_TX_CARRIER) !=
+		   (prev_UART_state_bitmap & B_TX_CARRIER))
+			icount->dsr++;
+		if ((UART_state_bitmap & B_RX_CARRIER) !=
+		   (prev_UART_state_bitmap & B_RX_CARRIER))
+			icount->dcd++;
+		tiocmget->prev_UART_state_bitmap = UART_state_bitmap;
+		spin_unlock(&serial->serial_lock);
+		tiocmget->intr_completed = 1;
+		wake_up_interruptible(&tiocmget->waitq);
+	}
+	memset(serial_state_notification, 0,
+	       sizeof(struct hso_serial_state_notification));
+	tiocmget_submit_urb(serial,
+			    tiocmget,
+			    serial->parent->usb);
+}
+
+/*
+ * next few functions largely stolen from drivers/serial/serial_core.c
+ */
+/* Wait for any of the 4 modem inputs (DCD,RI,DSR,CTS) to change
+ * - mask passed in arg for lines of interest
+ *   (use |'ed TIOCM_RNG/DSR/CD/CTS for masking)
+ * Caller should use TIOCGICOUNT to see which one it was
+ */
+static int
+hso_wait_modem_status(struct hso_serial *serial, unsigned long arg)
+{
+	DECLARE_WAITQUEUE(wait, current);
+	struct uart_icount cprev, cnow;
+	struct hso_tiocmget  *tiocmget;
+	int ret;
+
+	tiocmget = serial->tiocmget;
+	if (!tiocmget)
+		return -ENOENT;
+	/*
+	 * note the counters on entry
+	 */
+	spin_lock_irq(&serial->serial_lock);
+	memcpy(&cprev, &tiocmget->icount, sizeof(struct uart_icount));
+	spin_unlock_irq(&serial->serial_lock);
+	add_wait_queue(&tiocmget->waitq, &wait);
+	for (;;) {
+		spin_lock_irq(&serial->serial_lock);
+		memcpy(&cnow, &tiocmget->icount, sizeof(struct uart_icount));
+		spin_unlock_irq(&serial->serial_lock);
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (((arg & TIOCM_RNG) && (cnow.rng != cprev.rng)) ||
+		    ((arg & TIOCM_DSR) && (cnow.dsr != cprev.dsr)) ||
+		    ((arg & TIOCM_CD)  && (cnow.dcd != cprev.dcd))) {
+			ret = 0;
+			break;
+		}
+		schedule();
+		/* see if a signal did it */
+		if (signal_pending(current)) {
+			ret = -ERESTARTSYS;
+			break;
+		}
+		cprev = cnow;
+	}
+	current->state = TASK_RUNNING;
+	remove_wait_queue(&tiocmget->waitq, &wait);
+
+	return ret;
+}
+
+/*
+ * Get counter of input serial line interrupts (DCD,RI,DSR,CTS)
+ * Return: write counters to the user passed counter struct
+ * NB: both 1->0 and 0->1 transitions are counted except for
+ *     RI where only 0->1 is counted.
+ */
+static int hso_get_count(struct hso_serial *serial,
+			  struct serial_icounter_struct __user *icnt)
+{
+	struct serial_icounter_struct icount;
+	struct uart_icount cnow;
+	struct hso_tiocmget  *tiocmget = serial->tiocmget;
+
+	if (!tiocmget)
+		 return -ENOENT;
+	spin_lock_irq(&serial->serial_lock);
+	memcpy(&cnow, &tiocmget->icount, sizeof(struct uart_icount));
+	spin_unlock_irq(&serial->serial_lock);
+
+	icount.cts         = cnow.cts;
+	icount.dsr         = cnow.dsr;
+	icount.rng         = cnow.rng;
+	icount.dcd         = cnow.dcd;
+	icount.rx          = cnow.rx;
+	icount.tx          = cnow.tx;
+	icount.frame       = cnow.frame;
+	icount.overrun     = cnow.overrun;
+	icount.parity      = cnow.parity;
+	icount.brk         = cnow.brk;
+	icount.buf_overrun = cnow.buf_overrun;
+
+	return copy_to_user(icnt, &icount, sizeof(icount)) ? -EFAULT : 0;
+}
+
 
 static int hso_serial_tiocmget(struct tty_struct *tty, struct file *file)
 {
-	unsigned int value;
+	int retval;
 	struct hso_serial *serial = get_serial_by_tty(tty);
-	unsigned long flags;
+	struct hso_tiocmget  *tiocmget;
+	u16 UART_state_bitmap;
 
 	/* sanity check */
 	if (!serial) {
 		D1("no tty structures");
 		return -EINVAL;
 	}
-
-	spin_lock_irqsave(&serial->serial_lock, flags);
-	value = ((serial->rts_state) ? TIOCM_RTS : 0) |
+	spin_lock_irq(&serial->serial_lock);
+	retval = ((serial->rts_state) ? TIOCM_RTS : 0) |
 	    ((serial->dtr_state) ? TIOCM_DTR : 0);
-	spin_unlock_irqrestore(&serial->serial_lock, flags);
+	tiocmget = serial->tiocmget;
+	if (tiocmget) {
 
-	return value;
+		UART_state_bitmap = le16_to_cpu(
+			tiocmget->prev_UART_state_bitmap);
+		if (UART_state_bitmap & B_RING_SIGNAL)
+			retval |=  TIOCM_RNG;
+		if (UART_state_bitmap & B_RX_CARRIER)
+			retval |=  TIOCM_CD;
+		if (UART_state_bitmap & B_TX_CARRIER)
+			retval |=  TIOCM_DSR;
+	}
+	spin_unlock_irq(&serial->serial_lock);
+	return retval;
 }
 
 static int hso_serial_tiocmset(struct tty_struct *tty, struct file *file,
@@ -1460,6 +1714,32 @@
 			       USB_CTRL_SET_TIMEOUT);
 }
 
+static int hso_serial_ioctl(struct tty_struct *tty, struct file *file,
+			    unsigned int cmd, unsigned long arg)
+{
+	struct hso_serial *serial =  get_serial_by_tty(tty);
+	void __user *uarg = (void __user *)arg;
+	int ret = 0;
+	D4("IOCTL cmd: %d, arg: %ld", cmd, arg);
+
+	if (!serial)
+		return -ENODEV;
+	switch (cmd) {
+	case TIOCMIWAIT:
+		ret = hso_wait_modem_status(serial, arg);
+		break;
+
+	case TIOCGICOUNT:
+		ret = hso_get_count(serial, uarg);
+		break;
+	default:
+		ret = -ENOIOCTLCMD;
+		break;
+	}
+	return ret;
+}
+
+
 /* starts a transmit */
 static void hso_kick_transmit(struct hso_serial *serial)
 {
@@ -1653,6 +1933,7 @@
 {
 	struct hso_serial *serial = urb->context;
 	int status = urb->status;
+	struct tty_struct *tty;
 
 	/* sanity check */
 	if (!serial) {
@@ -1662,14 +1943,18 @@
 
 	spin_lock(&serial->serial_lock);
 	serial->tx_urb_used = 0;
+	tty = tty_kref_get(serial->tty);
 	spin_unlock(&serial->serial_lock);
 	if (status) {
 		log_usb_status(status, __func__);
+		tty_kref_put(tty);
 		return;
 	}
 	hso_put_activity(serial->parent);
-	if (serial->tty)
-		tty_wakeup(serial->tty);
+	if (tty) {
+		tty_wakeup(tty);
+		tty_kref_put(tty);
+	}
 	hso_kick_transmit(serial);
 
 	D1(" ");
@@ -1706,6 +1991,7 @@
 	struct hso_serial *serial = urb->context;
 	struct usb_ctrlrequest *req;
 	int status = urb->status;
+	struct tty_struct *tty;
 
 	/* sanity check */
 	if (!serial)
@@ -1713,9 +1999,11 @@
 
 	spin_lock(&serial->serial_lock);
 	serial->tx_urb_used = 0;
+	tty = tty_kref_get(serial->tty);
 	spin_unlock(&serial->serial_lock);
 	if (status) {
 		log_usb_status(status, __func__);
+		tty_kref_put(tty);
 		return;
 	}
 
@@ -1734,25 +2022,31 @@
 		spin_unlock(&serial->serial_lock);
 	} else {
 		hso_put_activity(serial->parent);
-		if (serial->tty)
-			tty_wakeup(serial->tty);
+		if (tty)
+			tty_wakeup(tty);
 		/* response to a write command */
 		hso_kick_transmit(serial);
 	}
+	tty_kref_put(tty);
 }
 
 /* handle RX data for serial port */
 static int put_rxbuf_data(struct urb *urb, struct hso_serial *serial)
 {
-	struct tty_struct *tty = serial->tty;
+	struct tty_struct *tty;
 	int write_length_remaining = 0;
 	int curr_write_len;
+
 	/* Sanity check */
 	if (urb == NULL || serial == NULL) {
 		D1("serial = NULL");
 		return -2;
 	}
 
+	spin_lock(&serial->serial_lock);
+	tty = tty_kref_get(serial->tty);
+	spin_unlock(&serial->serial_lock);
+
 	/* Push data to tty */
 	if (tty) {
 		write_length_remaining = urb->actual_length -
@@ -1774,6 +2068,7 @@
 		serial->curr_rx_urb_offset = 0;
 		serial->rx_urb_filled[hso_urb_to_index(serial, urb)] = 0;
 	}
+	tty_kref_put(tty);
 	return write_length_remaining;
 }
 
@@ -1922,7 +2217,10 @@
 		serial->shared_int->use_count++;
 		mutex_unlock(&serial->shared_int->shared_int_lock);
 	}
-
+	if (serial->tiocmget)
+		tiocmget_submit_urb(serial,
+				    serial->tiocmget,
+				    serial->parent->usb);
 	return result;
 }
 
@@ -1930,6 +2228,7 @@
 {
 	int i;
 	struct hso_serial *serial = dev2ser(hso_dev);
+	struct hso_tiocmget  *tiocmget;
 
 	if (!serial)
 		return -ENODEV;
@@ -1958,6 +2257,11 @@
 		}
 		mutex_unlock(&serial->shared_int->shared_int_lock);
 	}
+	tiocmget = serial->tiocmget;
+	if (tiocmget) {
+		wake_up_interruptible(&tiocmget->waitq);
+		usb_kill_urb(tiocmget->urb);
+	}
 
 	return 0;
 }
@@ -2304,6 +2608,20 @@
 	return NULL;
 }
 
+static void hso_free_tiomget(struct hso_serial *serial)
+{
+	struct hso_tiocmget *tiocmget = serial->tiocmget;
+	if (tiocmget) {
+		kfree(tiocmget);
+		if (tiocmget->urb) {
+			usb_free_urb(tiocmget->urb);
+			tiocmget->urb = NULL;
+		}
+		serial->tiocmget = NULL;
+
+	}
+}
+
 /* Frees an AT channel ( goes for both mux and non-mux ) */
 static void hso_free_serial_device(struct hso_device *hso_dev)
 {
@@ -2322,6 +2640,7 @@
 		else
 			mutex_unlock(&serial->shared_int->shared_int_lock);
 	}
+	hso_free_tiomget(serial);
 	kfree(serial);
 	hso_free_device(hso_dev);
 }
@@ -2333,6 +2652,7 @@
 	struct hso_device *hso_dev;
 	struct hso_serial *serial;
 	int num_urbs;
+	struct hso_tiocmget *tiocmget;
 
 	hso_dev = hso_create_device(interface, port);
 	if (!hso_dev)
@@ -2345,8 +2665,27 @@
 	serial->parent = hso_dev;
 	hso_dev->port_data.dev_serial = serial;
 
-	if (port & HSO_PORT_MODEM)
+	if ((port & HSO_PORT_MASK) == HSO_PORT_MODEM) {
 		num_urbs = 2;
+		serial->tiocmget = kzalloc(sizeof(struct hso_tiocmget),
+					   GFP_KERNEL);
+		/* it isn't going to break our heart if serial->tiocmget
+		 *  allocation fails don't bother checking this.
+		 */
+		if (serial->tiocmget) {
+			tiocmget = serial->tiocmget;
+			tiocmget->urb = usb_alloc_urb(0, GFP_KERNEL);
+			if (tiocmget->urb) {
+				mutex_init(&tiocmget->mutex);
+				init_waitqueue_head(&tiocmget->waitq);
+				tiocmget->endp = hso_get_ep(
+					interface,
+					USB_ENDPOINT_XFER_INT,
+					USB_DIR_IN);
+			} else
+				hso_free_tiomget(serial);
+		}
+	}
 	else
 		num_urbs = 1;
 
@@ -2382,6 +2721,7 @@
 exit2:
 	hso_serial_common_free(serial);
 exit:
+	hso_free_tiomget(serial);
 	kfree(serial);
 	hso_free_device(hso_dev);
 	return NULL;
@@ -2786,15 +3126,20 @@
 static void hso_free_interface(struct usb_interface *interface)
 {
 	struct hso_serial *hso_dev;
+	struct tty_struct *tty;
 	int i;
 
 	for (i = 0; i < HSO_SERIAL_TTY_MINORS; i++) {
 		if (serial_table[i]
 		    && (serial_table[i]->interface == interface)) {
 			hso_dev = dev2ser(serial_table[i]);
-			if (hso_dev->tty)
-				tty_hangup(hso_dev->tty);
+			spin_lock_irq(&hso_dev->serial_lock);
+			tty = tty_kref_get(hso_dev->tty);
+			spin_unlock_irq(&hso_dev->serial_lock);
+			if (tty)
+				tty_hangup(tty);
 			mutex_lock(&hso_dev->parent->mutex);
+			tty_kref_put(tty);
 			hso_dev->parent->usb_gone = 1;
 			mutex_unlock(&hso_dev->parent->mutex);
 			kref_put(&serial_table[i]->ref, hso_serial_ref_free);
@@ -2887,6 +3232,7 @@
 	.close = hso_serial_close,
 	.write = hso_serial_write,
 	.write_room = hso_serial_write_room,
+	.ioctl = hso_serial_ioctl,
 	.set_termios = hso_serial_set_termios,
 	.chars_in_buffer = hso_serial_chars_in_buffer,
 	.tiocmget = hso_serial_tiocmget,
@@ -2939,9 +3285,7 @@
 	tty_drv->subtype = SERIAL_TYPE_NORMAL;
 	tty_drv->flags = TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_DEV;
 	tty_drv->init_termios = tty_std_termios;
-	tty_drv->init_termios.c_cflag = B9600 | CS8 | CREAD | HUPCL | CLOCAL;
-	tty_drv->termios = hso_serial_termios;
-	tty_drv->termios_locked = hso_serial_termios_locked;
+	hso_init_termios(&tty_drv->init_termios);
 	tty_set_operations(tty_drv, &hso_serial_ops);
 
 	/* register the tty driver */
diff --git a/drivers/parisc/iosapic.c b/drivers/parisc/iosapic.c
index 7beffca..9dedbbd 100644
--- a/drivers/parisc/iosapic.c
+++ b/drivers/parisc/iosapic.c
@@ -704,16 +704,17 @@
 }
 
 #ifdef CONFIG_SMP
-static void iosapic_set_affinity_irq(unsigned int irq, cpumask_t dest)
+static void iosapic_set_affinity_irq(unsigned int irq,
+				     const struct cpumask *dest)
 {
 	struct vector_info *vi = iosapic_get_vector(irq);
 	u32 d0, d1, dummy_d0;
 	unsigned long flags;
 
-	if (cpu_check_affinity(irq, &dest))
+	if (cpu_check_affinity(irq, dest))
 		return;
 
-	vi->txn_addr = txn_affinity_addr(irq, first_cpu(dest));
+	vi->txn_addr = txn_affinity_addr(irq, cpumask_first(dest));
 
 	spin_lock_irqsave(&iosapic_lock, flags);
 	/* d1 contains the destination CPU, so only want to set that
diff --git a/drivers/pci/hotplug/cpqphp_core.c b/drivers/pci/hotplug/cpqphp_core.c
index 8514c3a..c2e1bcb 100644
--- a/drivers/pci/hotplug/cpqphp_core.c
+++ b/drivers/pci/hotplug/cpqphp_core.c
@@ -45,7 +45,7 @@
 
 #include "cpqphp.h"
 #include "cpqphp_nvram.h"
-#include "../../../arch/x86/pci/pci.h"	/* horrible hack showing how processor dependent we are... */
+#include <asm/pci_x86.h>
 
 
 /* Global variables */
diff --git a/drivers/pci/hotplug/cpqphp_pci.c b/drivers/pci/hotplug/cpqphp_pci.c
index 0902193..df146be 100644
--- a/drivers/pci/hotplug/cpqphp_pci.c
+++ b/drivers/pci/hotplug/cpqphp_pci.c
@@ -37,7 +37,7 @@
 #include "../pci.h"
 #include "cpqphp.h"
 #include "cpqphp_nvram.h"
-#include "../../../arch/x86/pci/pci.h"	/* horrible hack showing how processor dependent we are... */
+#include <asm/pci_x86.h>
 
 
 u8 cpqhp_nic_irq;
diff --git a/drivers/pci/hotplug/ibmphp_core.c b/drivers/pci/hotplug/ibmphp_core.c
index 633e743..dd18f85 100644
--- a/drivers/pci/hotplug/ibmphp_core.c
+++ b/drivers/pci/hotplug/ibmphp_core.c
@@ -35,7 +35,7 @@
 #include <linux/delay.h>
 #include <linux/wait.h>
 #include "../pci.h"
-#include "../../../arch/x86/pci/pci.h"	/* for struct irq_routing_table */
+#include <asm/pci_x86.h>		/* for struct irq_routing_table */
 #include "ibmphp.h"
 
 #define attn_on(sl)  ibmphp_hpc_writeslot (sl, HPC_SLOT_ATTNON)
diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index 5d72866..c884858 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -74,7 +74,7 @@
 	int len;
 
 	mask = pcibus_to_cpumask(to_pci_dev(dev)->bus);
-	len = cpumask_scnprintf(buf, PAGE_SIZE-2, mask);
+	len = cpumask_scnprintf(buf, PAGE_SIZE-2, &mask);
 	buf[len++] = '\n';
 	buf[len] = '\0';
 	return len;
@@ -88,7 +88,7 @@
 	int len;
 
 	mask = pcibus_to_cpumask(to_pci_dev(dev)->bus);
-	len = cpulist_scnprintf(buf, PAGE_SIZE-2, mask);
+	len = cpulist_scnprintf(buf, PAGE_SIZE-2, &mask);
 	buf[len++] = '\n';
 	buf[len] = '\0';
 	return len;
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 003a9b3..5b3f593 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -55,8 +55,8 @@
 
 	cpumask = pcibus_to_cpumask(to_pci_bus(dev));
 	ret = type?
-		cpulist_scnprintf(buf, PAGE_SIZE-2, cpumask):
-		cpumask_scnprintf(buf, PAGE_SIZE-2, cpumask);
+		cpulist_scnprintf(buf, PAGE_SIZE-2, &cpumask) :
+		cpumask_scnprintf(buf, PAGE_SIZE-2, &cpumask);
 	buf[ret++] = '\n';
 	buf[ret] = '\0';
 	return ret;
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 152d4aa..b732297 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -21,7 +21,7 @@
 	  You also need to say Y here if you have a device which speaks
 	  the SCSI protocol.  Examples of this include the parallel port
 	  version of the IOMEGA ZIP drive, USB storage devices, Fibre
-	  Channel, FireWire storage and the IDE-SCSI emulation driver.
+	  Channel, and FireWire storage.
 
 	  To compile this driver as a module, choose M here and read
 	  <file:Documentation/scsi/scsi.txt>.
@@ -101,9 +101,9 @@
 	---help---
 	  The OnStream SC-x0 SCSI tape drives cannot be driven by the
 	  standard st driver, but instead need this special osst driver and
-	  use the  /dev/osstX char device nodes (major 206).  Via usb-storage
-	  and ide-scsi, you may be able to drive the USB-x0 and DI-x0 drives
-	  as well.  Note that there is also a second generation of OnStream
+	  use the  /dev/osstX char device nodes (major 206).  Via usb-storage,
+	  you may be able to drive the USB-x0 and DI-x0 drives as well.
+	  Note that there is also a second generation of OnStream
 	  tape drives (ADR-x0) that supports the standard SCSI-2 commands for
 	  tapes (QIC-157) and can be driven by the standard driver st.
 	  For more information, you may have a look at the SCSI-HOWTO
diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile
index 1410697..7461eb0 100644
--- a/drivers/scsi/Makefile
+++ b/drivers/scsi/Makefile
@@ -105,7 +105,6 @@
 obj-$(CONFIG_SCSI_INITIO)	+= initio.o
 obj-$(CONFIG_SCSI_INIA100)	+= a100u2w.o
 obj-$(CONFIG_SCSI_QLOGICPTI)	+= qlogicpti.o
-obj-$(CONFIG_BLK_DEV_IDESCSI)	+= ide-scsi.o
 obj-$(CONFIG_SCSI_MESH)		+= mesh.o
 obj-$(CONFIG_SCSI_MAC53C94)	+= mac53c94.o
 obj-$(CONFIG_BLK_DEV_3W_XXXX_RAID) += 3w-xxxx.o
diff --git a/drivers/scsi/ide-scsi.c b/drivers/scsi/ide-scsi.c
deleted file mode 100644
index c24140a..0000000
--- a/drivers/scsi/ide-scsi.c
+++ /dev/null
@@ -1,840 +0,0 @@
-/*
- * Copyright (C) 1996-1999  Gadi Oxman <gadio@netvision.net.il>
- * Copyright (C) 2004-2005  Bartlomiej Zolnierkiewicz
- */
-
-/*
- * Emulation of a SCSI host adapter for IDE ATAPI devices.
- *
- * With this driver, one can use the Linux SCSI drivers instead of the
- * native IDE ATAPI drivers.
- *
- * Ver 0.1   Dec  3 96   Initial version.
- * Ver 0.2   Jan 26 97   Fixed bug in cleanup_module() and added emulation
- *                        of MODE_SENSE_6/MODE_SELECT_6 for cdroms. Thanks
- *                        to Janos Farkas for pointing this out.
- *                       Avoid using bitfields in structures for m68k.
- *                       Added Scatter/Gather and DMA support.
- * Ver 0.4   Dec  7 97   Add support for ATAPI PD/CD drives.
- *                       Use variable timeout for each command.
- * Ver 0.5   Jan  2 98   Fix previous PD/CD support.
- *                       Allow disabling of SCSI-6 to SCSI-10 transformation.
- * Ver 0.6   Jan 27 98   Allow disabling of SCSI command translation layer
- *                        for access through /dev/sg.
- *                       Fix MODE_SENSE_6/MODE_SELECT_6/INQUIRY translation.
- * Ver 0.7   Dec 04 98   Ignore commands where lun != 0 to avoid multiple
- *                        detection of devices with CONFIG_SCSI_MULTI_LUN
- * Ver 0.8   Feb 05 99   Optical media need translation too. Reverse 0.7.
- * Ver 0.9   Jul 04 99   Fix a bug in SG_SET_TRANSFORM.
- * Ver 0.91  Jun 10 02   Fix "off by one" error in transforms
- * Ver 0.92  Dec 31 02   Implement new SCSI mid level API
- */
-
-#define IDESCSI_VERSION "0.92"
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/ioport.h>
-#include <linux/blkdev.h>
-#include <linux/errno.h>
-#include <linux/slab.h>
-#include <linux/ide.h>
-#include <linux/scatterlist.h>
-#include <linux/delay.h>
-#include <linux/mutex.h>
-#include <linux/bitops.h>
-
-#include <asm/io.h>
-#include <asm/uaccess.h>
-
-#include <scsi/scsi.h>
-#include <scsi/scsi_cmnd.h>
-#include <scsi/scsi_device.h>
-#include <scsi/scsi_host.h>
-#include <scsi/scsi_tcq.h>
-#include <scsi/sg.h>
-
-#define IDESCSI_DEBUG_LOG		0
-
-#if IDESCSI_DEBUG_LOG
-#define debug_log(fmt, args...) \
-	printk(KERN_INFO "ide-scsi: " fmt, ## args)
-#else
-#define debug_log(fmt, args...) do {} while (0)
-#endif
-
-/*
- *	SCSI command transformation layer
- */
-#define IDESCSI_SG_TRANSFORM		1	/* /dev/sg transformation */
-
-/*
- *	Log flags
- */
-#define IDESCSI_LOG_CMD			0	/* Log SCSI commands */
-
-typedef struct ide_scsi_obj {
-	ide_drive_t		*drive;
-	ide_driver_t		*driver;
-	struct gendisk		*disk;
-	struct Scsi_Host	*host;
-
-	unsigned long transform;		/* SCSI cmd translation layer */
-	unsigned long log;			/* log flags */
-} idescsi_scsi_t;
-
-static DEFINE_MUTEX(idescsi_ref_mutex);
-/* Set by module param to skip cd */
-static int idescsi_nocd;
-
-#define ide_scsi_g(disk) \
-	container_of((disk)->private_data, struct ide_scsi_obj, driver)
-
-static struct ide_scsi_obj *ide_scsi_get(struct gendisk *disk)
-{
-	struct ide_scsi_obj *scsi = NULL;
-
-	mutex_lock(&idescsi_ref_mutex);
-	scsi = ide_scsi_g(disk);
-	if (scsi) {
-		if (ide_device_get(scsi->drive))
-			scsi = NULL;
-		else
-			scsi_host_get(scsi->host);
-	}
-	mutex_unlock(&idescsi_ref_mutex);
-	return scsi;
-}
-
-static void ide_scsi_put(struct ide_scsi_obj *scsi)
-{
-	ide_drive_t *drive = scsi->drive;
-
-	mutex_lock(&idescsi_ref_mutex);
-	scsi_host_put(scsi->host);
-	ide_device_put(drive);
-	mutex_unlock(&idescsi_ref_mutex);
-}
-
-static inline idescsi_scsi_t *scsihost_to_idescsi(struct Scsi_Host *host)
-{
-	return (idescsi_scsi_t*) (&host[1]);
-}
-
-static inline idescsi_scsi_t *drive_to_idescsi(ide_drive_t *ide_drive)
-{
-	return scsihost_to_idescsi(ide_drive->driver_data);
-}
-
-static void ide_scsi_hex_dump(u8 *data, int len)
-{
-	print_hex_dump(KERN_CONT, "", DUMP_PREFIX_NONE, 16, 1, data, len, 0);
-}
-
-static int idescsi_end_request(ide_drive_t *, int, int);
-
-static void ide_scsi_callback(ide_drive_t *drive, int dsc)
-{
-	idescsi_scsi_t *scsi = drive_to_idescsi(drive);
-	struct ide_atapi_pc *pc = drive->pc;
-
-	if (pc->flags & PC_FLAG_TIMEDOUT)
-		debug_log("%s: got timed out packet %lu at %lu\n", __func__,
-			  pc->scsi_cmd->serial_number, jiffies);
-		/* end this request now - scsi should retry it*/
-	else if (test_bit(IDESCSI_LOG_CMD, &scsi->log))
-		printk(KERN_INFO "Packet command completed, %d bytes"
-				 " transferred\n", pc->xferred);
-
-	idescsi_end_request(drive, 1, 0);
-}
-
-static int idescsi_check_condition(ide_drive_t *drive,
-		struct request *failed_cmd)
-{
-	idescsi_scsi_t *scsi = drive_to_idescsi(drive);
-	struct ide_atapi_pc   *pc;
-	struct request *rq;
-	u8             *buf;
-
-	/* stuff a sense request in front of our current request */
-	pc = kzalloc(sizeof(struct ide_atapi_pc), GFP_ATOMIC);
-	rq = blk_get_request(drive->queue, READ, GFP_ATOMIC);
-	buf = kzalloc(SCSI_SENSE_BUFFERSIZE, GFP_ATOMIC);
-	if (!pc || !rq || !buf) {
-		kfree(buf);
-		if (rq)
-			blk_put_request(rq);
-		kfree(pc);
-		return -ENOMEM;
-	}
-	rq->special = (char *) pc;
-	pc->rq = rq;
-	pc->buf = buf;
-	pc->c[0] = REQUEST_SENSE;
-	pc->c[4] = pc->req_xfer = pc->buf_size = SCSI_SENSE_BUFFERSIZE;
-	rq->cmd_type = REQ_TYPE_SENSE;
-	rq->cmd_flags |= REQ_PREEMPT;
-	pc->timeout = jiffies + WAIT_READY;
-	/* NOTE! Save the failed packet command in "rq->buffer" */
-	rq->buffer = (void *) failed_cmd->special;
-	pc->scsi_cmd = ((struct ide_atapi_pc *) failed_cmd->special)->scsi_cmd;
-	if (test_bit(IDESCSI_LOG_CMD, &scsi->log)) {
-		printk ("ide-scsi: %s: queue cmd = ", drive->name);
-		ide_scsi_hex_dump(pc->c, 6);
-	}
-	rq->rq_disk = scsi->disk;
-	rq->ref_count++;
-	memcpy(rq->cmd, pc->c, 12);
-	ide_do_drive_cmd(drive, rq);
-	return 0;
-}
-
-static ide_startstop_t
-idescsi_atapi_error(ide_drive_t *drive, struct request *rq, u8 stat, u8 err)
-{
-	ide_hwif_t *hwif = drive->hwif;
-
-	if (hwif->tp_ops->read_status(hwif) & (ATA_BUSY | ATA_DRQ))
-		/* force an abort */
-		hwif->tp_ops->exec_command(hwif, ATA_CMD_IDLEIMMEDIATE);
-
-	rq->errors++;
-
-	idescsi_end_request(drive, 0, 0);
-
-	return ide_stopped;
-}
-
-static int idescsi_end_request (ide_drive_t *drive, int uptodate, int nrsecs)
-{
-	idescsi_scsi_t *scsi = drive_to_idescsi(drive);
-	struct request *rq = HWGROUP(drive)->rq;
-	struct ide_atapi_pc *pc = (struct ide_atapi_pc *) rq->special;
-	int log = test_bit(IDESCSI_LOG_CMD, &scsi->log);
-	struct Scsi_Host *host;
-	int errors = rq->errors;
-	unsigned long flags;
-
-	if (!blk_special_request(rq) && !blk_sense_request(rq)) {
-		ide_end_request(drive, uptodate, nrsecs);
-		return 0;
-	}
-	ide_end_drive_cmd (drive, 0, 0);
-	if (blk_sense_request(rq)) {
-		struct ide_atapi_pc *opc = (struct ide_atapi_pc *) rq->buffer;
-		if (log) {
-			printk ("ide-scsi: %s: wrap up check %lu, rst = ", drive->name, opc->scsi_cmd->serial_number);
-			ide_scsi_hex_dump(pc->buf, 16);
-		}
-		memcpy((void *) opc->scsi_cmd->sense_buffer, pc->buf,
-			SCSI_SENSE_BUFFERSIZE);
-		kfree(pc->buf);
-		kfree(pc);
-		blk_put_request(rq);
-		pc = opc;
-		rq = pc->rq;
-		pc->scsi_cmd->result = (CHECK_CONDITION << 1) |
-				(((pc->flags & PC_FLAG_TIMEDOUT) ?
-				  DID_TIME_OUT :
-				  DID_OK) << 16);
-	} else if (pc->flags & PC_FLAG_TIMEDOUT) {
-		if (log)
-			printk (KERN_WARNING "ide-scsi: %s: timed out for %lu\n",
-					drive->name, pc->scsi_cmd->serial_number);
-		pc->scsi_cmd->result = DID_TIME_OUT << 16;
-	} else if (errors >= ERROR_MAX) {
-		pc->scsi_cmd->result = DID_ERROR << 16;
-		if (log)
-			printk ("ide-scsi: %s: I/O error for %lu\n", drive->name, pc->scsi_cmd->serial_number);
-	} else if (errors) {
-		if (log)
-			printk ("ide-scsi: %s: check condition for %lu\n", drive->name, pc->scsi_cmd->serial_number);
-		if (!idescsi_check_condition(drive, rq))
-			/* we started a request sense, so we'll be back, exit for now */
-			return 0;
-		pc->scsi_cmd->result = (CHECK_CONDITION << 1) | (DID_OK << 16);
-	} else {
-		pc->scsi_cmd->result = DID_OK << 16;
-	}
-	host = pc->scsi_cmd->device->host;
-	spin_lock_irqsave(host->host_lock, flags);
-	pc->done(pc->scsi_cmd);
-	spin_unlock_irqrestore(host->host_lock, flags);
-	kfree(pc);
-	blk_put_request(rq);
-	drive->pc = NULL;
-	return 0;
-}
-
-static inline int idescsi_set_direction(struct ide_atapi_pc *pc)
-{
-	switch (pc->c[0]) {
-		case READ_6: case READ_10: case READ_12:
-			pc->flags &= ~PC_FLAG_WRITING;
-			return 0;
-		case WRITE_6: case WRITE_10: case WRITE_12:
-			pc->flags |= PC_FLAG_WRITING;
-			return 0;
-		default:
-			return 1;
-	}
-}
-
-static int idescsi_map_sg(ide_drive_t *drive, struct ide_atapi_pc *pc)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	struct scatterlist *sg, *scsi_sg;
-	int segments;
-
-	if (!pc->req_xfer || pc->req_xfer % 1024)
-		return 1;
-
-	if (idescsi_set_direction(pc))
-		return 1;
-
-	sg = hwif->sg_table;
-	scsi_sg = scsi_sglist(pc->scsi_cmd);
-	segments = scsi_sg_count(pc->scsi_cmd);
-
-	if (segments > hwif->sg_max_nents)
-		return 1;
-
-	hwif->sg_nents = segments;
-	memcpy(sg, scsi_sg, sizeof(*sg) * segments);
-
-	return 0;
-}
-
-static ide_startstop_t idescsi_issue_pc(ide_drive_t *drive,
-		struct ide_atapi_pc *pc)
-{
-	/* Set the current packet command */
-	drive->pc = pc;
-
-	return ide_issue_pc(drive, ide_scsi_get_timeout(pc), ide_scsi_expiry);
-}
-
-/*
- *	idescsi_do_request is our request handling function.
- */
-static ide_startstop_t idescsi_do_request (ide_drive_t *drive, struct request *rq, sector_t block)
-{
-	debug_log("dev: %s, cmd: %x, errors: %d\n", rq->rq_disk->disk_name,
-		  rq->cmd[0], rq->errors);
-	debug_log("sector: %ld, nr_sectors: %ld, current_nr_sectors: %d\n",
-		  rq->sector, rq->nr_sectors, rq->current_nr_sectors);
-
-	if (blk_sense_request(rq) || blk_special_request(rq)) {
-		struct ide_atapi_pc *pc = (struct ide_atapi_pc *)rq->special;
-
-		if ((drive->dev_flags & IDE_DFLAG_USING_DMA) &&
-		    idescsi_map_sg(drive, pc) == 0)
-			pc->flags |= PC_FLAG_DMA_OK;
-
-		return idescsi_issue_pc(drive, pc);
-	}
-	blk_dump_rq_flags(rq, "ide-scsi: unsup command");
-	idescsi_end_request (drive, 0, 0);
-	return ide_stopped;
-}
-
-#ifdef CONFIG_IDE_PROC_FS
-static ide_proc_entry_t idescsi_proc[] = {
-	{ "capacity", S_IFREG|S_IRUGO, proc_ide_read_capacity, NULL },
-	{ NULL, 0, NULL, NULL }
-};
-
-#define ide_scsi_devset_get(name, field) \
-static int get_##name(ide_drive_t *drive) \
-{ \
-	idescsi_scsi_t *scsi = drive_to_idescsi(drive); \
-	return scsi->field; \
-}
-
-#define ide_scsi_devset_set(name, field) \
-static int set_##name(ide_drive_t *drive, int arg) \
-{ \
-	idescsi_scsi_t *scsi = drive_to_idescsi(drive); \
-	scsi->field = arg; \
-	return 0; \
-}
-
-#define ide_scsi_devset_rw_field(_name, _field) \
-ide_scsi_devset_get(_name, _field); \
-ide_scsi_devset_set(_name, _field); \
-IDE_DEVSET(_name, DS_SYNC, get_##_name, set_##_name);
-
-ide_devset_rw_field(bios_cyl, bios_cyl);
-ide_devset_rw_field(bios_head, bios_head);
-ide_devset_rw_field(bios_sect, bios_sect);
-
-ide_scsi_devset_rw_field(transform, transform);
-ide_scsi_devset_rw_field(log, log);
-
-static const struct ide_proc_devset idescsi_settings[] = {
-	IDE_PROC_DEVSET(bios_cyl,  0, 1023),
-	IDE_PROC_DEVSET(bios_head, 0,  255),
-	IDE_PROC_DEVSET(bios_sect, 0,	63),
-	IDE_PROC_DEVSET(log,	   0,	 1),
-	IDE_PROC_DEVSET(transform, 0,	 3),
-	{ 0 },
-};
-
-static ide_proc_entry_t *ide_scsi_proc_entries(ide_drive_t *drive)
-{
-	return idescsi_proc;
-}
-
-static const struct ide_proc_devset *ide_scsi_proc_devsets(ide_drive_t *drive)
-{
-	return idescsi_settings;
-}
-#endif
-
-/*
- *	Driver initialization.
- */
-static void idescsi_setup (ide_drive_t *drive, idescsi_scsi_t *scsi)
-{
-	clear_bit(IDESCSI_SG_TRANSFORM, &scsi->transform);
-#if IDESCSI_DEBUG_LOG
-	set_bit(IDESCSI_LOG_CMD, &scsi->log);
-#endif /* IDESCSI_DEBUG_LOG */
-
-	drive->pc_callback	 = ide_scsi_callback;
-	drive->pc_update_buffers = NULL;
-	drive->pc_io_buffers	 = ide_io_buffers;
-
-	ide_proc_register_driver(drive, scsi->driver);
-}
-
-static void ide_scsi_remove(ide_drive_t *drive)
-{
-	struct Scsi_Host *scsihost = drive->driver_data;
-	struct ide_scsi_obj *scsi = scsihost_to_idescsi(scsihost);
-	struct gendisk *g = scsi->disk;
-
-	scsi_remove_host(scsihost);
-	ide_proc_unregister_driver(drive, scsi->driver);
-
-	ide_unregister_region(g);
-
-	drive->driver_data = NULL;
-	g->private_data = NULL;
-	put_disk(g);
-
-	ide_scsi_put(scsi);
-
-	drive->dev_flags &= ~IDE_DFLAG_SCSI;
-}
-
-static int ide_scsi_probe(ide_drive_t *);
-
-static ide_driver_t idescsi_driver = {
-	.gen_driver = {
-		.owner		= THIS_MODULE,
-		.name		= "ide-scsi",
-		.bus		= &ide_bus_type,
-	},
-	.probe			= ide_scsi_probe,
-	.remove			= ide_scsi_remove,
-	.version		= IDESCSI_VERSION,
-	.do_request		= idescsi_do_request,
-	.end_request		= idescsi_end_request,
-	.error                  = idescsi_atapi_error,
-#ifdef CONFIG_IDE_PROC_FS
-	.proc_entries		= ide_scsi_proc_entries,
-	.proc_devsets		= ide_scsi_proc_devsets,
-#endif
-};
-
-static int idescsi_ide_open(struct block_device *bdev, fmode_t mode)
-{
-	struct ide_scsi_obj *scsi = ide_scsi_get(bdev->bd_disk);
-
-	if (!scsi)
-		return -ENXIO;
-
-	return 0;
-}
-
-static int idescsi_ide_release(struct gendisk *disk, fmode_t mode)
-{
-	ide_scsi_put(ide_scsi_g(disk));
-	return 0;
-}
-
-static int idescsi_ide_ioctl(struct block_device *bdev, fmode_t mode,
-			unsigned int cmd, unsigned long arg)
-{
-	struct ide_scsi_obj *scsi = ide_scsi_g(bdev->bd_disk);
-	return generic_ide_ioctl(scsi->drive, bdev, cmd, arg);
-}
-
-static struct block_device_operations idescsi_ops = {
-	.owner		= THIS_MODULE,
-	.open		= idescsi_ide_open,
-	.release	= idescsi_ide_release,
-	.locked_ioctl	= idescsi_ide_ioctl,
-};
-
-static int idescsi_slave_configure(struct scsi_device * sdp)
-{
-	/* Configure detected device */
-	sdp->use_10_for_rw = 1;
-	sdp->use_10_for_ms = 1;
-	scsi_adjust_queue_depth(sdp, MSG_SIMPLE_TAG, sdp->host->cmd_per_lun);
-	return 0;
-}
-
-static const char *idescsi_info (struct Scsi_Host *host)
-{
-	return "SCSI host adapter emulation for IDE ATAPI devices";
-}
-
-static int idescsi_ioctl (struct scsi_device *dev, int cmd, void __user *arg)
-{
-	idescsi_scsi_t *scsi = scsihost_to_idescsi(dev->host);
-
-	if (cmd == SG_SET_TRANSFORM) {
-		if (arg)
-			set_bit(IDESCSI_SG_TRANSFORM, &scsi->transform);
-		else
-			clear_bit(IDESCSI_SG_TRANSFORM, &scsi->transform);
-		return 0;
-	} else if (cmd == SG_GET_TRANSFORM)
-		return put_user(test_bit(IDESCSI_SG_TRANSFORM, &scsi->transform), (int __user *) arg);
-	return -EINVAL;
-}
-
-static int idescsi_queue (struct scsi_cmnd *cmd,
-		void (*done)(struct scsi_cmnd *))
-{
-	struct Scsi_Host *host = cmd->device->host;
-	idescsi_scsi_t *scsi = scsihost_to_idescsi(host);
-	ide_drive_t *drive = scsi->drive;
-	struct request *rq = NULL;
-	struct ide_atapi_pc *pc = NULL;
-	int write = cmd->sc_data_direction == DMA_TO_DEVICE;
-
-	if (!drive) {
-		scmd_printk (KERN_ERR, cmd, "drive not present\n");
-		goto abort;
-	}
-	scsi = drive_to_idescsi(drive);
-	pc = kmalloc(sizeof(struct ide_atapi_pc), GFP_ATOMIC);
-	rq = blk_get_request(drive->queue, write, GFP_ATOMIC);
-	if (rq == NULL || pc == NULL) {
-		printk (KERN_ERR "ide-scsi: %s: out of memory\n", drive->name);
-		goto abort;
-	}
-
-	memset (pc->c, 0, 12);
-	pc->flags = 0;
-	if (cmd->sc_data_direction == DMA_TO_DEVICE)
-		pc->flags |= PC_FLAG_WRITING;
-	pc->rq = rq;
-	memcpy (pc->c, cmd->cmnd, cmd->cmd_len);
-	pc->buf = NULL;
-	pc->sg = scsi_sglist(cmd);
-	pc->sg_cnt = scsi_sg_count(cmd);
-	pc->b_count = 0;
-	pc->req_xfer = pc->buf_size = scsi_bufflen(cmd);
-	pc->scsi_cmd = cmd;
-	pc->done = done;
-	pc->timeout = jiffies + cmd->request->timeout;
-
-	if (test_bit(IDESCSI_LOG_CMD, &scsi->log)) {
-		printk ("ide-scsi: %s: que %lu, cmd = ", drive->name, cmd->serial_number);
-		ide_scsi_hex_dump(cmd->cmnd, cmd->cmd_len);
-		if (memcmp(pc->c, cmd->cmnd, cmd->cmd_len)) {
-			printk ("ide-scsi: %s: que %lu, tsl = ", drive->name, cmd->serial_number);
-			ide_scsi_hex_dump(pc->c, 12);
-		}
-	}
-
-	rq->special = (char *) pc;
-	rq->cmd_type = REQ_TYPE_SPECIAL;
-	spin_unlock_irq(host->host_lock);
-	rq->ref_count++;
-	memcpy(rq->cmd, pc->c, 12);
-	blk_execute_rq_nowait(drive->queue, scsi->disk, rq, 0, NULL);
-	spin_lock_irq(host->host_lock);
-	return 0;
-abort:
-	kfree (pc);
-	if (rq)
-		blk_put_request(rq);
-	cmd->result = DID_ERROR << 16;
-	done(cmd);
-	return 0;
-}
-
-static int idescsi_eh_abort (struct scsi_cmnd *cmd)
-{
-	idescsi_scsi_t *scsi  = scsihost_to_idescsi(cmd->device->host);
-	ide_drive_t    *drive = scsi->drive;
-	ide_hwif_t     *hwif;
-	ide_hwgroup_t  *hwgroup;
-	int		busy;
-	int             ret   = FAILED;
-
-	struct ide_atapi_pc *pc;
-
-	/* In idescsi_eh_abort we try to gently pry our command from the ide subsystem */
-
-	if (test_bit(IDESCSI_LOG_CMD, &scsi->log))
-		printk (KERN_WARNING "ide-scsi: abort called for %lu\n", cmd->serial_number);
-
-	if (!drive) {
-		printk (KERN_WARNING "ide-scsi: Drive not set in idescsi_eh_abort\n");
-		WARN_ON(1);
-		goto no_drive;
-	}
-
-	hwif = drive->hwif;
-	hwgroup = hwif->hwgroup;
-
-	/* First give it some more time, how much is "right" is hard to say :-(
-	   FIXME - uses mdelay which causes latency? */
-	busy = ide_wait_not_busy(hwif, 100);
-	if (test_bit(IDESCSI_LOG_CMD, &scsi->log))
-		printk (KERN_WARNING "ide-scsi: drive did%s become ready\n", busy?" not":"");
-
-	spin_lock_irq(&hwgroup->lock);
-
-	/* If there is no pc running we're done (our interrupt took care of it) */
-	pc = drive->pc;
-	if (pc == NULL) {
-		ret = SUCCESS;
-		goto ide_unlock;
-	}
-
-	/* It's somewhere in flight. Does ide subsystem agree? */
-	if (pc->scsi_cmd->serial_number == cmd->serial_number && !busy &&
-	    elv_queue_empty(drive->queue) && HWGROUP(drive)->rq != pc->rq) {
-		/*
-		 * FIXME - not sure this condition can ever occur
-		 */
-		printk (KERN_ERR "ide-scsi: cmd aborted!\n");
-
-		if (blk_sense_request(pc->rq))
-			kfree(pc->buf);
-		/* we need to call blk_put_request twice. */
-		blk_put_request(pc->rq);
-		blk_put_request(pc->rq);
-		kfree(pc);
-		drive->pc = NULL;
-
-		ret = SUCCESS;
-	}
-
-ide_unlock:
-	spin_unlock_irq(&hwgroup->lock);
-no_drive:
-	if (test_bit(IDESCSI_LOG_CMD, &scsi->log))
-		printk (KERN_WARNING "ide-scsi: abort returns %s\n", ret == SUCCESS?"success":"failed");
-
-	return ret;
-}
-
-static int idescsi_eh_reset (struct scsi_cmnd *cmd)
-{
-	struct request *req;
-	idescsi_scsi_t *scsi  = scsihost_to_idescsi(cmd->device->host);
-	ide_drive_t    *drive = scsi->drive;
-	ide_hwgroup_t  *hwgroup;
-	int             ready = 0;
-	int             ret   = SUCCESS;
-
-	struct ide_atapi_pc *pc;
-
-	/* In idescsi_eh_reset we forcefully remove the command from the ide subsystem and reset the device. */
-
-	if (test_bit(IDESCSI_LOG_CMD, &scsi->log))
-		printk (KERN_WARNING "ide-scsi: reset called for %lu\n", cmd->serial_number);
-
-	if (!drive) {
-		printk (KERN_WARNING "ide-scsi: Drive not set in idescsi_eh_reset\n");
-		WARN_ON(1);
-		return FAILED;
-	}
-
-	hwgroup = drive->hwif->hwgroup;
-
-	spin_lock_irq(cmd->device->host->host_lock);
-	spin_lock(&hwgroup->lock);
-
-	pc = drive->pc;
-	if (pc)
-		req = pc->rq;
-
-	if (pc == NULL || req != hwgroup->rq || hwgroup->handler == NULL) {
-		printk (KERN_WARNING "ide-scsi: No active request in idescsi_eh_reset\n");
-		spin_unlock(&hwgroup->lock);
-		spin_unlock_irq(cmd->device->host->host_lock);
-		return FAILED;
-	}
-
-	/* kill current request */
-	if (__blk_end_request(req, -EIO, 0))
-		BUG();
-	if (blk_sense_request(req))
-		kfree(pc->buf);
-	kfree(pc);
-	drive->pc = NULL;
-	blk_put_request(req);
-
-	/* now nuke the drive queue */
-	while ((req = elv_next_request(drive->queue))) {
-		if (__blk_end_request(req, -EIO, 0))
-			BUG();
-	}
-
-	hwgroup->rq = NULL;
-	hwgroup->handler = NULL;
-	hwgroup->busy = 1; /* will set this to zero when ide reset finished */
-	spin_unlock(&hwgroup->lock);
-
-	ide_do_reset(drive);
-
-	/* ide_do_reset starts a polling handler which restarts itself every 50ms until the reset finishes */
-
-	do {
-		spin_unlock_irq(cmd->device->host->host_lock);
-		msleep(50);
-		spin_lock_irq(cmd->device->host->host_lock);
-	} while ( HWGROUP(drive)->handler );
-
-	ready = drive_is_ready(drive);
-	HWGROUP(drive)->busy--;
-	if (!ready) {
-		printk (KERN_ERR "ide-scsi: reset failed!\n");
-		ret = FAILED;
-	}
-
-	spin_unlock_irq(cmd->device->host->host_lock);
-	return ret;
-}
-
-static int idescsi_bios(struct scsi_device *sdev, struct block_device *bdev,
-		sector_t capacity, int *parm)
-{
-	idescsi_scsi_t *idescsi = scsihost_to_idescsi(sdev->host);
-	ide_drive_t *drive = idescsi->drive;
-
-	if (drive->bios_cyl && drive->bios_head && drive->bios_sect) {
-		parm[0] = drive->bios_head;
-		parm[1] = drive->bios_sect;
-		parm[2] = drive->bios_cyl;
-	}
-	return 0;
-}
-
-static struct scsi_host_template idescsi_template = {
-	.module			= THIS_MODULE,
-	.name			= "idescsi",
-	.info			= idescsi_info,
-	.slave_configure        = idescsi_slave_configure,
-	.ioctl			= idescsi_ioctl,
-	.queuecommand		= idescsi_queue,
-	.eh_abort_handler	= idescsi_eh_abort,
-	.eh_host_reset_handler  = idescsi_eh_reset,
-	.bios_param		= idescsi_bios,
-	.can_queue		= 40,
-	.this_id		= -1,
-	.sg_tablesize		= 256,
-	.cmd_per_lun		= 5,
-	.max_sectors		= 128,
-	.use_clustering		= DISABLE_CLUSTERING,
-	.emulated		= 1,
-	.proc_name		= "ide-scsi",
-};
-
-static int ide_scsi_probe(ide_drive_t *drive)
-{
-	idescsi_scsi_t *idescsi;
-	struct Scsi_Host *host;
-	struct gendisk *g;
-	static int warned;
-	int err = -ENOMEM;
-	u16 last_lun;
-
-	if (!warned && drive->media == ide_cdrom) {
-		printk(KERN_WARNING "ide-scsi is deprecated for cd burning! Use ide-cd and give dev=/dev/hdX as device\n");
-		warned = 1;
-	}
-
-	if (idescsi_nocd && drive->media == ide_cdrom)
-		return -ENODEV;
-
-	if (!strstr("ide-scsi", drive->driver_req) ||
-	    drive->media == ide_disk ||
-	    !(host = scsi_host_alloc(&idescsi_template,sizeof(idescsi_scsi_t))))
-		return -ENODEV;
-
-	drive->dev_flags |= IDE_DFLAG_SCSI;
-
-	g = alloc_disk(1 << PARTN_BITS);
-	if (!g)
-		goto out_host_put;
-
-	ide_init_disk(g, drive);
-
-	host->max_id = 1;
-
-	last_lun = drive->id[ATA_ID_LAST_LUN];
-	if (last_lun)
-		debug_log("%s: last_lun=%u\n", drive->name, last_lun);
-
-	if ((last_lun & 7) != 7)
-		host->max_lun = (last_lun & 7) + 1;
-	else
-		host->max_lun = 1;
-
-	drive->driver_data = host;
-	idescsi = scsihost_to_idescsi(host);
-	idescsi->drive = drive;
-	idescsi->driver = &idescsi_driver;
-	idescsi->host = host;
-	idescsi->disk = g;
-	g->private_data = &idescsi->driver;
-	err = 0;
-	idescsi_setup(drive, idescsi);
-	g->fops = &idescsi_ops;
-	ide_register_region(g);
-	err = scsi_add_host(host, &drive->gendev);
-	if (!err) {
-		scsi_scan_host(host);
-		return 0;
-	}
-	/* fall through on error */
-	ide_unregister_region(g);
-	ide_proc_unregister_driver(drive, &idescsi_driver);
-
-	put_disk(g);
-out_host_put:
-	drive->dev_flags &= ~IDE_DFLAG_SCSI;
-	scsi_host_put(host);
-	return err;
-}
-
-static int __init init_idescsi_module(void)
-{
-	return driver_register(&idescsi_driver.gen_driver);
-}
-
-static void __exit exit_idescsi_module(void)
-{
-	driver_unregister(&idescsi_driver.gen_driver);
-}
-
-module_param(idescsi_nocd, int, 0600);
-MODULE_PARM_DESC(idescsi_nocd, "Disable handling of CD-ROMs so they may be driven by ide-cd");
-module_init(init_idescsi_module);
-module_exit(exit_idescsi_module);
-MODULE_LICENSE("GPL");
diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c
index 303272a..daa0056 100644
--- a/drivers/serial/8250.c
+++ b/drivers/serial/8250.c
@@ -279,6 +279,13 @@
 		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10,
 		.flags		= UART_CAP_FIFO,
 	},
+	[PORT_OCTEON] = {
+		.name		= "OCTEON",
+		.fifo_size	= 64,
+		.tx_loadsz	= 64,
+		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10,
+		.flags		= UART_CAP_FIFO,
+	},
 };
 
 #if defined (CONFIG_SERIAL_8250_AU1X00)
@@ -303,16 +310,16 @@
 };
 
 /* sane hardware needs no mapping */
-static inline int map_8250_in_reg(struct uart_8250_port *up, int offset)
+static inline int map_8250_in_reg(struct uart_port *p, int offset)
 {
-	if (up->port.iotype != UPIO_AU)
+	if (p->iotype != UPIO_AU)
 		return offset;
 	return au_io_in_map[offset];
 }
 
-static inline int map_8250_out_reg(struct uart_8250_port *up, int offset)
+static inline int map_8250_out_reg(struct uart_port *p, int offset)
 {
-	if (up->port.iotype != UPIO_AU)
+	if (p->iotype != UPIO_AU)
 		return offset;
 	return au_io_out_map[offset];
 }
@@ -341,16 +348,16 @@
 		[UART_SCR]	= 0x2c
 	};
 
-static inline int map_8250_in_reg(struct uart_8250_port *up, int offset)
+static inline int map_8250_in_reg(struct uart_port *p, int offset)
 {
-	if (up->port.iotype != UPIO_RM9000)
+	if (p->iotype != UPIO_RM9000)
 		return offset;
 	return regmap_in[offset];
 }
 
-static inline int map_8250_out_reg(struct uart_8250_port *up, int offset)
+static inline int map_8250_out_reg(struct uart_port *p, int offset)
 {
-	if (up->port.iotype != UPIO_RM9000)
+	if (p->iotype != UPIO_RM9000)
 		return offset;
 	return regmap_out[offset];
 }
@@ -363,108 +370,170 @@
 
 #endif
 
-static unsigned int serial_in(struct uart_8250_port *up, int offset)
+static unsigned int hub6_serial_in(struct uart_port *p, int offset)
 {
-	unsigned int tmp;
-	offset = map_8250_in_reg(up, offset) << up->port.regshift;
-
-	switch (up->port.iotype) {
-	case UPIO_HUB6:
-		outb(up->port.hub6 - 1 + offset, up->port.iobase);
-		return inb(up->port.iobase + 1);
-
-	case UPIO_MEM:
-	case UPIO_DWAPB:
-		return readb(up->port.membase + offset);
-
-	case UPIO_RM9000:
-	case UPIO_MEM32:
-		return readl(up->port.membase + offset);
-
-#ifdef CONFIG_SERIAL_8250_AU1X00
-	case UPIO_AU:
-		return __raw_readl(up->port.membase + offset);
-#endif
-
-	case UPIO_TSI:
-		if (offset == UART_IIR) {
-			tmp = readl(up->port.membase + (UART_IIR & ~3));
-			return (tmp >> 16) & 0xff; /* UART_IIR % 4 == 2 */
-		} else
-			return readb(up->port.membase + offset);
-
-	default:
-		return inb(up->port.iobase + offset);
-	}
+	offset = map_8250_in_reg(p, offset) << p->regshift;
+	outb(p->hub6 - 1 + offset, p->iobase);
+	return inb(p->iobase + 1);
 }
 
-static void
-serial_out(struct uart_8250_port *up, int offset, int value)
+static void hub6_serial_out(struct uart_port *p, int offset, int value)
 {
-	/* Save the offset before it's remapped */
-	int save_offset = offset;
-	offset = map_8250_out_reg(up, offset) << up->port.regshift;
+	offset = map_8250_out_reg(p, offset) << p->regshift;
+	outb(p->hub6 - 1 + offset, p->iobase);
+	outb(value, p->iobase + 1);
+}
 
-	switch (up->port.iotype) {
+static unsigned int mem_serial_in(struct uart_port *p, int offset)
+{
+	offset = map_8250_in_reg(p, offset) << p->regshift;
+	return readb(p->membase + offset);
+}
+
+static void mem_serial_out(struct uart_port *p, int offset, int value)
+{
+	offset = map_8250_out_reg(p, offset) << p->regshift;
+	writeb(value, p->membase + offset);
+}
+
+static void mem32_serial_out(struct uart_port *p, int offset, int value)
+{
+	offset = map_8250_out_reg(p, offset) << p->regshift;
+	writel(value, p->membase + offset);
+}
+
+static unsigned int mem32_serial_in(struct uart_port *p, int offset)
+{
+	offset = map_8250_in_reg(p, offset) << p->regshift;
+	return readl(p->membase + offset);
+}
+
+#ifdef CONFIG_SERIAL_8250_AU1X00
+static unsigned int au_serial_in(struct uart_port *p, int offset)
+{
+	offset = map_8250_in_reg(p, offset) << p->regshift;
+	return __raw_readl(p->membase + offset);
+}
+
+static void au_serial_out(struct uart_port *p, int offset, int value)
+{
+	offset = map_8250_out_reg(p, offset) << p->regshift;
+	__raw_writel(value, p->membase + offset);
+}
+#endif
+
+static unsigned int tsi_serial_in(struct uart_port *p, int offset)
+{
+	unsigned int tmp;
+	offset = map_8250_in_reg(p, offset) << p->regshift;
+	if (offset == UART_IIR) {
+		tmp = readl(p->membase + (UART_IIR & ~3));
+		return (tmp >> 16) & 0xff; /* UART_IIR % 4 == 2 */
+	} else
+		return readb(p->membase + offset);
+}
+
+static void tsi_serial_out(struct uart_port *p, int offset, int value)
+{
+	offset = map_8250_out_reg(p, offset) << p->regshift;
+	if (!((offset == UART_IER) && (value & UART_IER_UUE)))
+		writeb(value, p->membase + offset);
+}
+
+static void dwapb_serial_out(struct uart_port *p, int offset, int value)
+{
+	int save_offset = offset;
+	offset = map_8250_out_reg(p, offset) << p->regshift;
+	/* Save the LCR value so it can be re-written when a
+	 * Busy Detect interrupt occurs. */
+	if (save_offset == UART_LCR) {
+		struct uart_8250_port *up = (struct uart_8250_port *)p;
+		up->lcr = value;
+	}
+	writeb(value, p->membase + offset);
+	/* Read the IER to ensure any interrupt is cleared before
+	 * returning from ISR. */
+	if (save_offset == UART_TX || save_offset == UART_IER)
+		value = p->serial_in(p, UART_IER);
+}
+
+static unsigned int io_serial_in(struct uart_port *p, int offset)
+{
+	offset = map_8250_in_reg(p, offset) << p->regshift;
+	return inb(p->iobase + offset);
+}
+
+static void io_serial_out(struct uart_port *p, int offset, int value)
+{
+	offset = map_8250_out_reg(p, offset) << p->regshift;
+	outb(value, p->iobase + offset);
+}
+
+static void set_io_from_upio(struct uart_port *p)
+{
+	switch (p->iotype) {
 	case UPIO_HUB6:
-		outb(up->port.hub6 - 1 + offset, up->port.iobase);
-		outb(value, up->port.iobase + 1);
+		p->serial_in = hub6_serial_in;
+		p->serial_out = hub6_serial_out;
 		break;
 
 	case UPIO_MEM:
-		writeb(value, up->port.membase + offset);
+		p->serial_in = mem_serial_in;
+		p->serial_out = mem_serial_out;
 		break;
 
 	case UPIO_RM9000:
 	case UPIO_MEM32:
-		writel(value, up->port.membase + offset);
+		p->serial_in = mem32_serial_in;
+		p->serial_out = mem32_serial_out;
 		break;
 
 #ifdef CONFIG_SERIAL_8250_AU1X00
 	case UPIO_AU:
-		__raw_writel(value, up->port.membase + offset);
+		p->serial_in = au_serial_in;
+		p->serial_out = au_serial_out;
 		break;
 #endif
 	case UPIO_TSI:
-		if (!((offset == UART_IER) && (value & UART_IER_UUE)))
-			writeb(value, up->port.membase + offset);
+		p->serial_in = tsi_serial_in;
+		p->serial_out = tsi_serial_out;
 		break;
 
 	case UPIO_DWAPB:
-		/* Save the LCR value so it can be re-written when a
-		 * Busy Detect interrupt occurs. */
-		if (save_offset == UART_LCR)
-			up->lcr = value;
-		writeb(value, up->port.membase + offset);
-		/* Read the IER to ensure any interrupt is cleared before
-		 * returning from ISR. */
-		if (save_offset == UART_TX || save_offset == UART_IER)
-			value = serial_in(up, UART_IER);
+		p->serial_in = mem_serial_in;
+		p->serial_out = dwapb_serial_out;
 		break;
 
 	default:
-		outb(value, up->port.iobase + offset);
+		p->serial_in = io_serial_in;
+		p->serial_out = io_serial_out;
+		break;
 	}
 }
 
 static void
 serial_out_sync(struct uart_8250_port *up, int offset, int value)
 {
-	switch (up->port.iotype) {
+	struct uart_port *p = &up->port;
+	switch (p->iotype) {
 	case UPIO_MEM:
 	case UPIO_MEM32:
 #ifdef CONFIG_SERIAL_8250_AU1X00
 	case UPIO_AU:
 #endif
 	case UPIO_DWAPB:
-		serial_out(up, offset, value);
-		serial_in(up, UART_LCR);	/* safe, no side-effects */
+		p->serial_out(p, offset, value);
+		p->serial_in(p, UART_LCR);	/* safe, no side-effects */
 		break;
 	default:
-		serial_out(up, offset, value);
+		p->serial_out(p, offset, value);
 	}
 }
 
+#define serial_in(up, offset)		\
+	(up->port.serial_in(&(up)->port, (offset)))
+#define serial_out(up, offset, value)	\
+	(up->port.serial_out(&(up)->port, (offset), (value)))
 /*
  * We used to support using pause I/O for certain machines.  We
  * haven't supported this for a while, but just in case it's badly
@@ -2576,6 +2645,7 @@
 		up->port.membase  = old_serial_port[i].iomem_base;
 		up->port.iotype   = old_serial_port[i].io_type;
 		up->port.regshift = old_serial_port[i].iomem_reg_shift;
+		set_io_from_upio(&up->port);
 		if (share_irqs)
 			up->port.flags |= UPF_SHARE_IRQ;
 	}
@@ -2752,12 +2822,30 @@
  */
 int __init early_serial_setup(struct uart_port *port)
 {
+	struct uart_port *p;
+
 	if (port->line >= ARRAY_SIZE(serial8250_ports))
 		return -ENODEV;
 
 	serial8250_isa_init_ports();
-	serial8250_ports[port->line].port	= *port;
-	serial8250_ports[port->line].port.ops	= &serial8250_pops;
+	p = &serial8250_ports[port->line].port;
+	p->iobase       = port->iobase;
+	p->membase      = port->membase;
+	p->irq          = port->irq;
+	p->uartclk      = port->uartclk;
+	p->fifosize     = port->fifosize;
+	p->regshift     = port->regshift;
+	p->iotype       = port->iotype;
+	p->flags        = port->flags;
+	p->mapbase      = port->mapbase;
+	p->private_data = port->private_data;
+
+	set_io_from_upio(p);
+	if (port->serial_in)
+		p->serial_in = port->serial_in;
+	if (port->serial_out)
+		p->serial_out = port->serial_out;
+
 	return 0;
 }
 
@@ -2822,6 +2910,9 @@
 		port.mapbase		= p->mapbase;
 		port.hub6		= p->hub6;
 		port.private_data	= p->private_data;
+		port.type		= p->type;
+		port.serial_in		= p->serial_in;
+		port.serial_out		= p->serial_out;
 		port.dev		= &dev->dev;
 		if (share_irqs)
 			port.flags |= UPF_SHARE_IRQ;
@@ -2976,6 +3067,20 @@
 		if (port->dev)
 			uart->port.dev = port->dev;
 
+		if (port->flags & UPF_FIXED_TYPE) {
+			uart->port.type = port->type;
+			uart->port.fifosize = uart_config[port->type].fifo_size;
+			uart->capabilities = uart_config[port->type].flags;
+			uart->tx_loadsz = uart_config[port->type].tx_loadsz;
+		}
+
+		set_io_from_upio(&uart->port);
+		/* Possibly override default I/O functions.  */
+		if (port->serial_in)
+			uart->port.serial_in = port->serial_in;
+		if (port->serial_out)
+			uart->port.serial_out = port->serial_out;
+
 		ret = uart_add_one_port(&serial8250_reg, &uart->port);
 		if (ret == 0)
 			ret = uart->port.line;
diff --git a/drivers/serial/8250_pci.c b/drivers/serial/8250_pci.c
index 5450a0e..c088146 100644
--- a/drivers/serial/8250_pci.c
+++ b/drivers/serial/8250_pci.c
@@ -42,7 +42,8 @@
 	u32	subvendor;
 	u32	subdevice;
 	int	(*init)(struct pci_dev *dev);
-	int	(*setup)(struct serial_private *, struct pciserial_board *,
+	int	(*setup)(struct serial_private *,
+			 const struct pciserial_board *,
 			 struct uart_port *, int);
 	void	(*exit)(struct pci_dev *dev);
 };
@@ -107,7 +108,7 @@
  * ADDI-DATA GmbH communication cards <info@addi-data.com>
  */
 static int addidata_apci7800_setup(struct serial_private *priv,
-				struct pciserial_board *board,
+				const struct pciserial_board *board,
 				struct uart_port *port, int idx)
 {
 	unsigned int bar = 0, offset = board->first_offset;
@@ -134,7 +135,7 @@
  * Not that ugly ;) -- HW
  */
 static int
-afavlab_setup(struct serial_private *priv, struct pciserial_board *board,
+afavlab_setup(struct serial_private *priv, const struct pciserial_board *board,
 	      struct uart_port *port, int idx)
 {
 	unsigned int bar, offset = board->first_offset;
@@ -188,8 +189,9 @@
  * some serial ports are supposed to be hidden on certain models.
  */
 static int
-pci_hp_diva_setup(struct serial_private *priv, struct pciserial_board *board,
-	      struct uart_port *port, int idx)
+pci_hp_diva_setup(struct serial_private *priv,
+		const struct pciserial_board *board,
+		struct uart_port *port, int idx)
 {
 	unsigned int offset = board->first_offset;
 	unsigned int bar = FL_GET_BASE(board->flags);
@@ -306,7 +308,7 @@
 
 /* SBS Technologies Inc. PMC-OCTPRO and P-OCTAL cards */
 static int
-sbs_setup(struct serial_private *priv, struct pciserial_board *board,
+sbs_setup(struct serial_private *priv, const struct pciserial_board *board,
 		struct uart_port *port, int idx)
 {
 	unsigned int bar, offset = board->first_offset;
@@ -463,7 +465,7 @@
 }
 
 static int pci_siig_setup(struct serial_private *priv,
-			  struct pciserial_board *board,
+			  const struct pciserial_board *board,
 			  struct uart_port *port, int idx)
 {
 	unsigned int bar = FL_GET_BASE(board->flags) + idx, offset = 0;
@@ -534,7 +536,8 @@
  * Ugh, this is ugly as all hell --- TYT
  */
 static int
-pci_timedia_setup(struct serial_private *priv, struct pciserial_board *board,
+pci_timedia_setup(struct serial_private *priv,
+		  const struct pciserial_board *board,
 		  struct uart_port *port, int idx)
 {
 	unsigned int bar = 0, offset = board->first_offset;
@@ -568,7 +571,7 @@
  */
 static int
 titan_400l_800l_setup(struct serial_private *priv,
-		      struct pciserial_board *board,
+		      const struct pciserial_board *board,
 		      struct uart_port *port, int idx)
 {
 	unsigned int bar, offset = board->first_offset;
@@ -737,8 +740,41 @@
 	release_region(ioport, ITE_887x_IOSIZE);
 }
 
+/*
+ * Oxford Semiconductor Inc.
+ * Check that device is part of the Tornado range of devices, then determine
+ * the number of ports available on the device.
+ */
+static int pci_oxsemi_tornado_init(struct pci_dev *dev)
+{
+	u8 __iomem *p;
+	unsigned long deviceID;
+	unsigned int  number_uarts = 0;
+
+	/* OxSemi Tornado devices are all 0xCxxx */
+	if (dev->vendor == PCI_VENDOR_ID_OXSEMI &&
+	    (dev->device & 0xF000) != 0xC000)
+		return 0;
+
+	p = pci_iomap(dev, 0, 5);
+	if (p == NULL)
+		return -ENOMEM;
+
+	deviceID = ioread32(p);
+	/* Tornado device */
+	if (deviceID == 0x07000200) {
+		number_uarts = ioread8(p + 4);
+		printk(KERN_DEBUG
+			"%d ports detected on Oxford PCI Express device\n",
+								number_uarts);
+	}
+	pci_iounmap(dev, p);
+	return number_uarts;
+}
+
 static int
-pci_default_setup(struct serial_private *priv, struct pciserial_board *board,
+pci_default_setup(struct serial_private *priv,
+		  const struct pciserial_board *board,
 		  struct uart_port *port, int idx)
 {
 	unsigned int bar, offset = board->first_offset, maxnr;
@@ -1018,6 +1054,25 @@
 		.setup		= pci_default_setup,
 	},
 	/*
+	 * For Oxford Semiconductor and Mainpine
+	 */
+	{
+		.vendor		= PCI_VENDOR_ID_OXSEMI,
+		.device		= PCI_ANY_ID,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= PCI_ANY_ID,
+		.init		= pci_oxsemi_tornado_init,
+		.setup		= pci_default_setup,
+	},
+	{
+		.vendor		= PCI_VENDOR_ID_MAINPINE,
+		.device		= PCI_ANY_ID,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= PCI_ANY_ID,
+		.init		= pci_oxsemi_tornado_init,
+		.setup		= pci_default_setup,
+	},
+	/*
 	 * Default "match everything" terminator entry
 	 */
 	{
@@ -1048,7 +1103,7 @@
 }
 
 static inline int get_pci_irq(struct pci_dev *dev,
-				struct pciserial_board *board)
+				const struct pciserial_board *board)
 {
 	if (board->flags & FL_NOIRQ)
 		return 0;
@@ -1843,8 +1898,8 @@
 }
 
 static inline int
-serial_pci_matches(struct pciserial_board *board,
-		   struct pciserial_board *guessed)
+serial_pci_matches(const struct pciserial_board *board,
+		   const struct pciserial_board *guessed)
 {
 	return
 	    board->num_ports == guessed->num_ports &&
@@ -1854,54 +1909,14 @@
 	    board->first_offset == guessed->first_offset;
 }
 
-/*
- * Oxford Semiconductor Inc.
- * Check that device is part of the Tornado range of devices, then determine
- * the number of ports available on the device.
- */
-static int pci_oxsemi_tornado_init(struct pci_dev *dev, struct pciserial_board *board)
-{
-	u8 __iomem *p;
-	unsigned long deviceID;
-	unsigned int  number_uarts;
-
-	/* OxSemi Tornado devices are all 0xCxxx */
-	if (dev->vendor == PCI_VENDOR_ID_OXSEMI &&
-	    (dev->device & 0xF000) != 0xC000)
-		return 0;
-
-	p = pci_iomap(dev, 0, 5);
-	if (p == NULL)
-		return -ENOMEM;
-
-	deviceID = ioread32(p);
-	/* Tornado device */
-	if (deviceID == 0x07000200) {
-		number_uarts = ioread8(p + 4);
-		board->num_ports = number_uarts;
-		printk(KERN_DEBUG
-			"%d ports detected on Oxford PCI Express device\n",
-								number_uarts);
-	}
-	pci_iounmap(dev, p);
-	return 0;
-}
-
 struct serial_private *
-pciserial_init_ports(struct pci_dev *dev, struct pciserial_board *board)
+pciserial_init_ports(struct pci_dev *dev, const struct pciserial_board *board)
 {
 	struct uart_port serial_port;
 	struct serial_private *priv;
 	struct pci_serial_quirk *quirk;
 	int rc, nr_ports, i;
 
-	/*
-	 * Find number of ports on board
-	 */
-	if (dev->vendor == PCI_VENDOR_ID_OXSEMI ||
-	    dev->vendor == PCI_VENDOR_ID_MAINPINE)
-		pci_oxsemi_tornado_init(dev, board);
-
 	nr_ports = board->num_ports;
 
 	/*
@@ -2028,7 +2043,8 @@
 pciserial_init_one(struct pci_dev *dev, const struct pci_device_id *ent)
 {
 	struct serial_private *priv;
-	struct pciserial_board *board, tmp;
+	const struct pciserial_board *board;
+	struct pciserial_board tmp;
 	int rc;
 
 	if (ent->driver_data >= ARRAY_SIZE(pci_boards)) {
@@ -2055,7 +2071,7 @@
 		 * We matched one of our class entries.  Try to
 		 * determine the parameters of this board.
 		 */
-		rc = serial_pci_guess_board(dev, board);
+		rc = serial_pci_guess_board(dev, &tmp);
 		if (rc)
 			goto disable;
 	} else {
@@ -2271,6 +2287,9 @@
 	{	PCI_VENDOR_ID_SEALEVEL, PCI_DEVICE_ID_SEALEVEL_COMM8,
 		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
 		pbn_b2_8_115200 },
+	{	PCI_VENDOR_ID_SEALEVEL, PCI_DEVICE_ID_SEALEVEL_7803,
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+		pbn_b2_8_460800 },
 	{	PCI_VENDOR_ID_SEALEVEL, PCI_DEVICE_ID_SEALEVEL_UCOMM8,
 		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
 		pbn_b2_8_115200 },
@@ -2372,6 +2391,9 @@
 		 * For now just used the hex ID 0x950a.
 		 */
 	{	PCI_VENDOR_ID_OXSEMI, 0x950a,
+		PCI_SUBVENDOR_ID_SIIG, PCI_SUBDEVICE_ID_SIIG_DUAL_SERIAL, 0, 0,
+		pbn_b0_2_115200 },
+	{	PCI_VENDOR_ID_OXSEMI, 0x950a,
 		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
 		pbn_b0_2_1130000 },
 	{	PCI_VENDOR_ID_OXSEMI, PCI_DEVICE_ID_OXSEMI_16PCI954,
diff --git a/drivers/serial/bfin_5xx.c b/drivers/serial/bfin_5xx.c
index 569f0e2..318d69d 100644
--- a/drivers/serial/bfin_5xx.c
+++ b/drivers/serial/bfin_5xx.c
@@ -22,7 +22,8 @@
 #include <linux/tty_flip.h>
 #include <linux/serial_core.h>
 
-#ifdef CONFIG_KGDB_UART
+#if defined(CONFIG_KGDB_SERIAL_CONSOLE) || \
+	defined(CONFIG_KGDB_SERIAL_CONSOLE_MODULE)
 #include <linux/kgdb.h>
 #include <asm/irq_regs.h>
 #endif
@@ -45,6 +46,16 @@
 static struct bfin_serial_port bfin_serial_ports[BFIN_UART_NR_PORTS];
 static int nr_active_ports = ARRAY_SIZE(bfin_serial_resource);
 
+#if defined(CONFIG_KGDB_SERIAL_CONSOLE) || \
+	defined(CONFIG_KGDB_SERIAL_CONSOLE_MODULE)
+
+# ifndef CONFIG_SERIAL_BFIN_PIO
+#  error KGDB only support UART in PIO mode.
+# endif
+
+static int kgdboc_port_line;
+static int kgdboc_break_enabled;
+#endif
 /*
  * Setup for console. Argument comes from the menuconfig
  */
@@ -62,13 +73,17 @@
 
 static void bfin_serial_mctrl_check(struct bfin_serial_port *uart);
 
+static void bfin_serial_reset_irda(struct uart_port *port);
+
 /*
  * interrupts are disabled on entry
  */
 static void bfin_serial_stop_tx(struct uart_port *port)
 {
 	struct bfin_serial_port *uart = (struct bfin_serial_port *)port;
+#ifdef CONFIG_SERIAL_BFIN_DMA
 	struct circ_buf *xmit = &uart->port.info->xmit;
+#endif
 
 	while (!(UART_GET_LSR(uart) & TEMT))
 		cpu_relax();
@@ -94,6 +109,14 @@
 static void bfin_serial_start_tx(struct uart_port *port)
 {
 	struct bfin_serial_port *uart = (struct bfin_serial_port *)port;
+	struct tty_struct *tty = uart->port.info->port.tty;
+
+	/*
+	 * To avoid losting RX interrupt, we reset IR function
+	 * before sending data.
+	 */
+	if (tty->termios->c_line == N_IRDA)
+		bfin_serial_reset_irda(port);
 
 #ifdef CONFIG_SERIAL_BFIN_DMA
 	if (uart->tx_done)
@@ -110,9 +133,7 @@
 static void bfin_serial_stop_rx(struct uart_port *port)
 {
 	struct bfin_serial_port *uart = (struct bfin_serial_port *)port;
-#ifdef CONFIG_KGDB_UART
-	if (uart->port.line != CONFIG_KGDB_UART_PORT)
-#endif
+
 	UART_CLEAR_IER(uart, ERBFI);
 }
 
@@ -123,49 +144,6 @@
 {
 }
 
-#ifdef CONFIG_KGDB_UART
-static int kgdb_entry_state;
-
-void kgdb_put_debug_char(int chr)
-{
-	struct bfin_serial_port *uart;
-
-	if (CONFIG_KGDB_UART_PORT < 0
-		|| CONFIG_KGDB_UART_PORT >= BFIN_UART_NR_PORTS)
-		uart = &bfin_serial_ports[0];
-	else
-		uart = &bfin_serial_ports[CONFIG_KGDB_UART_PORT];
-
-	while (!(UART_GET_LSR(uart) & THRE)) {
-		SSYNC();
-	}
-
-	UART_CLEAR_DLAB(uart);
-	UART_PUT_CHAR(uart, (unsigned char)chr);
-	SSYNC();
-}
-
-int kgdb_get_debug_char(void)
-{
-	struct bfin_serial_port *uart;
-	unsigned char chr;
-
-	if (CONFIG_KGDB_UART_PORT < 0
-		|| CONFIG_KGDB_UART_PORT >= BFIN_UART_NR_PORTS)
-		uart = &bfin_serial_ports[0];
-	else
-		uart = &bfin_serial_ports[CONFIG_KGDB_UART_PORT];
-
-	while(!(UART_GET_LSR(uart) & DR)) {
-		SSYNC();
-	}
-	UART_CLEAR_DLAB(uart);
-	chr = UART_GET_CHAR(uart);
-	SSYNC();
-
-	return chr;
-}
-#endif
 
 #if ANOMALY_05000363 && defined(CONFIG_SERIAL_BFIN_PIO)
 # define UART_GET_ANOMALY_THRESHOLD(uart)    ((uart)->anomaly_threshold)
@@ -178,7 +156,7 @@
 #ifdef CONFIG_SERIAL_BFIN_PIO
 static void bfin_serial_rx_chars(struct bfin_serial_port *uart)
 {
-	struct tty_struct *tty = uart->port.info->port.tty;
+	struct tty_struct *tty = NULL;
 	unsigned int status, ch, flg;
 	static struct timeval anomaly_start = { .tv_sec = 0 };
 
@@ -188,27 +166,18 @@
  	ch = UART_GET_CHAR(uart);
  	uart->port.icount.rx++;
 
-#ifdef CONFIG_KGDB_UART
-	if (uart->port.line == CONFIG_KGDB_UART_PORT) {
-		struct pt_regs *regs = get_irq_regs();
-		if (uart->port.cons->index == CONFIG_KGDB_UART_PORT && ch == 0x1) { /* Ctrl + A */
-			kgdb_breakkey_pressed(regs);
+#if defined(CONFIG_KGDB_SERIAL_CONSOLE) || \
+	defined(CONFIG_KGDB_SERIAL_CONSOLE_MODULE)
+	if (kgdb_connected && kgdboc_port_line == uart->port.line)
+		if (ch == 0x3) {/* Ctrl + C */
+			kgdb_breakpoint();
 			return;
-		} else if (kgdb_entry_state == 0 && ch == '$') {/* connection from KGDB */
-			kgdb_entry_state = 1;
-		} else if (kgdb_entry_state == 1 && ch == 'q') {
-			kgdb_entry_state = 0;
-			kgdb_breakkey_pressed(regs);
-			return;
-		} else if (ch == 0x3) {/* Ctrl + C */
-			kgdb_entry_state = 0;
-			kgdb_breakkey_pressed(regs);
-			return;
-		} else {
-			kgdb_entry_state = 0;
 		}
-	}
+
+	if (!uart->port.info || !uart->port.info->tty)
+		return;
 #endif
+	tty = uart->port.info->tty;
 
 	if (ANOMALY_05000363) {
 		/* The BF533 (and BF561) family of processors have a nice anomaly
@@ -250,6 +219,7 @@
 			return;
 
  known_good_char:
+			status &= ~BI;
 			anomaly_start.tv_sec = 0;
 		}
 	}
@@ -445,7 +415,9 @@
 
 void bfin_serial_rx_dma_timeout(struct bfin_serial_port *uart)
 {
-	int x_pos, pos;
+	int x_pos, pos, flags;
+
+	spin_lock_irqsave(&uart->port.lock, flags);
 
 	uart->rx_dma_nrows = get_dma_curr_ycount(uart->rx_dma_channel);
 	x_pos = get_dma_curr_xcount(uart->rx_dma_channel);
@@ -463,6 +435,8 @@
 		uart->rx_dma_buf.tail = uart->rx_dma_buf.head;
 	}
 
+	spin_unlock_irqrestore(&uart->port.lock, flags);
+
 	mod_timer(&(uart->rx_dma_timer), jiffies + DMA_RX_FLUSH_JIFFIES);
 }
 
@@ -497,10 +471,9 @@
 	spin_lock(&uart->port.lock);
 	irqstat = get_dma_curr_irqstat(uart->rx_dma_channel);
 	clear_dma_irqstat(uart->rx_dma_channel);
+	bfin_serial_dma_rx_chars(uart);
 	spin_unlock(&uart->port.lock);
 
-	mod_timer(&(uart->rx_dma_timer), jiffies);
-
 	return IRQ_HANDLED;
 }
 #endif
@@ -630,16 +603,16 @@
 	uart->rx_dma_timer.expires = jiffies + DMA_RX_FLUSH_JIFFIES;
 	add_timer(&(uart->rx_dma_timer));
 #else
+#if defined(CONFIG_KGDB_SERIAL_CONSOLE) || \
+	defined(CONFIG_KGDB_SERIAL_CONSOLE_MODULE)
+	if (kgdboc_port_line == uart->port.line && kgdboc_break_enabled)
+		kgdboc_break_enabled = 0;
+	else {
+# endif
 	if (request_irq(uart->port.irq, bfin_serial_rx_int, IRQF_DISABLED,
 	     "BFIN_UART_RX", uart)) {
-# ifdef	CONFIG_KGDB_UART
-		if (uart->port.line != CONFIG_KGDB_UART_PORT) {
-# endif
 		printk(KERN_NOTICE "Unable to attach BlackFin UART RX interrupt\n");
 		return -EBUSY;
-# ifdef	CONFIG_KGDB_UART
-		}
-# endif
 	}
 
 	if (request_irq
@@ -685,6 +658,10 @@
 		}
 	}
 # endif
+#if defined(CONFIG_KGDB_SERIAL_CONSOLE) || \
+	defined(CONFIG_KGDB_SERIAL_CONSOLE_MODULE)
+	}
+# endif
 #endif
 	UART_SET_IER(uart, ERBFI);
 	return 0;
@@ -716,9 +693,6 @@
 		break;
 	};
 #endif
-#ifdef	CONFIG_KGDB_UART
-	if (uart->port.line != CONFIG_KGDB_UART_PORT)
-#endif
 	free_irq(uart->port.irq, uart);
 	free_irq(uart->port.irq+1, uart);
 #endif
@@ -887,6 +861,65 @@
 	}
 }
 
+#ifdef CONFIG_CONSOLE_POLL
+static void bfin_serial_poll_put_char(struct uart_port *port, unsigned char chr)
+{
+	struct bfin_serial_port *uart = (struct bfin_serial_port *)port;
+
+	while (!(UART_GET_LSR(uart) & THRE))
+		cpu_relax();
+
+	UART_CLEAR_DLAB(uart);
+	UART_PUT_CHAR(uart, (unsigned char)chr);
+}
+
+static int bfin_serial_poll_get_char(struct uart_port *port)
+{
+	struct bfin_serial_port *uart = (struct bfin_serial_port *)port;
+	unsigned char chr;
+
+	while (!(UART_GET_LSR(uart) & DR))
+		cpu_relax();
+
+	UART_CLEAR_DLAB(uart);
+	chr = UART_GET_CHAR(uart);
+
+	return chr;
+}
+#endif
+
+#if defined(CONFIG_KGDB_SERIAL_CONSOLE) || \
+	defined(CONFIG_KGDB_SERIAL_CONSOLE_MODULE)
+static void bfin_kgdboc_port_shutdown(struct uart_port *port)
+{
+	if (kgdboc_break_enabled) {
+		kgdboc_break_enabled = 0;
+		bfin_serial_shutdown(port);
+	}
+}
+
+static int bfin_kgdboc_port_startup(struct uart_port *port)
+{
+	kgdboc_port_line = port->line;
+	kgdboc_break_enabled = !bfin_serial_startup(port);
+	return 0;
+}
+#endif
+
+static void bfin_serial_reset_irda(struct uart_port *port)
+{
+	int line = port->line;
+	unsigned short val;
+
+	val = UART_GET_GCTL(&bfin_serial_ports[line]);
+	val &= ~(IREN | RPOLC);
+	UART_PUT_GCTL(&bfin_serial_ports[line], val);
+	SSYNC();
+	val |= (IREN | RPOLC);
+	UART_PUT_GCTL(&bfin_serial_ports[line], val);
+	SSYNC();
+}
+
 static struct uart_ops bfin_serial_pops = {
 	.tx_empty	= bfin_serial_tx_empty,
 	.set_mctrl	= bfin_serial_set_mctrl,
@@ -905,6 +938,15 @@
 	.request_port	= bfin_serial_request_port,
 	.config_port	= bfin_serial_config_port,
 	.verify_port	= bfin_serial_verify_port,
+#if defined(CONFIG_KGDB_SERIAL_CONSOLE) || \
+	defined(CONFIG_KGDB_SERIAL_CONSOLE_MODULE)
+	.kgdboc_port_startup	= bfin_kgdboc_port_startup,
+	.kgdboc_port_shutdown	= bfin_kgdboc_port_shutdown,
+#endif
+#ifdef CONFIG_CONSOLE_POLL
+	.poll_put_char	= bfin_serial_poll_put_char,
+	.poll_get_char	= bfin_serial_poll_get_char,
+#endif
 };
 
 static void __init bfin_serial_init_ports(void)
@@ -950,7 +992,7 @@
 
 }
 
-#ifdef CONFIG_SERIAL_BFIN_CONSOLE
+#if defined(CONFIG_SERIAL_BFIN_CONSOLE) || defined(CONFIG_EARLY_PRINTK)
 /*
  * If the port was already initialised (eg, by a boot loader),
  * try to determine the current setup.
@@ -994,24 +1036,20 @@
 	}
 	pr_debug("%s:baud = %d, parity = %c, bits= %d\n", __func__, *baud, *parity, *bits);
 }
-#endif
 
-#if defined(CONFIG_SERIAL_BFIN_CONSOLE) || defined(CONFIG_EARLY_PRINTK)
 static struct uart_driver bfin_serial_reg;
 
 static int __init
 bfin_serial_console_setup(struct console *co, char *options)
 {
 	struct bfin_serial_port *uart;
-# ifdef CONFIG_SERIAL_BFIN_CONSOLE
 	int baud = 57600;
 	int bits = 8;
 	int parity = 'n';
-#  ifdef CONFIG_SERIAL_BFIN_CTSRTS
+# ifdef CONFIG_SERIAL_BFIN_CTSRTS
 	int flow = 'r';
-#  else
+# else
 	int flow = 'n';
-#  endif
 # endif
 
 	/*
@@ -1023,16 +1061,12 @@
 		co->index = 0;
 	uart = &bfin_serial_ports[co->index];
 
-# ifdef CONFIG_SERIAL_BFIN_CONSOLE
 	if (options)
 		uart_parse_options(options, &baud, &parity, &bits, &flow);
 	else
 		bfin_serial_console_get_options(uart, &baud, &parity, &bits);
 
 	return uart_set_options(&uart->port, co, baud, parity, bits, flow);
-# else
-	return 0;
-# endif
 }
 #endif /* defined (CONFIG_SERIAL_BFIN_CONSOLE) ||
 				 defined (CONFIG_EARLY_PRINTK) */
@@ -1076,10 +1110,7 @@
 {
 	bfin_serial_init_ports();
 	register_console(&bfin_serial_console);
-#ifdef CONFIG_KGDB_UART
-	kgdb_entry_state = 0;
-	init_kgdb_uart();
-#endif
+
 	return 0;
 }
 console_initcall(bfin_serial_rs_console_init);
@@ -1144,7 +1175,7 @@
 	return &bfin_early_serial_console;
 }
 
-#endif /* CONFIG_SERIAL_BFIN_CONSOLE */
+#endif /* CONFIG_EARLY_PRINTK */
 
 static struct uart_driver bfin_serial_reg = {
 	.owner			= THIS_MODULE,
@@ -1235,10 +1266,6 @@
 static int __init bfin_serial_init(void)
 {
 	int ret;
-#ifdef CONFIG_KGDB_UART
-	struct bfin_serial_port *uart = &bfin_serial_ports[CONFIG_KGDB_UART_PORT];
-	struct ktermios t;
-#endif
 
 	pr_info("Serial: Blackfin serial driver\n");
 
@@ -1252,21 +1279,6 @@
 			uart_unregister_driver(&bfin_serial_reg);
 		}
 	}
-#ifdef CONFIG_KGDB_UART
-	if (uart->port.cons->index != CONFIG_KGDB_UART_PORT) {
-		request_irq(uart->port.irq, bfin_serial_rx_int,
-			IRQF_DISABLED, "BFIN_UART_RX", uart);
-		pr_info("Request irq for kgdb uart port\n");
-		UART_SET_IER(uart, ERBFI);
-		SSYNC();
-		t.c_cflag = CS8|B57600;
-		t.c_iflag = 0;
-		t.c_oflag = 0;
-		t.c_lflag = ICANON;
-		t.c_line = CONFIG_KGDB_UART_PORT;
-		bfin_serial_set_termios(&uart->port, &t, &t);
-	}
-#endif
 	return ret;
 }
 
@@ -1276,6 +1288,7 @@
 	uart_unregister_driver(&bfin_serial_reg);
 }
 
+
 module_init(bfin_serial_init);
 module_exit(bfin_serial_exit);
 
diff --git a/drivers/serial/bfin_sport_uart.c b/drivers/serial/bfin_sport_uart.c
index dd8564d..529c0ff 100644
--- a/drivers/serial/bfin_sport_uart.c
+++ b/drivers/serial/bfin_sport_uart.c
@@ -99,7 +99,7 @@
 
 static inline void tx_one_byte(struct sport_uart_port *up, unsigned int value)
 {
-	pr_debug("%s value:%x\n", __FUNCTION__, value);
+	pr_debug("%s value:%x\n", __func__, value);
 	/* Place a Start and Stop bit */
 	__asm__ volatile (
 		"R2 = b#01111111100;\n\t"
@@ -110,7 +110,7 @@
 		:"=r"(value)
 		:"0"(value)
 		:"R2", "R3");
-	pr_debug("%s value:%x\n", __FUNCTION__, value);
+	pr_debug("%s value:%x\n", __func__, value);
 
 	SPORT_PUT_TX(up, value);
 }
@@ -120,7 +120,7 @@
 	unsigned int value, extract;
 
 	value = SPORT_GET_RX32(up);
-	pr_debug("%s value:%x\n", __FUNCTION__, value);
+	pr_debug("%s value:%x\n", __func__, value);
 
 	/* Extract 8 bits data */
 	__asm__ volatile (
@@ -151,12 +151,12 @@
 	/* Set TCR1 and TCR2 */
 	SPORT_PUT_TCR1(up, (LTFS | ITFS | TFSR | TLSBIT | ITCLK));
 	SPORT_PUT_TCR2(up, 10);
-	pr_debug("%s TCR1:%x, TCR2:%x\n", __FUNCTION__, SPORT_GET_TCR1(up), SPORT_GET_TCR2(up));
+	pr_debug("%s TCR1:%x, TCR2:%x\n", __func__, SPORT_GET_TCR1(up), SPORT_GET_TCR2(up));
 
 	/* Set RCR1 and RCR2 */
 	SPORT_PUT_RCR1(up, (RCKFE | LARFS | LRFS | RFSR | IRCLK));
 	SPORT_PUT_RCR2(up, 28);
-	pr_debug("%s RCR1:%x, RCR2:%x\n", __FUNCTION__, SPORT_GET_RCR1(up), SPORT_GET_RCR2(up));
+	pr_debug("%s RCR1:%x, RCR2:%x\n", __func__, SPORT_GET_RCR1(up), SPORT_GET_RCR2(up));
 
 	tclkdiv = sclk/(2 * baud_rate) - 1;
 	tfsdiv = 12;
@@ -166,7 +166,7 @@
 	SPORT_PUT_RCLKDIV(up, rclkdiv);
 	SSYNC();
 	pr_debug("%s sclk:%d, baud_rate:%d, tclkdiv:%d, tfsdiv:%d, rclkdiv:%d\n",
-			__FUNCTION__, sclk, baud_rate, tclkdiv, tfsdiv, rclkdiv);
+			__func__, sclk, baud_rate, tclkdiv, tfsdiv, rclkdiv);
 
 	return 0;
 }
@@ -231,7 +231,7 @@
 	char buffer[20];
 	int retval;
 
-	pr_debug("%s enter\n", __FUNCTION__);
+	pr_debug("%s enter\n", __func__);
 	memset(buffer, 20, '\0');
 	snprintf(buffer, 20, "%s rx", up->name);
 	retval = request_irq(up->rx_irq, sport_uart_rx_irq, IRQF_SAMPLE_RANDOM, buffer, up);
@@ -320,7 +320,7 @@
 	unsigned int stat;
 
 	stat = SPORT_GET_STAT(up);
-	pr_debug("%s stat:%04x\n", __FUNCTION__, stat);
+	pr_debug("%s stat:%04x\n", __func__, stat);
 	if (stat & TXHRE) {
 		return TIOCSER_TEMT;
 	} else
@@ -329,13 +329,13 @@
 
 static unsigned int sport_get_mctrl(struct uart_port *port)
 {
-	pr_debug("%s enter\n", __FUNCTION__);
+	pr_debug("%s enter\n", __func__);
 	return (TIOCM_CTS | TIOCM_CD | TIOCM_DSR);
 }
 
 static void sport_set_mctrl(struct uart_port *port, unsigned int mctrl)
 {
-	pr_debug("%s enter\n", __FUNCTION__);
+	pr_debug("%s enter\n", __func__);
 }
 
 static void sport_stop_tx(struct uart_port *port)
@@ -343,7 +343,7 @@
 	struct sport_uart_port *up = (struct sport_uart_port *)port;
 	unsigned int stat;
 
-	pr_debug("%s enter\n", __FUNCTION__);
+	pr_debug("%s enter\n", __func__);
 
 	stat = SPORT_GET_STAT(up);
 	while(!(stat & TXHRE)) {
@@ -366,21 +366,21 @@
 {
 	struct sport_uart_port *up = (struct sport_uart_port *)port;
 
-	pr_debug("%s enter\n", __FUNCTION__);
+	pr_debug("%s enter\n", __func__);
 	/* Write data into SPORT FIFO before enable SPROT to transmit */
 	sport_uart_tx_chars(up);
 
 	/* Enable transmit, then an interrupt will generated */
 	SPORT_PUT_TCR1(up, (SPORT_GET_TCR1(up) | TSPEN));
 	SSYNC();
-	pr_debug("%s exit\n", __FUNCTION__);
+	pr_debug("%s exit\n", __func__);
 }
 
 static void sport_stop_rx(struct uart_port *port)
 {
 	struct sport_uart_port *up = (struct sport_uart_port *)port;
 
-	pr_debug("%s enter\n", __FUNCTION__);
+	pr_debug("%s enter\n", __func__);
 	/* Disable sport to stop rx */
 	SPORT_PUT_RCR1(up, (SPORT_GET_RCR1(up) & ~RSPEN));
 	SSYNC();
@@ -388,19 +388,19 @@
 
 static void sport_enable_ms(struct uart_port *port)
 {
-	pr_debug("%s enter\n", __FUNCTION__);
+	pr_debug("%s enter\n", __func__);
 }
 
 static void sport_break_ctl(struct uart_port *port, int break_state)
 {
-	pr_debug("%s enter\n", __FUNCTION__);
+	pr_debug("%s enter\n", __func__);
 }
 
 static void sport_shutdown(struct uart_port *port)
 {
 	struct sport_uart_port *up = (struct sport_uart_port *)port;
 
-	pr_debug("%s enter\n", __FUNCTION__);
+	pr_debug("%s enter\n", __func__);
 
 	/* Disable sport */
 	SPORT_PUT_TCR1(up, (SPORT_GET_TCR1(up) & ~TSPEN));
@@ -421,7 +421,7 @@
 static void sport_set_termios(struct uart_port *port,
 		struct termios *termios, struct termios *old)
 {
-	pr_debug("%s enter, c_cflag:%08x\n", __FUNCTION__, termios->c_cflag);
+	pr_debug("%s enter, c_cflag:%08x\n", __func__, termios->c_cflag);
 	uart_update_timeout(port, CS8 ,port->uartclk);
 }
 
@@ -429,18 +429,18 @@
 {
 	struct sport_uart_port *up = (struct sport_uart_port *)port;
 
-	pr_debug("%s enter\n", __FUNCTION__);
+	pr_debug("%s enter\n", __func__);
 	return up->name;
 }
 
 static void sport_release_port(struct uart_port *port)
 {
-	pr_debug("%s enter\n", __FUNCTION__);
+	pr_debug("%s enter\n", __func__);
 }
 
 static int sport_request_port(struct uart_port *port)
 {
-	pr_debug("%s enter\n", __FUNCTION__);
+	pr_debug("%s enter\n", __func__);
 	return 0;
 }
 
@@ -448,13 +448,13 @@
 {
 	struct sport_uart_port *up = (struct sport_uart_port *)port;
 
-	pr_debug("%s enter\n", __FUNCTION__);
+	pr_debug("%s enter\n", __func__);
 	up->port.type = PORT_BFIN_SPORT;
 }
 
 static int sport_verify_port(struct uart_port *port, struct serial_struct *ser)
 {
-	pr_debug("%s enter\n", __FUNCTION__);
+	pr_debug("%s enter\n", __func__);
 	return 0;
 }
 
@@ -527,7 +527,7 @@
 {
 	struct sport_uart_port *sport = platform_get_drvdata(dev);
 
-	pr_debug("%s enter\n", __FUNCTION__);
+	pr_debug("%s enter\n", __func__);
 	if (sport)
 		uart_suspend_port(&sport_uart_reg, &sport->port);
 
@@ -538,7 +538,7 @@
 {
 	struct sport_uart_port *sport = platform_get_drvdata(dev);
 
-	pr_debug("%s enter\n", __FUNCTION__);
+	pr_debug("%s enter\n", __func__);
 	if (sport)
 		uart_resume_port(&sport_uart_reg, &sport->port);
 
@@ -547,7 +547,7 @@
 
 static int sport_uart_probe(struct platform_device *dev)
 {
-	pr_debug("%s enter\n", __FUNCTION__);
+	pr_debug("%s enter\n", __func__);
 	sport_uart_ports[dev->id].port.dev = &dev->dev;
 	uart_add_one_port(&sport_uart_reg, &sport_uart_ports[dev->id].port);
 	platform_set_drvdata(dev, &sport_uart_ports[dev->id]);
@@ -559,7 +559,7 @@
 {
 	struct sport_uart_port *sport = platform_get_drvdata(dev);
 
-	pr_debug("%s enter\n", __FUNCTION__);
+	pr_debug("%s enter\n", __func__);
 	platform_set_drvdata(dev, NULL);
 
 	if (sport)
@@ -582,7 +582,7 @@
 {
 	int ret;
 
-	pr_debug("%s enter\n", __FUNCTION__);
+	pr_debug("%s enter\n", __func__);
 	ret = uart_register_driver(&sport_uart_reg);
 	if (ret != 0) {
 		printk(KERN_ERR "Failed to register %s:%d\n",
@@ -597,13 +597,13 @@
 	}
 
 
-	pr_debug("%s exit\n", __FUNCTION__);
+	pr_debug("%s exit\n", __func__);
 	return ret;
 }
 
 static void __exit sport_uart_exit(void)
 {
-	pr_debug("%s enter\n", __FUNCTION__);
+	pr_debug("%s enter\n", __func__);
 	platform_driver_unregister(&sport_uart_driver);
 	uart_unregister_driver(&sport_uart_reg);
 }
diff --git a/drivers/serial/jsm/jsm_tty.c b/drivers/serial/jsm/jsm_tty.c
index a697914..3547558 100644
--- a/drivers/serial/jsm/jsm_tty.c
+++ b/drivers/serial/jsm/jsm_tty.c
@@ -272,7 +272,7 @@
 	jsm_printk(CLOSE, INFO, &channel->ch_bd->pci_dev, "start\n");
 
 	bd = channel->ch_bd;
-	ts = channel->uart_port.info->port.tty->termios;
+	ts = port->info->port.tty->termios;
 
 	channel->ch_flags &= ~(CH_STOPI);
 
diff --git a/drivers/serial/serial_core.c b/drivers/serial/serial_core.c
index 874786a..dc68b7e 100644
--- a/drivers/serial/serial_core.c
+++ b/drivers/serial/serial_core.c
@@ -50,7 +50,7 @@
 
 #define HIGH_BITS_OFFSET	((sizeof(long)-sizeof(int))*8)
 
-#define uart_users(state)	((state)->count + ((state)->info ? (state)->info->port.blocked_open : 0))
+#define uart_users(state)	((state)->count + (state)->info.port.blocked_open)
 
 #ifdef CONFIG_SERIAL_CORE_CONSOLE
 #define uart_console(port)	((port)->cons && (port)->cons->index == (port)->line)
@@ -94,7 +94,7 @@
 	struct uart_state *state = tty->driver_data;
 	struct uart_port *port = state->port;
 
-	if (!uart_circ_empty(&state->info->xmit) && state->info->xmit.buf &&
+	if (!uart_circ_empty(&state->info.xmit) && state->info.xmit.buf &&
 	    !tty->stopped && !tty->hw_stopped)
 		port->ops->start_tx(port);
 }
@@ -113,7 +113,7 @@
 static void uart_tasklet_action(unsigned long data)
 {
 	struct uart_state *state = (struct uart_state *)data;
-	tty_wakeup(state->info->port.tty);
+	tty_wakeup(state->info.port.tty);
 }
 
 static inline void
@@ -139,7 +139,7 @@
  */
 static int uart_startup(struct uart_state *state, int init_hw)
 {
-	struct uart_info *info = state->info;
+	struct uart_info *info = &state->info;
 	struct uart_port *port = state->port;
 	unsigned long page;
 	int retval = 0;
@@ -212,14 +212,15 @@
  */
 static void uart_shutdown(struct uart_state *state)
 {
-	struct uart_info *info = state->info;
+	struct uart_info *info = &state->info;
 	struct uart_port *port = state->port;
+	struct tty_struct *tty = info->port.tty;
 
 	/*
 	 * Set the TTY IO error marker
 	 */
-	if (info->port.tty)
-		set_bit(TTY_IO_ERROR, &info->port.tty->flags);
+	if (tty)
+		set_bit(TTY_IO_ERROR, &tty->flags);
 
 	if (info->flags & UIF_INITIALIZED) {
 		info->flags &= ~UIF_INITIALIZED;
@@ -227,7 +228,7 @@
 		/*
 		 * Turn off DTR and RTS early.
 		 */
-		if (!info->port.tty || (info->port.tty->termios->c_cflag & HUPCL))
+		if (!tty || (tty->termios->c_cflag & HUPCL))
 			uart_clear_mctrl(port, TIOCM_DTR | TIOCM_RTS);
 
 		/*
@@ -427,7 +428,7 @@
 static void
 uart_change_speed(struct uart_state *state, struct ktermios *old_termios)
 {
-	struct tty_struct *tty = state->info->port.tty;
+	struct tty_struct *tty = state->info.port.tty;
 	struct uart_port *port = state->port;
 	struct ktermios *termios;
 
@@ -444,14 +445,14 @@
 	 * Set flags based on termios cflag
 	 */
 	if (termios->c_cflag & CRTSCTS)
-		state->info->flags |= UIF_CTS_FLOW;
+		state->info.flags |= UIF_CTS_FLOW;
 	else
-		state->info->flags &= ~UIF_CTS_FLOW;
+		state->info.flags &= ~UIF_CTS_FLOW;
 
 	if (termios->c_cflag & CLOCAL)
-		state->info->flags &= ~UIF_CHECK_CD;
+		state->info.flags &= ~UIF_CHECK_CD;
 	else
-		state->info->flags |= UIF_CHECK_CD;
+		state->info.flags |= UIF_CHECK_CD;
 
 	port->ops->set_termios(port, termios, old_termios);
 }
@@ -479,7 +480,7 @@
 {
 	struct uart_state *state = tty->driver_data;
 
-	return __uart_put_char(state->port, &state->info->xmit, ch);
+	return __uart_put_char(state->port, &state->info.xmit, ch);
 }
 
 static void uart_flush_chars(struct tty_struct *tty)
@@ -500,13 +501,13 @@
 	 * This means you called this function _after_ the port was
 	 * closed.  No cookie for you.
 	 */
-	if (!state || !state->info) {
+	if (!state) {
 		WARN_ON(1);
 		return -EL3HLT;
 	}
 
 	port = state->port;
-	circ = &state->info->xmit;
+	circ = &state->info.xmit;
 
 	if (!circ->buf)
 		return 0;
@@ -537,7 +538,7 @@
 	int ret;
 
 	spin_lock_irqsave(&state->port->lock, flags);
-	ret = uart_circ_chars_free(&state->info->xmit);
+	ret = uart_circ_chars_free(&state->info.xmit);
 	spin_unlock_irqrestore(&state->port->lock, flags);
 	return ret;
 }
@@ -549,7 +550,7 @@
 	int ret;
 
 	spin_lock_irqsave(&state->port->lock, flags);
-	ret = uart_circ_chars_pending(&state->info->xmit);
+	ret = uart_circ_chars_pending(&state->info.xmit);
 	spin_unlock_irqrestore(&state->port->lock, flags);
 	return ret;
 }
@@ -564,7 +565,7 @@
 	 * This means you called this function _after_ the port was
 	 * closed.  No cookie for you.
 	 */
-	if (!state || !state->info) {
+	if (!state) {
 		WARN_ON(1);
 		return;
 	}
@@ -573,7 +574,7 @@
 	pr_debug("uart_flush_buffer(%d) called\n", tty->index);
 
 	spin_lock_irqsave(&port->lock, flags);
-	uart_circ_clear(&state->info->xmit);
+	uart_circ_clear(&state->info.xmit);
 	if (port->ops->flush_buffer)
 		port->ops->flush_buffer(port);
 	spin_unlock_irqrestore(&port->lock, flags);
@@ -837,15 +838,15 @@
 	state->closing_wait    = closing_wait;
 	if (new_serial.xmit_fifo_size)
 		port->fifosize = new_serial.xmit_fifo_size;
-	if (state->info->port.tty)
-		state->info->port.tty->low_latency =
+	if (state->info.port.tty)
+		state->info.port.tty->low_latency =
 			(port->flags & UPF_LOW_LATENCY) ? 1 : 0;
 
  check_and_exit:
 	retval = 0;
 	if (port->type == PORT_UNKNOWN)
 		goto exit;
-	if (state->info->flags & UIF_INITIALIZED) {
+	if (state->info.flags & UIF_INITIALIZED) {
 		if (((old_flags ^ port->flags) & UPF_SPD_MASK) ||
 		    old_custom_divisor != port->custom_divisor) {
 			/*
@@ -858,7 +859,7 @@
 				printk(KERN_NOTICE
 				       "%s sets custom speed on %s. This "
 				       "is deprecated.\n", current->comm,
-				       tty_name(state->info->port.tty, buf));
+				       tty_name(state->info.port.tty, buf));
 			}
 			uart_change_speed(state, NULL);
 		}
@@ -889,8 +890,8 @@
 	 * interrupt happens).
 	 */
 	if (port->x_char ||
-	    ((uart_circ_chars_pending(&state->info->xmit) > 0) &&
-	     !state->info->port.tty->stopped && !state->info->port.tty->hw_stopped))
+	    ((uart_circ_chars_pending(&state->info.xmit) > 0) &&
+	     !state->info.port.tty->stopped && !state->info.port.tty->hw_stopped))
 		result &= ~TIOCSER_TEMT;
 
 	return put_user(result, value);
@@ -1017,7 +1018,7 @@
 	port->ops->enable_ms(port);
 	spin_unlock_irq(&port->lock);
 
-	add_wait_queue(&state->info->delta_msr_wait, &wait);
+	add_wait_queue(&state->info.delta_msr_wait, &wait);
 	for (;;) {
 		spin_lock_irq(&port->lock);
 		memcpy(&cnow, &port->icount, sizeof(struct uart_icount));
@@ -1045,7 +1046,7 @@
 	}
 
 	current->state = TASK_RUNNING;
-	remove_wait_queue(&state->info->delta_msr_wait, &wait);
+	remove_wait_queue(&state->info.delta_msr_wait, &wait);
 
 	return ret;
 }
@@ -1241,7 +1242,7 @@
 	 */
 	if (!(old_termios->c_cflag & CLOCAL) &&
 	    (tty->termios->c_cflag & CLOCAL))
-		wake_up_interruptible(&state->info->port.open_wait);
+		wake_up_interruptible(&info->port.open_wait);
 #endif
 }
 
@@ -1303,7 +1304,7 @@
 	 * At this point, we stop accepting input.  To do this, we
 	 * disable the receive line status interrupts.
 	 */
-	if (state->info->flags & UIF_INITIALIZED) {
+	if (state->info.flags & UIF_INITIALIZED) {
 		unsigned long flags;
 		spin_lock_irqsave(&port->lock, flags);
 		port->ops->stop_rx(port);
@@ -1322,9 +1323,9 @@
 	tty_ldisc_flush(tty);
 
 	tty->closing = 0;
-	state->info->port.tty = NULL;
+	state->info.port.tty = NULL;
 
-	if (state->info->port.blocked_open) {
+	if (state->info.port.blocked_open) {
 		if (state->close_delay)
 			msleep_interruptible(state->close_delay);
 	} else if (!uart_console(port)) {
@@ -1334,8 +1335,8 @@
 	/*
 	 * Wake up anyone trying to open this port.
 	 */
-	state->info->flags &= ~UIF_NORMAL_ACTIVE;
-	wake_up_interruptible(&state->info->port.open_wait);
+	state->info.flags &= ~UIF_NORMAL_ACTIVE;
+	wake_up_interruptible(&state->info.port.open_wait);
 
  done:
 	mutex_unlock(&state->mutex);
@@ -1409,19 +1410,20 @@
 static void uart_hangup(struct tty_struct *tty)
 {
 	struct uart_state *state = tty->driver_data;
+	struct uart_info *info = &state->info;
 
 	BUG_ON(!kernel_locked());
 	pr_debug("uart_hangup(%d)\n", state->port->line);
 
 	mutex_lock(&state->mutex);
-	if (state->info && state->info->flags & UIF_NORMAL_ACTIVE) {
+	if (info->flags & UIF_NORMAL_ACTIVE) {
 		uart_flush_buffer(tty);
 		uart_shutdown(state);
 		state->count = 0;
-		state->info->flags &= ~UIF_NORMAL_ACTIVE;
-		state->info->port.tty = NULL;
-		wake_up_interruptible(&state->info->port.open_wait);
-		wake_up_interruptible(&state->info->delta_msr_wait);
+		info->flags &= ~UIF_NORMAL_ACTIVE;
+		info->port.tty = NULL;
+		wake_up_interruptible(&info->port.open_wait);
+		wake_up_interruptible(&info->delta_msr_wait);
 	}
 	mutex_unlock(&state->mutex);
 }
@@ -1434,7 +1436,7 @@
  */
 static void uart_update_termios(struct uart_state *state)
 {
-	struct tty_struct *tty = state->info->port.tty;
+	struct tty_struct *tty = state->info.port.tty;
 	struct uart_port *port = state->port;
 
 	if (uart_console(port) && port->cons->cflag) {
@@ -1469,7 +1471,7 @@
 uart_block_til_ready(struct file *filp, struct uart_state *state)
 {
 	DECLARE_WAITQUEUE(wait, current);
-	struct uart_info *info = state->info;
+	struct uart_info *info = &state->info;
 	struct uart_port *port = state->port;
 	unsigned int mctrl;
 
@@ -1563,28 +1565,6 @@
 		ret = -ENXIO;
 		goto err_unlock;
 	}
-
-	/* BKL: RACE HERE - LEAK */
-	/* We should move this into the uart_state structure and kill off
-	   this whole complexity */
-	if (!state->info) {
-		state->info = kzalloc(sizeof(struct uart_info), GFP_KERNEL);
-		if (state->info) {
-			init_waitqueue_head(&state->info->port.open_wait);
-			init_waitqueue_head(&state->info->delta_msr_wait);
-
-			/*
-			 * Link the info into the other structures.
-			 */
-			state->port->info = state->info;
-
-			tasklet_init(&state->info->tlet, uart_tasklet_action,
-				     (unsigned long)state);
-		} else {
-			ret = -ENOMEM;
-			goto err_unlock;
-		}
-	}
 	return state;
 
  err_unlock:
@@ -1641,9 +1621,10 @@
 	 * Any failures from here onwards should not touch the count.
 	 */
 	tty->driver_data = state;
+	state->port->info = &state->info;
 	tty->low_latency = (state->port->flags & UPF_LOW_LATENCY) ? 1 : 0;
 	tty->alt_speed = 0;
-	state->info->port.tty = tty;
+	state->info.port.tty = tty;
 
 	/*
 	 * If the port is in the middle of closing, bail out now.
@@ -1676,8 +1657,8 @@
 	/*
 	 * If this is the first open to succeed, adjust things to suit.
 	 */
-	if (retval == 0 && !(state->info->flags & UIF_NORMAL_ACTIVE)) {
-		state->info->flags |= UIF_NORMAL_ACTIVE;
+	if (retval == 0 && !(state->info.flags & UIF_NORMAL_ACTIVE)) {
+		state->info.flags |= UIF_NORMAL_ACTIVE;
 
 		uart_update_termios(state);
 	}
@@ -2028,11 +2009,11 @@
 	}
 	port->suspended = 1;
 
-	if (state->info && state->info->flags & UIF_INITIALIZED) {
+	if (state->info.flags & UIF_INITIALIZED) {
 		const struct uart_ops *ops = port->ops;
 		int tries;
 
-		state->info->flags = (state->info->flags & ~UIF_INITIALIZED)
+		state->info.flags = (state->info.flags & ~UIF_INITIALIZED)
 				     | UIF_SUSPENDED;
 
 		spin_lock_irq(&port->lock);
@@ -2107,15 +2088,15 @@
 		/*
 		 * If that's unset, use the tty termios setting.
 		 */
-		if (state->info && state->info->port.tty && termios.c_cflag == 0)
-			termios = *state->info->port.tty->termios;
+		if (state->info.port.tty && termios.c_cflag == 0)
+			termios = *state->info.port.tty->termios;
 
 		uart_change_pm(state, 0);
 		port->ops->set_termios(port, &termios, NULL);
 		console_start(port->cons);
 	}
 
-	if (state->info && state->info->flags & UIF_SUSPENDED) {
+	if (state->info.flags & UIF_SUSPENDED) {
 		const struct uart_ops *ops = port->ops;
 		int ret;
 
@@ -2130,7 +2111,7 @@
 			ops->set_mctrl(port, port->mctrl);
 			ops->start_tx(port);
 			spin_unlock_irq(&port->lock);
-			state->info->flags |= UIF_INITIALIZED;
+			state->info.flags |= UIF_INITIALIZED;
 		} else {
 			/*
 			 * Failed to resume - maybe hardware went away?
@@ -2140,7 +2121,7 @@
 			uart_shutdown(state);
 		}
 
-		state->info->flags &= ~UIF_SUSPENDED;
+		state->info.flags &= ~UIF_SUSPENDED;
 	}
 
 	mutex_unlock(&state->mutex);
@@ -2198,11 +2179,14 @@
 	 * Now do the auto configuration stuff.  Note that config_port
 	 * is expected to claim the resources and map the port for us.
 	 */
-	flags = UART_CONFIG_TYPE;
+	flags = 0;
 	if (port->flags & UPF_AUTO_IRQ)
 		flags |= UART_CONFIG_IRQ;
 	if (port->flags & UPF_BOOT_AUTOCONF) {
-		port->type = PORT_UNKNOWN;
+		if (!(port->flags & UPF_FIXED_TYPE)) {
+			port->type = PORT_UNKNOWN;
+			flags |= UART_CONFIG_TYPE;
+		}
 		port->ops->config_port(port, flags);
 	}
 
@@ -2383,8 +2367,12 @@
 
 		state->close_delay     = 500;	/* .5 seconds */
 		state->closing_wait    = 30000;	/* 30 seconds */
-
 		mutex_init(&state->mutex);
+
+		tty_port_init(&state->info.port);
+		init_waitqueue_head(&state->info.delta_msr_wait);
+		tasklet_init(&state->info.tlet, uart_tasklet_action,
+			     (unsigned long)state);
 	}
 
 	retval = tty_register_driver(normal);
@@ -2455,7 +2443,7 @@
 	state->pm_state = -1;
 
 	port->cons = drv->cons;
-	port->info = state->info;
+	port->info = &state->info;
 
 	/*
 	 * If this port is a console, then the spinlock is already
@@ -2527,18 +2515,11 @@
 	 */
 	tty_unregister_device(drv->tty_driver, port->line);
 
-	info = state->info;
+	info = &state->info;
 	if (info && info->port.tty)
 		tty_vhangup(info->port.tty);
 
 	/*
-	 * All users of this port should now be disconnected from
-	 * this driver, and the port shut down.  We should be the
-	 * only thread fiddling with this port from now on.
-	 */
-	state->info = NULL;
-
-	/*
 	 * Free the port IO and memory resources, if any.
 	 */
 	if (port->type != PORT_UNKNOWN)
@@ -2552,10 +2533,8 @@
 	/*
 	 * Kill the tasklet, and free resources.
 	 */
-	if (info) {
+	if (info)
 		tasklet_kill(&info->tlet);
-		kfree(info);
-	}
 
 	state->port = NULL;
 	mutex_unlock(&port_mutex);
diff --git a/drivers/usb/host/hwa-hc.c b/drivers/usb/host/hwa-hc.c
index 64be4d8..8582236 100644
--- a/drivers/usb/host/hwa-hc.c
+++ b/drivers/usb/host/hwa-hc.c
@@ -54,7 +54,6 @@
  *                      DWA).
  */
 #include <linux/kernel.h>
-#include <linux/version.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/workqueue.h>
@@ -63,16 +62,12 @@
 #include "../wusbcore/wa-hc.h"
 #include "../wusbcore/wusbhc.h"
 
-#define D_LOCAL 0
-#include <linux/uwb/debug.h>
-
 struct hwahc {
 	struct wusbhc wusbhc;	/* has to be 1st */
 	struct wahc wa;
-	u8 buffer[16];		/* for misc usb transactions */
 };
 
-/**
+/*
  * FIXME should be wusbhc
  *
  * NOTE: we need to cache the Cluster ID because later...there is no
@@ -126,7 +121,6 @@
 	struct hwahc *hwahc = container_of(wusbhc, struct hwahc, wusbhc);
 	struct device *dev = &hwahc->wa.usb_iface->dev;
 
-	d_fnstart(4, dev, "(hwahc %p)\n", hwahc);
 	mutex_lock(&wusbhc->mutex);
 	wa_nep_disarm(&hwahc->wa);
 	result = __wa_set_feature(&hwahc->wa, WA_RESET);
@@ -134,7 +128,6 @@
 		dev_err(dev, "error commanding HC to reset: %d\n", result);
 		goto error_unlock;
 	}
-	d_printf(3, dev, "reset: waiting for device to change state\n");
 	result = __wa_wait_status(&hwahc->wa, WA_STATUS_RESETTING, 0);
 	if (result < 0) {
 		dev_err(dev, "error waiting for HC to reset: %d\n", result);
@@ -142,7 +135,6 @@
 	}
 error_unlock:
 	mutex_unlock(&wusbhc->mutex);
-	d_fnend(4, dev, "(hwahc %p) = %d\n", hwahc, result);
 	return result;
 }
 
@@ -155,15 +147,9 @@
 	int result;
 	struct wusbhc *wusbhc = usb_hcd_to_wusbhc(usb_hcd);
 	struct hwahc *hwahc = container_of(wusbhc, struct hwahc, wusbhc);
-	struct device *dev = &hwahc->wa.usb_iface->dev;
 
-	/* Set up a Host Info WUSB Information Element */
-	d_fnstart(4, dev, "(hwahc %p)\n", hwahc);
 	result = -ENOSPC;
 	mutex_lock(&wusbhc->mutex);
-	/* Start the numbering from the top so that the bottom
-	 * range of the unauth addr space is used for devices,
-	 * the top for HCs; use 0xfe - RC# */
 	addr = wusb_cluster_id_get();
 	if (addr == 0)
 		goto error_cluster_id_get;
@@ -171,22 +157,14 @@
 	if (result < 0)
 		goto error_set_cluster_id;
 
-	result = wa_nep_arm(&hwahc->wa, GFP_KERNEL);
-	if (result < 0) {
-		dev_err(dev, "cannot listen to notifications: %d\n", result);
-		goto error_stop;
-	}
 	usb_hcd->uses_new_polling = 1;
 	usb_hcd->poll_rh = 1;
 	usb_hcd->state = HC_STATE_RUNNING;
 	result = 0;
 out:
 	mutex_unlock(&wusbhc->mutex);
-	d_fnend(4, dev, "(hwahc %p) = %d\n", hwahc, result);
 	return result;
 
-error_stop:
-	__wa_stop(&hwahc->wa);
 error_set_cluster_id:
 	wusb_cluster_id_put(wusbhc->cluster_id);
 error_cluster_id_get:
@@ -194,39 +172,6 @@
 
 }
 
-/*
- * FIXME: break this function up
- */
-static int __hwahc_op_wusbhc_start(struct wusbhc *wusbhc)
-{
-	int result;
-	struct hwahc *hwahc = container_of(wusbhc, struct hwahc, wusbhc);
-	struct device *dev = &hwahc->wa.usb_iface->dev;
-
-	/* Set up a Host Info WUSB Information Element */
-	d_fnstart(4, dev, "(hwahc %p)\n", hwahc);
-	result = -ENOSPC;
-
-	result = __wa_set_feature(&hwahc->wa, WA_ENABLE);
-	if (result < 0) {
-		dev_err(dev, "error commanding HC to start: %d\n", result);
-		goto error_stop;
-	}
-	result = __wa_wait_status(&hwahc->wa, WA_ENABLE, WA_ENABLE);
-	if (result < 0) {
-		dev_err(dev, "error waiting for HC to start: %d\n", result);
-		goto error_stop;
-	}
-	result = 0;
-out:
-	d_fnend(4, dev, "(hwahc %p) = %d\n", hwahc, result);
-	return result;
-
-error_stop:
-	result = __wa_clear_feature(&hwahc->wa, WA_ENABLE);
-	goto out;
-}
-
 static int hwahc_op_suspend(struct usb_hcd *usb_hcd, pm_message_t msg)
 {
 	struct wusbhc *wusbhc = usb_hcd_to_wusbhc(usb_hcd);
@@ -246,18 +191,6 @@
 	return -ENOSYS;
 }
 
-static void __hwahc_op_wusbhc_stop(struct wusbhc *wusbhc)
-{
-	int result;
-	struct hwahc *hwahc = container_of(wusbhc, struct hwahc, wusbhc);
-	struct device *dev = &hwahc->wa.usb_iface->dev;
-
-	d_fnstart(4, dev, "(hwahc %p)\n", hwahc);
-	/* Nothing for now */
-	d_fnend(4, dev, "(hwahc %p) = %d\n", hwahc, result);
-	return;
-}
-
 /*
  * No need to abort pipes, as when this is called, all the children
  * has been disconnected and that has done it [through
@@ -266,21 +199,11 @@
  */
 static void hwahc_op_stop(struct usb_hcd *usb_hcd)
 {
-	int result;
 	struct wusbhc *wusbhc = usb_hcd_to_wusbhc(usb_hcd);
-	struct hwahc *hwahc = container_of(wusbhc, struct hwahc, wusbhc);
-	struct wahc *wa = &hwahc->wa;
-	struct device *dev = &wa->usb_iface->dev;
 
-	d_fnstart(4, dev, "(hwahc %p)\n", hwahc);
 	mutex_lock(&wusbhc->mutex);
-	wusbhc_stop(wusbhc);
-	wa_nep_disarm(&hwahc->wa);
-	result = __wa_stop(&hwahc->wa);
 	wusb_cluster_id_put(wusbhc->cluster_id);
 	mutex_unlock(&wusbhc->mutex);
-	d_fnend(4, dev, "(hwahc %p) = %d\n", hwahc, result);
-	return;
 }
 
 static int hwahc_op_get_frame_number(struct usb_hcd *usb_hcd)
@@ -325,6 +248,54 @@
 	rpipe_ep_disable(&hwahc->wa, ep);
 }
 
+static int __hwahc_op_wusbhc_start(struct wusbhc *wusbhc)
+{
+	int result;
+	struct hwahc *hwahc = container_of(wusbhc, struct hwahc, wusbhc);
+	struct device *dev = &hwahc->wa.usb_iface->dev;
+
+	result = __wa_set_feature(&hwahc->wa, WA_ENABLE);
+	if (result < 0) {
+		dev_err(dev, "error commanding HC to start: %d\n", result);
+		goto error_stop;
+	}
+	result = __wa_wait_status(&hwahc->wa, WA_ENABLE, WA_ENABLE);
+	if (result < 0) {
+		dev_err(dev, "error waiting for HC to start: %d\n", result);
+		goto error_stop;
+	}
+	result = wa_nep_arm(&hwahc->wa, GFP_KERNEL);
+	if (result < 0) {
+		dev_err(dev, "cannot listen to notifications: %d\n", result);
+		goto error_stop;
+	}
+	return result;
+
+error_stop:
+	__wa_clear_feature(&hwahc->wa, WA_ENABLE);
+	return result;
+}
+
+static void __hwahc_op_wusbhc_stop(struct wusbhc *wusbhc, int delay)
+{
+	struct hwahc *hwahc = container_of(wusbhc, struct hwahc, wusbhc);
+	struct wahc *wa = &hwahc->wa;
+	u8 iface_no = wa->usb_iface->cur_altsetting->desc.bInterfaceNumber;
+	int ret;
+
+	ret = usb_control_msg(wa->usb_dev, usb_sndctrlpipe(wa->usb_dev, 0),
+			      WUSB_REQ_CHAN_STOP,
+			      USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
+			      delay * 1000,
+			      iface_no,
+			      NULL, 0, 1000 /* FIXME: arbitrary */);
+	if (ret == 0)
+		msleep(delay);
+
+	wa_nep_disarm(&hwahc->wa);
+	__wa_stop(&hwahc->wa);
+}
+
 /*
  * Set the UWB MAS allocation for the WUSB cluster
  *
@@ -581,11 +552,11 @@
 	itr_size = le16_to_cpu(usb_dev->actconfig->desc.wTotalLength);
 	while (itr_size >= sizeof(*hdr)) {
 		hdr = (struct usb_descriptor_header *) itr;
-		d_printf(3, dev, "Extra device descriptor: "
-			 "type %02x/%u bytes @ %zu (%zu left)\n",
-			 hdr->bDescriptorType, hdr->bLength,
-			 (itr - usb_dev->rawdescriptors[actconfig_idx]),
-			 itr_size);
+		dev_dbg(dev, "Extra device descriptor: "
+			"type %02x/%u bytes @ %zu (%zu left)\n",
+			hdr->bDescriptorType, hdr->bLength,
+			(itr - usb_dev->rawdescriptors[actconfig_idx]),
+			itr_size);
 		if (hdr->bDescriptorType == USB_DT_WIRE_ADAPTER)
 			goto found;
 		itr += hdr->bLength;
@@ -794,7 +765,6 @@
 {
 	struct wusbhc *wusbhc = &hwahc->wusbhc;
 
-	d_fnstart(1, NULL, "(hwahc %p)\n", hwahc);
 	mutex_lock(&wusbhc->mutex);
 	__wa_destroy(&hwahc->wa);
 	wusbhc_destroy(&hwahc->wusbhc);
@@ -804,7 +774,6 @@
 	usb_put_intf(hwahc->wa.usb_iface);
 	usb_put_dev(hwahc->wa.usb_dev);
 	mutex_unlock(&wusbhc->mutex);
-	d_fnend(1, NULL, "(hwahc %p) = void\n", hwahc);
 }
 
 static void hwahc_init(struct hwahc *hwahc)
@@ -821,7 +790,6 @@
 	struct hwahc *hwahc;
 	struct device *dev = &usb_iface->dev;
 
-	d_fnstart(4, dev, "(%p, %p)\n", usb_iface, id);
 	result = -ENOMEM;
 	usb_hcd = usb_create_hcd(&hwahc_hc_driver, &usb_iface->dev, "wusb-hwa");
 	if (usb_hcd == NULL) {
@@ -848,7 +816,6 @@
 		dev_err(dev, "Cannot setup phase B of WUSBHC: %d\n", result);
 		goto error_wusbhc_b_create;
 	}
-	d_fnend(4, dev, "(%p, %p) = 0\n", usb_iface, id);
 	return 0;
 
 error_wusbhc_b_create:
@@ -858,7 +825,6 @@
 error_hwahc_create:
 	usb_put_hcd(usb_hcd);
 error_alloc:
-	d_fnend(4, dev, "(%p, %p) = %d\n", usb_iface, id, result);
 	return result;
 }
 
@@ -872,16 +838,12 @@
 	wusbhc = usb_hcd_to_wusbhc(usb_hcd);
 	hwahc = container_of(wusbhc, struct hwahc, wusbhc);
 
-	d_fnstart(1, NULL, "(hwahc %p [usb_iface %p])\n", hwahc, usb_iface);
 	wusbhc_b_destroy(&hwahc->wusbhc);
 	usb_remove_hcd(usb_hcd);
 	hwahc_destroy(hwahc);
 	usb_put_hcd(usb_hcd);
-	d_fnend(1, NULL, "(hwahc %p [usb_iface %p]) = void\n", hwahc,
-		usb_iface);
 }
 
-/** USB device ID's that we handle */
 static struct usb_device_id hwahc_id_table[] = {
 	/* FIXME: use class labels for this */
 	{ USB_INTERFACE_INFO(0xe0, 0x02, 0x01), },
@@ -898,18 +860,7 @@
 
 static int __init hwahc_driver_init(void)
 {
-	int result;
-	result = usb_register(&hwahc_driver);
-	if (result < 0) {
-		printk(KERN_ERR "WA-CDS: Cannot register USB driver: %d\n",
-		       result);
-		goto error_usb_register;
-	}
-	return 0;
-
-error_usb_register:
-	return result;
-
+	return usb_register(&hwahc_driver);
 }
 module_init(hwahc_driver_init);
 
diff --git a/drivers/usb/host/whci/Kbuild b/drivers/usb/host/whci/Kbuild
index 26a3871..11e5040 100644
--- a/drivers/usb/host/whci/Kbuild
+++ b/drivers/usb/host/whci/Kbuild
@@ -2,6 +2,7 @@
 
 whci-hcd-y := \
 	asl.o	\
+	debug.o \
 	hcd.o 	\
 	hw.o	\
 	init.o	\
diff --git a/drivers/usb/host/whci/asl.c b/drivers/usb/host/whci/asl.c
index 4d7078e5..577c0d2 100644
--- a/drivers/usb/host/whci/asl.c
+++ b/drivers/usb/host/whci/asl.c
@@ -19,32 +19,11 @@
 #include <linux/dma-mapping.h>
 #include <linux/uwb/umc.h>
 #include <linux/usb.h>
-#define D_LOCAL 0
-#include <linux/uwb/debug.h>
 
 #include "../../wusbcore/wusbhc.h"
 
 #include "whcd.h"
 
-#if D_LOCAL >= 4
-static void dump_asl(struct whc *whc, const char *tag)
-{
-	struct device *dev = &whc->umc->dev;
-	struct whc_qset *qset;
-
-	d_printf(4, dev, "ASL %s\n", tag);
-
-	list_for_each_entry(qset, &whc->async_list, list_node) {
-		dump_qset(qset, dev);
-	}
-}
-#else
-static inline void dump_asl(struct whc *whc, const char *tag)
-{
-}
-#endif
-
-
 static void qset_get_next_prev(struct whc *whc, struct whc_qset *qset,
 			       struct whc_qset **next, struct whc_qset **prev)
 {
@@ -179,11 +158,26 @@
 		      1000, "stop ASL");
 }
 
+/**
+ * asl_update - request an ASL update and wait for the hardware to be synced
+ * @whc: the WHCI HC
+ * @wusbcmd: WUSBCMD value to start the update.
+ *
+ * If the WUSB HC is inactive (i.e., the ASL is stopped) then the
+ * update must be skipped as the hardware may not respond to update
+ * requests.
+ */
 void asl_update(struct whc *whc, uint32_t wusbcmd)
 {
-	whc_write_wusbcmd(whc, wusbcmd, wusbcmd);
-	wait_event(whc->async_list_wq,
-		   (le_readl(whc->base + WUSBCMD) & WUSBCMD_ASYNC_UPDATED) == 0);
+	struct wusbhc *wusbhc = &whc->wusbhc;
+
+	mutex_lock(&wusbhc->mutex);
+	if (wusbhc->active) {
+		whc_write_wusbcmd(whc, wusbcmd, wusbcmd);
+		wait_event(whc->async_list_wq,
+			   (le_readl(whc->base + WUSBCMD) & WUSBCMD_ASYNC_UPDATED) == 0);
+	}
+	mutex_unlock(&wusbhc->mutex);
 }
 
 /**
@@ -202,8 +196,6 @@
 
 	spin_lock_irq(&whc->lock);
 
-	dump_asl(whc, "before processing");
-
 	/*
 	 * Transerve the software list backwards so new qsets can be
 	 * safely inserted into the ASL without making it non-circular.
@@ -217,8 +209,6 @@
 		update |= process_qset(whc, qset);
 	}
 
-	dump_asl(whc, "after processing");
-
 	spin_unlock_irq(&whc->lock);
 
 	if (update) {
diff --git a/drivers/usb/host/whci/debug.c b/drivers/usb/host/whci/debug.c
new file mode 100644
index 0000000..cf2d459
--- /dev/null
+++ b/drivers/usb/host/whci/debug.c
@@ -0,0 +1,189 @@
+/*
+ * Wireless Host Controller (WHC) debug.
+ *
+ * Copyright (C) 2008 Cambridge Silicon Radio Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/kernel.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include "../../wusbcore/wusbhc.h"
+
+#include "whcd.h"
+
+struct whc_dbg {
+	struct dentry *di_f;
+	struct dentry *asl_f;
+	struct dentry *pzl_f;
+};
+
+void qset_print(struct seq_file *s, struct whc_qset *qset)
+{
+	struct whc_std *std;
+	struct urb *urb = NULL;
+	int i;
+
+	seq_printf(s, "qset %08x\n", (u32)qset->qset_dma);
+	seq_printf(s, "  -> %08x\n", (u32)qset->qh.link);
+	seq_printf(s, "  info: %08x %08x %08x\n",
+		qset->qh.info1, qset->qh.info2,  qset->qh.info3);
+	seq_printf(s, "  sts: %04x errs: %d\n", qset->qh.status, qset->qh.err_count);
+	seq_printf(s, "  TD: sts: %08x opts: %08x\n",
+		qset->qh.overlay.qtd.status, qset->qh.overlay.qtd.options);
+
+	for (i = 0; i < WHCI_QSET_TD_MAX; i++) {
+		seq_printf(s, "  %c%c TD[%d]: sts: %08x opts: %08x ptr: %08x\n",
+			i == qset->td_start ? 'S' : ' ',
+			i == qset->td_end ? 'E' : ' ',
+			i, qset->qtd[i].status, qset->qtd[i].options,
+			(u32)qset->qtd[i].page_list_ptr);
+	}
+	seq_printf(s, "  ntds: %d\n", qset->ntds);
+	list_for_each_entry(std, &qset->stds, list_node) {
+		if (urb != std->urb) {
+			urb = std->urb;
+			seq_printf(s, "  urb %p transferred: %d bytes\n", urb,
+				urb->actual_length);
+		}
+		if (std->qtd)
+			seq_printf(s, "    sTD[%td]: %zu bytes @ %08x\n",
+				std->qtd - &qset->qtd[0],
+				std->len, std->num_pointers ?
+				(u32)(std->pl_virt[0].buf_ptr) : (u32)std->dma_addr);
+		else
+			seq_printf(s, "    sTD[-]: %zd bytes @ %08x\n",
+				std->len, std->num_pointers ?
+				(u32)(std->pl_virt[0].buf_ptr) : (u32)std->dma_addr);
+	}
+}
+
+static int di_print(struct seq_file *s, void *p)
+{
+	struct whc *whc = s->private;
+	char buf[72];
+	int d;
+
+	for (d = 0; d < whc->n_devices; d++) {
+		struct di_buf_entry *di = &whc->di_buf[d];
+
+		bitmap_scnprintf(buf, sizeof(buf),
+				 (unsigned long *)di->availability_info, UWB_NUM_MAS);
+
+		seq_printf(s, "DI[%d]\n", d);
+		seq_printf(s, "  availability: %s\n", buf);
+		seq_printf(s, "  %c%c key idx: %d dev addr: %d\n",
+			   (di->addr_sec_info & WHC_DI_SECURE) ? 'S' : ' ',
+			   (di->addr_sec_info & WHC_DI_DISABLE) ? 'D' : ' ',
+			   (di->addr_sec_info & WHC_DI_KEY_IDX_MASK) >> 8,
+			   (di->addr_sec_info & WHC_DI_DEV_ADDR_MASK));
+	}
+	return 0;
+}
+
+static int asl_print(struct seq_file *s, void *p)
+{
+	struct whc *whc = s->private;
+	struct whc_qset *qset;
+
+	list_for_each_entry(qset, &whc->async_list, list_node) {
+		qset_print(s, qset);
+	}
+
+	return 0;
+}
+
+static int pzl_print(struct seq_file *s, void *p)
+{
+	struct whc *whc = s->private;
+	struct whc_qset *qset;
+	int period;
+
+	for (period = 0; period < 5; period++) {
+		seq_printf(s, "Period %d\n", period);
+		list_for_each_entry(qset, &whc->periodic_list[period], list_node) {
+			qset_print(s, qset);
+		}
+	}
+	return 0;
+}
+
+static int di_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, di_print, inode->i_private);
+}
+
+static int asl_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, asl_print, inode->i_private);
+}
+
+static int pzl_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, pzl_print, inode->i_private);
+}
+
+static struct file_operations di_fops = {
+	.open    = di_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = single_release,
+	.owner   = THIS_MODULE,
+};
+
+static struct file_operations asl_fops = {
+	.open    = asl_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = single_release,
+	.owner   = THIS_MODULE,
+};
+
+static struct file_operations pzl_fops = {
+	.open    = pzl_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = single_release,
+	.owner   = THIS_MODULE,
+};
+
+void whc_dbg_init(struct whc *whc)
+{
+	if (whc->wusbhc.pal.debugfs_dir == NULL)
+		return;
+
+	whc->dbg = kzalloc(sizeof(struct whc_dbg), GFP_KERNEL);
+	if (whc->dbg == NULL)
+		return;
+
+	whc->dbg->di_f = debugfs_create_file("di", 0444,
+					      whc->wusbhc.pal.debugfs_dir, whc,
+					      &di_fops);
+	whc->dbg->asl_f = debugfs_create_file("asl", 0444,
+					      whc->wusbhc.pal.debugfs_dir, whc,
+					      &asl_fops);
+	whc->dbg->pzl_f = debugfs_create_file("pzl", 0444,
+					      whc->wusbhc.pal.debugfs_dir, whc,
+					      &pzl_fops);
+}
+
+void whc_dbg_clean_up(struct whc *whc)
+{
+	if (whc->dbg) {
+		debugfs_remove(whc->dbg->pzl_f);
+		debugfs_remove(whc->dbg->asl_f);
+		debugfs_remove(whc->dbg->di_f);
+		kfree(whc->dbg);
+	}
+}
diff --git a/drivers/usb/host/whci/hcd.c b/drivers/usb/host/whci/hcd.c
index ef3ad4d..1569afd 100644
--- a/drivers/usb/host/whci/hcd.c
+++ b/drivers/usb/host/whci/hcd.c
@@ -15,7 +15,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
-#include <linux/version.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/uwb/umc.h>
@@ -92,8 +91,6 @@
 
 	mutex_lock(&wusbhc->mutex);
 
-	wusbhc_stop(wusbhc);
-
 	/* stop HC */
 	le_writel(0, whc->base + WUSBINTR);
 	whc_write_wusbcmd(whc, WUSBCMD_RUN, 0);
@@ -276,6 +273,8 @@
 		goto error_wusbhc_b_create;
 	}
 
+	whc_dbg_init(whc);
+
 	return 0;
 
 error_wusbhc_b_create:
@@ -299,6 +298,7 @@
 	struct whc *whc = wusbhc_to_whc(wusbhc);
 
 	if (usb_hcd) {
+		whc_dbg_clean_up(whc);
 		wusbhc_b_destroy(wusbhc);
 		usb_remove_hcd(usb_hcd);
 		wusbhc_destroy(wusbhc);
diff --git a/drivers/usb/host/whci/hw.c b/drivers/usb/host/whci/hw.c
index ac86e59..d498e72 100644
--- a/drivers/usb/host/whci/hw.c
+++ b/drivers/usb/host/whci/hw.c
@@ -50,6 +50,7 @@
 	unsigned long flags;
 	dma_addr_t dma_addr;
 	int t;
+	int ret = 0;
 
 	mutex_lock(&whc->mutex);
 
@@ -61,7 +62,8 @@
 		dev_err(&whc->umc->dev, "generic command timeout (%04x/%04x)\n",
 			le_readl(whc->base + WUSBGENCMDSTS),
 			le_readl(whc->base + WUSBGENCMDPARAMS));
-		return -ETIMEDOUT;
+		ret = -ETIMEDOUT;
+		goto out;
 	}
 
 	if (addr) {
@@ -80,8 +82,8 @@
 		  whc->base + WUSBGENCMDSTS);
 
 	spin_unlock_irqrestore(&whc->lock, flags);
-
+out:
 	mutex_unlock(&whc->mutex);
 
-	return 0;
+	return ret;
 }
diff --git a/drivers/usb/host/whci/int.c b/drivers/usb/host/whci/int.c
index fce0117..6aae700 100644
--- a/drivers/usb/host/whci/int.c
+++ b/drivers/usb/host/whci/int.c
@@ -15,7 +15,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
-#include <linux/version.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/uwb/umc.h>
diff --git a/drivers/usb/host/whci/pzl.c b/drivers/usb/host/whci/pzl.c
index 8d62df0..2ae5abf 100644
--- a/drivers/usb/host/whci/pzl.c
+++ b/drivers/usb/host/whci/pzl.c
@@ -19,35 +19,11 @@
 #include <linux/dma-mapping.h>
 #include <linux/uwb/umc.h>
 #include <linux/usb.h>
-#define D_LOCAL 0
-#include <linux/uwb/debug.h>
 
 #include "../../wusbcore/wusbhc.h"
 
 #include "whcd.h"
 
-#if D_LOCAL >= 4
-static void dump_pzl(struct whc *whc, const char *tag)
-{
-	struct device *dev = &whc->umc->dev;
-	struct whc_qset *qset;
-	int period = 0;
-
-	d_printf(4, dev, "PZL %s\n", tag);
-
-	for (period = 0; period < 5; period++) {
-		d_printf(4, dev, "Period %d\n", period);
-		list_for_each_entry(qset, &whc->periodic_list[period], list_node) {
-			dump_qset(qset, dev);
-		}
-	}
-}
-#else
-static inline void dump_pzl(struct whc *whc, const char *tag)
-{
-}
-#endif
-
 static void update_pzl_pointers(struct whc *whc, int period, u64 addr)
 {
 	switch (period) {
@@ -195,11 +171,26 @@
 		      1000, "stop PZL");
 }
 
+/**
+ * pzl_update - request a PZL update and wait for the hardware to be synced
+ * @whc: the WHCI HC
+ * @wusbcmd: WUSBCMD value to start the update.
+ *
+ * If the WUSB HC is inactive (i.e., the PZL is stopped) then the
+ * update must be skipped as the hardware may not respond to update
+ * requests.
+ */
 void pzl_update(struct whc *whc, uint32_t wusbcmd)
 {
-	whc_write_wusbcmd(whc, wusbcmd, wusbcmd);
-	wait_event(whc->periodic_list_wq,
-		   (le_readl(whc->base + WUSBCMD) & WUSBCMD_PERIODIC_UPDATED) == 0);
+	struct wusbhc *wusbhc = &whc->wusbhc;
+
+	mutex_lock(&wusbhc->mutex);
+	if (wusbhc->active) {
+		whc_write_wusbcmd(whc, wusbcmd, wusbcmd);
+		wait_event(whc->periodic_list_wq,
+			   (le_readl(whc->base + WUSBCMD) & WUSBCMD_PERIODIC_UPDATED) == 0);
+	}
+	mutex_unlock(&wusbhc->mutex);
 }
 
 static void update_pzl_hw_view(struct whc *whc)
@@ -235,8 +226,6 @@
 
 	spin_lock_irq(&whc->lock);
 
-	dump_pzl(whc, "before processing");
-
 	for (period = 4; period >= 0; period--) {
 		list_for_each_entry_safe(qset, t, &whc->periodic_list[period], list_node) {
 			if (!qset->in_hw_list)
@@ -248,8 +237,6 @@
 	if (update & (WHC_UPDATE_ADDED | WHC_UPDATE_REMOVED))
 		update_pzl_hw_view(whc);
 
-	dump_pzl(whc, "after processing");
-
 	spin_unlock_irq(&whc->lock);
 
 	if (update) {
diff --git a/drivers/usb/host/whci/qset.c b/drivers/usb/host/whci/qset.c
index 0420037..7be7431 100644
--- a/drivers/usb/host/whci/qset.c
+++ b/drivers/usb/host/whci/qset.c
@@ -24,46 +24,6 @@
 
 #include "whcd.h"
 
-void dump_qset(struct whc_qset *qset, struct device *dev)
-{
-	struct whc_std *std;
-	struct urb *urb = NULL;
-	int i;
-
-	dev_dbg(dev, "qset %08x\n", (u32)qset->qset_dma);
-	dev_dbg(dev, "  -> %08x\n", (u32)qset->qh.link);
-	dev_dbg(dev, "  info: %08x %08x %08x\n",
-		qset->qh.info1, qset->qh.info2,  qset->qh.info3);
-	dev_dbg(dev, "  sts: %04x errs: %d\n", qset->qh.status, qset->qh.err_count);
-	dev_dbg(dev, "  TD: sts: %08x opts: %08x\n",
-		qset->qh.overlay.qtd.status, qset->qh.overlay.qtd.options);
-
-	for (i = 0; i < WHCI_QSET_TD_MAX; i++) {
-		dev_dbg(dev, "  %c%c TD[%d]: sts: %08x opts: %08x ptr: %08x\n",
-			i == qset->td_start ? 'S' : ' ',
-			i == qset->td_end ? 'E' : ' ',
-			i, qset->qtd[i].status, qset->qtd[i].options,
-			(u32)qset->qtd[i].page_list_ptr);
-	}
-	dev_dbg(dev, "  ntds: %d\n", qset->ntds);
-	list_for_each_entry(std, &qset->stds, list_node) {
-		if (urb != std->urb) {
-			urb = std->urb;
-			dev_dbg(dev, "  urb %p transferred: %d bytes\n", urb,
-				urb->actual_length);
-		}
-		if (std->qtd)
-			dev_dbg(dev, "    sTD[%td]: %zu bytes @ %08x\n",
-				std->qtd - &qset->qtd[0],
-				std->len, std->num_pointers ?
-				(u32)(std->pl_virt[0].buf_ptr) : (u32)std->dma_addr);
-		else
-			dev_dbg(dev, "    sTD[-]: %zd bytes @ %08x\n",
-				std->len, std->num_pointers ?
-				(u32)(std->pl_virt[0].buf_ptr) : (u32)std->dma_addr);
-	}
-}
-
 struct whc_qset *qset_alloc(struct whc *whc, gfp_t mem_flags)
 {
 	struct whc_qset *qset;
diff --git a/drivers/usb/host/whci/whcd.h b/drivers/usb/host/whci/whcd.h
index 1d2a53b..0f3540f 100644
--- a/drivers/usb/host/whci/whcd.h
+++ b/drivers/usb/host/whci/whcd.h
@@ -21,6 +21,7 @@
 #define __WHCD_H
 
 #include <linux/uwb/whci.h>
+#include <linux/uwb/umc.h>
 #include <linux/workqueue.h>
 
 #include "whci-hc.h"
@@ -28,6 +29,7 @@
 /* Generic command timeout. */
 #define WHC_GENCMD_TIMEOUT_MS 100
 
+struct whc_dbg;
 
 struct whc {
 	struct wusbhc wusbhc;
@@ -69,6 +71,8 @@
 	struct list_head periodic_removed_list;
 	wait_queue_head_t periodic_list_wq;
 	struct work_struct periodic_work;
+
+	struct whc_dbg *dbg;
 };
 
 #define wusbhc_to_whc(w) (container_of((w), struct whc, wusbhc))
@@ -136,7 +140,7 @@
 
 /* wusb.c */
 int whc_wusbhc_start(struct wusbhc *wusbhc);
-void whc_wusbhc_stop(struct wusbhc *wusbhc);
+void whc_wusbhc_stop(struct wusbhc *wusbhc, int delay);
 int whc_mmcie_add(struct wusbhc *wusbhc, u8 interval, u8 repeat_cnt,
 		  u8 handle, struct wuie_hdr *wuie);
 int whc_mmcie_rm(struct wusbhc *wusbhc, u8 handle);
@@ -190,8 +194,11 @@
 				 struct whc_qtd *qtd);
 enum whc_update qset_add_qtds(struct whc *whc, struct whc_qset *qset);
 void qset_remove_complete(struct whc *whc, struct whc_qset *qset);
-void dump_qset(struct whc_qset *qset, struct device *dev);
 void pzl_update(struct whc *whc, uint32_t wusbcmd);
 void asl_update(struct whc *whc, uint32_t wusbcmd);
 
+/* debug.c */
+void whc_dbg_init(struct whc *whc);
+void whc_dbg_clean_up(struct whc *whc);
+
 #endif /* #ifndef __WHCD_H */
diff --git a/drivers/usb/host/whci/whci-hc.h b/drivers/usb/host/whci/whci-hc.h
index bff1eb7..51df7e3 100644
--- a/drivers/usb/host/whci/whci-hc.h
+++ b/drivers/usb/host/whci/whci-hc.h
@@ -410,6 +410,8 @@
 #  define WUSBDNTSCTRL_SLOTS(s)    ((s) << 0)
 
 #define WUSBTIME             0x68
+#  define WUSBTIME_CHANNEL_TIME_MASK 0x00ffffff
+
 #define WUSBBPST             0x6c
 #define WUSBDIBUPDATED       0x70
 
diff --git a/drivers/usb/host/whci/wusb.c b/drivers/usb/host/whci/wusb.c
index 66e4ddc..f24efde 100644
--- a/drivers/usb/host/whci/wusb.c
+++ b/drivers/usb/host/whci/wusb.c
@@ -15,47 +15,19 @@
  * You should have received a copy of the GNU General Public License
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
-#include <linux/version.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/uwb/umc.h>
-#define D_LOCAL 1
-#include <linux/uwb/debug.h>
 
 #include "../../wusbcore/wusbhc.h"
 
 #include "whcd.h"
 
-#if D_LOCAL >= 1
-static void dump_di(struct whc *whc, int idx)
-{
-	struct di_buf_entry *di = &whc->di_buf[idx];
-	struct device *dev = &whc->umc->dev;
-	char buf[128];
-
-	bitmap_scnprintf(buf, sizeof(buf), (unsigned long *)di->availability_info, UWB_NUM_MAS);
-
-	d_printf(1, dev, "DI[%d]\n", idx);
-	d_printf(1, dev, "  availability: %s\n", buf);
-	d_printf(1, dev, "  %c%c key idx: %d dev addr: %d\n",
-		 (di->addr_sec_info & WHC_DI_SECURE) ? 'S' : ' ',
-		 (di->addr_sec_info & WHC_DI_DISABLE) ? 'D' : ' ',
-		 (di->addr_sec_info & WHC_DI_KEY_IDX_MASK) >> 8,
-		 (di->addr_sec_info & WHC_DI_DEV_ADDR_MASK));
-}
-#else
-static inline void dump_di(struct whc *whc, int idx)
-{
-}
-#endif
-
 static int whc_update_di(struct whc *whc, int idx)
 {
 	int offset = idx / 32;
 	u32 bit = 1 << (idx % 32);
 
-	dump_di(whc, idx);
-
 	le_writel(bit, whc->base + WUSBDIBUPDATED + offset);
 
 	return whci_wait_for(&whc->umc->dev,
@@ -64,8 +36,9 @@
 }
 
 /*
- * WHCI starts and stops MMCs based on there being a valid GTK so
- * these need only start/stop the asynchronous and periodic schedules.
+ * WHCI starts MMCs based on there being a valid GTK so these need
+ * only start/stop the asynchronous and periodic schedules and send a
+ * channel stop command.
  */
 
 int whc_wusbhc_start(struct wusbhc *wusbhc)
@@ -78,12 +51,20 @@
 	return 0;
 }
 
-void whc_wusbhc_stop(struct wusbhc *wusbhc)
+void whc_wusbhc_stop(struct wusbhc *wusbhc, int delay)
 {
 	struct whc *whc = wusbhc_to_whc(wusbhc);
+	u32 stop_time, now_time;
+	int ret;
 
 	pzl_stop(whc);
 	asl_stop(whc);
+
+	now_time = le_readl(whc->base + WUSBTIME) & WUSBTIME_CHANNEL_TIME_MASK;
+	stop_time = (now_time + ((delay * 8) << 7)) & 0x00ffffff;
+	ret = whc_do_gencmd(whc, WUSBGENCMDSTS_CHAN_STOP, stop_time, NULL, 0);
+	if (ret == 0)
+		msleep(delay);
 }
 
 int whc_mmcie_add(struct wusbhc *wusbhc, u8 interval, u8 repeat_cnt,
diff --git a/drivers/usb/serial/console.c b/drivers/usb/serial/console.c
index 5b95009..19e2404 100644
--- a/drivers/usb/serial/console.c
+++ b/drivers/usb/serial/console.c
@@ -241,12 +241,25 @@
 	}
 }
 
+static struct tty_driver *usb_console_device(struct console *co, int *index)
+{
+	struct tty_driver **p = (struct tty_driver **)co->data;
+
+	if (!*p)
+		return NULL;
+
+	*index = co->index;
+	return *p;
+}
+
 static struct console usbcons = {
 	.name =		"ttyUSB",
 	.write =	usb_console_write,
+	.device =	usb_console_device,
 	.setup =	usb_console_setup,
 	.flags =	CON_PRINTBUFFER,
 	.index =	-1,
+	.data = 	&usb_serial_tty_driver,
 };
 
 void usb_serial_console_disconnect(struct usb_serial *serial)
diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index fb6f293..ef6cfa5 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -1054,6 +1054,8 @@
 
 	if (copy_from_user(&new_serial, newinfo, sizeof(new_serial)))
 		return -EFAULT;
+
+	lock_kernel();
 	old_priv = *priv;
 
 	/* Do error checking and permission checking */
@@ -1069,8 +1071,10 @@
 	}
 
 	if ((new_serial.baud_base != priv->baud_base) &&
-	    (new_serial.baud_base < 9600))
+	    (new_serial.baud_base < 9600)) {
+	    	unlock_kernel();
 		return -EINVAL;
+	}
 
 	/* Make the changes - these are privileged changes! */
 
@@ -1098,8 +1102,11 @@
 	     (priv->flags & ASYNC_SPD_MASK)) ||
 	    (((priv->flags & ASYNC_SPD_MASK) == ASYNC_SPD_CUST) &&
 	     (old_priv.custom_divisor != priv->custom_divisor))) {
+		unlock_kernel();
 		change_speed(tty, port);
 	}
+	else
+		unlock_kernel();
 	return 0;
 
 } /* set_serial_info */
diff --git a/drivers/usb/serial/kl5kusb105.c b/drivers/usb/serial/kl5kusb105.c
index dc36a05..fcd9082 100644
--- a/drivers/usb/serial/kl5kusb105.c
+++ b/drivers/usb/serial/kl5kusb105.c
@@ -878,6 +878,7 @@
 
 	dbg("%sstate=%d", __func__, break_state);
 
+	/* LOCKING */
 	if (break_state)
 		lcr |= MCT_U232_SET_BREAK;
 
diff --git a/drivers/usb/serial/mct_u232.c b/drivers/usb/serial/mct_u232.c
index 07710cf..82930a7 100644
--- a/drivers/usb/serial/mct_u232.c
+++ b/drivers/usb/serial/mct_u232.c
@@ -721,10 +721,10 @@
 
 	spin_lock_irqsave(&priv->lock, flags);
 	lcr = priv->last_lcr;
-	spin_unlock_irqrestore(&priv->lock, flags);
 
 	if (break_state)
 		lcr |= MCT_U232_SET_BREAK;
+	spin_unlock_irqrestore(&priv->lock, flags);
 
 	mct_u232_set_line_ctrl(serial, lcr);
 } /* mct_u232_break_ctl */
diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c
index fda4a64..96a8c77 100644
--- a/drivers/usb/serial/mos7840.c
+++ b/drivers/usb/serial/mos7840.c
@@ -1343,6 +1343,7 @@
 	else
 		data = mos7840_port->shadowLCR & ~LCR_SET_BREAK;
 
+	/* FIXME: no locking on shadowLCR anywhere in driver */
 	mos7840_port->shadowLCR = data;
 	dbg("mcs7840_break mos7840_port->shadowLCR is %x\n",
 	    mos7840_port->shadowLCR);
@@ -2214,10 +2215,12 @@
 		break;
 	}
 
+	lock_kernel();
 	mos7840_port->shadowMCR = mcr;
 
 	Data = mos7840_port->shadowMCR;
 	status = mos7840_set_uart_reg(port, MODEM_CONTROL_REGISTER, Data);
+	unlock_kernel();
 	if (status < 0) {
 		dbg("setting MODEM_CONTROL_REGISTER Failed\n");
 		return -1;
diff --git a/drivers/usb/serial/sierra.c b/drivers/usb/serial/sierra.c
index 0f2b672..d9bf9a5 100644
--- a/drivers/usb/serial/sierra.c
+++ b/drivers/usb/serial/sierra.c
@@ -442,7 +442,7 @@
 		    " endpoint %02x.", __func__, status, endpoint);
 	} else {
 		if (urb->actual_length) {
-		tty = tty_port_tty_get(&port->port);
+			tty = tty_port_tty_get(&port->port);
 			tty_buffer_request_room(tty, urb->actual_length);
 			tty_insert_flip_string(tty, data, urb->actual_length);
 			tty_flip_buffer_push(tty);
diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index 794b5ff..080ade2 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -269,15 +269,19 @@
 		return;
 	}
 
-	--port->port.count;
-	if (port->port.count == 0)
+	if (port->port.count == 1)
 		/* only call the device specific close if this
-		 * port is being closed by the last owner */
+		 * port is being closed by the last owner. Ensure we do
+		 * this before we drop the port count. The call is protected
+		 * by the port mutex
+		 */
 		port->serial->type->close(tty, port, filp);
 
-	if (port->port.count == (port->console? 1 : 0)) {
+	if (port->port.count == (port->console ? 2 : 1)) {
 		struct tty_struct *tty = tty_port_tty_get(&port->port);
 		if (tty) {
+			/* We must do this before we drop the port count to
+			   zero. */
 			if (tty->driver_data)
 				tty->driver_data = NULL;
 			tty_port_tty_set(&port->port, NULL);
@@ -285,13 +289,14 @@
 		}
 	}
 
-	if (port->port.count == 0) {
+	if (port->port.count == 1) {
 		mutex_lock(&port->serial->disc_mutex);
 		if (!port->serial->disconnected)
 			usb_autopm_put_interface(port->serial->interface);
 		mutex_unlock(&port->serial->disc_mutex);
 		module_put(port->serial->type->driver.owner);
 	}
+	--port->port.count;
 
 	mutex_unlock(&port->mutex);
 	usb_serial_put(port->serial);
@@ -334,6 +339,10 @@
 	dbg("%s = port %d", __func__, port->number);
 
 	WARN_ON(!port->port.count);
+	/* if the device was unplugged then any remaining characters
+	   fell out of the connector ;) */
+	if (port->serial->disconnected)
+		return 0;
 	/* pass on to the driver specific version of this function */
 	return port->serial->type->chars_in_buffer(tty);
 }
@@ -373,9 +382,7 @@
 	/* pass on to the driver specific version of this function
 	   if it is available */
 	if (port->serial->type->ioctl) {
-		lock_kernel();
 		retval = port->serial->type->ioctl(tty, file, cmd, arg);
-		unlock_kernel();
 	} else
 		retval = -ENOIOCTLCMD;
 	return retval;
@@ -404,11 +411,8 @@
 	WARN_ON(!port->port.count);
 	/* pass on to the driver specific version of this function
 	   if it is available */
-	if (port->serial->type->break_ctl) {
-		lock_kernel();
+	if (port->serial->type->break_ctl)
 		port->serial->type->break_ctl(tty, break_state);
-		unlock_kernel();
-	}
 	return 0;
 }
 
diff --git a/drivers/usb/wusbcore/cbaf.c b/drivers/usb/wusbcore/cbaf.c
index ab4788d..1335cbe 100644
--- a/drivers/usb/wusbcore/cbaf.c
+++ b/drivers/usb/wusbcore/cbaf.c
@@ -88,7 +88,6 @@
  */
 #include <linux/module.h>
 #include <linux/ctype.h>
-#include <linux/version.h>
 #include <linux/usb.h>
 #include <linux/interrupt.h>
 #include <linux/delay.h>
diff --git a/drivers/usb/wusbcore/crypto.c b/drivers/usb/wusbcore/crypto.c
index c36c438..9ec7fd5 100644
--- a/drivers/usb/wusbcore/crypto.c
+++ b/drivers/usb/wusbcore/crypto.c
@@ -51,9 +51,17 @@
 #include <linux/uwb.h>
 #include <linux/usb/wusb.h>
 #include <linux/scatterlist.h>
-#define D_LOCAL 0
-#include <linux/uwb/debug.h>
 
+static int debug_crypto_verify = 0;
+
+module_param(debug_crypto_verify, int, 0);
+MODULE_PARM_DESC(debug_crypto_verify, "verify the key generation algorithms");
+
+static void wusb_key_dump(const void *buf, size_t len)
+{
+	print_hex_dump(KERN_ERR, "  ", DUMP_PREFIX_OFFSET, 16, 1,
+		       buf, len, 0);
+}
 
 /*
  * Block of data, as understood by AES-CCM
@@ -203,9 +211,6 @@
 	const u8 bzero[16] = { 0 };
 	size_t zero_padding;
 
-	d_fnstart(3, NULL, "(tfm_cbc %p, tfm_aes %p, mic %p, "
-		  "n %p, a %p, b %p, blen %zu)\n",
-		  tfm_cbc, tfm_aes, mic, n, a, b, blen);
 	/*
 	 * These checks should be compile time optimized out
 	 * ensure @a fills b1's mac_header and following fields
@@ -247,16 +252,6 @@
 	b1.la = cpu_to_be16(blen + 14);
 	memcpy(&b1.mac_header, a, sizeof(*a));
 
-	d_printf(4, NULL, "I: B0 (%zu bytes)\n", sizeof(b0));
-	d_dump(4, NULL, &b0, sizeof(b0));
-	d_printf(4, NULL, "I: B1 (%zu bytes)\n", sizeof(b1));
-	d_dump(4, NULL, &b1, sizeof(b1));
-	d_printf(4, NULL, "I: B (%zu bytes)\n", blen);
-	d_dump(4, NULL, b, blen);
-	d_printf(4, NULL, "I: B 0-padding (%zu bytes)\n", zero_padding);
-	d_printf(4, NULL, "D: IV before crypto (%zu)\n", ivsize);
-	d_dump(4, NULL, iv, ivsize);
-
 	sg_init_table(sg, ARRAY_SIZE(sg));
 	sg_set_buf(&sg[0], &b0, sizeof(b0));
 	sg_set_buf(&sg[1], &b1, sizeof(b1));
@@ -273,8 +268,6 @@
 		       result);
 		goto error_cbc_crypt;
 	}
-	d_printf(4, NULL, "D: MIC tag\n");
-	d_dump(4, NULL, iv, ivsize);
 
 	/* Now we crypt the MIC Tag (*iv) with Ax -- values per WUSB1.0[6.5]
 	 * The procedure is to AES crypt the A0 block and XOR the MIC
@@ -289,17 +282,10 @@
 	ax.counter = 0;
 	crypto_cipher_encrypt_one(tfm_aes, (void *)&ax, (void *)&ax);
 	bytewise_xor(mic, &ax, iv, 8);
-	d_printf(4, NULL, "D: CTR[MIC]\n");
-	d_dump(4, NULL, &ax, 8);
-	d_printf(4, NULL, "D: CCM-MIC tag\n");
-	d_dump(4, NULL, mic, 8);
 	result = 8;
 error_cbc_crypt:
 	kfree(dst_buf);
 error_dst_buf:
-	d_fnend(3, NULL, "(tfm_cbc %p, tfm_aes %p, mic %p, "
-		"n %p, a %p, b %p, blen %zu)\n",
-		tfm_cbc, tfm_aes, mic, n, a, b, blen);
 	return result;
 }
 
@@ -321,10 +307,6 @@
 	u64 sfn = 0;
 	__le64 sfn_le;
 
-	d_fnstart(3, NULL, "(out %p, out_size %zu, key %p, _n %p, "
-		  "a %p, b %p, blen %zu, len %zu)\n", out, out_size,
-		  key, _n, a, b, blen, len);
-
 	tfm_cbc = crypto_alloc_blkcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC);
 	if (IS_ERR(tfm_cbc)) {
 		result = PTR_ERR(tfm_cbc);
@@ -366,9 +348,6 @@
 error_setkey_cbc:
 	crypto_free_blkcipher(tfm_cbc);
 error_alloc_cbc:
-	d_fnend(3, NULL, "(out %p, out_size %zu, key %p, _n %p, "
-		"a %p, b %p, blen %zu, len %zu) = %d\n", out, out_size,
-		key, _n, a, b, blen, len, (int)bytes);
 	return result;
 }
 
@@ -422,14 +401,14 @@
 		       "mismatch between MIC result and WUSB1.0[A2]\n");
 		hs_size = sizeof(stv_hsmic_hs) - sizeof(stv_hsmic_hs.MIC);
 		printk(KERN_ERR "E: Handshake2 in: (%zu bytes)\n", hs_size);
-		dump_bytes(NULL, &stv_hsmic_hs, hs_size);
+		wusb_key_dump(&stv_hsmic_hs, hs_size);
 		printk(KERN_ERR "E: CCM Nonce in: (%zu bytes)\n",
 		       sizeof(stv_hsmic_n));
-		dump_bytes(NULL, &stv_hsmic_n, sizeof(stv_hsmic_n));
+		wusb_key_dump(&stv_hsmic_n, sizeof(stv_hsmic_n));
 		printk(KERN_ERR "E: MIC out:\n");
-		dump_bytes(NULL, mic, sizeof(mic));
+		wusb_key_dump(mic, sizeof(mic));
 		printk(KERN_ERR "E: MIC out (from WUSB1.0[A.2]):\n");
-		dump_bytes(NULL, stv_hsmic_hs.MIC, sizeof(stv_hsmic_hs.MIC));
+		wusb_key_dump(stv_hsmic_hs.MIC, sizeof(stv_hsmic_hs.MIC));
 		result = -EINVAL;
 	} else
 		result = 0;
@@ -497,19 +476,16 @@
 		printk(KERN_ERR "E: WUSB key derivation test: "
 		       "mismatch between key derivation result "
 		       "and WUSB1.0[A1] Errata 2006/12\n");
-		printk(KERN_ERR "E: keydvt in: key (%zu bytes)\n",
-		       sizeof(stv_key_a1));
-		dump_bytes(NULL, stv_key_a1, sizeof(stv_key_a1));
-		printk(KERN_ERR "E: keydvt in: nonce (%zu bytes)\n",
-		       sizeof(stv_keydvt_n_a1));
-		dump_bytes(NULL, &stv_keydvt_n_a1, sizeof(stv_keydvt_n_a1));
-		printk(KERN_ERR "E: keydvt in: hnonce & dnonce (%zu bytes)\n",
-		       sizeof(stv_keydvt_in_a1));
-		dump_bytes(NULL, &stv_keydvt_in_a1, sizeof(stv_keydvt_in_a1));
+		printk(KERN_ERR "E: keydvt in: key\n");
+		wusb_key_dump(stv_key_a1, sizeof(stv_key_a1));
+		printk(KERN_ERR "E: keydvt in: nonce\n");
+		wusb_key_dump( &stv_keydvt_n_a1, sizeof(stv_keydvt_n_a1));
+		printk(KERN_ERR "E: keydvt in: hnonce & dnonce\n");
+		wusb_key_dump(&stv_keydvt_in_a1, sizeof(stv_keydvt_in_a1));
 		printk(KERN_ERR "E: keydvt out: KCK\n");
-		dump_bytes(NULL, &keydvt_out.kck, sizeof(keydvt_out.kck));
+		wusb_key_dump(&keydvt_out.kck, sizeof(keydvt_out.kck));
 		printk(KERN_ERR "E: keydvt out: PTK\n");
-		dump_bytes(NULL, &keydvt_out.ptk, sizeof(keydvt_out.ptk));
+		wusb_key_dump(&keydvt_out.ptk, sizeof(keydvt_out.ptk));
 		result = -EINVAL;
 	} else
 		result = 0;
@@ -526,10 +502,13 @@
 {
 	int result;
 
-	result = wusb_key_derive_verify();
-	if (result < 0)
-		return result;
-	return wusb_oob_mic_verify();
+	if (debug_crypto_verify) {
+		result = wusb_key_derive_verify();
+		if (result < 0)
+			return result;
+		return wusb_oob_mic_verify();
+	}
+	return 0;
 }
 
 void wusb_crypto_exit(void)
diff --git a/drivers/usb/wusbcore/dev-sysfs.c b/drivers/usb/wusbcore/dev-sysfs.c
index 7897a19..1018345 100644
--- a/drivers/usb/wusbcore/dev-sysfs.c
+++ b/drivers/usb/wusbcore/dev-sysfs.c
@@ -28,10 +28,6 @@
 #include <linux/workqueue.h>
 #include "wusbhc.h"
 
-#undef D_LOCAL
-#define D_LOCAL 4
-#include <linux/uwb/debug.h>
-
 static ssize_t wusb_disconnect_store(struct device *dev,
 				     struct device_attribute *attr,
 				     const char *buf, size_t size)
diff --git a/drivers/usb/wusbcore/devconnect.c b/drivers/usb/wusbcore/devconnect.c
index f45d777..e2e7e4b 100644
--- a/drivers/usb/wusbcore/devconnect.c
+++ b/drivers/usb/wusbcore/devconnect.c
@@ -57,9 +57,6 @@
  *                              Called by notif.c:wusb_handle_dn_connect()
  *                              when a DN_Connect is received.
  *
- *   wusbhc_devconnect_auth()   Called by rh.c:wusbhc_rh_port_reset() when
- *                              doing the device connect sequence.
- *
  *     wusb_devconnect_acked()  Ack done, release resources.
  *
  *   wusb_handle_dn_alive()     Called by notif.c:wusb_handle_dn()
@@ -69,9 +66,6 @@
  *                              process a disconenct request from a
  *                              device.
  *
- *   wusb_dev_reset()           Called by rh.c:wusbhc_rh_port_reset() when
- *                              resetting a device.
- *
  *   __wusb_dev_disable()       Called by rh.c:wusbhc_rh_clear_port_feat() when
  *                              disabling a port.
  *
@@ -97,10 +91,6 @@
 #include <linux/workqueue.h>
 #include "wusbhc.h"
 
-#undef D_LOCAL
-#define D_LOCAL 1
-#include <linux/uwb/debug.h>
-
 static void wusbhc_devconnect_acked_work(struct work_struct *work);
 
 static void wusb_dev_free(struct wusb_dev *wusb_dev)
@@ -240,6 +230,7 @@
 	list_add_tail(&wusb_dev->cack_node, &wusbhc->cack_list);
 	wusbhc->cack_count++;
 	wusbhc_fill_cack_ie(wusbhc);
+
 	return wusb_dev;
 }
 
@@ -250,12 +241,9 @@
  */
 static void wusbhc_cack_rm(struct wusbhc *wusbhc, struct wusb_dev *wusb_dev)
 {
-	struct device *dev = wusbhc->dev;
-	d_fnstart(3, dev, "(wusbhc %p wusb_dev %p)\n", wusbhc, wusb_dev);
 	list_del_init(&wusb_dev->cack_node);
 	wusbhc->cack_count--;
 	wusbhc_fill_cack_ie(wusbhc);
-	d_fnend(3, dev, "(wusbhc %p wusb_dev %p) = void\n", wusbhc, wusb_dev);
 }
 
 /*
@@ -263,14 +251,11 @@
 static
 void wusbhc_devconnect_acked(struct wusbhc *wusbhc, struct wusb_dev *wusb_dev)
 {
-	struct device *dev = wusbhc->dev;
-	d_fnstart(3, dev, "(wusbhc %p wusb_dev %p)\n", wusbhc, wusb_dev);
 	wusbhc_cack_rm(wusbhc, wusb_dev);
 	if (wusbhc->cack_count)
 		wusbhc_mmcie_set(wusbhc, 0, 0, &wusbhc->cack_ie.hdr);
 	else
 		wusbhc_mmcie_rm(wusbhc, &wusbhc->cack_ie.hdr);
-	d_fnend(3, dev, "(wusbhc %p wusb_dev %p) = void\n", wusbhc, wusb_dev);
 }
 
 static void wusbhc_devconnect_acked_work(struct work_struct *work)
@@ -320,7 +305,6 @@
 	struct wusb_port *port;
 	unsigned idx, devnum;
 
-	d_fnstart(3, dev, "(%p, %p, %s)\n", wusbhc, dnc, pr_cdid);
 	mutex_lock(&wusbhc->mutex);
 
 	/* Check we are not handling it already */
@@ -366,16 +350,13 @@
 	port->wusb_dev = wusb_dev;
 	port->status |= USB_PORT_STAT_CONNECTION;
 	port->change |= USB_PORT_STAT_C_CONNECTION;
-	port->reset_count = 0;
 	/* Now the port status changed to connected; khubd will
 	 * pick the change up and try to reset the port to bring it to
 	 * the enabled state--so this process returns up to the stack
-	 * and it calls back into wusbhc_rh_port_reset() who will call
-	 * devconnect_auth().
+	 * and it calls back into wusbhc_rh_port_reset().
 	 */
 error_unlock:
 	mutex_unlock(&wusbhc->mutex);
-	d_fnend(3, dev, "(%p, %p, %s) = void\n", wusbhc, dnc, pr_cdid);
 	return;
 
 }
@@ -398,10 +379,8 @@
 static void __wusbhc_dev_disconnect(struct wusbhc *wusbhc,
 				    struct wusb_port *port)
 {
-	struct device *dev = wusbhc->dev;
 	struct wusb_dev *wusb_dev = port->wusb_dev;
 
-	d_fnstart(3, dev, "(wusbhc %p, port %p)\n", wusbhc, port);
 	port->status &= ~(USB_PORT_STAT_CONNECTION | USB_PORT_STAT_ENABLE
 			  | USB_PORT_STAT_SUSPEND | USB_PORT_STAT_RESET
 			  | USB_PORT_STAT_LOW_SPEED | USB_PORT_STAT_HIGH_SPEED);
@@ -413,15 +392,11 @@
 		wusb_dev_put(wusb_dev);
 	}
 	port->wusb_dev = NULL;
-	/* don't reset the reset_count to zero or wusbhc_rh_port_reset will get
-	 * confused! We only reset to zero when we connect a new device.
-	 */
 
 	/* After a device disconnects, change the GTK (see [WUSB]
 	 * section 6.2.11.2). */
 	wusbhc_gtk_rekey(wusbhc);
 
-	d_fnend(3, dev, "(wusbhc %p, port %p) = void\n", wusbhc, port);
 	/* The Wireless USB part has forgotten about the device already; now
 	 * khubd's timer will pick up the disconnection and remove the USB
 	 * device from the system
@@ -429,39 +404,6 @@
 }
 
 /*
- * Authenticate a device into the WUSB Cluster
- *
- * Called from the Root Hub code (rh.c:wusbhc_rh_port_reset()) when
- * asking for a reset on a port that is not enabled (ie: first connect
- * on the port).
- *
- * Performs the 4way handshake to allow the device to comunicate w/ the
- * WUSB Cluster securely; once done, issue a request to the device for
- * it to change to address 0.
- *
- * This mimics the reset step of Wired USB that once resetting a
- * device, leaves the port in enabled state and the dev with the
- * default address (0).
- *
- * WUSB1.0[7.1.2]
- *
- * @port_idx: port where the change happened--This is the index into
- *            the wusbhc port array, not the USB port number.
- */
-int wusbhc_devconnect_auth(struct wusbhc *wusbhc, u8 port_idx)
-{
-	struct device *dev = wusbhc->dev;
-	struct wusb_port *port = wusb_port_by_idx(wusbhc, port_idx);
-
-	d_fnstart(3, dev, "(%p, %u)\n", wusbhc, port_idx);
-	port->status &= ~USB_PORT_STAT_RESET;
-	port->status |= USB_PORT_STAT_ENABLE;
-	port->change |= USB_PORT_STAT_C_RESET | USB_PORT_STAT_C_ENABLE;
-	d_fnend(3, dev, "(%p, %u) = 0\n", wusbhc, port_idx);
-	return 0;
-}
-
-/*
  * Refresh the list of keep alives to emit in the MMC
  *
  * Some devices don't respond to keep alives unless they've been
@@ -528,21 +470,15 @@
  */
 static void wusbhc_keep_alive_run(struct work_struct *ws)
 {
-	struct delayed_work *dw =
-		container_of(ws, struct delayed_work, work);
-	struct wusbhc *wusbhc =
-		container_of(dw, struct wusbhc, keep_alive_timer);
+	struct delayed_work *dw = container_of(ws, struct delayed_work, work);
+	struct wusbhc *wusbhc =	container_of(dw, struct wusbhc, keep_alive_timer);
 
-	d_fnstart(5, wusbhc->dev, "(wusbhc %p)\n", wusbhc);
-	if (wusbhc->active) {
-		mutex_lock(&wusbhc->mutex);
-		__wusbhc_keep_alive(wusbhc);
-		mutex_unlock(&wusbhc->mutex);
-		queue_delayed_work(wusbd, &wusbhc->keep_alive_timer,
-				   (wusbhc->trust_timeout * CONFIG_HZ)/1000/2);
-	}
-	d_fnend(5, wusbhc->dev, "(wusbhc %p) = void\n", wusbhc);
-	return;
+	mutex_lock(&wusbhc->mutex);
+	__wusbhc_keep_alive(wusbhc);
+	mutex_unlock(&wusbhc->mutex);
+
+	queue_delayed_work(wusbd, &wusbhc->keep_alive_timer,
+			   msecs_to_jiffies(wusbhc->trust_timeout / 2));
 }
 
 /*
@@ -585,10 +521,6 @@
  */
 static void wusbhc_handle_dn_alive(struct wusbhc *wusbhc, struct wusb_dev *wusb_dev)
 {
-	struct device *dev = wusbhc->dev;
-
-	d_printf(2, dev, "DN ALIVE: device 0x%02x pong\n", wusb_dev->addr);
-
 	mutex_lock(&wusbhc->mutex);
 	wusb_dev->entry_ts = jiffies;
 	__wusbhc_keep_alive(wusbhc);
@@ -621,11 +553,10 @@
 		"no-beacon"
 	};
 
-	d_fnstart(3, dev, "(%p, %p, %zu)\n", wusbhc, dn_hdr, size);
 	if (size < sizeof(*dnc)) {
 		dev_err(dev, "DN CONNECT: short notification (%zu < %zu)\n",
 			size, sizeof(*dnc));
-		goto out;
+		return;
 	}
 
 	dnc = container_of(dn_hdr, struct wusb_dn_connect, hdr);
@@ -637,10 +568,6 @@
 		 wusb_dn_connect_new_connection(dnc) ? "connect" : "reconnect");
 	/* ACK the connect */
 	wusbhc_devconnect_ack(wusbhc, dnc, pr_cdid);
-out:
-	d_fnend(3, dev, "(%p, %p, %zu) = void\n",
-		wusbhc, dn_hdr, size);
-	return;
 }
 
 /*
@@ -662,60 +589,6 @@
 }
 
 /*
- * Reset a WUSB device on a HWA
- *
- * @wusbhc
- * @port_idx   Index of the port where the device is
- *
- * In Wireless USB, a reset is more or less equivalent to a full
- * disconnect; so we just do a full disconnect and send the device a
- * Device Reset IE (WUSB1.0[7.5.11]) giving it a few millisecs (6 MMCs).
- *
- * @wusbhc should be refcounted and unlocked
- */
-int wusbhc_dev_reset(struct wusbhc *wusbhc, u8 port_idx)
-{
-	int result;
-	struct device *dev = wusbhc->dev;
-	struct wusb_dev *wusb_dev;
-	struct wuie_reset *ie;
-
-	d_fnstart(3, dev, "(%p, %u)\n", wusbhc, port_idx);
-	mutex_lock(&wusbhc->mutex);
-	result = 0;
-	wusb_dev = wusb_port_by_idx(wusbhc, port_idx)->wusb_dev;
-	if (wusb_dev == NULL) {
-		/* reset no device? ignore */
-		dev_dbg(dev, "RESET: no device at port %u, ignoring\n",
-			port_idx);
-		goto error_unlock;
-	}
-	result = -ENOMEM;
-	ie = kzalloc(sizeof(*ie), GFP_KERNEL);
-	if (ie == NULL)
-		goto error_unlock;
-	ie->hdr.bLength = sizeof(ie->hdr) + sizeof(ie->CDID);
-	ie->hdr.bIEIdentifier = WUIE_ID_RESET_DEVICE;
-	ie->CDID = wusb_dev->cdid;
-	result = wusbhc_mmcie_set(wusbhc, 0xff, 6, &ie->hdr);
-	if (result < 0) {
-		dev_err(dev, "RESET: cant's set MMC: %d\n", result);
-		goto error_kfree;
-	}
-	__wusbhc_dev_disconnect(wusbhc, wusb_port_by_idx(wusbhc, port_idx));
-
-	/* 120ms, hopefully 6 MMCs (FIXME) */
-	msleep(120);
-	wusbhc_mmcie_rm(wusbhc, &ie->hdr);
-error_kfree:
-	kfree(ie);
-error_unlock:
-	mutex_unlock(&wusbhc->mutex);
-	d_fnend(3, dev, "(%p, %u) = %d\n", wusbhc, port_idx, result);
-	return result;
-}
-
-/*
  * Handle a Device Notification coming a host
  *
  * The Device Notification comes from a host (HWA, DWA or WHCI)
@@ -735,19 +608,17 @@
 	struct device *dev = wusbhc->dev;
 	struct wusb_dev *wusb_dev;
 
-	d_fnstart(3, dev, "(%p, %p)\n", wusbhc, dn_hdr);
-
 	if (size < sizeof(struct wusb_dn_hdr)) {
 		dev_err(dev, "DN data shorter than DN header (%d < %d)\n",
 			(int)size, (int)sizeof(struct wusb_dn_hdr));
-		goto out;
+		return;
 	}
 
 	wusb_dev = wusbhc_find_dev_by_addr(wusbhc, srcaddr);
 	if (wusb_dev == NULL && dn_hdr->bType != WUSB_DN_CONNECT) {
 		dev_dbg(dev, "ignoring DN %d from unconnected device %02x\n",
 			dn_hdr->bType, srcaddr);
-		goto out;
+		return;
 	}
 
 	switch (dn_hdr->bType) {
@@ -772,9 +643,6 @@
 		dev_warn(dev, "unknown DN %u (%d octets) from %u\n",
 			 dn_hdr->bType, (int)size, srcaddr);
 	}
-out:
-	d_fnend(3, dev, "(%p, %p) = void\n", wusbhc, dn_hdr);
-	return;
 }
 EXPORT_SYMBOL_GPL(wusbhc_handle_dn);
 
@@ -804,59 +672,30 @@
 	struct wusb_dev *wusb_dev;
 	struct wuie_disconnect *ie;
 
-	d_fnstart(3, dev, "(%p, %u)\n", wusbhc, port_idx);
-	result = 0;
 	wusb_dev = wusb_port_by_idx(wusbhc, port_idx)->wusb_dev;
 	if (wusb_dev == NULL) {
 		/* reset no device? ignore */
 		dev_dbg(dev, "DISCONNECT: no device at port %u, ignoring\n",
 			port_idx);
-		goto error;
+		return;
 	}
 	__wusbhc_dev_disconnect(wusbhc, wusb_port_by_idx(wusbhc, port_idx));
 
-	result = -ENOMEM;
 	ie = kzalloc(sizeof(*ie), GFP_KERNEL);
 	if (ie == NULL)
-		goto error;
+		return;
 	ie->hdr.bLength = sizeof(*ie);
 	ie->hdr.bIEIdentifier = WUIE_ID_DEVICE_DISCONNECT;
 	ie->bDeviceAddress = wusb_dev->addr;
 	result = wusbhc_mmcie_set(wusbhc, 0, 0, &ie->hdr);
-	if (result < 0) {
+	if (result < 0)
 		dev_err(dev, "DISCONNECT: can't set MMC: %d\n", result);
-		goto error_kfree;
+	else {
+		/* At least 6 MMCs, assuming at least 1 MMC per zone. */
+		msleep(7*4);
+		wusbhc_mmcie_rm(wusbhc, &ie->hdr);
 	}
-
-	/* 120ms, hopefully 6 MMCs */
-	msleep(100);
-	wusbhc_mmcie_rm(wusbhc, &ie->hdr);
-error_kfree:
 	kfree(ie);
-error:
-	d_fnend(3, dev, "(%p, %u) = %d\n", wusbhc, port_idx, result);
-	return;
-}
-
-static void wusb_cap_descr_printf(const unsigned level, struct device *dev,
-				  const struct usb_wireless_cap_descriptor *wcd)
-{
-	d_printf(level, dev,
-		 "WUSB Capability Descriptor\n"
-		 "  bDevCapabilityType          0x%02x\n"
-		 "  bmAttributes                0x%02x\n"
-		 "  wPhyRates                   0x%04x\n"
-		 "  bmTFITXPowerInfo            0x%02x\n"
-		 "  bmFFITXPowerInfo            0x%02x\n"
-		 "  bmBandGroup                 0x%04x\n"
-		 "  bReserved                   0x%02x\n",
-		 wcd->bDevCapabilityType,
-		 wcd->bmAttributes,
-		 le16_to_cpu(wcd->wPHYRates),
-		 wcd->bmTFITXPowerInfo,
-		 wcd->bmFFITXPowerInfo,
-		 wcd->bmBandGroup,
-		 wcd->bReserved);
 }
 
 /*
@@ -899,8 +738,6 @@
 		}
 		cap_size = cap_hdr->bLength;
 		cap_type = cap_hdr->bDevCapabilityType;
-		d_printf(4, dev, "BOS Capability: 0x%02x (%zu bytes)\n",
-			 cap_type, cap_size);
 		if (cap_size == 0)
 			break;
 		if (cap_size > top - itr) {
@@ -912,7 +749,6 @@
 			result = -EBADF;
 			goto error_bad_cap;
 		}
-		d_dump(3, dev, itr, cap_size);
 		switch (cap_type) {
 		case USB_CAP_TYPE_WIRELESS_USB:
 			if (cap_size != sizeof(*wusb_dev->wusb_cap_descr))
@@ -920,10 +756,8 @@
 					"descriptor is %zu bytes vs %zu "
 					"needed\n", cap_size,
 					sizeof(*wusb_dev->wusb_cap_descr));
-			else {
+			else
 				wusb_dev->wusb_cap_descr = itr;
-				wusb_cap_descr_printf(3, dev, itr);
-			}
 			break;
 		default:
 			dev_err(dev, "BUG? Unknown BOS capability 0x%02x "
@@ -988,9 +822,7 @@
 			"%zu bytes): %zd\n", desc_size, result);
 		goto error_get_descriptor;
 	}
-	d_printf(2, dev, "Got BOS descriptor %zd bytes, %u capabilities\n",
-		 result, bos->bNumDeviceCaps);
-	d_dump(2, dev, bos, result);
+
 	result = wusb_dev_bos_grok(usb_dev, wusb_dev, bos, result);
 	if (result < 0)
 		goto error_bad_bos;
@@ -1056,8 +888,6 @@
 	if (usb_dev->wusb == 0 || usb_dev->devnum == 1)
 		return;		/* skip non wusb and wusb RHs */
 
-	d_fnstart(3, dev, "(usb_dev %p)\n", usb_dev);
-
 	wusbhc = wusbhc_get_by_usb_dev(usb_dev);
 	if (wusbhc == NULL)
 		goto error_nodev;
@@ -1087,7 +917,6 @@
 	wusb_dev_put(wusb_dev);
 	wusbhc_put(wusbhc);
 error_nodev:
-	d_fnend(3, dev, "(usb_dev %p) = void\n", usb_dev);
 	return;
 
 	wusb_dev_sysfs_rm(wusb_dev);
@@ -1174,11 +1003,10 @@
 
 void wusb_dev_destroy(struct kref *_wusb_dev)
 {
-	struct wusb_dev *wusb_dev
-		= container_of(_wusb_dev, struct wusb_dev, refcnt);
+	struct wusb_dev *wusb_dev = container_of(_wusb_dev, struct wusb_dev, refcnt);
+
 	list_del_init(&wusb_dev->cack_node);
 	wusb_dev_free(wusb_dev);
-	d_fnend(1, NULL, "%s (wusb_dev %p) = void\n", __func__, wusb_dev);
 }
 EXPORT_SYMBOL_GPL(wusb_dev_destroy);
 
@@ -1190,8 +1018,6 @@
  */
 int wusbhc_devconnect_create(struct wusbhc *wusbhc)
 {
-	d_fnstart(3, wusbhc->dev, "(wusbhc %p)\n", wusbhc);
-
 	wusbhc->keep_alive_ie.hdr.bIEIdentifier = WUIE_ID_KEEP_ALIVE;
 	wusbhc->keep_alive_ie.hdr.bLength = sizeof(wusbhc->keep_alive_ie.hdr);
 	INIT_DELAYED_WORK(&wusbhc->keep_alive_timer, wusbhc_keep_alive_run);
@@ -1200,7 +1026,6 @@
 	wusbhc->cack_ie.hdr.bLength = sizeof(wusbhc->cack_ie.hdr);
 	INIT_LIST_HEAD(&wusbhc->cack_list);
 
-	d_fnend(3, wusbhc->dev, "(wusbhc %p) = void\n", wusbhc);
 	return 0;
 }
 
@@ -1209,8 +1034,7 @@
  */
 void wusbhc_devconnect_destroy(struct wusbhc *wusbhc)
 {
-	d_fnstart(3, wusbhc->dev, "(wusbhc %p)\n", wusbhc);
-	d_fnend(3, wusbhc->dev, "(wusbhc %p) = void\n", wusbhc);
+	/* no op */
 }
 
 /*
@@ -1222,8 +1046,7 @@
  * FIXME: This also enables the keep alives but this is not necessary
  * until there are connected and authenticated devices.
  */
-int wusbhc_devconnect_start(struct wusbhc *wusbhc,
-			    const struct wusb_ckhdid *chid)
+int wusbhc_devconnect_start(struct wusbhc *wusbhc)
 {
 	struct device *dev = wusbhc->dev;
 	struct wuie_host_info *hi;
@@ -1236,7 +1059,7 @@
 	hi->hdr.bLength       = sizeof(*hi);
 	hi->hdr.bIEIdentifier = WUIE_ID_HOST_INFO;
 	hi->attributes        = cpu_to_le16((wusbhc->rsv->stream << 3) | WUIE_HI_CAP_ALL);
-	hi->CHID              = *chid;
+	hi->CHID              = wusbhc->chid;
 	result = wusbhc_mmcie_set(wusbhc, 0, 0, &hi->hdr);
 	if (result < 0) {
 		dev_err(dev, "Cannot add Host Info MMCIE: %d\n", result);
diff --git a/drivers/usb/wusbcore/mmc.c b/drivers/usb/wusbcore/mmc.c
index cfa77a0..3b52161 100644
--- a/drivers/usb/wusbcore/mmc.c
+++ b/drivers/usb/wusbcore/mmc.c
@@ -159,15 +159,35 @@
 }
 EXPORT_SYMBOL_GPL(wusbhc_mmcie_rm);
 
+static int wusbhc_mmc_start(struct wusbhc *wusbhc)
+{
+	int ret;
+
+	mutex_lock(&wusbhc->mutex);
+	ret = wusbhc->start(wusbhc);
+	if (ret >= 0)
+		wusbhc->active = 1;
+	mutex_unlock(&wusbhc->mutex);
+
+	return ret;
+}
+
+static void wusbhc_mmc_stop(struct wusbhc *wusbhc)
+{
+	mutex_lock(&wusbhc->mutex);
+	wusbhc->active = 0;
+	wusbhc->stop(wusbhc, WUSB_CHANNEL_STOP_DELAY_MS);
+	mutex_unlock(&wusbhc->mutex);
+}
+
 /*
  * wusbhc_start - start transmitting MMCs and accepting connections
  * @wusbhc: the HC to start
- * @chid: the CHID to use for this host
  *
  * Establishes a cluster reservation, enables device connections, and
  * starts MMCs with appropriate DNTS parameters.
  */
-int wusbhc_start(struct wusbhc *wusbhc, const struct wusb_ckhdid *chid)
+int wusbhc_start(struct wusbhc *wusbhc)
 {
 	int result;
 	struct device *dev = wusbhc->dev;
@@ -181,7 +201,7 @@
 		goto error_rsv_establish;
 	}
 
-	result = wusbhc_devconnect_start(wusbhc, chid);
+	result = wusbhc_devconnect_start(wusbhc);
 	if (result < 0) {
 		dev_err(dev, "error enabling device connections: %d\n", result);
 		goto error_devconnect_start;
@@ -199,12 +219,12 @@
 		dev_err(dev, "Cannot set DNTS parameters: %d\n", result);
 		goto error_set_num_dnts;
 	}
-	result = wusbhc->start(wusbhc);
+	result = wusbhc_mmc_start(wusbhc);
 	if (result < 0) {
 		dev_err(dev, "error starting wusbch: %d\n", result);
 		goto error_wusbhc_start;
 	}
-	wusbhc->active = 1;
+
 	return 0;
 
 error_wusbhc_start:
@@ -219,76 +239,17 @@
 }
 
 /*
- * Disconnect all from the WUSB Channel
- *
- * Send a Host Disconnect IE in the MMC, wait, don't send it any more
- */
-static int __wusbhc_host_disconnect_ie(struct wusbhc *wusbhc)
-{
-	int result = -ENOMEM;
-	struct wuie_host_disconnect *host_disconnect_ie;
-	might_sleep();
-	host_disconnect_ie = kmalloc(sizeof(*host_disconnect_ie), GFP_KERNEL);
-	if (host_disconnect_ie == NULL)
-		goto error_alloc;
-	host_disconnect_ie->hdr.bLength       = sizeof(*host_disconnect_ie);
-	host_disconnect_ie->hdr.bIEIdentifier = WUIE_ID_HOST_DISCONNECT;
-	result = wusbhc_mmcie_set(wusbhc, 0, 0, &host_disconnect_ie->hdr);
-	if (result < 0)
-		goto error_mmcie_set;
-
-	/* WUSB1.0[8.5.3.1 & 7.5.2] */
-	msleep(100);
-	wusbhc_mmcie_rm(wusbhc, &host_disconnect_ie->hdr);
-error_mmcie_set:
-	kfree(host_disconnect_ie);
-error_alloc:
-	return result;
-}
-
-/*
  * wusbhc_stop - stop transmitting MMCs
  * @wusbhc: the HC to stop
  *
- * Send a Host Disconnect IE, wait, remove all the MMCs (stop sending MMCs).
- *
- * If we can't allocate a Host Stop IE, screw it, we don't notify the
- * devices we are disconnecting...
+ * Stops the WUSB channel and removes the cluster reservation.
  */
 void wusbhc_stop(struct wusbhc *wusbhc)
 {
-	if (wusbhc->active) {
-		wusbhc->active = 0;
-		wusbhc->stop(wusbhc);
-		wusbhc_sec_stop(wusbhc);
-		__wusbhc_host_disconnect_ie(wusbhc);
-		wusbhc_devconnect_stop(wusbhc);
-		wusbhc_rsv_terminate(wusbhc);
-	}
-}
-EXPORT_SYMBOL_GPL(wusbhc_stop);
-
-/*
- * Change the CHID in a WUSB Channel
- *
- * If it is just a new CHID, send a Host Disconnect IE and then change
- * the CHID IE.
- */
-static int __wusbhc_chid_change(struct wusbhc *wusbhc,
-				const struct wusb_ckhdid *chid)
-{
-	int result = -ENOSYS;
-	struct device *dev = wusbhc->dev;
-	dev_err(dev, "%s() not implemented yet\n", __func__);
-	return result;
-
-	BUG_ON(wusbhc->wuie_host_info == NULL);
-	__wusbhc_host_disconnect_ie(wusbhc);
-	wusbhc->wuie_host_info->CHID = *chid;
-	result = wusbhc_mmcie_set(wusbhc, 0, 0, &wusbhc->wuie_host_info->hdr);
-	if (result < 0)
-		dev_err(dev, "Can't update Host Info WUSB IE: %d\n", result);
-	return result;
+	wusbhc_mmc_stop(wusbhc);
+	wusbhc_sec_stop(wusbhc);
+	wusbhc_devconnect_stop(wusbhc);
+	wusbhc_rsv_terminate(wusbhc);
 }
 
 /*
@@ -306,16 +267,19 @@
 		chid = NULL;
 
 	mutex_lock(&wusbhc->mutex);
-	if (wusbhc->active) {
-		if (chid)
-			result = __wusbhc_chid_change(wusbhc, chid);
-		else
-			wusbhc_stop(wusbhc);
-	} else {
-		if (chid)
-			wusbhc_start(wusbhc, chid);
+	if (chid) {
+		if (wusbhc->active) {
+			mutex_unlock(&wusbhc->mutex);
+			return -EBUSY;
+		}
+		wusbhc->chid = *chid;
 	}
 	mutex_unlock(&wusbhc->mutex);
+
+	if (chid)
+		result = uwb_radio_start(&wusbhc->pal);
+	else
+		uwb_radio_stop(&wusbhc->pal);
 	return result;
 }
 EXPORT_SYMBOL_GPL(wusbhc_chid_set);
diff --git a/drivers/usb/wusbcore/pal.c b/drivers/usb/wusbcore/pal.c
index 7cc51e9..d0b172c 100644
--- a/drivers/usb/wusbcore/pal.c
+++ b/drivers/usb/wusbcore/pal.c
@@ -18,6 +18,16 @@
  */
 #include "wusbhc.h"
 
+static void wusbhc_channel_changed(struct uwb_pal *pal, int channel)
+{
+	struct wusbhc *wusbhc = container_of(pal, struct wusbhc, pal);
+
+	if (channel < 0)
+		wusbhc_stop(wusbhc);
+	else
+		wusbhc_start(wusbhc);
+}
+
 /**
  * wusbhc_pal_register - register the WUSB HC as a UWB PAL
  * @wusbhc: the WUSB HC
@@ -28,8 +38,10 @@
 
 	wusbhc->pal.name   = "wusbhc";
 	wusbhc->pal.device = wusbhc->usb_hcd.self.controller;
+	wusbhc->pal.rc     = wusbhc->uwb_rc;
+	wusbhc->pal.channel_changed = wusbhc_channel_changed;
 
-	return uwb_pal_register(wusbhc->uwb_rc, &wusbhc->pal);
+	return uwb_pal_register(&wusbhc->pal);
 }
 
 /**
@@ -38,5 +50,5 @@
  */
 void wusbhc_pal_unregister(struct wusbhc *wusbhc)
 {
-	uwb_pal_unregister(wusbhc->uwb_rc, &wusbhc->pal);
+	uwb_pal_unregister(&wusbhc->pal);
 }
diff --git a/drivers/usb/wusbcore/reservation.c b/drivers/usb/wusbcore/reservation.c
index fc63e77..4ed9736 100644
--- a/drivers/usb/wusbcore/reservation.c
+++ b/drivers/usb/wusbcore/reservation.c
@@ -48,18 +48,19 @@
 {
 	struct wusbhc *wusbhc = rsv->pal_priv;
 	struct device *dev = wusbhc->dev;
+	struct uwb_mas_bm mas;
 	char buf[72];
 
 	switch (rsv->state) {
 	case UWB_RSV_STATE_O_ESTABLISHED:
-		bitmap_scnprintf(buf, sizeof(buf), rsv->mas.bm, UWB_NUM_MAS);
+		uwb_rsv_get_usable_mas(rsv, &mas);
+		bitmap_scnprintf(buf, sizeof(buf), mas.bm, UWB_NUM_MAS);
 		dev_dbg(dev, "established reservation: %s\n", buf);
-		wusbhc_bwa_set(wusbhc, rsv->stream, &rsv->mas);
+		wusbhc_bwa_set(wusbhc, rsv->stream, &mas);
 		break;
 	case UWB_RSV_STATE_NONE:
 		dev_dbg(dev, "removed reservation\n");
 		wusbhc_bwa_set(wusbhc, 0, NULL);
-		wusbhc->rsv = NULL;
 		break;
 	default:
 		dev_dbg(dev, "unexpected reservation state: %d\n", rsv->state);
@@ -86,13 +87,12 @@
 	bcid.data[0] = wusbhc->cluster_id;
 	bcid.data[1] = 0;
 
-	rsv->owner = &rc->uwb_dev;
 	rsv->target.type = UWB_RSV_TARGET_DEVADDR;
 	rsv->target.devaddr = bcid;
 	rsv->type = UWB_DRP_TYPE_PRIVATE;
-	rsv->max_mas = 256;
-	rsv->min_mas = 16;  /* one MAS per zone? */
-	rsv->sparsity = 16; /* at least one MAS in each zone? */
+	rsv->max_mas = 256; /* try to get as much as possible */
+	rsv->min_mas = 15;  /* one MAS per zone */
+	rsv->max_interval = 1; /* max latency is one zone */
 	rsv->is_multicast = true;
 
 	ret = uwb_rsv_establish(rsv);
@@ -105,11 +105,14 @@
 
 
 /**
- * wusbhc_rsv_terminate - terminate any cluster reservation
+ * wusbhc_rsv_terminate - terminate the cluster reservation
  * @wusbhc: the WUSB host whose reservation is to be terminated
  */
 void wusbhc_rsv_terminate(struct wusbhc *wusbhc)
 {
-	if (wusbhc->rsv)
+	if (wusbhc->rsv) {
 		uwb_rsv_terminate(wusbhc->rsv);
+		uwb_rsv_destroy(wusbhc->rsv);
+		wusbhc->rsv = NULL;
+	}
 }
diff --git a/drivers/usb/wusbcore/rh.c b/drivers/usb/wusbcore/rh.c
index 267a643..95c6fa3 100644
--- a/drivers/usb/wusbcore/rh.c
+++ b/drivers/usb/wusbcore/rh.c
@@ -71,19 +71,20 @@
  */
 #include "wusbhc.h"
 
-#define D_LOCAL 0
-#include <linux/uwb/debug.h>
-
 /*
  * Reset a fake port
  *
- * This can be called to reset a port from any other state or to reset
- * it when connecting. In Wireless USB they are different; when doing
- * a new connect that involves going over the authentication. When
- * just reseting, its a different story.
+ * Using a Reset Device IE is too heavyweight as it causes the device
+ * to enter the UnConnected state and leave the cluster, this can mean
+ * that when the device reconnects it is connected to a different fake
+ * port.
  *
- * The Linux USB stack resets a port twice before it considers it
- * enabled, so we have to detect and ignore that.
+ * Instead, reset authenticated devices with a SetAddress(0), followed
+ * by a SetAddresss(AuthAddr).
+ *
+ * For unauthenticated devices just pretend to reset but do nothing.
+ * If the device initialization continues to fail it will eventually
+ * time out after TrustTimeout and enter the UnConnected state.
  *
  * @wusbhc is assumed referenced and @wusbhc->mutex unlocked.
  *
@@ -97,20 +98,20 @@
 {
 	int result = 0;
 	struct wusb_port *port = wusb_port_by_idx(wusbhc, port_idx);
+	struct wusb_dev *wusb_dev = port->wusb_dev;
 
-	d_fnstart(3, wusbhc->dev, "(wusbhc %p port_idx %u)\n",
-		  wusbhc, port_idx);
-	if (port->reset_count == 0) {
-		wusbhc_devconnect_auth(wusbhc, port_idx);
-		port->reset_count++;
-	} else if (port->reset_count == 1)
-		/* see header */
-		d_printf(2, wusbhc->dev, "Ignoring second reset on port_idx "
-			"%u\n", port_idx);
+	port->status |= USB_PORT_STAT_RESET;
+	port->change |= USB_PORT_STAT_C_RESET;
+
+	if (wusb_dev->addr & WUSB_DEV_ADDR_UNAUTH)
+		result = 0;
 	else
-		result = wusbhc_dev_reset(wusbhc, port_idx);
-	d_fnend(3, wusbhc->dev, "(wusbhc %p port_idx %u) = %d\n",
-		wusbhc, port_idx, result);
+		result = wusb_dev_update_address(wusbhc, wusb_dev);
+
+	port->status &= ~USB_PORT_STAT_RESET;
+	port->status |= USB_PORT_STAT_ENABLE;
+	port->change |= USB_PORT_STAT_C_RESET | USB_PORT_STAT_C_ENABLE;	
+
 	return result;
 }
 
@@ -138,7 +139,6 @@
 	size_t cnt, size;
 	unsigned long *buf = (unsigned long *) _buf;
 
-	d_fnstart(1, wusbhc->dev, "(wusbhc %p)\n", wusbhc);
 	/* WE DON'T LOCK, see comment */
 	size = wusbhc->ports_max + 1 /* hub bit */;
 	size = (size + 8 - 1) / 8;	/* round to bytes */
@@ -147,8 +147,6 @@
 			set_bit(cnt + 1, buf);
 		else
 			clear_bit(cnt + 1, buf);
-	d_fnend(1, wusbhc->dev, "(wusbhc %p) %u, buffer:\n", wusbhc, (int)size);
-	d_dump(1, wusbhc->dev, _buf, size);
 	return size;
 }
 EXPORT_SYMBOL_GPL(wusbhc_rh_status_data);
@@ -197,9 +195,7 @@
 static int wusbhc_rh_clear_hub_feat(struct wusbhc *wusbhc, u16 feature)
 {
 	int result;
-	struct device *dev = wusbhc->dev;
 
-	d_fnstart(4, dev, "(%p, feature 0x%04u)\n", wusbhc, feature);
 	switch (feature) {
 	case C_HUB_LOCAL_POWER:
 		/* FIXME: maybe plug bit 0 to the power input status,
@@ -211,7 +207,6 @@
 	default:
 		result = -EPIPE;
 	}
-	d_fnend(4, dev, "(%p, feature 0x%04u), %d\n", wusbhc, feature, result);
 	return result;
 }
 
@@ -238,14 +233,10 @@
 static int wusbhc_rh_set_port_feat(struct wusbhc *wusbhc, u16 feature,
 				   u8 selector, u8 port_idx)
 {
-	int result = -EINVAL;
 	struct device *dev = wusbhc->dev;
 
-	d_fnstart(4, dev, "(feat 0x%04u, selector 0x%u, port_idx %d)\n",
-		  feature, selector, port_idx);
-
 	if (port_idx > wusbhc->ports_max)
-		goto error;
+		return -EINVAL;
 
 	switch (feature) {
 		/* According to USB2.0[11.24.2.13]p2, these features
@@ -255,35 +246,27 @@
 	case USB_PORT_FEAT_C_SUSPEND:
 	case USB_PORT_FEAT_C_CONNECTION:
 	case USB_PORT_FEAT_C_RESET:
-		result = 0;
-		break;
-
+		return 0;
 	case USB_PORT_FEAT_POWER:
 		/* No such thing, but we fake it works */
 		mutex_lock(&wusbhc->mutex);
 		wusb_port_by_idx(wusbhc, port_idx)->status |= USB_PORT_STAT_POWER;
 		mutex_unlock(&wusbhc->mutex);
-		result = 0;
-		break;
+		return 0;
 	case USB_PORT_FEAT_RESET:
-		result = wusbhc_rh_port_reset(wusbhc, port_idx);
-		break;
+		return wusbhc_rh_port_reset(wusbhc, port_idx);
 	case USB_PORT_FEAT_ENABLE:
 	case USB_PORT_FEAT_SUSPEND:
 		dev_err(dev, "(port_idx %d) set feat %d/%d UNIMPLEMENTED\n",
 			port_idx, feature, selector);
-		result = -ENOSYS;
-		break;
+		return -ENOSYS;
 	default:
 		dev_err(dev, "(port_idx %d) set feat %d/%d UNKNOWN\n",
 			port_idx, feature, selector);
-		result = -EPIPE;
-		break;
+		return -EPIPE;
 	}
-error:
-	d_fnend(4, dev, "(feat 0x%04u, selector 0x%u, port_idx %d) = %d\n",
-		feature, selector, port_idx, result);
-	return result;
+
+	return 0;
 }
 
 /*
@@ -294,17 +277,13 @@
 static int wusbhc_rh_clear_port_feat(struct wusbhc *wusbhc, u16 feature,
 				     u8 selector, u8 port_idx)
 {
-	int result = -EINVAL;
+	int result = 0;
 	struct device *dev = wusbhc->dev;
 
-	d_fnstart(4, dev, "(wusbhc %p feat 0x%04x selector %d port_idx %d)\n",
-		  wusbhc, feature, selector, port_idx);
-
 	if (port_idx > wusbhc->ports_max)
-		goto error;
+		return -EINVAL;
 
 	mutex_lock(&wusbhc->mutex);
-	result = 0;
 	switch (feature) {
 	case USB_PORT_FEAT_POWER:	/* fake port always on */
 		/* According to USB2.0[11.24.2.7.1.4], no need to implement? */
@@ -324,10 +303,8 @@
 		break;
 	case USB_PORT_FEAT_SUSPEND:
 	case USB_PORT_FEAT_C_SUSPEND:
-	case 0xffff:		/* ??? FIXME */
 		dev_err(dev, "(port_idx %d) Clear feat %d/%d UNIMPLEMENTED\n",
 			port_idx, feature, selector);
-		/* dump_stack(); */
 		result = -ENOSYS;
 		break;
 	default:
@@ -337,9 +314,7 @@
 		break;
 	}
 	mutex_unlock(&wusbhc->mutex);
-error:
-	d_fnend(4, dev, "(wusbhc %p feat 0x%04x selector %d port_idx %d) = "
-		"%d\n", wusbhc, feature, selector, port_idx, result);
+
 	return result;
 }
 
@@ -351,22 +326,17 @@
 static int wusbhc_rh_get_port_status(struct wusbhc *wusbhc, u16 port_idx,
 				     u32 *_buf, u16 wLength)
 {
-	int result = -EINVAL;
 	u16 *buf = (u16 *) _buf;
 
-	d_fnstart(1, wusbhc->dev, "(wusbhc %p port_idx %u wLength %u)\n",
-		  wusbhc, port_idx, wLength);
 	if (port_idx > wusbhc->ports_max)
-		goto error;
+		return -EINVAL;
+
 	mutex_lock(&wusbhc->mutex);
 	buf[0] = cpu_to_le16(wusb_port_by_idx(wusbhc, port_idx)->status);
 	buf[1] = cpu_to_le16(wusb_port_by_idx(wusbhc, port_idx)->change);
-	result = 0;
 	mutex_unlock(&wusbhc->mutex);
-error:
-	d_fnend(1, wusbhc->dev, "(wusbhc %p) = %d, buffer:\n", wusbhc, result);
-	d_dump(1, wusbhc->dev, _buf, wLength);
-	return result;
+
+	return 0;
 }
 
 /*
diff --git a/drivers/usb/wusbcore/security.c b/drivers/usb/wusbcore/security.c
index a101cad..f4aa28e 100644
--- a/drivers/usb/wusbcore/security.c
+++ b/drivers/usb/wusbcore/security.c
@@ -27,19 +27,6 @@
 #include <linux/random.h>
 #include "wusbhc.h"
 
-/*
- * DEBUG & SECURITY WARNING!!!!
- *
- * If you enable this past 1, the debug code will weaken the
- * cryptographic safety of the system (on purpose, for debugging).
- *
- * Weaken means:
- *   we print secret keys and intermediate values all the way,
- */
-#undef D_LOCAL
-#define D_LOCAL 2
-#include <linux/uwb/debug.h>
-
 static void wusbhc_set_gtk_callback(struct urb *urb);
 static void wusbhc_gtk_rekey_done_work(struct work_struct *work);
 
@@ -219,7 +206,6 @@
 	const void *itr, *top;
 	char buf[64];
 
-	d_fnstart(3, dev, "(usb_dev %p, wusb_dev %p)\n", usb_dev, wusb_dev);
 	result = usb_get_descriptor(usb_dev, USB_DT_SECURITY,
 				    0, &secd, sizeof(secd));
 	if (result < sizeof(secd)) {
@@ -228,8 +214,6 @@
 		goto error_secd;
 	}
 	secd_size = le16_to_cpu(secd.wTotalLength);
-	d_printf(5, dev, "got %d bytes of sec descriptor, total is %d\n",
-		 result, secd_size);
 	secd_buf = kmalloc(secd_size, GFP_KERNEL);
 	if (secd_buf == NULL) {
 		dev_err(dev, "Can't allocate space for security descriptors\n");
@@ -242,7 +226,6 @@
 			"not enough data: %d\n", result);
 		goto error_secd_all;
 	}
-	d_printf(5, dev, "got %d bytes of sec descriptors\n", result);
 	bytes = 0;
 	itr = secd_buf + sizeof(secd);
 	top = secd_buf + result;
@@ -279,14 +262,12 @@
 		goto error_no_ccm1;
 	}
 	wusb_dev->ccm1_etd = *ccm1_etd;
-	dev_info(dev, "supported encryption: %s; using %s (0x%02x/%02x)\n",
-		 buf, wusb_et_name(ccm1_etd->bEncryptionType),
-		 ccm1_etd->bEncryptionValue, ccm1_etd->bAuthKeyIndex);
+	dev_dbg(dev, "supported encryption: %s; using %s (0x%02x/%02x)\n",
+		buf, wusb_et_name(ccm1_etd->bEncryptionType),
+		ccm1_etd->bEncryptionValue, ccm1_etd->bAuthKeyIndex);
 	result = 0;
 	kfree(secd_buf);
 out:
-	d_fnend(3, dev, "(usb_dev %p, wusb_dev %p) = %d\n",
-		usb_dev, wusb_dev, result);
 	return result;
 
 
@@ -303,32 +284,6 @@
 	/* Nothing so far */
 }
 
-static void hs_printk(unsigned level, struct device *dev,
-		      struct usb_handshake *hs)
-{
-	d_printf(level, dev,
-		 "  bMessageNumber: %u\n"
-		 "  bStatus:        %u\n"
-		 "  tTKID:          %02x %02x %02x\n"
-		 "  CDID:           %02x %02x %02x %02x %02x %02x %02x %02x\n"
-		 "                  %02x %02x %02x %02x %02x %02x %02x %02x\n"
-		 "  nonce:          %02x %02x %02x %02x %02x %02x %02x %02x\n"
-		 "                  %02x %02x %02x %02x %02x %02x %02x %02x\n"
-		 "  MIC:            %02x %02x %02x %02x %02x %02x %02x %02x\n",
-		 hs->bMessageNumber, hs->bStatus,
-		 hs->tTKID[2], hs->tTKID[1], hs->tTKID[0],
-		 hs->CDID[0], hs->CDID[1], hs->CDID[2], hs->CDID[3],
-		 hs->CDID[4], hs->CDID[5], hs->CDID[6], hs->CDID[7],
-		 hs->CDID[8], hs->CDID[9], hs->CDID[10], hs->CDID[11],
-		 hs->CDID[12], hs->CDID[13], hs->CDID[14], hs->CDID[15],
-		 hs->nonce[0], hs->nonce[1], hs->nonce[2], hs->nonce[3],
-		 hs->nonce[4], hs->nonce[5], hs->nonce[6], hs->nonce[7],
-		 hs->nonce[8], hs->nonce[9], hs->nonce[10], hs->nonce[11],
-		 hs->nonce[12], hs->nonce[13], hs->nonce[14], hs->nonce[15],
-		 hs->MIC[0], hs->MIC[1], hs->MIC[2], hs->MIC[3],
-		 hs->MIC[4], hs->MIC[5], hs->MIC[6], hs->MIC[7]);
-}
-
 /**
  * Update the address of an unauthenticated WUSB device
  *
@@ -338,8 +293,7 @@
  * Before the device's address (as known by it) was usb_dev->devnum |
  * 0x80 (unauthenticated address). With this we update it to usb_dev->devnum.
  */
-static int wusb_dev_update_address(struct wusbhc *wusbhc,
-				   struct wusb_dev *wusb_dev)
+int wusb_dev_update_address(struct wusbhc *wusbhc, struct wusb_dev *wusb_dev)
 {
 	int result = -ENOMEM;
 	struct usb_device *usb_dev = wusb_dev->usb_dev;
@@ -422,9 +376,6 @@
 	get_random_bytes(&hs[0].nonce, sizeof(hs[0].nonce));
 	memset(hs[0].MIC, 0, sizeof(hs[0].MIC));	/* Per WUSB1.0[T7-22] */
 
-	d_printf(1, dev, "I: sending hs1:\n");
-	hs_printk(2, dev, &hs[0]);
-
 	result = usb_control_msg(
 		usb_dev, usb_sndctrlpipe(usb_dev, 0),
 		USB_REQ_SET_HANDSHAKE,
@@ -445,8 +396,6 @@
 		dev_err(dev, "Handshake2: request failed: %d\n", result);
 		goto error_hs2;
 	}
-	d_printf(1, dev, "got HS2:\n");
-	hs_printk(2, dev, &hs[1]);
 
 	result = -EINVAL;
 	if (hs[1].bMessageNumber != 2) {
@@ -487,10 +436,6 @@
 			result);
 		goto error_hs2;
 	}
-	d_printf(2, dev, "KCK:\n");
-	d_dump(2, dev, keydvt_out.kck, sizeof(keydvt_out.kck));
-	d_printf(2, dev, "PTK:\n");
-	d_dump(2, dev, keydvt_out.ptk, sizeof(keydvt_out.ptk));
 
 	/* Compute MIC and verify it */
 	result = wusb_oob_mic(mic, keydvt_out.kck, &ccm_n, &hs[1]);
@@ -500,8 +445,6 @@
 		goto error_hs2;
 	}
 
-	d_printf(2, dev, "MIC:\n");
-	d_dump(2, dev, mic, sizeof(mic));
 	if (memcmp(hs[1].MIC, mic, sizeof(hs[1].MIC))) {
 		dev_err(dev, "Handshake2 failed: MIC mismatch\n");
 		goto error_hs2;
@@ -521,9 +464,6 @@
 		goto error_hs2;
 	}
 
-	d_printf(1, dev, "I: sending hs3:\n");
-	hs_printk(2, dev, &hs[2]);
-
 	result = usb_control_msg(
 		usb_dev, usb_sndctrlpipe(usb_dev, 0),
 		USB_REQ_SET_HANDSHAKE,
@@ -534,14 +474,11 @@
 		goto error_hs3;
 	}
 
-	d_printf(1, dev, "I: turning on encryption on host for device\n");
-	d_dump(2, dev, keydvt_out.ptk, sizeof(keydvt_out.ptk));
 	result = wusbhc->set_ptk(wusbhc, wusb_dev->port_idx, tkid,
 				 keydvt_out.ptk, sizeof(keydvt_out.ptk));
 	if (result < 0)
 		goto error_wusbhc_set_ptk;
 
-	d_printf(1, dev, "I: setting a GTK\n");
 	result = wusb_dev_set_gtk(wusbhc, wusb_dev);
 	if (result < 0) {
 		dev_err(dev, "Set GTK for device: request failed: %d\n",
@@ -551,13 +488,12 @@
 
 	/* Update the device's address from unauth to auth */
 	if (usb_dev->authenticated == 0) {
-		d_printf(1, dev, "I: updating addres to auth from non-auth\n");
 		result = wusb_dev_update_address(wusbhc, wusb_dev);
 		if (result < 0)
 			goto error_dev_update_address;
 	}
 	result = 0;
-	d_printf(1, dev, "I: 4way handshke done, device authenticated\n");
+	dev_info(dev, "device authenticated\n");
 
 error_dev_update_address:
 error_wusbhc_set_gtk:
@@ -570,10 +506,8 @@
 	memset(&keydvt_in, 0, sizeof(keydvt_in));
 	memset(&ccm_n, 0, sizeof(ccm_n));
 	memset(mic, 0, sizeof(mic));
-	if (result < 0) {
-		/* error path */
+	if (result < 0)
 		wusb_dev_set_encryption(usb_dev, 0);
-	}
 error_dev_set_encryption:
 	kfree(hs);
 error_kzalloc:
diff --git a/drivers/usb/wusbcore/wa-nep.c b/drivers/usb/wusbcore/wa-nep.c
index 3f54299..17d2626 100644
--- a/drivers/usb/wusbcore/wa-nep.c
+++ b/drivers/usb/wusbcore/wa-nep.c
@@ -51,7 +51,7 @@
  */
 #include <linux/workqueue.h>
 #include <linux/ctype.h>
-#include <linux/uwb/debug.h>
+
 #include "wa-hc.h"
 #include "wusbhc.h"
 
@@ -139,13 +139,10 @@
 			/* FIXME: unimplemented WA NOTIFs */
 			/* fallthru */
 		default:
-			if (printk_ratelimit()) {
-				dev_err(dev, "HWA: unknown notification 0x%x, "
-					"%zu bytes; discarding\n",
-					notif_hdr->bNotifyType,
-					(size_t)notif_hdr->bLength);
-				dump_bytes(dev, notif_hdr, 16);
-			}
+			dev_err(dev, "HWA: unknown notification 0x%x, "
+				"%zu bytes; discarding\n",
+				notif_hdr->bNotifyType,
+				(size_t)notif_hdr->bLength);
 			break;
 		}
 	}
@@ -160,12 +157,9 @@
 	 * discard the data, as this should not happen.
 	 */
 exhausted_buffer:
-	if (!printk_ratelimit())
-		goto out;
 	dev_warn(dev, "HWA: device sent short notification, "
 		 "%d bytes missing; discarding %d bytes.\n",
 		 missing, (int)size);
-	dump_bytes(dev, itr, size);
 	goto out;
 }
 
diff --git a/drivers/usb/wusbcore/wa-rpipe.c b/drivers/usb/wusbcore/wa-rpipe.c
index f18e4aa..7369655 100644
--- a/drivers/usb/wusbcore/wa-rpipe.c
+++ b/drivers/usb/wusbcore/wa-rpipe.c
@@ -60,13 +60,10 @@
 #include <linux/init.h>
 #include <asm/atomic.h>
 #include <linux/bitmap.h>
+
 #include "wusbhc.h"
 #include "wa-hc.h"
 
-#define D_LOCAL 0
-#include <linux/uwb/debug.h>
-
-
 static int __rpipe_get_descr(struct wahc *wa,
 			     struct usb_rpipe_descriptor *descr, u16 index)
 {
@@ -76,7 +73,6 @@
 	/* Get the RPIPE descriptor -- we cannot use the usb_get_descriptor()
 	 * function because the arguments are different.
 	 */
-	d_printf(1, dev, "rpipe %u: get descr\n", index);
 	result = usb_control_msg(
 		wa->usb_dev, usb_rcvctrlpipe(wa->usb_dev, 0),
 		USB_REQ_GET_DESCRIPTOR,
@@ -115,7 +111,6 @@
 	/* we cannot use the usb_get_descriptor() function because the
 	 * arguments are different.
 	 */
-	d_printf(1, dev, "rpipe %u: set descr\n", index);
 	result = usb_control_msg(
 		wa->usb_dev, usb_sndctrlpipe(wa->usb_dev, 0),
 		USB_REQ_SET_DESCRIPTOR,
@@ -174,13 +169,12 @@
 {
 	struct wa_rpipe *rpipe = container_of(_rpipe, struct wa_rpipe, refcnt);
 	u8 index = le16_to_cpu(rpipe->descr.wRPipeIndex);
-	d_fnstart(1, NULL, "(rpipe %p %u)\n", rpipe, index);
+
 	if (rpipe->ep)
 		rpipe->ep->hcpriv = NULL;
 	rpipe_put_idx(rpipe->wa, index);
 	wa_put(rpipe->wa);
 	kfree(rpipe);
-	d_fnend(1, NULL, "(rpipe %p %u)\n", rpipe, index);
 }
 EXPORT_SYMBOL_GPL(rpipe_destroy);
 
@@ -202,7 +196,6 @@
 	struct wa_rpipe *rpipe;
 	struct device *dev = &wa->usb_iface->dev;
 
-	d_fnstart(3, dev, "(wa %p crs 0x%02x)\n", wa, crs);
 	rpipe = kzalloc(sizeof(*rpipe), gfp);
 	if (rpipe == NULL)
 		return -ENOMEM;
@@ -223,14 +216,12 @@
 	}
 	*prpipe = NULL;
 	kfree(rpipe);
-	d_fnend(3, dev, "(wa %p crs 0x%02x) = -ENXIO\n", wa, crs);
 	return -ENXIO;
 
 found:
 	set_bit(rpipe_idx, wa->rpipe_bm);
 	rpipe->wa = wa_get(wa);
 	*prpipe = rpipe;
-	d_fnstart(3, dev, "(wa %p crs 0x%02x) = 0\n", wa, crs);
 	return 0;
 }
 
@@ -239,7 +230,6 @@
 	int result;
 	struct device *dev = &wa->usb_iface->dev;
 
-	d_printf(1, dev, "rpipe %u: reset\n", index);
 	result = usb_control_msg(
 		wa->usb_dev, usb_sndctrlpipe(wa->usb_dev, 0),
 		USB_REQ_RPIPE_RESET,
@@ -276,7 +266,6 @@
 	struct usb_descriptor_header *hdr;
 	struct usb_wireless_ep_comp_descriptor *epcd;
 
-	d_fnstart(3, dev, "(ep %p)\n", ep);
 	if (ep->desc.bEndpointAddress == 0) {
 		epcd = &epc0;
 		goto out;
@@ -310,7 +299,6 @@
 		itr_size -= hdr->bDescriptorType;
 	}
 out:
-	d_fnend(3, dev, "(ep %p) = %p\n", ep, epcd);
 	return epcd;
 }
 
@@ -329,8 +317,6 @@
 	struct usb_wireless_ep_comp_descriptor *epcd;
 	u8 unauth;
 
-	d_fnstart(3, dev, "(rpipe %p wa %p ep %p, urb %p)\n",
-		    rpipe, wa, ep, urb);
 	epcd = rpipe_epc_find(dev, ep);
 	if (epcd == NULL) {
 		dev_err(dev, "ep 0x%02x: can't find companion descriptor\n",
@@ -350,10 +336,12 @@
 	/* FIXME: use maximum speed as supported or recommended by device */
 	rpipe->descr.bSpeed = usb_pipeendpoint(urb->pipe) == 0 ?
 		UWB_PHY_RATE_53 : UWB_PHY_RATE_200;
-	d_printf(2, dev, "addr %u (0x%02x) rpipe #%u ep# %u speed %d\n",
-		 urb->dev->devnum, urb->dev->devnum | unauth,
-		 le16_to_cpu(rpipe->descr.wRPipeIndex),
-		 usb_pipeendpoint(urb->pipe), rpipe->descr.bSpeed);
+
+	dev_dbg(dev, "addr %u (0x%02x) rpipe #%u ep# %u speed %d\n",
+		urb->dev->devnum, urb->dev->devnum | unauth,
+		le16_to_cpu(rpipe->descr.wRPipeIndex),
+		usb_pipeendpoint(urb->pipe), rpipe->descr.bSpeed);
+
 	/* see security.c:wusb_update_address() */
 	if (unlikely(urb->dev->devnum == 0x80))
 		rpipe->descr.bDeviceAddress = 0;
@@ -384,8 +372,6 @@
 	}
 	result = 0;
 error:
-	d_fnend(3, dev, "(rpipe %p wa %p ep %p urb %p) = %d\n",
-		  rpipe, wa, ep, urb, result);
 	return result;
 }
 
@@ -405,8 +391,6 @@
 	u8 unauth = (usb_dev->wusb && !usb_dev->authenticated) ? 0x80 : 0;
 	u8 portnum = wusb_port_no_to_idx(urb->dev->portnum);
 
-	d_fnstart(3, dev, "(rpipe %p wa %p ep %p, urb %p)\n",
-		    rpipe, wa, ep, urb);
 #define AIM_CHECK(rdf, val, text)					\
 	do {								\
 		if (rpipe->descr.rdf != (val)) {			\
@@ -451,8 +435,6 @@
 	struct wa_rpipe *rpipe;
 	u8 eptype;
 
-	d_fnstart(3, dev, "(wa %p ep %p urb %p gfp 0x%08x)\n", wa, ep, urb,
-		  gfp);
 	mutex_lock(&wa->rpipe_mutex);
 	rpipe = ep->hcpriv;
 	if (rpipe != NULL) {
@@ -462,9 +444,9 @@
 				goto error;
 		}
 		__rpipe_get(rpipe);
-		d_printf(2, dev, "ep 0x%02x: reusing rpipe %u\n",
-			 ep->desc.bEndpointAddress,
-			 le16_to_cpu(rpipe->descr.wRPipeIndex));
+		dev_dbg(dev, "ep 0x%02x: reusing rpipe %u\n",
+			ep->desc.bEndpointAddress,
+			le16_to_cpu(rpipe->descr.wRPipeIndex));
 	} else {
 		/* hmm, assign idle rpipe, aim it */
 		result = -ENOBUFS;
@@ -480,14 +462,12 @@
 		ep->hcpriv = rpipe;
 		rpipe->ep = ep;
 		__rpipe_get(rpipe);	/* for caching into ep->hcpriv */
-		d_printf(2, dev, "ep 0x%02x: using rpipe %u\n",
-			 ep->desc.bEndpointAddress,
-			 le16_to_cpu(rpipe->descr.wRPipeIndex));
+		dev_dbg(dev, "ep 0x%02x: using rpipe %u\n",
+			ep->desc.bEndpointAddress,
+			le16_to_cpu(rpipe->descr.wRPipeIndex));
 	}
-	d_dump(4, dev, &rpipe->descr, sizeof(rpipe->descr));
 error:
 	mutex_unlock(&wa->rpipe_mutex);
-	d_fnend(3, dev, "(wa %p ep %p urb %p gfp 0x%08x)\n", wa, ep, urb, gfp);
 	return result;
 }
 
@@ -507,7 +487,7 @@
 void wa_rpipes_destroy(struct wahc *wa)
 {
 	struct device *dev = &wa->usb_iface->dev;
-	d_fnstart(3, dev, "(wa %p)\n", wa);
+
 	if (!bitmap_empty(wa->rpipe_bm, wa->rpipes)) {
 		char buf[256];
 		WARN_ON(1);
@@ -515,7 +495,6 @@
 		dev_err(dev, "BUG: pipes not released on exit: %s\n", buf);
 	}
 	kfree(wa->rpipe_bm);
-	d_fnend(3, dev, "(wa %p)\n", wa);
 }
 
 /*
@@ -530,33 +509,20 @@
  */
 void rpipe_ep_disable(struct wahc *wa, struct usb_host_endpoint *ep)
 {
-	struct device *dev = &wa->usb_iface->dev;
 	struct wa_rpipe *rpipe;
-	d_fnstart(2, dev, "(wa %p ep %p)\n", wa, ep);
+
 	mutex_lock(&wa->rpipe_mutex);
 	rpipe = ep->hcpriv;
 	if (rpipe != NULL) {
-		unsigned rc = atomic_read(&rpipe->refcnt.refcount);
-		int result;
 		u16 index = le16_to_cpu(rpipe->descr.wRPipeIndex);
 
-		if (rc != 1)
-			d_printf(1, dev, "(wa %p ep %p) rpipe %p refcnt %u\n",
-				 wa, ep, rpipe, rc);
-
-		d_printf(1, dev, "rpipe %u: abort\n", index);
-		result = usb_control_msg(
+		usb_control_msg(
 			wa->usb_dev, usb_rcvctrlpipe(wa->usb_dev, 0),
 			USB_REQ_RPIPE_ABORT,
 			USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_RPIPE,
 			0, index, NULL, 0, 1000 /* FIXME: arbitrary */);
-		if (result < 0 && result != -ENODEV /* dev is gone */)
-			d_printf(1, dev, "(wa %p rpipe %u): abort failed: %d\n",
-				 wa, index, result);
 		rpipe_put(rpipe);
 	}
 	mutex_unlock(&wa->rpipe_mutex);
-	d_fnend(2, dev, "(wa %p ep %p)\n", wa, ep);
-	return;
 }
 EXPORT_SYMBOL_GPL(rpipe_ep_disable);
diff --git a/drivers/usb/wusbcore/wa-xfer.c b/drivers/usb/wusbcore/wa-xfer.c
index c038635..238a96a 100644
--- a/drivers/usb/wusbcore/wa-xfer.c
+++ b/drivers/usb/wusbcore/wa-xfer.c
@@ -82,13 +82,10 @@
 #include <linux/init.h>
 #include <linux/spinlock.h>
 #include <linux/hash.h>
+
 #include "wa-hc.h"
 #include "wusbhc.h"
 
-#undef D_LOCAL
-#define D_LOCAL 0 /* 0 disabled, > 0 different levels... */
-#include <linux/uwb/debug.h>
-
 enum {
 	WA_SEGS_MAX = 255,
 };
@@ -180,7 +177,6 @@
 		}
 	}
 	kfree(xfer);
-	d_printf(2, NULL, "xfer %p destroyed\n", xfer);
 }
 
 static void wa_xfer_get(struct wa_xfer *xfer)
@@ -190,10 +186,7 @@
 
 static void wa_xfer_put(struct wa_xfer *xfer)
 {
-	d_fnstart(3, NULL, "(xfer %p) -- ref count bef put %d\n",
-		    xfer, atomic_read(&xfer->refcnt.refcount));
 	kref_put(&xfer->refcnt, wa_xfer_destroy);
-	d_fnend(3, NULL, "(xfer %p) = void\n", xfer);
 }
 
 /*
@@ -209,7 +202,7 @@
 static void wa_xfer_giveback(struct wa_xfer *xfer)
 {
 	unsigned long flags;
-	d_fnstart(3, NULL, "(xfer %p)\n", xfer);
+
 	spin_lock_irqsave(&xfer->wa->xfer_list_lock, flags);
 	list_del_init(&xfer->list_node);
 	spin_unlock_irqrestore(&xfer->wa->xfer_list_lock, flags);
@@ -217,7 +210,6 @@
 	wusbhc_giveback_urb(xfer->wa->wusb, xfer->urb, xfer->result);
 	wa_put(xfer->wa);
 	wa_xfer_put(xfer);
-	d_fnend(3, NULL, "(xfer %p) = void\n", xfer);
 }
 
 /*
@@ -227,13 +219,10 @@
  */
 static void wa_xfer_completion(struct wa_xfer *xfer)
 {
-	d_fnstart(3, NULL, "(xfer %p)\n", xfer);
 	if (xfer->wusb_dev)
 		wusb_dev_put(xfer->wusb_dev);
 	rpipe_put(xfer->ep->hcpriv);
 	wa_xfer_giveback(xfer);
-	d_fnend(3, NULL, "(xfer %p) = void\n", xfer);
-	return;
 }
 
 /*
@@ -243,12 +232,12 @@
  */
 static unsigned __wa_xfer_is_done(struct wa_xfer *xfer)
 {
+	struct device *dev = &xfer->wa->usb_iface->dev;
 	unsigned result, cnt;
 	struct wa_seg *seg;
 	struct urb *urb = xfer->urb;
 	unsigned found_short = 0;
 
-	d_fnstart(3, NULL, "(xfer %p)\n", xfer);
 	result = xfer->segs_done == xfer->segs_submitted;
 	if (result == 0)
 		goto out;
@@ -258,10 +247,8 @@
 		switch (seg->status) {
 		case WA_SEG_DONE:
 			if (found_short && seg->result > 0) {
-				if (printk_ratelimit())
-					printk(KERN_ERR "xfer %p#%u: bad short "
-					       "segments (%zu)\n", xfer, cnt,
-					       seg->result);
+				dev_dbg(dev, "xfer %p#%u: bad short segments (%zu)\n",
+					xfer, cnt, seg->result);
 				urb->status = -EINVAL;
 				goto out;
 			}
@@ -269,36 +256,30 @@
 			if (seg->result < xfer->seg_size
 			    && cnt != xfer->segs-1)
 				found_short = 1;
-			d_printf(2, NULL, "xfer %p#%u: DONE short %d "
-				 "result %zu urb->actual_length %d\n",
-				 xfer, seg->index, found_short, seg->result,
-				 urb->actual_length);
+			dev_dbg(dev, "xfer %p#%u: DONE short %d "
+				"result %zu urb->actual_length %d\n",
+				xfer, seg->index, found_short, seg->result,
+				urb->actual_length);
 			break;
 		case WA_SEG_ERROR:
 			xfer->result = seg->result;
-			d_printf(2, NULL, "xfer %p#%u: ERROR result %zu\n",
-				 xfer, seg->index, seg->result);
+			dev_dbg(dev, "xfer %p#%u: ERROR result %zu\n",
+				xfer, seg->index, seg->result);
 			goto out;
 		case WA_SEG_ABORTED:
-			WARN_ON(urb->status != -ECONNRESET
-				&& urb->status != -ENOENT);
-			d_printf(2, NULL, "xfer %p#%u ABORTED: result %d\n",
-				 xfer, seg->index, urb->status);
+			dev_dbg(dev, "xfer %p#%u ABORTED: result %d\n",
+				xfer, seg->index, urb->status);
 			xfer->result = urb->status;
 			goto out;
 		default:
-			/* if (printk_ratelimit()) */
-				printk(KERN_ERR "xfer %p#%u: "
-				       "is_done bad state %d\n",
-				       xfer, cnt, seg->status);
+			dev_warn(dev, "xfer %p#%u: is_done bad state %d\n",
+				 xfer, cnt, seg->status);
 			xfer->result = -EINVAL;
-			WARN_ON(1);
 			goto out;
 		}
 	}
 	xfer->result = 0;
 out:
-	d_fnend(3, NULL, "(xfer %p) = void\n", xfer);
 	return result;
 }
 
@@ -424,8 +405,6 @@
 	struct urb *urb = xfer->urb;
 	struct wa_rpipe *rpipe = xfer->ep->hcpriv;
 
-	d_fnstart(3, dev, "(xfer %p [rpipe %p] urb %p)\n",
-		  xfer, rpipe, urb);
 	switch (rpipe->descr.bmAttribute & 0x3) {
 	case USB_ENDPOINT_XFER_CONTROL:
 		*pxfer_type = WA_XFER_TYPE_CTL;
@@ -472,12 +451,10 @@
 	if (xfer->segs == 0 && *pxfer_type == WA_XFER_TYPE_CTL)
 		xfer->segs = 1;
 error:
-	d_fnend(3, dev, "(xfer %p [rpipe %p] urb %p) = %d\n",
-		xfer, rpipe, urb, (int)result);
 	return result;
 }
 
-/** Fill in the common request header and xfer-type specific data. */
+/* Fill in the common request header and xfer-type specific data. */
 static void __wa_xfer_setup_hdr0(struct wa_xfer *xfer,
 				 struct wa_xfer_hdr *xfer_hdr0,
 				 enum wa_xfer_type xfer_type,
@@ -534,14 +511,13 @@
 	unsigned rpipe_ready = 0;
 	u8 done = 0;
 
-	d_fnstart(3, NULL, "(urb %p [%d])\n", urb, urb->status);
 	switch (urb->status) {
 	case 0:
 		spin_lock_irqsave(&xfer->lock, flags);
 		wa = xfer->wa;
 		dev = &wa->usb_iface->dev;
-		d_printf(2, dev, "xfer %p#%u: data out done (%d bytes)\n",
-			   xfer, seg->index, urb->actual_length);
+		dev_dbg(dev, "xfer %p#%u: data out done (%d bytes)\n",
+			xfer, seg->index, urb->actual_length);
 		if (seg->status < WA_SEG_PENDING)
 			seg->status = WA_SEG_PENDING;
 		seg->result = urb->actual_length;
@@ -555,9 +531,8 @@
 		wa = xfer->wa;
 		dev = &wa->usb_iface->dev;
 		rpipe = xfer->ep->hcpriv;
-		if (printk_ratelimit())
-			dev_err(dev, "xfer %p#%u: data out error %d\n",
-				xfer, seg->index, urb->status);
+		dev_dbg(dev, "xfer %p#%u: data out error %d\n",
+			xfer, seg->index, urb->status);
 		if (edc_inc(&wa->nep_edc, EDC_MAX_ERRORS,
 			    EDC_ERROR_TIMEFRAME)){
 			dev_err(dev, "DTO: URB max acceptable errors "
@@ -578,7 +553,6 @@
 		if (rpipe_ready)
 			wa_xfer_delayed_run(rpipe);
 	}
-	d_fnend(3, NULL, "(urb %p [%d]) = void\n", urb, urb->status);
 }
 
 /*
@@ -610,14 +584,12 @@
 	unsigned rpipe_ready;
 	u8 done = 0;
 
-	d_fnstart(3, NULL, "(urb %p [%d])\n", urb, urb->status);
 	switch (urb->status) {
 	case 0:
 		spin_lock_irqsave(&xfer->lock, flags);
 		wa = xfer->wa;
 		dev = &wa->usb_iface->dev;
-		d_printf(2, dev, "xfer %p#%u: request done\n",
-			   xfer, seg->index);
+		dev_dbg(dev, "xfer %p#%u: request done\n", xfer, seg->index);
 		if (xfer->is_inbound && seg->status < WA_SEG_PENDING)
 			seg->status = WA_SEG_PENDING;
 		spin_unlock_irqrestore(&xfer->lock, flags);
@@ -652,7 +624,6 @@
 		if (rpipe_ready)
 			wa_xfer_delayed_run(rpipe);
 	}
-	d_fnend(3, NULL, "(urb %p [%d]) = void\n", urb, urb->status);
 }
 
 /*
@@ -750,9 +721,6 @@
 	size_t xfer_hdr_size, cnt, transfer_size;
 	struct wa_xfer_hdr *xfer_hdr0, *xfer_hdr;
 
-	d_fnstart(3, dev, "(xfer %p [rpipe %p] urb %p)\n",
-		  xfer, xfer->ep->hcpriv, urb);
-
 	result = __wa_xfer_setup_sizes(xfer, &xfer_type);
 	if (result < 0)
 		goto error_setup_sizes;
@@ -788,8 +756,6 @@
 	result = 0;
 error_setup_segs:
 error_setup_sizes:
-	d_fnend(3, dev, "(xfer %p [rpipe %p] urb %p) = %d\n",
-		xfer, xfer->ep->hcpriv, urb, result);
 	return result;
 }
 
@@ -843,9 +809,6 @@
 	struct wa_xfer *xfer;
 	unsigned long flags;
 
-	d_fnstart(1, dev, "(rpipe #%d) %d segments available\n",
-		  le16_to_cpu(rpipe->descr.wRPipeIndex),
-		  atomic_read(&rpipe->segs_available));
 	spin_lock_irqsave(&rpipe->seg_lock, flags);
 	while (atomic_read(&rpipe->segs_available) > 0
 	      && !list_empty(&rpipe->seg_list)) {
@@ -854,10 +817,8 @@
 		list_del(&seg->list_node);
 		xfer = seg->xfer;
 		result = __wa_seg_submit(rpipe, xfer, seg);
-		d_printf(1, dev, "xfer %p#%u submitted from delayed "
-			 "[%d segments available] %d\n",
-			 xfer, seg->index,
-			 atomic_read(&rpipe->segs_available), result);
+		dev_dbg(dev, "xfer %p#%u submitted from delayed [%d segments available] %d\n",
+			xfer, seg->index, atomic_read(&rpipe->segs_available), result);
 		if (unlikely(result < 0)) {
 			spin_unlock_irqrestore(&rpipe->seg_lock, flags);
 			spin_lock_irqsave(&xfer->lock, flags);
@@ -868,10 +829,6 @@
 		}
 	}
 	spin_unlock_irqrestore(&rpipe->seg_lock, flags);
-	d_fnend(1, dev, "(rpipe #%d) = void, %d segments available\n",
-		le16_to_cpu(rpipe->descr.wRPipeIndex),
-		atomic_read(&rpipe->segs_available));
-
 }
 
 /*
@@ -894,9 +851,6 @@
 	u8 available;
 	u8 empty;
 
-	d_fnstart(3, dev, "(xfer %p [rpipe %p])\n",
-		  xfer, xfer->ep->hcpriv);
-
 	spin_lock_irqsave(&wa->xfer_list_lock, flags);
 	list_add_tail(&xfer->list_node, &wa->xfer_list);
 	spin_unlock_irqrestore(&wa->xfer_list_lock, flags);
@@ -908,30 +862,24 @@
 		available = atomic_read(&rpipe->segs_available);
 		empty = list_empty(&rpipe->seg_list);
 		seg = xfer->seg[cnt];
-		d_printf(2, dev, "xfer %p#%u: available %u empty %u (%s)\n",
-			 xfer, cnt, available, empty,
-			 available == 0 || !empty ? "delayed" : "submitted");
+		dev_dbg(dev, "xfer %p#%u: available %u empty %u (%s)\n",
+			xfer, cnt, available, empty,
+			available == 0 || !empty ? "delayed" : "submitted");
 		if (available == 0 || !empty) {
-			d_printf(1, dev, "xfer %p#%u: delayed\n", xfer, cnt);
+			dev_dbg(dev, "xfer %p#%u: delayed\n", xfer, cnt);
 			seg->status = WA_SEG_DELAYED;
 			list_add_tail(&seg->list_node, &rpipe->seg_list);
 		} else {
 			result = __wa_seg_submit(rpipe, xfer, seg);
-			if (result < 0)
+			if (result < 0) {
+				__wa_xfer_abort(xfer);
 				goto error_seg_submit;
+			}
 		}
 		xfer->segs_submitted++;
 	}
-	spin_unlock_irqrestore(&rpipe->seg_lock, flags);
-	d_fnend(3, dev, "(xfer %p [rpipe %p]) = void\n", xfer,
-		xfer->ep->hcpriv);
-	return result;
-
 error_seg_submit:
-	__wa_xfer_abort(xfer);
 	spin_unlock_irqrestore(&rpipe->seg_lock, flags);
-	d_fnend(3, dev, "(xfer %p [rpipe %p]) = void\n", xfer,
-		xfer->ep->hcpriv);
 	return result;
 }
 
@@ -964,11 +912,9 @@
 	struct urb *urb = xfer->urb;
 	struct wahc *wa = xfer->wa;
 	struct wusbhc *wusbhc = wa->wusb;
-	struct device *dev = &wa->usb_iface->dev;
 	struct wusb_dev *wusb_dev;
 	unsigned done;
 
-	d_fnstart(3, dev, "(wa %p urb %p)\n", wa, urb);
 	result = rpipe_get_by_ep(wa, xfer->ep, urb, xfer->gfp);
 	if (result < 0)
 		goto error_rpipe_get;
@@ -997,7 +943,6 @@
 	if (result < 0)
 		goto error_xfer_submit;
 	spin_unlock_irqrestore(&xfer->lock, flags);
-	d_fnend(3, dev, "(wa %p urb %p) = void\n", wa, urb);
 	return;
 
 	/* this is basically wa_xfer_completion() broken up wa_xfer_giveback()
@@ -1015,7 +960,6 @@
 error_rpipe_get:
 	xfer->result = result;
 	wa_xfer_giveback(xfer);
-	d_fnend(3, dev, "(wa %p urb %p) = (void) %d\n", wa, urb, result);
 	return;
 
 error_xfer_submit:
@@ -1024,8 +968,6 @@
 	spin_unlock_irqrestore(&xfer->lock, flags);
 	if (done)
 		wa_xfer_completion(xfer);
-	d_fnend(3, dev, "(wa %p urb %p) = (void) %d\n", wa, urb, result);
-	return;
 }
 
 /*
@@ -1041,11 +983,9 @@
 void wa_urb_enqueue_run(struct work_struct *ws)
 {
 	struct wahc *wa = container_of(ws, struct wahc, xfer_work);
-	struct device *dev = &wa->usb_iface->dev;
 	struct wa_xfer *xfer, *next;
 	struct urb *urb;
 
-	d_fnstart(3, dev, "(wa %p)\n", wa);
 	spin_lock_irq(&wa->xfer_list_lock);
 	list_for_each_entry_safe(xfer, next, &wa->xfer_delayed_list,
 				 list_node) {
@@ -1059,7 +999,6 @@
 		spin_lock_irq(&wa->xfer_list_lock);
 	}
 	spin_unlock_irq(&wa->xfer_list_lock);
-	d_fnend(3, dev, "(wa %p) = void\n", wa);
 }
 EXPORT_SYMBOL_GPL(wa_urb_enqueue_run);
 
@@ -1084,9 +1023,6 @@
 	unsigned long my_flags;
 	unsigned cant_sleep = irqs_disabled() | in_atomic();
 
-	d_fnstart(3, dev, "(wa %p ep %p urb %p [%d] gfp 0x%x)\n",
-		  wa, ep, urb, urb->transfer_buffer_length, gfp);
-
 	if (urb->transfer_buffer == NULL
 	    && !(urb->transfer_flags & URB_NO_TRANSFER_DMA_MAP)
 	    && urb->transfer_buffer_length != 0) {
@@ -1108,11 +1044,13 @@
 	xfer->gfp = gfp;
 	xfer->ep = ep;
 	urb->hcpriv = xfer;
-	d_printf(2, dev, "xfer %p urb %p pipe 0x%02x [%d bytes] %s %s %s\n",
-		 xfer, urb, urb->pipe, urb->transfer_buffer_length,
-		 urb->transfer_flags & URB_NO_TRANSFER_DMA_MAP ? "dma" : "nodma",
-		 urb->pipe & USB_DIR_IN ? "inbound" : "outbound",
-		 cant_sleep ? "deferred" : "inline");
+
+	dev_dbg(dev, "xfer %p urb %p pipe 0x%02x [%d bytes] %s %s %s\n",
+		xfer, urb, urb->pipe, urb->transfer_buffer_length,
+		urb->transfer_flags & URB_NO_TRANSFER_DMA_MAP ? "dma" : "nodma",
+		urb->pipe & USB_DIR_IN ? "inbound" : "outbound",
+		cant_sleep ? "deferred" : "inline");
+
 	if (cant_sleep) {
 		usb_get_urb(urb);
 		spin_lock_irqsave(&wa->xfer_list_lock, my_flags);
@@ -1122,15 +1060,11 @@
 	} else {
 		wa_urb_enqueue_b(xfer);
 	}
-	d_fnend(3, dev, "(wa %p ep %p urb %p [%d] gfp 0x%x) = 0\n",
-		wa, ep, urb, urb->transfer_buffer_length, gfp);
 	return 0;
 
 error_dequeued:
 	kfree(xfer);
 error_kmalloc:
-	d_fnend(3, dev, "(wa %p ep %p urb %p [%d] gfp 0x%x) = %d\n",
-		wa, ep, urb, urb->transfer_buffer_length, gfp, result);
 	return result;
 }
 EXPORT_SYMBOL_GPL(wa_urb_enqueue);
@@ -1155,7 +1089,6 @@
  */
 int wa_urb_dequeue(struct wahc *wa, struct urb *urb)
 {
-	struct device *dev = &wa->usb_iface->dev;
 	unsigned long flags, flags2;
 	struct wa_xfer *xfer;
 	struct wa_seg *seg;
@@ -1163,9 +1096,6 @@
 	unsigned cnt;
 	unsigned rpipe_ready = 0;
 
-	d_fnstart(3, dev, "(wa %p, urb %p)\n", wa, urb);
-
-	d_printf(1, dev, "xfer %p urb %p: aborting\n", urb->hcpriv, urb);
 	xfer = urb->hcpriv;
 	if (xfer == NULL) {
 		/* NOthing setup yet enqueue will see urb->status !=
@@ -1234,13 +1164,11 @@
 	wa_xfer_completion(xfer);
 	if (rpipe_ready)
 		wa_xfer_delayed_run(rpipe);
-	d_fnend(3, dev, "(wa %p, urb %p) = 0\n", wa, urb);
 	return 0;
 
 out_unlock:
 	spin_unlock_irqrestore(&xfer->lock, flags);
 out:
-	d_fnend(3, dev, "(wa %p, urb %p) = 0\n", wa, urb);
 	return 0;
 
 dequeue_delayed:
@@ -1250,7 +1178,6 @@
 	spin_unlock_irqrestore(&xfer->lock, flags);
 	wa_xfer_giveback(xfer);
 	usb_put_urb(urb);		/* we got a ref in enqueue() */
-	d_fnend(3, dev, "(wa %p, urb %p) = 0\n", wa, urb);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(wa_urb_dequeue);
@@ -1326,7 +1253,6 @@
 	u8 usb_status;
 	unsigned rpipe_ready = 0;
 
-	d_fnstart(3, dev, "(wa %p xfer %p)\n", wa, xfer);
 	spin_lock_irqsave(&xfer->lock, flags);
 	seg_idx = xfer_result->bTransferSegment & 0x7f;
 	if (unlikely(seg_idx >= xfer->segs))
@@ -1334,8 +1260,8 @@
 	seg = xfer->seg[seg_idx];
 	rpipe = xfer->ep->hcpriv;
 	usb_status = xfer_result->bTransferStatus;
-	d_printf(2, dev, "xfer %p#%u: bTransferStatus 0x%02x (seg %u)\n",
-		 xfer, seg_idx, usb_status, seg->status);
+	dev_dbg(dev, "xfer %p#%u: bTransferStatus 0x%02x (seg %u)\n",
+		xfer, seg_idx, usb_status, seg->status);
 	if (seg->status == WA_SEG_ABORTED
 	    || seg->status == WA_SEG_ERROR)	/* already handled */
 		goto segment_aborted;
@@ -1391,10 +1317,8 @@
 		wa_xfer_completion(xfer);
 	if (rpipe_ready)
 		wa_xfer_delayed_run(rpipe);
-	d_fnend(3, dev, "(wa %p xfer %p) = void\n", wa, xfer);
 	return;
 
-
 error_submit_buf_in:
 	if (edc_inc(&wa->dti_edc, EDC_MAX_ERRORS, EDC_ERROR_TIMEFRAME)) {
 		dev_err(dev, "DTI: URB max acceptable errors "
@@ -1416,11 +1340,8 @@
 		wa_xfer_completion(xfer);
 	if (rpipe_ready)
 		wa_xfer_delayed_run(rpipe);
-	d_fnend(3, dev, "(wa %p xfer %p) = void [segment/DTI-submit error]\n",
-		wa, xfer);
 	return;
 
-
 error_bad_seg:
 	spin_unlock_irqrestore(&xfer->lock, flags);
 	wa_urb_dequeue(wa, xfer->urb);
@@ -1431,17 +1352,11 @@
 			"exceeded, resetting device\n");
 		wa_reset_all(wa);
 	}
-	d_fnend(3, dev, "(wa %p xfer %p) = void [bad seg]\n", wa, xfer);
 	return;
 
-
 segment_aborted:
 	/* nothing to do, as the aborter did the completion */
 	spin_unlock_irqrestore(&xfer->lock, flags);
-	d_fnend(3, dev, "(wa %p xfer %p) = void [segment aborted]\n",
-		wa, xfer);
-	return;
-
 }
 
 /*
@@ -1465,15 +1380,14 @@
 	unsigned long flags;
 	u8 done = 0;
 
-	d_fnstart(3, NULL, "(urb %p [%d])\n", urb, urb->status);
 	switch (urb->status) {
 	case 0:
 		spin_lock_irqsave(&xfer->lock, flags);
 		wa = xfer->wa;
 		dev = &wa->usb_iface->dev;
 		rpipe = xfer->ep->hcpriv;
-		d_printf(2, dev, "xfer %p#%u: data in done (%zu bytes)\n",
-			   xfer, seg->index, (size_t)urb->actual_length);
+		dev_dbg(dev, "xfer %p#%u: data in done (%zu bytes)\n",
+			xfer, seg->index, (size_t)urb->actual_length);
 		seg->status = WA_SEG_DONE;
 		seg->result = urb->actual_length;
 		xfer->segs_done++;
@@ -1514,7 +1428,6 @@
 		if (rpipe_ready)
 			wa_xfer_delayed_run(rpipe);
 	}
-	d_fnend(3, NULL, "(urb %p [%d]) = void\n", urb, urb->status);
 }
 
 /*
@@ -1553,14 +1466,12 @@
 	struct wa_xfer *xfer;
 	u8 usb_status;
 
-	d_fnstart(3, dev, "(%p)\n", wa);
 	BUG_ON(wa->dti_urb != urb);
 	switch (wa->dti_urb->status) {
 	case 0:
 		/* We have a xfer result buffer; check it */
-		d_printf(2, dev, "DTI: xfer result %d bytes at %p\n",
-			   urb->actual_length, urb->transfer_buffer);
-		d_dump(3, dev, urb->transfer_buffer, urb->actual_length);
+		dev_dbg(dev, "DTI: xfer result %d bytes at %p\n",
+			urb->actual_length, urb->transfer_buffer);
 		if (wa->dti_urb->actual_length != sizeof(*xfer_result)) {
 			dev_err(dev, "DTI Error: xfer result--bad size "
 				"xfer result (%d bytes vs %zu needed)\n",
@@ -1622,7 +1533,6 @@
 		wa_reset_all(wa);
 	}
 out:
-	d_fnend(3, dev, "(%p) = void\n", wa);
 	return;
 }
 
@@ -1653,7 +1563,6 @@
 	struct wa_notif_xfer *notif_xfer;
 	const struct usb_endpoint_descriptor *dti_epd = wa->dti_epd;
 
-	d_fnstart(4, dev, "(%p, %p)\n", wa, notif_hdr);
 	notif_xfer = container_of(notif_hdr, struct wa_notif_xfer, hdr);
 	BUG_ON(notif_hdr->bNotifyType != WA_NOTIF_TRANSFER);
 
@@ -1693,7 +1602,6 @@
 		goto error_dti_urb_submit;
 	}
 out:
-	d_fnend(4, dev, "(%p, %p) = void\n", wa, notif_hdr);
 	return;
 
 error_dti_urb_submit:
@@ -1704,6 +1612,4 @@
 error_dti_urb_alloc:
 error:
 	wa_reset_all(wa);
-	d_fnend(4, dev, "(%p, %p) = void\n", wa, notif_hdr);
-	return;
 }
diff --git a/drivers/usb/wusbcore/wusbhc.h b/drivers/usb/wusbcore/wusbhc.h
index d0c1324..797c245 100644
--- a/drivers/usb/wusbcore/wusbhc.h
+++ b/drivers/usb/wusbcore/wusbhc.h
@@ -64,6 +64,13 @@
 #include <linux/uwb.h>
 #include <linux/usb/wusb.h>
 
+/*
+ * Time from a WUSB channel stop request to the last transmitted MMC.
+ *
+ * This needs to be > 4.096 ms in case no MMCs can be transmitted in
+ * zone 0.
+ */
+#define WUSB_CHANNEL_STOP_DELAY_MS 8
 
 /**
  * Wireless USB device
@@ -147,7 +154,6 @@
 	u16 status;
 	u16 change;
 	struct wusb_dev *wusb_dev;	/* connected device's info */
-	unsigned reset_count;
 	u32 ptk_tkid;
 };
 
@@ -198,21 +204,18 @@
  * @mmcies_max	   Max number of Information Elements this HC can send
  *                 in its MMC. Read-only.
  *
+ * @start          Start the WUSB channel.
+ *
+ * @stop           Stop the WUSB channel after the specified number of
+ *                 milliseconds.  Channel Stop IEs should be transmitted
+ *                 as required by [WUSB] 4.16.2.1.
+ *
  * @mmcie_add	   HC specific operation (WHCI or HWA) for adding an
  *                 MMCIE.
  *
  * @mmcie_rm	   HC specific operation (WHCI or HWA) for removing an
  *                 MMCIE.
  *
- * @enc_types	   Array which describes the encryptions methods
- *                 supported by the host as described in WUSB1.0 --
- *                 one entry per supported method. As of WUSB1.0 there
- *                 is only four methods, we make space for eight just in
- *                 case they decide to add some more (and pray they do
- *                 it in sequential order). if 'enc_types[enc_method]
- *                 != 0', then it is supported by the host. enc_method
- *                 is USB_ENC_TYPE*.
- *
  * @set_ptk:       Set the PTK and enable encryption for a device. Or, if
  *                 the supplied key is NULL, disable encryption for that
  *                 device.
@@ -249,7 +252,8 @@
 	struct uwb_pal pal;
 
 	unsigned trust_timeout;			/* in jiffies */
-	struct wuie_host_info *wuie_host_info;	/* Includes CHID */
+	struct wusb_ckhdid chid;
+	struct wuie_host_info *wuie_host_info;
 
 	struct mutex mutex;			/* locks everything else */
 	u16 cluster_id;				/* Wireless USB Cluster ID */
@@ -269,7 +273,7 @@
 	u8 mmcies_max;
 	/* FIXME: make wusbhc_ops? */
 	int (*start)(struct wusbhc *wusbhc);
-	void (*stop)(struct wusbhc *wusbhc);
+	void (*stop)(struct wusbhc *wusbhc, int delay);
 	int (*mmcie_add)(struct wusbhc *wusbhc, u8 interval, u8 repeat_cnt,
 			 u8 handle, struct wuie_hdr *wuie);
 	int (*mmcie_rm)(struct wusbhc *wusbhc, u8 handle);
@@ -373,20 +377,17 @@
 	usb_put_hcd(&wusbhc->usb_hcd);
 }
 
-int wusbhc_start(struct wusbhc *wusbhc, const struct wusb_ckhdid *chid);
+int wusbhc_start(struct wusbhc *wusbhc);
 void wusbhc_stop(struct wusbhc *wusbhc);
 extern int wusbhc_chid_set(struct wusbhc *, const struct wusb_ckhdid *);
 
 /* Device connect handling */
 extern int wusbhc_devconnect_create(struct wusbhc *);
 extern void wusbhc_devconnect_destroy(struct wusbhc *);
-extern int wusbhc_devconnect_start(struct wusbhc *wusbhc,
-				   const struct wusb_ckhdid *chid);
+extern int wusbhc_devconnect_start(struct wusbhc *wusbhc);
 extern void wusbhc_devconnect_stop(struct wusbhc *wusbhc);
-extern int wusbhc_devconnect_auth(struct wusbhc *, u8);
 extern void wusbhc_handle_dn(struct wusbhc *, u8 srcaddr,
 			     struct wusb_dn_hdr *dn_hdr, size_t size);
-extern int wusbhc_dev_reset(struct wusbhc *wusbhc, u8 port);
 extern void __wusbhc_dev_disable(struct wusbhc *wusbhc, u8 port);
 extern int wusb_usb_ncb(struct notifier_block *nb, unsigned long val,
 			void *priv);
@@ -432,6 +433,7 @@
 extern int wusb_dev_4way_handshake(struct wusbhc *, struct wusb_dev *,
 				   struct wusb_ckhdid *ck);
 void wusbhc_gtk_rekey(struct wusbhc *wusbhc);
+int wusb_dev_update_address(struct wusbhc *wusbhc, struct wusb_dev *wusb_dev);
 
 
 /* WUSB Cluster ID handling */
diff --git a/drivers/uwb/Makefile b/drivers/uwb/Makefile
index 257e690..2f98d08 100644
--- a/drivers/uwb/Makefile
+++ b/drivers/uwb/Makefile
@@ -6,6 +6,7 @@
 
 uwb-objs :=		\
 	address.o	\
+	allocator.o	\
 	beacon.o	\
 	driver.o	\
 	drp.o		\
@@ -13,10 +14,12 @@
 	drp-ie.o	\
 	est.o		\
 	ie.o		\
+	ie-rcv.o	\
 	lc-dev.o	\
 	lc-rc.o		\
 	neh.o		\
 	pal.o		\
+	radio.o		\
 	reset.o		\
 	rsv.o		\
 	scan.o		\
diff --git a/drivers/uwb/address.c b/drivers/uwb/address.c
index 1664ae5..ad21b1d7 100644
--- a/drivers/uwb/address.c
+++ b/drivers/uwb/address.c
@@ -28,7 +28,7 @@
 #include <linux/device.h>
 #include <linux/random.h>
 #include <linux/etherdevice.h>
-#include <linux/uwb/debug.h>
+
 #include "uwb-internal.h"
 
 
diff --git a/drivers/uwb/allocator.c b/drivers/uwb/allocator.c
new file mode 100644
index 0000000..c8185e6
--- /dev/null
+++ b/drivers/uwb/allocator.c
@@ -0,0 +1,386 @@
+/*
+ * UWB reservation management.
+ *
+ * Copyright (C) 2008 Cambridge Silicon Radio Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/version.h>
+#include <linux/kernel.h>
+#include <linux/uwb.h>
+
+#include "uwb-internal.h"
+
+static void uwb_rsv_fill_column_alloc(struct uwb_rsv_alloc_info *ai)
+{
+	int col, mas, safe_mas, unsafe_mas;
+	unsigned char *bm = ai->bm;
+	struct uwb_rsv_col_info *ci = ai->ci;
+	unsigned char c;
+
+	for (col = ci->csi.start_col; col < UWB_NUM_ZONES; col += ci->csi.interval) {
+    
+		safe_mas   = ci->csi.safe_mas_per_col;
+		unsafe_mas = ci->csi.unsafe_mas_per_col;
+    
+		for (mas = 0; mas < UWB_MAS_PER_ZONE; mas++ ) {
+			if (bm[col * UWB_MAS_PER_ZONE + mas] == 0) {
+	
+				if (safe_mas > 0) {
+					safe_mas--;
+					c = UWB_RSV_MAS_SAFE;
+				} else if (unsafe_mas > 0) {
+					unsafe_mas--;
+					c = UWB_RSV_MAS_UNSAFE;
+				} else {
+					break;
+				}
+				bm[col * UWB_MAS_PER_ZONE + mas] = c;
+			}
+		}
+	}
+}
+
+static void uwb_rsv_fill_row_alloc(struct uwb_rsv_alloc_info *ai)
+{
+	int mas, col, rows;
+	unsigned char *bm = ai->bm;
+	struct uwb_rsv_row_info *ri = &ai->ri;
+	unsigned char c;
+
+	rows = 1;
+	c = UWB_RSV_MAS_SAFE;
+	for (mas = UWB_MAS_PER_ZONE - 1; mas >= 0; mas--) {
+		if (ri->avail[mas] == 1) {
+      
+			if (rows > ri->used_rows) {
+				break;
+			} else if (rows > 7) {
+				c = UWB_RSV_MAS_UNSAFE;
+			}
+
+			for (col = 0; col < UWB_NUM_ZONES; col++) {
+				if (bm[col * UWB_NUM_ZONES + mas] != UWB_RSV_MAS_NOT_AVAIL) {
+					bm[col * UWB_NUM_ZONES + mas] = c;
+					if(c == UWB_RSV_MAS_SAFE)
+						ai->safe_allocated_mases++;
+					else
+						ai->unsafe_allocated_mases++;
+				}
+			}
+			rows++;
+		}
+	}
+	ai->total_allocated_mases = ai->safe_allocated_mases + ai->unsafe_allocated_mases;
+}
+
+/*
+ * Find the best column set for a given availability, interval, num safe mas and
+ * num unsafe mas.
+ *
+ * The different sets are tried in order as shown below, depending on the interval.
+ *
+ * interval = 16
+ *	deep = 0
+ *		set 1 ->  {  8 }
+ *	deep = 1
+ *		set 1 ->  {  4 }
+ *		set 2 ->  { 12 }
+ *	deep = 2
+ *		set 1 ->  {  2 }
+ *		set 2 ->  {  6 }
+ *		set 3 ->  { 10 }
+ *		set 4 ->  { 14 }
+ *	deep = 3
+ *		set 1 ->  {  1 }
+ *		set 2 ->  {  3 }
+ *		set 3 ->  {  5 }
+ *		set 4 ->  {  7 }
+ *		set 5 ->  {  9 }
+ *		set 6 ->  { 11 }
+ *		set 7 ->  { 13 }
+ *		set 8 ->  { 15 }
+ *
+ * interval = 8
+ *	deep = 0
+ *		set 1 ->  {  4  12 }
+ *	deep = 1
+ *		set 1 ->  {  2  10 }
+ *		set 2 ->  {  6  14 }
+ *	deep = 2
+ *		set 1 ->  {  1   9 }
+ *		set 2 ->  {  3  11 }
+ *		set 3 ->  {  5  13 }
+ *		set 4 ->  {  7  15 }
+ *
+ * interval = 4
+ *	deep = 0
+ *		set 1 ->  {  2   6  10  14 }
+ *	deep = 1
+ *		set 1 ->  {  1   5   9  13 }
+ *		set 2 ->  {  3   7  11  15 }
+ *
+ * interval = 2
+ *	deep = 0
+ *		set 1 ->  {  1   3   5   7   9  11  13  15 }
+ */
+static int uwb_rsv_find_best_column_set(struct uwb_rsv_alloc_info *ai, int interval, 
+					int num_safe_mas, int num_unsafe_mas)
+{
+	struct uwb_rsv_col_info *ci = ai->ci;
+	struct uwb_rsv_col_set_info *csi = &ci->csi;
+	struct uwb_rsv_col_set_info tmp_csi;
+	int deep, set, col, start_col_deep, col_start_set;
+	int start_col, max_mas_in_set, lowest_max_mas_in_deep;
+	int n_mas;
+	int found = UWB_RSV_ALLOC_NOT_FOUND; 
+
+	tmp_csi.start_col = 0;
+	start_col_deep = interval;
+	n_mas = num_unsafe_mas + num_safe_mas;
+
+	for (deep = 0; ((interval >> deep) & 0x1) == 0; deep++) {
+		start_col_deep /= 2;
+		col_start_set = 0;
+		lowest_max_mas_in_deep = UWB_MAS_PER_ZONE;
+
+		for (set = 1; set <= (1 << deep); set++) {
+			max_mas_in_set = 0;
+			start_col = start_col_deep + col_start_set;
+			for (col = start_col; col < UWB_NUM_ZONES; col += interval) {
+                
+				if (ci[col].max_avail_safe >= num_safe_mas &&
+				    ci[col].max_avail_unsafe >= n_mas) {
+					if (ci[col].highest_mas[n_mas] > max_mas_in_set)
+						max_mas_in_set = ci[col].highest_mas[n_mas];
+				} else {
+					max_mas_in_set = 0;
+					break;
+				}
+			}
+			if ((lowest_max_mas_in_deep > max_mas_in_set) && max_mas_in_set) {
+				lowest_max_mas_in_deep = max_mas_in_set;
+
+				tmp_csi.start_col = start_col;
+			}
+			col_start_set += (interval >> deep);
+		}
+
+		if (lowest_max_mas_in_deep < 8) {
+			csi->start_col = tmp_csi.start_col;
+			found = UWB_RSV_ALLOC_FOUND;
+			break;
+		} else if ((lowest_max_mas_in_deep > 8) && 
+			   (lowest_max_mas_in_deep != UWB_MAS_PER_ZONE) &&
+			   (found == UWB_RSV_ALLOC_NOT_FOUND)) {
+			csi->start_col = tmp_csi.start_col;
+			found = UWB_RSV_ALLOC_FOUND;
+		}
+	}
+
+	if (found == UWB_RSV_ALLOC_FOUND) {
+		csi->interval = interval;
+		csi->safe_mas_per_col = num_safe_mas;
+		csi->unsafe_mas_per_col = num_unsafe_mas;
+
+		ai->safe_allocated_mases = (UWB_NUM_ZONES / interval) * num_safe_mas;
+		ai->unsafe_allocated_mases = (UWB_NUM_ZONES / interval) * num_unsafe_mas;
+		ai->total_allocated_mases = ai->safe_allocated_mases + ai->unsafe_allocated_mases;
+		ai->interval = interval;		
+	}
+	return found;
+}
+
+static void get_row_descriptors(struct uwb_rsv_alloc_info *ai)
+{
+	unsigned char *bm = ai->bm;
+	struct uwb_rsv_row_info *ri = &ai->ri;
+	int col, mas;
+  
+	ri->free_rows = 16;
+	for (mas = 0; mas < UWB_MAS_PER_ZONE; mas ++) {
+		ri->avail[mas] = 1;
+		for (col = 1; col < UWB_NUM_ZONES; col++) {
+			if (bm[col * UWB_NUM_ZONES + mas] == UWB_RSV_MAS_NOT_AVAIL) {
+				ri->free_rows--;
+				ri->avail[mas]=0;
+				break;
+			}
+		}
+	}
+}
+
+static void uwb_rsv_fill_column_info(unsigned char *bm, int column, struct uwb_rsv_col_info *rci)
+{
+	int mas;
+	int block_count = 0, start_block = 0; 
+	int previous_avail = 0;
+	int available = 0;
+	int safe_mas_in_row[UWB_MAS_PER_ZONE] = {
+		8, 7, 6, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 2, 1,
+	};
+
+	rci->max_avail_safe = 0;
+
+	for (mas = 0; mas < UWB_MAS_PER_ZONE; mas ++) {
+		if (!bm[column * UWB_NUM_ZONES + mas]) {
+			available++;
+			rci->max_avail_unsafe = available;
+
+			rci->highest_mas[available] = mas;
+
+			if (previous_avail) {
+				block_count++;
+				if ((block_count > safe_mas_in_row[start_block]) &&
+				    (!rci->max_avail_safe))
+					rci->max_avail_safe = available - 1;
+			} else {
+				previous_avail = 1;
+				start_block = mas;
+				block_count = 1;
+			}
+		} else {
+			previous_avail = 0;
+		}
+	}
+	if (!rci->max_avail_safe)
+		rci->max_avail_safe = rci->max_avail_unsafe;
+}
+
+static void get_column_descriptors(struct uwb_rsv_alloc_info *ai)
+{
+	unsigned char *bm = ai->bm;
+	struct uwb_rsv_col_info *ci = ai->ci;
+	int col;
+
+	for (col = 1; col < UWB_NUM_ZONES; col++) {
+		uwb_rsv_fill_column_info(bm, col, &ci[col]);
+	}
+}
+
+static int uwb_rsv_find_best_row_alloc(struct uwb_rsv_alloc_info *ai)
+{
+	int n_rows;
+	int max_rows = ai->max_mas / UWB_USABLE_MAS_PER_ROW;
+	int min_rows = ai->min_mas / UWB_USABLE_MAS_PER_ROW;
+	if (ai->min_mas % UWB_USABLE_MAS_PER_ROW)
+		min_rows++;
+	for (n_rows = max_rows; n_rows >= min_rows; n_rows--) {
+		if (n_rows <= ai->ri.free_rows) {
+			ai->ri.used_rows = n_rows;
+			ai->interval = 1; /* row reservation */
+			uwb_rsv_fill_row_alloc(ai);
+			return UWB_RSV_ALLOC_FOUND;
+		}
+	}  
+	return UWB_RSV_ALLOC_NOT_FOUND;
+}
+
+static int uwb_rsv_find_best_col_alloc(struct uwb_rsv_alloc_info *ai, int interval)
+{
+	int n_safe, n_unsafe, n_mas;  
+	int n_column = UWB_NUM_ZONES / interval;
+	int max_per_zone = ai->max_mas / n_column;
+	int min_per_zone = ai->min_mas / n_column;
+
+	if (ai->min_mas % n_column)
+		min_per_zone++;
+
+	if (min_per_zone > UWB_MAS_PER_ZONE) {
+		return UWB_RSV_ALLOC_NOT_FOUND;
+	}
+    
+	if (max_per_zone > UWB_MAS_PER_ZONE) {
+		max_per_zone = UWB_MAS_PER_ZONE;
+	}
+    
+	for (n_mas = max_per_zone; n_mas >= min_per_zone; n_mas--) {
+		if (uwb_rsv_find_best_column_set(ai, interval, 0, n_mas) == UWB_RSV_ALLOC_NOT_FOUND)
+			continue;
+		for (n_safe = n_mas; n_safe >= 0; n_safe--) {
+			n_unsafe = n_mas - n_safe;
+			if (uwb_rsv_find_best_column_set(ai, interval, n_safe, n_unsafe) == UWB_RSV_ALLOC_FOUND) {
+				uwb_rsv_fill_column_alloc(ai);
+				return UWB_RSV_ALLOC_FOUND;
+			}
+		}
+	}
+	return UWB_RSV_ALLOC_NOT_FOUND;
+}
+
+int uwb_rsv_find_best_allocation(struct uwb_rsv *rsv, struct uwb_mas_bm *available, 
+				 struct uwb_mas_bm *result)
+{
+	struct uwb_rsv_alloc_info *ai;
+	int interval;
+	int bit_index;
+
+	ai = kzalloc(sizeof(struct uwb_rsv_alloc_info), GFP_KERNEL);
+	
+	ai->min_mas = rsv->min_mas;
+	ai->max_mas = rsv->max_mas;
+	ai->max_interval = rsv->max_interval;
+
+
+	/* fill the not available vector from the available bm */
+	for (bit_index = 0; bit_index < UWB_NUM_MAS; bit_index++) {
+		if (!test_bit(bit_index, available->bm))
+			ai->bm[bit_index] = UWB_RSV_MAS_NOT_AVAIL;
+	}
+
+	if (ai->max_interval == 1) {
+		get_row_descriptors(ai);
+		if (uwb_rsv_find_best_row_alloc(ai) == UWB_RSV_ALLOC_FOUND)
+			goto alloc_found;
+		else
+			goto alloc_not_found;
+	}
+
+	get_column_descriptors(ai);
+        
+	for (interval = 16; interval >= 2; interval>>=1) {
+		if (interval > ai->max_interval)
+			continue;
+		if (uwb_rsv_find_best_col_alloc(ai, interval) == UWB_RSV_ALLOC_FOUND)
+			goto alloc_found;
+	}
+
+	/* try row reservation if no column is found */
+	get_row_descriptors(ai);
+	if (uwb_rsv_find_best_row_alloc(ai) == UWB_RSV_ALLOC_FOUND)
+		goto alloc_found;
+	else
+		goto alloc_not_found;
+
+  alloc_found:
+	bitmap_zero(result->bm, UWB_NUM_MAS);
+	bitmap_zero(result->unsafe_bm, UWB_NUM_MAS);
+	/* fill the safe and unsafe bitmaps */
+	for (bit_index = 0; bit_index < UWB_NUM_MAS; bit_index++) {
+		if (ai->bm[bit_index] == UWB_RSV_MAS_SAFE)
+			set_bit(bit_index, result->bm);
+		else if (ai->bm[bit_index] == UWB_RSV_MAS_UNSAFE)
+			set_bit(bit_index, result->unsafe_bm);
+	}
+	bitmap_or(result->bm, result->bm, result->unsafe_bm, UWB_NUM_MAS);
+
+	result->safe   = ai->safe_allocated_mases;
+	result->unsafe = ai->unsafe_allocated_mases;
+	
+	kfree(ai);		
+	return UWB_RSV_ALLOC_FOUND;
+  
+  alloc_not_found:
+	kfree(ai);
+	return UWB_RSV_ALLOC_NOT_FOUND;
+}
diff --git a/drivers/uwb/beacon.c b/drivers/uwb/beacon.c
index 46b18ee..36bc315 100644
--- a/drivers/uwb/beacon.c
+++ b/drivers/uwb/beacon.c
@@ -22,19 +22,16 @@
  *
  * FIXME: docs
  */
-
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/device.h>
 #include <linux/err.h>
 #include <linux/kdev_t.h>
+
 #include "uwb-internal.h"
 
-#define D_LOCAL 0
-#include <linux/uwb/debug.h>
-
-/** Start Beaconing command structure */
+/* Start Beaconing command structure */
 struct uwb_rc_cmd_start_beacon {
 	struct uwb_rccb rccb;
 	__le16 wBPSTOffset;
@@ -119,7 +116,6 @@
 	int result;
 	struct device *dev = &rc->uwb_dev.dev;
 
-	mutex_lock(&rc->uwb_dev.mutex);
 	if (channel < 0)
 		channel = -1;
 	if (channel == -1)
@@ -128,7 +124,7 @@
 		/* channel >= 0...dah */
 		result = uwb_rc_start_beacon(rc, bpst_offset, channel);
 		if (result < 0)
-			goto out_up;
+			return result;
 		if (le16_to_cpu(rc->ies->wIELength) > 0) {
 			result = uwb_rc_set_ie(rc, rc->ies);
 			if (result < 0) {
@@ -137,19 +133,12 @@
 				result = uwb_rc_stop_beacon(rc);
 				channel = -1;
 				bpst_offset = 0;
-			} else
-				result = 0;
+			}
 		}
 	}
 
-	if (result < 0)
-		goto out_up;
-	rc->beaconing = channel;
-
-	uwb_notify(rc, NULL, uwb_bg_joined(rc) ? UWB_NOTIF_BG_JOIN : UWB_NOTIF_BG_LEAVE);
-
-out_up:
-	mutex_unlock(&rc->uwb_dev.mutex);
+	if (result >= 0)
+		rc->beaconing = channel;
 	return result;
 }
 
@@ -168,12 +157,6 @@
  * FIXME: use something faster for search than a list
  */
 
-struct uwb_beca uwb_beca = {
-	.list = LIST_HEAD_INIT(uwb_beca.list),
-	.mutex = __MUTEX_INITIALIZER(uwb_beca.mutex)
-};
-
-
 void uwb_bce_kfree(struct kref *_bce)
 {
 	struct uwb_beca_e *bce = container_of(_bce, struct uwb_beca_e, refcnt);
@@ -185,13 +168,11 @@
 
 /* Find a beacon by dev addr in the cache */
 static
-struct uwb_beca_e *__uwb_beca_find_bydev(const struct uwb_dev_addr *dev_addr)
+struct uwb_beca_e *__uwb_beca_find_bydev(struct uwb_rc *rc,
+					 const struct uwb_dev_addr *dev_addr)
 {
 	struct uwb_beca_e *bce, *next;
-	list_for_each_entry_safe(bce, next, &uwb_beca.list, node) {
-		d_printf(6, NULL, "looking for addr %02x:%02x in %02x:%02x\n",
-			 dev_addr->data[0], dev_addr->data[1],
-			 bce->dev_addr.data[0], bce->dev_addr.data[1]);
+	list_for_each_entry_safe(bce, next, &rc->uwb_beca.list, node) {
 		if (!memcmp(&bce->dev_addr, dev_addr, sizeof(bce->dev_addr)))
 			goto out;
 	}
@@ -202,10 +183,11 @@
 
 /* Find a beacon by dev addr in the cache */
 static
-struct uwb_beca_e *__uwb_beca_find_bymac(const struct uwb_mac_addr *mac_addr)
+struct uwb_beca_e *__uwb_beca_find_bymac(struct uwb_rc *rc, 
+					 const struct uwb_mac_addr *mac_addr)
 {
 	struct uwb_beca_e *bce, *next;
-	list_for_each_entry_safe(bce, next, &uwb_beca.list, node) {
+	list_for_each_entry_safe(bce, next, &rc->uwb_beca.list, node) {
 		if (!memcmp(bce->mac_addr, mac_addr->data,
 			    sizeof(struct uwb_mac_addr)))
 			goto out;
@@ -229,11 +211,11 @@
 	struct uwb_dev *found = NULL;
 	struct uwb_beca_e *bce;
 
-	mutex_lock(&uwb_beca.mutex);
-	bce = __uwb_beca_find_bydev(devaddr);
+	mutex_lock(&rc->uwb_beca.mutex);
+	bce = __uwb_beca_find_bydev(rc, devaddr);
 	if (bce)
 		found = uwb_dev_try_get(rc, bce->uwb_dev);
-	mutex_unlock(&uwb_beca.mutex);
+	mutex_unlock(&rc->uwb_beca.mutex);
 
 	return found;
 }
@@ -249,11 +231,11 @@
 	struct uwb_dev *found = NULL;
 	struct uwb_beca_e *bce;
 
-	mutex_lock(&uwb_beca.mutex);
-	bce = __uwb_beca_find_bymac(macaddr);
+	mutex_lock(&rc->uwb_beca.mutex);
+	bce = __uwb_beca_find_bymac(rc, macaddr);
 	if (bce)
 		found = uwb_dev_try_get(rc, bce->uwb_dev);
-	mutex_unlock(&uwb_beca.mutex);
+	mutex_unlock(&rc->uwb_beca.mutex);
 
 	return found;
 }
@@ -274,7 +256,9 @@
  * @bf:         Beacon frame (part of b, really)
  * @ts_jiffies: Timestamp (in jiffies) when the beacon was received
  */
-struct uwb_beca_e *__uwb_beca_add(struct uwb_rc_evt_beacon *be,
+static
+struct uwb_beca_e *__uwb_beca_add(struct uwb_rc *rc,
+				  struct uwb_rc_evt_beacon *be,
 				  struct uwb_beacon_frame *bf,
 				  unsigned long ts_jiffies)
 {
@@ -286,7 +270,7 @@
 	uwb_beca_e_init(bce);
 	bce->ts_jiffies = ts_jiffies;
 	bce->uwb_dev = NULL;
-	list_add(&bce->node, &uwb_beca.list);
+	list_add(&bce->node, &rc->uwb_beca.list);
 	return bce;
 }
 
@@ -295,33 +279,32 @@
  *
  * Remove associated devicest too.
  */
-void uwb_beca_purge(void)
+void uwb_beca_purge(struct uwb_rc *rc)
 {
 	struct uwb_beca_e *bce, *next;
 	unsigned long expires;
 
-	mutex_lock(&uwb_beca.mutex);
-	list_for_each_entry_safe(bce, next, &uwb_beca.list, node) {
+	mutex_lock(&rc->uwb_beca.mutex);
+	list_for_each_entry_safe(bce, next, &rc->uwb_beca.list, node) {
 		expires = bce->ts_jiffies + msecs_to_jiffies(beacon_timeout_ms);
 		if (time_after(jiffies, expires)) {
 			uwbd_dev_offair(bce);
-			list_del(&bce->node);
-			uwb_bce_put(bce);
 		}
 	}
-	mutex_unlock(&uwb_beca.mutex);
+	mutex_unlock(&rc->uwb_beca.mutex);
 }
 
 /* Clean up the whole beacon cache. Called on shutdown */
-void uwb_beca_release(void)
+void uwb_beca_release(struct uwb_rc *rc)
 {
 	struct uwb_beca_e *bce, *next;
-	mutex_lock(&uwb_beca.mutex);
-	list_for_each_entry_safe(bce, next, &uwb_beca.list, node) {
+
+	mutex_lock(&rc->uwb_beca.mutex);
+	list_for_each_entry_safe(bce, next, &rc->uwb_beca.list, node) {
 		list_del(&bce->node);
 		uwb_bce_put(bce);
 	}
-	mutex_unlock(&uwb_beca.mutex);
+	mutex_unlock(&rc->uwb_beca.mutex);
 }
 
 static void uwb_beacon_print(struct uwb_rc *rc, struct uwb_rc_evt_beacon *be,
@@ -349,22 +332,22 @@
 	ssize_t result = 0;
 	struct uwb_rc_evt_beacon *be;
 	struct uwb_beacon_frame *bf;
-	struct uwb_buf_ctx ctx = {
-		.buf = buf,
-		.bytes = 0,
-		.size = size
-	};
+	int ies_len;
+	struct uwb_ie_hdr *ies;
 
 	mutex_lock(&bce->mutex);
+
 	be = bce->be;
-	if (be == NULL)
-		goto out;
-	bf = (void *) be->BeaconInfo;
-	uwb_ie_for_each(uwb_dev, uwb_ie_dump_hex, &ctx,
-			bf->IEData, be->wBeaconInfoLength - sizeof(*bf));
-	result = ctx.bytes;
-out:
+	if (be) {
+		bf = (struct uwb_beacon_frame *)bce->be->BeaconInfo;
+		ies_len = be->wBeaconInfoLength - sizeof(struct uwb_beacon_frame);
+		ies = (struct uwb_ie_hdr *)bf->IEData;
+
+		result = uwb_ie_dump_hex(ies, ies_len, buf, size);
+	}
+
 	mutex_unlock(&bce->mutex);
+
 	return result;
 }
 
@@ -437,18 +420,18 @@
 	if (uwb_mac_addr_bcast(&bf->Device_Identifier))
 		return 0;
 
-	mutex_lock(&uwb_beca.mutex);
-	bce = __uwb_beca_find_bymac(&bf->Device_Identifier);
+	mutex_lock(&rc->uwb_beca.mutex);
+	bce = __uwb_beca_find_bymac(rc, &bf->Device_Identifier);
 	if (bce == NULL) {
 		/* Not in there, a new device is pinging */
 		uwb_beacon_print(evt->rc, be, bf);
-		bce = __uwb_beca_add(be, bf, evt->ts_jiffies);
+		bce = __uwb_beca_add(rc, be, bf, evt->ts_jiffies);
 		if (bce == NULL) {
-			mutex_unlock(&uwb_beca.mutex);
+			mutex_unlock(&rc->uwb_beca.mutex);
 			return -ENOMEM;
 		}
 	}
-	mutex_unlock(&uwb_beca.mutex);
+	mutex_unlock(&rc->uwb_beca.mutex);
 
 	mutex_lock(&bce->mutex);
 	/* purge old beacon data */
@@ -588,19 +571,6 @@
 	return result;
 }
 
-/**
- * uwb_bg_joined - is the RC in a beacon group?
- * @rc: the radio controller
- *
- * Returns true if the radio controller is in a beacon group (even if
- * it's the sole member).
- */
-int uwb_bg_joined(struct uwb_rc *rc)
-{
-	return rc->beaconing != -1;
-}
-EXPORT_SYMBOL_GPL(uwb_bg_joined);
-
 /*
  * Print beaconing state.
  */
@@ -619,9 +589,6 @@
 
 /*
  * Start beaconing on the specified channel, or stop beaconing.
- *
- * The BPST offset of when to start searching for a beacon group to
- * join may be specified.
  */
 static ssize_t uwb_rc_beacon_store(struct device *dev,
 				   struct device_attribute *attr,
@@ -630,12 +597,11 @@
 	struct uwb_dev *uwb_dev = to_uwb_dev(dev);
 	struct uwb_rc *rc = uwb_dev->rc;
 	int channel;
-	unsigned bpst_offset = 0;
 	ssize_t result = -EINVAL;
 
-	result = sscanf(buf, "%d %u\n", &channel, &bpst_offset);
+	result = sscanf(buf, "%d", &channel);
 	if (result >= 1)
-		result = uwb_rc_beacon(rc, channel, bpst_offset);
+		result = uwb_radio_force_channel(rc, channel);
 
 	return result < 0 ? result : size;
 }
diff --git a/drivers/uwb/driver.c b/drivers/uwb/driver.c
index 521cdeb..da77e41 100644
--- a/drivers/uwb/driver.c
+++ b/drivers/uwb/driver.c
@@ -53,7 +53,7 @@
 #include <linux/err.h>
 #include <linux/kdev_t.h>
 #include <linux/random.h>
-#include <linux/uwb/debug.h>
+
 #include "uwb-internal.h"
 
 
@@ -118,7 +118,6 @@
 	result = class_register(&uwb_rc_class);
 	if (result < 0)
 		goto error_uwb_rc_class_register;
-	uwbd_start();
 	uwb_dbg_init();
 	return 0;
 
@@ -132,7 +131,6 @@
 static void __exit uwb_subsys_exit(void)
 {
 	uwb_dbg_exit();
-	uwbd_stop();
 	class_unregister(&uwb_rc_class);
 	uwb_est_destroy();
 	return;
diff --git a/drivers/uwb/drp-avail.c b/drivers/uwb/drp-avail.c
index 3febd855..40a540a 100644
--- a/drivers/uwb/drp-avail.c
+++ b/drivers/uwb/drp-avail.c
@@ -58,7 +58,7 @@
  *
  * avail = global & local & pending
  */
-static void uwb_drp_available(struct uwb_rc *rc, struct uwb_mas_bm *avail)
+void uwb_drp_available(struct uwb_rc *rc, struct uwb_mas_bm *avail)
 {
 	bitmap_and(avail->bm, rc->drp_avail.global, rc->drp_avail.local, UWB_NUM_MAS);
 	bitmap_and(avail->bm, avail->bm, rc->drp_avail.pending, UWB_NUM_MAS);
@@ -105,6 +105,7 @@
 	bitmap_or(rc->drp_avail.local, rc->drp_avail.local, mas->bm, UWB_NUM_MAS);
 	bitmap_or(rc->drp_avail.pending, rc->drp_avail.pending, mas->bm, UWB_NUM_MAS);
 	rc->drp_avail.ie_valid = false;
+	uwb_rsv_handle_drp_avail_change(rc);
 }
 
 /**
@@ -280,6 +281,7 @@
 	mutex_lock(&rc->rsvs_mutex);
 	bitmap_copy(rc->drp_avail.global, bmp, UWB_NUM_MAS);
 	rc->drp_avail.ie_valid = false;
+	uwb_rsv_handle_drp_avail_change(rc);
 	mutex_unlock(&rc->rsvs_mutex);
 
 	uwb_rsv_sched_update(rc);
diff --git a/drivers/uwb/drp-ie.c b/drivers/uwb/drp-ie.c
index 882724c..2840d7b 100644
--- a/drivers/uwb/drp-ie.c
+++ b/drivers/uwb/drp-ie.c
@@ -16,13 +16,102 @@
  * You should have received a copy of the GNU General Public License
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
-#include <linux/version.h>
 #include <linux/kernel.h>
 #include <linux/random.h>
 #include <linux/uwb.h>
 
 #include "uwb-internal.h"
 
+
+/*
+ * Return the reason code for a reservations's DRP IE.
+ */
+int uwb_rsv_reason_code(struct uwb_rsv *rsv)
+{
+	static const int reason_codes[] = {
+		[UWB_RSV_STATE_O_INITIATED]          = UWB_DRP_REASON_ACCEPTED,
+		[UWB_RSV_STATE_O_PENDING]            = UWB_DRP_REASON_ACCEPTED,
+		[UWB_RSV_STATE_O_MODIFIED]           = UWB_DRP_REASON_MODIFIED,
+		[UWB_RSV_STATE_O_ESTABLISHED]        = UWB_DRP_REASON_ACCEPTED,
+		[UWB_RSV_STATE_O_TO_BE_MOVED]        = UWB_DRP_REASON_ACCEPTED,
+		[UWB_RSV_STATE_O_MOVE_COMBINING]     = UWB_DRP_REASON_MODIFIED,
+		[UWB_RSV_STATE_O_MOVE_REDUCING]      = UWB_DRP_REASON_MODIFIED,
+		[UWB_RSV_STATE_O_MOVE_EXPANDING]     = UWB_DRP_REASON_ACCEPTED,
+		[UWB_RSV_STATE_T_ACCEPTED]           = UWB_DRP_REASON_ACCEPTED,
+		[UWB_RSV_STATE_T_CONFLICT]           = UWB_DRP_REASON_CONFLICT,
+		[UWB_RSV_STATE_T_PENDING]            = UWB_DRP_REASON_PENDING,
+		[UWB_RSV_STATE_T_DENIED]             = UWB_DRP_REASON_DENIED,
+		[UWB_RSV_STATE_T_RESIZED]            = UWB_DRP_REASON_ACCEPTED,
+		[UWB_RSV_STATE_T_EXPANDING_ACCEPTED] = UWB_DRP_REASON_ACCEPTED,
+		[UWB_RSV_STATE_T_EXPANDING_CONFLICT] = UWB_DRP_REASON_CONFLICT,
+		[UWB_RSV_STATE_T_EXPANDING_PENDING]  = UWB_DRP_REASON_PENDING,
+		[UWB_RSV_STATE_T_EXPANDING_DENIED]   = UWB_DRP_REASON_DENIED,
+	};
+
+	return reason_codes[rsv->state];
+}
+
+/*
+ * Return the reason code for a reservations's companion DRP IE .
+ */
+int uwb_rsv_companion_reason_code(struct uwb_rsv *rsv)
+{
+	static const int companion_reason_codes[] = {
+		[UWB_RSV_STATE_O_MOVE_EXPANDING]     = UWB_DRP_REASON_ACCEPTED,
+		[UWB_RSV_STATE_T_EXPANDING_ACCEPTED] = UWB_DRP_REASON_ACCEPTED,
+		[UWB_RSV_STATE_T_EXPANDING_CONFLICT] = UWB_DRP_REASON_CONFLICT,
+		[UWB_RSV_STATE_T_EXPANDING_PENDING]  = UWB_DRP_REASON_PENDING,
+		[UWB_RSV_STATE_T_EXPANDING_DENIED]   = UWB_DRP_REASON_DENIED,
+	};
+
+	return companion_reason_codes[rsv->state];
+}
+
+/*
+ * Return the status bit for a reservations's DRP IE.
+ */
+int uwb_rsv_status(struct uwb_rsv *rsv)
+{
+	static const int statuses[] = {
+		[UWB_RSV_STATE_O_INITIATED]          = 0,
+		[UWB_RSV_STATE_O_PENDING]            = 0,
+		[UWB_RSV_STATE_O_MODIFIED]           = 1,
+		[UWB_RSV_STATE_O_ESTABLISHED]        = 1,
+		[UWB_RSV_STATE_O_TO_BE_MOVED]        = 0,
+		[UWB_RSV_STATE_O_MOVE_COMBINING]     = 1,
+		[UWB_RSV_STATE_O_MOVE_REDUCING]      = 1,
+		[UWB_RSV_STATE_O_MOVE_EXPANDING]     = 1,
+		[UWB_RSV_STATE_T_ACCEPTED]           = 1,
+		[UWB_RSV_STATE_T_CONFLICT]           = 0,
+		[UWB_RSV_STATE_T_PENDING]            = 0,
+		[UWB_RSV_STATE_T_DENIED]             = 0,
+		[UWB_RSV_STATE_T_RESIZED]            = 1,
+		[UWB_RSV_STATE_T_EXPANDING_ACCEPTED] = 1,
+		[UWB_RSV_STATE_T_EXPANDING_CONFLICT] = 1,
+		[UWB_RSV_STATE_T_EXPANDING_PENDING]  = 1,
+		[UWB_RSV_STATE_T_EXPANDING_DENIED]   = 1,
+
+	};
+
+	return statuses[rsv->state];
+}
+
+/*
+ * Return the status bit for a reservations's companion DRP IE .
+ */
+int uwb_rsv_companion_status(struct uwb_rsv *rsv)
+{
+	static const int companion_statuses[] = {
+		[UWB_RSV_STATE_O_MOVE_EXPANDING]     = 0,
+		[UWB_RSV_STATE_T_EXPANDING_ACCEPTED] = 1,
+		[UWB_RSV_STATE_T_EXPANDING_CONFLICT] = 0,
+		[UWB_RSV_STATE_T_EXPANDING_PENDING]  = 0,
+		[UWB_RSV_STATE_T_EXPANDING_DENIED]   = 0,
+	};
+
+	return companion_statuses[rsv->state];
+}
+
 /*
  * Allocate a DRP IE.
  *
@@ -34,16 +123,12 @@
 static struct uwb_ie_drp *uwb_drp_ie_alloc(void)
 {
 	struct uwb_ie_drp *drp_ie;
-	unsigned tiebreaker;
 
 	drp_ie = kzalloc(sizeof(struct uwb_ie_drp) +
 			UWB_NUM_ZONES * sizeof(struct uwb_drp_alloc),
 			GFP_KERNEL);
 	if (drp_ie) {
 		drp_ie->hdr.element_id = UWB_IE_DRP;
-
-		get_random_bytes(&tiebreaker, sizeof(unsigned));
-		uwb_ie_drp_set_tiebreaker(drp_ie, tiebreaker & 1);
 	}
 	return drp_ie;
 }
@@ -104,43 +189,17 @@
  */
 int uwb_drp_ie_update(struct uwb_rsv *rsv)
 {
-	struct device *dev = &rsv->rc->uwb_dev.dev;
 	struct uwb_ie_drp *drp_ie;
-	int reason_code, status;
+	struct uwb_rsv_move *mv;
+	int unsafe;
 
-	switch (rsv->state) {
-	case UWB_RSV_STATE_NONE:
+	if (rsv->state == UWB_RSV_STATE_NONE) {
 		kfree(rsv->drp_ie);
 		rsv->drp_ie = NULL;
 		return 0;
-	case UWB_RSV_STATE_O_INITIATED:
-		reason_code = UWB_DRP_REASON_ACCEPTED;
-		status = 0;
-		break;
-	case UWB_RSV_STATE_O_PENDING:
-		reason_code = UWB_DRP_REASON_ACCEPTED;
-		status = 0;
-		break;
-	case UWB_RSV_STATE_O_MODIFIED:
-		reason_code = UWB_DRP_REASON_MODIFIED;
-		status = 1;
-		break;
-	case UWB_RSV_STATE_O_ESTABLISHED:
-		reason_code = UWB_DRP_REASON_ACCEPTED;
-		status = 1;
-		break;
-	case UWB_RSV_STATE_T_ACCEPTED:
-		reason_code = UWB_DRP_REASON_ACCEPTED;
-		status = 1;
-		break;
-	case UWB_RSV_STATE_T_DENIED:
-		reason_code = UWB_DRP_REASON_DENIED;
-		status = 0;
-		break;
-	default:
-		dev_dbg(dev, "rsv with unhandled state (%d)\n", rsv->state);
-		return -EINVAL;
 	}
+	
+	unsafe = rsv->mas.unsafe ? 1 : 0;
 
 	if (rsv->drp_ie == NULL) {
 		rsv->drp_ie = uwb_drp_ie_alloc();
@@ -149,9 +208,11 @@
 	}
 	drp_ie = rsv->drp_ie;
 
+	uwb_ie_drp_set_unsafe(drp_ie,       unsafe);
+	uwb_ie_drp_set_tiebreaker(drp_ie,   rsv->tiebreaker);
 	uwb_ie_drp_set_owner(drp_ie,        uwb_rsv_is_owner(rsv));
-	uwb_ie_drp_set_status(drp_ie,       status);
-	uwb_ie_drp_set_reason_code(drp_ie,  reason_code);
+	uwb_ie_drp_set_status(drp_ie,       uwb_rsv_status(rsv));
+	uwb_ie_drp_set_reason_code(drp_ie,  uwb_rsv_reason_code(rsv));
 	uwb_ie_drp_set_stream_index(drp_ie, rsv->stream);
 	uwb_ie_drp_set_type(drp_ie,         rsv->type);
 
@@ -169,6 +230,27 @@
 
 	uwb_drp_ie_from_bm(drp_ie, &rsv->mas);
 
+	if (uwb_rsv_has_two_drp_ies(rsv)) {
+		mv = &rsv->mv; 
+		if (mv->companion_drp_ie == NULL) {
+			mv->companion_drp_ie = uwb_drp_ie_alloc();
+			if (mv->companion_drp_ie == NULL)
+				return -ENOMEM;
+		}
+		drp_ie = mv->companion_drp_ie;
+		
+		/* keep all the same configuration of the main drp_ie */
+		memcpy(drp_ie, rsv->drp_ie, sizeof(struct uwb_ie_drp));
+		
+
+		/* FIXME: handle properly the unsafe bit */
+		uwb_ie_drp_set_unsafe(drp_ie,       1);
+		uwb_ie_drp_set_status(drp_ie,       uwb_rsv_companion_status(rsv));
+		uwb_ie_drp_set_reason_code(drp_ie,  uwb_rsv_companion_reason_code(rsv));
+	
+		uwb_drp_ie_from_bm(drp_ie, &mv->companion_mas);
+	}
+
 	rsv->ie_valid = true;
 	return 0;
 }
@@ -219,6 +301,8 @@
 	u8 zone;
 	u16 zone_mask;
 
+	bitmap_zero(bm->bm, UWB_NUM_MAS);
+
 	for (cnt = 0; cnt < numallocs; cnt++) {
 		alloc = &drp_ie->allocs[cnt];
 		zone_bm = le16_to_cpu(alloc->zone_bm);
@@ -230,3 +314,4 @@
 		}
 	}
 }
+
diff --git a/drivers/uwb/drp.c b/drivers/uwb/drp.c
index c0b1e5e..2b4f940 100644
--- a/drivers/uwb/drp.c
+++ b/drivers/uwb/drp.c
@@ -23,6 +23,59 @@
 #include <linux/delay.h>
 #include "uwb-internal.h"
 
+
+/* DRP Conflict Actions ([ECMA-368 2nd Edition] 17.4.6) */
+enum uwb_drp_conflict_action {
+	/* Reservation is mantained, no action needed */
+	UWB_DRP_CONFLICT_MANTAIN = 0,
+	
+	/* the device shall not transmit frames in conflicting MASs in
+	 * the following superframe. If the device is the reservation
+	 * target, it shall also set the Reason Code in its DRP IE to
+	 * Conflict in its beacon in the following superframe.
+	 */
+	UWB_DRP_CONFLICT_ACT1,
+	
+	/* the device shall not set the Reservation Status bit to ONE
+	 * and shall not transmit frames in conflicting MASs. If the
+	 * device is the reservation target, it shall also set the
+	 * Reason Code in its DRP IE to Conflict.
+	 */	
+	UWB_DRP_CONFLICT_ACT2,
+
+	/* the device shall not transmit frames in conflicting MASs in
+	 * the following superframe. It shall remove the conflicting
+	 * MASs from the reservation or set the Reservation Status to
+	 * ZERO in its beacon in the following superframe. If the
+	 * device is the reservation target, it shall also set the
+	 * Reason Code in its DRP IE to Conflict.
+	 */
+	UWB_DRP_CONFLICT_ACT3,
+};
+
+
+static void uwb_rc_set_drp_cmd_done(struct uwb_rc *rc, void *arg,
+				    struct uwb_rceb *reply, ssize_t reply_size)
+{
+	struct uwb_rc_evt_set_drp_ie *r = (struct uwb_rc_evt_set_drp_ie *)reply;
+
+	if (r != NULL) {
+		if (r->bResultCode != UWB_RC_RES_SUCCESS)
+			dev_err(&rc->uwb_dev.dev, "SET-DRP-IE failed: %s (%d)\n",
+				uwb_rc_strerror(r->bResultCode), r->bResultCode);
+	} else
+		dev_err(&rc->uwb_dev.dev, "SET-DRP-IE: timeout\n");
+
+	spin_lock(&rc->rsvs_lock);
+	if (rc->set_drp_ie_pending > 1) {
+		rc->set_drp_ie_pending = 0;
+		uwb_rsv_queue_update(rc);	
+	} else {
+		rc->set_drp_ie_pending = 0;	
+	}
+	spin_unlock(&rc->rsvs_lock);
+}
+
 /**
  * Construct and send the SET DRP IE
  *
@@ -37,28 +90,32 @@
  *
  * A DRP Availability IE is appended.
  *
- * rc->uwb_dev.mutex is held
+ * rc->rsvs_mutex is held
  *
  * FIXME We currently ignore the returned value indicating the remaining space
  * in beacon. This could be used to deny reservation requests earlier if
  * determined that they would cause the beacon space to be exceeded.
  */
-static
-int uwb_rc_gen_send_drp_ie(struct uwb_rc *rc)
+int uwb_rc_send_all_drp_ie(struct uwb_rc *rc)
 {
 	int result;
-	struct device *dev = &rc->uwb_dev.dev;
 	struct uwb_rc_cmd_set_drp_ie *cmd;
-	struct uwb_rc_evt_set_drp_ie reply;
 	struct uwb_rsv *rsv;
+	struct uwb_rsv_move *mv;
 	int num_bytes = 0;
 	u8 *IEDataptr;
 
 	result = -ENOMEM;
 	/* First traverse all reservations to determine memory needed. */
 	list_for_each_entry(rsv, &rc->reservations, rc_node) {
-		if (rsv->drp_ie != NULL)
+		if (rsv->drp_ie != NULL) {
 			num_bytes += rsv->drp_ie->hdr.length + 2;
+			if (uwb_rsv_has_two_drp_ies(rsv) &&
+				(rsv->mv.companion_drp_ie != NULL)) {
+				mv = &rsv->mv;
+				num_bytes += mv->companion_drp_ie->hdr.length + 2;	
+			}
+		}
 	}
 	num_bytes += sizeof(rc->drp_avail.ie);
 	cmd = kzalloc(sizeof(*cmd) + num_bytes, GFP_KERNEL);
@@ -69,128 +126,322 @@
 	cmd->wIELength = num_bytes;
 	IEDataptr = (u8 *)&cmd->IEData[0];
 
+	/* FIXME: DRV avail IE is not always needed */
+	/* put DRP avail IE first */
+	memcpy(IEDataptr, &rc->drp_avail.ie, sizeof(rc->drp_avail.ie));
+	IEDataptr += sizeof(struct uwb_ie_drp_avail);
+
 	/* Next traverse all reservations to place IEs in allocated memory. */
 	list_for_each_entry(rsv, &rc->reservations, rc_node) {
 		if (rsv->drp_ie != NULL) {
 			memcpy(IEDataptr, rsv->drp_ie,
 			       rsv->drp_ie->hdr.length + 2);
 			IEDataptr += rsv->drp_ie->hdr.length + 2;
+			
+			if (uwb_rsv_has_two_drp_ies(rsv) &&
+				(rsv->mv.companion_drp_ie != NULL)) {
+				mv = &rsv->mv;
+				memcpy(IEDataptr, mv->companion_drp_ie,
+				       mv->companion_drp_ie->hdr.length + 2);
+				IEDataptr += mv->companion_drp_ie->hdr.length + 2;	
+			}
 		}
 	}
-	memcpy(IEDataptr, &rc->drp_avail.ie, sizeof(rc->drp_avail.ie));
 
-	reply.rceb.bEventType = UWB_RC_CET_GENERAL;
-	reply.rceb.wEvent = UWB_RC_CMD_SET_DRP_IE;
-	result = uwb_rc_cmd(rc, "SET-DRP-IE", &cmd->rccb,
-			sizeof(*cmd) + num_bytes, &reply.rceb,
-			sizeof(reply));
-	if (result < 0)
-		goto error_cmd;
-	result = le16_to_cpu(reply.wRemainingSpace);
-	if (reply.bResultCode != UWB_RC_RES_SUCCESS) {
-		dev_err(&rc->uwb_dev.dev, "SET-DRP-IE: command execution "
-				"failed: %s (%d). RemainingSpace in beacon "
-				"= %d\n", uwb_rc_strerror(reply.bResultCode),
-				reply.bResultCode, result);
-		result = -EIO;
-	} else {
-		dev_dbg(dev, "SET-DRP-IE sent. RemainingSpace in beacon "
-			     "= %d.\n", result);
-		result = 0;
-	}
-error_cmd:
+	result = uwb_rc_cmd_async(rc, "SET-DRP-IE", &cmd->rccb, sizeof(*cmd) + num_bytes,
+				  UWB_RC_CET_GENERAL, UWB_RC_CMD_SET_DRP_IE,
+				  uwb_rc_set_drp_cmd_done, NULL);
+	
+	rc->set_drp_ie_pending = 1;
+
 	kfree(cmd);
 error:
 	return result;
-
 }
-/**
- * Send all DRP IEs associated with this host
+
+/*
+ * Evaluate the action to perform using conflict resolution rules
  *
- * @returns:    >= 0 number of bytes still available in the beacon
- *              < 0 errno code on error.
- *
- * As per the protocol we obtain the host controller device lock to access
- * bandwidth structures.
+ * Return a uwb_drp_conflict_action.
  */
-int uwb_rc_send_all_drp_ie(struct uwb_rc *rc)
+static int evaluate_conflict_action(struct uwb_ie_drp *ext_drp_ie, int ext_beacon_slot,
+				    struct uwb_rsv *rsv, int our_status)
 {
-	int result;
+	int our_tie_breaker = rsv->tiebreaker;
+	int our_type        = rsv->type;
+	int our_beacon_slot = rsv->rc->uwb_dev.beacon_slot;
 
-	mutex_lock(&rc->uwb_dev.mutex);
-	result = uwb_rc_gen_send_drp_ie(rc);
-	mutex_unlock(&rc->uwb_dev.mutex);
-	return result;
-}
-
-void uwb_drp_handle_timeout(struct uwb_rsv *rsv)
-{
-	struct device *dev = &rsv->rc->uwb_dev.dev;
-
-	dev_dbg(dev, "reservation timeout in state %s (%d)\n",
-		uwb_rsv_state_str(rsv->state), rsv->state);
-
-	switch (rsv->state) {
-	case UWB_RSV_STATE_O_INITIATED:
-		if (rsv->is_multicast) {
-			uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_ESTABLISHED);
-			return;
-		}
-		break;
-	case UWB_RSV_STATE_O_ESTABLISHED:
-		if (rsv->is_multicast)
-			return;
-		break;
-	default:
-		break;
+	int ext_tie_breaker = uwb_ie_drp_tiebreaker(ext_drp_ie);
+	int ext_status      = uwb_ie_drp_status(ext_drp_ie);
+	int ext_type        = uwb_ie_drp_type(ext_drp_ie);
+	
+	
+	/* [ECMA-368 2nd Edition] 17.4.6 */
+	if (ext_type == UWB_DRP_TYPE_PCA && our_type == UWB_DRP_TYPE_PCA) {
+		return UWB_DRP_CONFLICT_MANTAIN;
 	}
-	uwb_rsv_remove(rsv);
+
+	/* [ECMA-368 2nd Edition] 17.4.6-1 */
+	if (our_type == UWB_DRP_TYPE_ALIEN_BP) {
+		return UWB_DRP_CONFLICT_MANTAIN;
+	}
+	
+	/* [ECMA-368 2nd Edition] 17.4.6-2 */
+	if (ext_type == UWB_DRP_TYPE_ALIEN_BP) {
+		/* here we know our_type != UWB_DRP_TYPE_ALIEN_BP */
+		return UWB_DRP_CONFLICT_ACT1;
+	}
+
+	/* [ECMA-368 2nd Edition] 17.4.6-3 */
+	if (our_status == 0 && ext_status == 1) {
+		return UWB_DRP_CONFLICT_ACT2;
+	}
+
+	/* [ECMA-368 2nd Edition] 17.4.6-4 */
+	if (our_status == 1 && ext_status == 0) {
+		return UWB_DRP_CONFLICT_MANTAIN;
+	}
+
+	/* [ECMA-368 2nd Edition] 17.4.6-5a */
+	if (our_tie_breaker == ext_tie_breaker &&
+	    our_beacon_slot <  ext_beacon_slot) {
+		return UWB_DRP_CONFLICT_MANTAIN;
+	}
+
+	/* [ECMA-368 2nd Edition] 17.4.6-5b */
+	if (our_tie_breaker != ext_tie_breaker &&
+	    our_beacon_slot >  ext_beacon_slot) {
+		return UWB_DRP_CONFLICT_MANTAIN;
+	}
+	
+	if (our_status == 0) {
+		if (our_tie_breaker == ext_tie_breaker) {
+			/* [ECMA-368 2nd Edition] 17.4.6-6a */
+			if (our_beacon_slot > ext_beacon_slot) {
+				return UWB_DRP_CONFLICT_ACT2;
+			}
+		} else  {
+			/* [ECMA-368 2nd Edition] 17.4.6-6b */
+			if (our_beacon_slot < ext_beacon_slot) {
+				return UWB_DRP_CONFLICT_ACT2;
+			}
+		}
+	} else {
+		if (our_tie_breaker == ext_tie_breaker) {
+			/* [ECMA-368 2nd Edition] 17.4.6-7a */
+			if (our_beacon_slot > ext_beacon_slot) {
+				return UWB_DRP_CONFLICT_ACT3;
+			}
+		} else {
+			/* [ECMA-368 2nd Edition] 17.4.6-7b */
+			if (our_beacon_slot < ext_beacon_slot) {
+				return UWB_DRP_CONFLICT_ACT3;
+			}
+		}
+	}
+	return UWB_DRP_CONFLICT_MANTAIN;
 }
 
+static void handle_conflict_normal(struct uwb_ie_drp *drp_ie, 
+				   int ext_beacon_slot, 
+				   struct uwb_rsv *rsv, 
+				   struct uwb_mas_bm *conflicting_mas)
+{
+	struct uwb_rc *rc = rsv->rc;
+	struct uwb_rsv_move *mv = &rsv->mv;
+	struct uwb_drp_backoff_win *bow = &rc->bow;
+	int action;
+
+	action = evaluate_conflict_action(drp_ie, ext_beacon_slot, rsv, uwb_rsv_status(rsv));
+
+	if (uwb_rsv_is_owner(rsv)) {
+		switch(action) {
+		case UWB_DRP_CONFLICT_ACT2:
+			/* try move */
+			uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_TO_BE_MOVED);
+			if (bow->can_reserve_extra_mases == false)
+				uwb_rsv_backoff_win_increment(rc);
+			
+			break;
+		case UWB_DRP_CONFLICT_ACT3:
+			uwb_rsv_backoff_win_increment(rc);
+			/* drop some mases with reason modified */
+			/* put in the companion the mases to be dropped */
+			bitmap_and(mv->companion_mas.bm, rsv->mas.bm, conflicting_mas->bm, UWB_NUM_MAS);
+			uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MODIFIED);
+		default:
+			break;
+		}
+	} else {
+		switch(action) {
+		case UWB_DRP_CONFLICT_ACT2:
+		case UWB_DRP_CONFLICT_ACT3:
+			uwb_rsv_set_state(rsv, UWB_RSV_STATE_T_CONFLICT);	
+		default:
+			break;
+		}
+
+	}
+	
+}
+
+static void handle_conflict_expanding(struct uwb_ie_drp *drp_ie, int ext_beacon_slot,
+				      struct uwb_rsv *rsv, bool companion_only,
+				      struct uwb_mas_bm *conflicting_mas)
+{
+	struct uwb_rc *rc = rsv->rc;
+	struct uwb_drp_backoff_win *bow = &rc->bow;
+	struct uwb_rsv_move *mv = &rsv->mv;
+	int action;
+	
+	if (companion_only) {
+		/* status of companion is 0 at this point */
+		action = evaluate_conflict_action(drp_ie, ext_beacon_slot, rsv, 0);
+		if (uwb_rsv_is_owner(rsv)) {
+			switch(action) {
+			case UWB_DRP_CONFLICT_ACT2:
+			case UWB_DRP_CONFLICT_ACT3:
+				uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_ESTABLISHED);
+				rsv->needs_release_companion_mas = false;
+				if (bow->can_reserve_extra_mases == false)
+					uwb_rsv_backoff_win_increment(rc);
+				uwb_drp_avail_release(rsv->rc, &rsv->mv.companion_mas);
+			}
+		} else { /* rsv is target */			
+			switch(action) {
+			case UWB_DRP_CONFLICT_ACT2:
+			case UWB_DRP_CONFLICT_ACT3:
+				uwb_rsv_set_state(rsv, UWB_RSV_STATE_T_EXPANDING_CONFLICT);
+                                /* send_drp_avail_ie = true; */
+			}
+		}
+	} else { /* also base part of the reservation is conflicting */		
+		if (uwb_rsv_is_owner(rsv)) {
+			uwb_rsv_backoff_win_increment(rc);
+			/* remove companion part */
+			uwb_drp_avail_release(rsv->rc, &rsv->mv.companion_mas);
+
+			/* drop some mases with reason modified */
+
+			/* put in the companion the mases to be dropped */
+			bitmap_andnot(mv->companion_mas.bm, rsv->mas.bm, conflicting_mas->bm, UWB_NUM_MAS);
+			uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MODIFIED);
+		} else { /* it is a target rsv */
+			uwb_rsv_set_state(rsv, UWB_RSV_STATE_T_CONFLICT);
+                        /* send_drp_avail_ie = true; */
+		}
+	}
+}
+
+static void uwb_drp_handle_conflict_rsv(struct uwb_rc *rc, struct uwb_rsv *rsv,
+					struct uwb_rc_evt_drp *drp_evt, 
+					struct uwb_ie_drp *drp_ie,
+					struct uwb_mas_bm *conflicting_mas)
+{
+	struct uwb_rsv_move *mv;
+
+	/* check if the conflicting reservation has two drp_ies */
+	if (uwb_rsv_has_two_drp_ies(rsv)) {
+		mv = &rsv->mv;
+		if (bitmap_intersects(rsv->mas.bm, conflicting_mas->bm, UWB_NUM_MAS)) {
+			handle_conflict_expanding(drp_ie, drp_evt->beacon_slot_number,
+						  rsv, false, conflicting_mas);
+		} else {
+			if (bitmap_intersects(mv->companion_mas.bm, conflicting_mas->bm, UWB_NUM_MAS)) {
+				handle_conflict_expanding(drp_ie, drp_evt->beacon_slot_number,
+							  rsv, true, conflicting_mas);	
+			}
+		}
+	} else if (bitmap_intersects(rsv->mas.bm, conflicting_mas->bm, UWB_NUM_MAS)) {
+		handle_conflict_normal(drp_ie, drp_evt->beacon_slot_number, rsv, conflicting_mas);
+	}
+}
+
+static void uwb_drp_handle_all_conflict_rsv(struct uwb_rc *rc,
+					    struct uwb_rc_evt_drp *drp_evt, 
+					    struct uwb_ie_drp *drp_ie,
+					    struct uwb_mas_bm *conflicting_mas)
+{
+	struct uwb_rsv *rsv;
+	
+	list_for_each_entry(rsv, &rc->reservations, rc_node) {
+		uwb_drp_handle_conflict_rsv(rc, rsv, drp_evt, drp_ie, conflicting_mas);	
+	}
+}
+	
 /*
  * Based on the DRP IE, transition a target reservation to a new
  * state.
  */
 static void uwb_drp_process_target(struct uwb_rc *rc, struct uwb_rsv *rsv,
-				   struct uwb_ie_drp *drp_ie)
+				   struct uwb_ie_drp *drp_ie, struct uwb_rc_evt_drp *drp_evt)
 {
 	struct device *dev = &rc->uwb_dev.dev;
+	struct uwb_rsv_move *mv = &rsv->mv;
 	int status;
 	enum uwb_drp_reason reason_code;
-
+	struct uwb_mas_bm mas;
+	
 	status = uwb_ie_drp_status(drp_ie);
 	reason_code = uwb_ie_drp_reason_code(drp_ie);
+	uwb_drp_ie_to_bm(&mas, drp_ie);
 
-	if (status) {
-		switch (reason_code) {
-		case UWB_DRP_REASON_ACCEPTED:
+	switch (reason_code) {
+	case UWB_DRP_REASON_ACCEPTED:
+
+		if (rsv->state == UWB_RSV_STATE_T_CONFLICT) {
+			uwb_rsv_set_state(rsv, UWB_RSV_STATE_T_CONFLICT);
+			break;
+		}
+
+		if (rsv->state == UWB_RSV_STATE_T_EXPANDING_ACCEPTED) {
+			/* drp_ie is companion */
+			if (!bitmap_equal(rsv->mas.bm, mas.bm, UWB_NUM_MAS))
+				/* stroke companion */
+				uwb_rsv_set_state(rsv, UWB_RSV_STATE_T_EXPANDING_ACCEPTED);	
+		} else {
+			if (!bitmap_equal(rsv->mas.bm, mas.bm, UWB_NUM_MAS)) {
+				if (uwb_drp_avail_reserve_pending(rc, &mas) == -EBUSY) {
+					/* FIXME: there is a conflict, find
+					 * the conflicting reservations and
+					 * take a sensible action. Consider
+					 * that in drp_ie there is the
+					 * "neighbour" */
+					uwb_drp_handle_all_conflict_rsv(rc, drp_evt, drp_ie, &mas);
+				} else {
+					/* accept the extra reservation */
+					bitmap_copy(mv->companion_mas.bm, mas.bm, UWB_NUM_MAS);
+					uwb_rsv_set_state(rsv, UWB_RSV_STATE_T_EXPANDING_ACCEPTED);
+				}
+			} else {
+				if (status) {
+					uwb_rsv_set_state(rsv, UWB_RSV_STATE_T_ACCEPTED);
+				}
+			}
+			
+		}
+		break;
+
+	case UWB_DRP_REASON_MODIFIED:
+		/* check to see if we have already modified the reservation */
+		if (bitmap_equal(rsv->mas.bm, mas.bm, UWB_NUM_MAS)) {
 			uwb_rsv_set_state(rsv, UWB_RSV_STATE_T_ACCEPTED);
 			break;
-		case UWB_DRP_REASON_MODIFIED:
-			dev_err(dev, "FIXME: unhandled reason code (%d/%d)\n",
-				reason_code, status);
-			break;
-		default:
-			dev_warn(dev, "ignoring invalid DRP IE state (%d/%d)\n",
-				 reason_code, status);
 		}
-	} else {
-		switch (reason_code) {
-		case UWB_DRP_REASON_ACCEPTED:
-			/* New reservations are handled in uwb_rsv_find(). */
-			break;
-		case UWB_DRP_REASON_DENIED:
-			uwb_rsv_set_state(rsv, UWB_RSV_STATE_NONE);
-			break;
-		case UWB_DRP_REASON_CONFLICT:
-		case UWB_DRP_REASON_MODIFIED:
-			dev_err(dev, "FIXME: unhandled reason code (%d/%d)\n",
-				reason_code, status);
-			break;
-		default:
-			dev_warn(dev, "ignoring invalid DRP IE state (%d/%d)\n",
-				 reason_code, status);
+
+		/* find if the owner wants to expand or reduce */
+		if (bitmap_subset(mas.bm, rsv->mas.bm, UWB_NUM_MAS)) {
+			/* owner is reducing */
+			bitmap_andnot(mv->companion_mas.bm, rsv->mas.bm, mas.bm, UWB_NUM_MAS);
+			uwb_drp_avail_release(rsv->rc, &mv->companion_mas);
 		}
+
+		bitmap_copy(rsv->mas.bm, mas.bm, UWB_NUM_MAS);
+		uwb_rsv_set_state(rsv, UWB_RSV_STATE_T_RESIZED);
+		break;
+	default:
+		dev_warn(dev, "ignoring invalid DRP IE state (%d/%d)\n",
+			 reason_code, status);
 	}
 }
 
@@ -199,23 +450,60 @@
  * state.
  */
 static void uwb_drp_process_owner(struct uwb_rc *rc, struct uwb_rsv *rsv,
-				  struct uwb_ie_drp *drp_ie)
+				  struct uwb_dev *src, struct uwb_ie_drp *drp_ie,
+				  struct uwb_rc_evt_drp *drp_evt)
 {
 	struct device *dev = &rc->uwb_dev.dev;
+	struct uwb_rsv_move *mv = &rsv->mv;
 	int status;
 	enum uwb_drp_reason reason_code;
+	struct uwb_mas_bm mas;
 
 	status = uwb_ie_drp_status(drp_ie);
 	reason_code = uwb_ie_drp_reason_code(drp_ie);
+	uwb_drp_ie_to_bm(&mas, drp_ie);
 
 	if (status) {
 		switch (reason_code) {
 		case UWB_DRP_REASON_ACCEPTED:
-			uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_ESTABLISHED);
-			break;
-		case UWB_DRP_REASON_MODIFIED:
-			dev_err(dev, "FIXME: unhandled reason code (%d/%d)\n",
-				reason_code, status);
+			switch (rsv->state) {
+			case UWB_RSV_STATE_O_PENDING:
+			case UWB_RSV_STATE_O_INITIATED:
+			case UWB_RSV_STATE_O_ESTABLISHED:
+				uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_ESTABLISHED);
+				break;
+			case UWB_RSV_STATE_O_MODIFIED:
+				if (bitmap_equal(mas.bm, rsv->mas.bm, UWB_NUM_MAS)) {
+					uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_ESTABLISHED);
+				} else {
+					uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MODIFIED);	
+				}
+				break;
+				
+			case UWB_RSV_STATE_O_MOVE_REDUCING: /* shouldn' t be a problem */
+				if (bitmap_equal(mas.bm, rsv->mas.bm, UWB_NUM_MAS)) {
+					uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_ESTABLISHED);
+				} else {
+					uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MOVE_REDUCING);	
+				}
+				break;
+			case UWB_RSV_STATE_O_MOVE_EXPANDING:
+				if (bitmap_equal(mas.bm, mv->companion_mas.bm, UWB_NUM_MAS)) {
+					/* Companion reservation accepted */
+					uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MOVE_COMBINING);
+				} else {
+					uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MOVE_EXPANDING);
+				}
+				break;
+			case UWB_RSV_STATE_O_MOVE_COMBINING:
+				if (bitmap_equal(mas.bm, rsv->mas.bm, UWB_NUM_MAS))
+					uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MOVE_REDUCING);
+				else
+					uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MOVE_COMBINING);
+				break;
+			default:
+				break;	
+			}
 			break;
 		default:
 			dev_warn(dev, "ignoring invalid DRP IE state (%d/%d)\n",
@@ -230,9 +518,10 @@
 			uwb_rsv_set_state(rsv, UWB_RSV_STATE_NONE);
 			break;
 		case UWB_DRP_REASON_CONFLICT:
-		case UWB_DRP_REASON_MODIFIED:
-			dev_err(dev, "FIXME: unhandled reason code (%d/%d)\n",
-				reason_code, status);
+			/* resolve the conflict */
+			bitmap_complement(mas.bm, src->last_availability_bm,
+					  UWB_NUM_MAS);
+			uwb_drp_handle_conflict_rsv(rc, rsv, drp_evt, drp_ie, &mas);
 			break;
 		default:
 			dev_warn(dev, "ignoring invalid DRP IE state (%d/%d)\n",
@@ -241,12 +530,110 @@
 	}
 }
 
+static void uwb_cnflt_alien_stroke_timer(struct uwb_cnflt_alien *cnflt)
+{
+	unsigned timeout_us = UWB_MAX_LOST_BEACONS * UWB_SUPERFRAME_LENGTH_US;
+	mod_timer(&cnflt->timer, jiffies + usecs_to_jiffies(timeout_us));
+}
+
+static void uwb_cnflt_update_work(struct work_struct *work)
+{
+	struct uwb_cnflt_alien *cnflt = container_of(work,
+						     struct uwb_cnflt_alien,
+						     cnflt_update_work);
+	struct uwb_cnflt_alien *c;
+	struct uwb_rc *rc = cnflt->rc;
+	
+	unsigned long delay_us = UWB_MAS_LENGTH_US * UWB_MAS_PER_ZONE;
+	
+	mutex_lock(&rc->rsvs_mutex);
+
+	list_del(&cnflt->rc_node);
+
+	/* update rc global conflicting alien bitmap */
+	bitmap_zero(rc->cnflt_alien_bitmap.bm, UWB_NUM_MAS);
+
+	list_for_each_entry(c, &rc->cnflt_alien_list, rc_node) {
+		bitmap_or(rc->cnflt_alien_bitmap.bm, rc->cnflt_alien_bitmap.bm, c->mas.bm, UWB_NUM_MAS);			
+	}
+	
+	queue_delayed_work(rc->rsv_workq, &rc->rsv_alien_bp_work, usecs_to_jiffies(delay_us));
+
+	kfree(cnflt);
+	mutex_unlock(&rc->rsvs_mutex);
+}
+
+static void uwb_cnflt_timer(unsigned long arg)
+{
+	struct uwb_cnflt_alien *cnflt = (struct uwb_cnflt_alien *)arg;
+
+	queue_work(cnflt->rc->rsv_workq, &cnflt->cnflt_update_work);
+}
+
 /*
- * Process a received DRP IE, it's either for a reservation owned by
- * the RC or targeted at it (or it's for a WUSB cluster reservation).
+ * We have received an DRP_IE of type Alien BP and we need to make
+ * sure we do not transmit in conflicting MASs.
  */
-static void uwb_drp_process(struct uwb_rc *rc, struct uwb_dev *src,
-		     struct uwb_ie_drp *drp_ie)
+static void uwb_drp_handle_alien_drp(struct uwb_rc *rc, struct uwb_ie_drp *drp_ie)
+{
+	struct device *dev = &rc->uwb_dev.dev;
+	struct uwb_mas_bm mas;
+	struct uwb_cnflt_alien *cnflt;
+	char buf[72];
+	unsigned long delay_us = UWB_MAS_LENGTH_US * UWB_MAS_PER_ZONE;
+	
+	uwb_drp_ie_to_bm(&mas, drp_ie);
+	bitmap_scnprintf(buf, sizeof(buf), mas.bm, UWB_NUM_MAS);
+	
+	list_for_each_entry(cnflt, &rc->cnflt_alien_list, rc_node) {
+		if (bitmap_equal(cnflt->mas.bm, mas.bm, UWB_NUM_MAS)) {
+			/* Existing alien BP reservation conflicting
+			 * bitmap, just reset the timer */
+			uwb_cnflt_alien_stroke_timer(cnflt);
+			return;
+		}
+	}
+
+	/* New alien BP reservation conflicting bitmap */
+
+	/* alloc and initialize new uwb_cnflt_alien */
+	cnflt = kzalloc(sizeof(struct uwb_cnflt_alien), GFP_KERNEL);
+	if (!cnflt)
+		dev_err(dev, "failed to alloc uwb_cnflt_alien struct\n");
+	INIT_LIST_HEAD(&cnflt->rc_node);
+	init_timer(&cnflt->timer);
+	cnflt->timer.function = uwb_cnflt_timer;
+	cnflt->timer.data     = (unsigned long)cnflt;
+
+	cnflt->rc = rc;
+	INIT_WORK(&cnflt->cnflt_update_work, uwb_cnflt_update_work);
+	
+	bitmap_copy(cnflt->mas.bm, mas.bm, UWB_NUM_MAS);
+
+	list_add_tail(&cnflt->rc_node, &rc->cnflt_alien_list);
+
+	/* update rc global conflicting alien bitmap */
+	bitmap_or(rc->cnflt_alien_bitmap.bm, rc->cnflt_alien_bitmap.bm, mas.bm, UWB_NUM_MAS);
+
+	queue_delayed_work(rc->rsv_workq, &rc->rsv_alien_bp_work, usecs_to_jiffies(delay_us));
+	
+	/* start the timer */
+	uwb_cnflt_alien_stroke_timer(cnflt);
+}
+
+static void uwb_drp_process_not_involved(struct uwb_rc *rc,
+					 struct uwb_rc_evt_drp *drp_evt, 
+					 struct uwb_ie_drp *drp_ie)
+{
+	struct uwb_mas_bm mas;
+	
+	uwb_drp_ie_to_bm(&mas, drp_ie);
+	uwb_drp_handle_all_conflict_rsv(rc, drp_evt, drp_ie, &mas);
+}
+
+static void uwb_drp_process_involved(struct uwb_rc *rc, struct uwb_dev *src,
+				     struct uwb_rc_evt_drp *drp_evt,
+				     struct uwb_ie_drp *drp_ie)
 {
 	struct uwb_rsv *rsv;
 
@@ -259,7 +646,7 @@
 		 */
 		return;
 	}
-
+	
 	/*
 	 * Do nothing with DRP IEs for reservations that have been
 	 * terminated.
@@ -268,14 +655,44 @@
 		uwb_rsv_set_state(rsv, UWB_RSV_STATE_NONE);
 		return;
 	}
-
+			
 	if (uwb_ie_drp_owner(drp_ie))
-		uwb_drp_process_target(rc, rsv, drp_ie);
+		uwb_drp_process_target(rc, rsv, drp_ie, drp_evt);
 	else
-		uwb_drp_process_owner(rc, rsv, drp_ie);
+		uwb_drp_process_owner(rc, rsv, src, drp_ie, drp_evt);
+	
 }
 
 
+static bool uwb_drp_involves_us(struct uwb_rc *rc, struct uwb_ie_drp *drp_ie)
+{
+	return uwb_dev_addr_cmp(&rc->uwb_dev.dev_addr, &drp_ie->dev_addr) == 0;
+}
+
+/*
+ * Process a received DRP IE.
+ */
+static void uwb_drp_process(struct uwb_rc *rc, struct uwb_rc_evt_drp *drp_evt,
+			    struct uwb_dev *src, struct uwb_ie_drp *drp_ie)
+{
+	if (uwb_ie_drp_type(drp_ie) == UWB_DRP_TYPE_ALIEN_BP)
+		uwb_drp_handle_alien_drp(rc, drp_ie);
+	else if (uwb_drp_involves_us(rc, drp_ie))
+		uwb_drp_process_involved(rc, src, drp_evt, drp_ie);
+	else
+		uwb_drp_process_not_involved(rc, drp_evt, drp_ie);
+}
+
+/*
+ * Process a received DRP Availability IE
+ */
+static void uwb_drp_availability_process(struct uwb_rc *rc, struct uwb_dev *src,
+					 struct uwb_ie_drp_avail *drp_availability_ie)
+{
+	bitmap_copy(src->last_availability_bm,
+		    drp_availability_ie->bmp, UWB_NUM_MAS);
+}
+
 /*
  * Process all the DRP IEs (both DRP IEs and the DRP Availability IE)
  * from a device.
@@ -296,10 +713,10 @@
 
 		switch (ie_hdr->element_id) {
 		case UWB_IE_DRP_AVAILABILITY:
-			/* FIXME: does something need to be done with this? */
+			uwb_drp_availability_process(rc, src_dev, (struct uwb_ie_drp_avail *)ie_hdr);
 			break;
 		case UWB_IE_DRP:
-			uwb_drp_process(rc, src_dev, (struct uwb_ie_drp *)ie_hdr);
+			uwb_drp_process(rc, drp_evt, src_dev, (struct uwb_ie_drp *)ie_hdr);
 			break;
 		default:
 			dev_warn(dev, "unexpected IE in DRP notification\n");
@@ -312,55 +729,6 @@
 			 (int)ielen);
 }
 
-
-/*
- * Go through all the DRP IEs and find the ones that conflict with our
- * reservations.
- *
- * FIXME: must resolve the conflict according the the rules in
- * [ECMA-368].
- */
-static
-void uwb_drp_process_conflict_all(struct uwb_rc *rc, struct uwb_rc_evt_drp *drp_evt,
-				  size_t ielen, struct uwb_dev *src_dev)
-{
-	struct device *dev = &rc->uwb_dev.dev;
-	struct uwb_ie_hdr *ie_hdr;
-	struct uwb_ie_drp *drp_ie;
-	void *ptr;
-
-	ptr = drp_evt->ie_data;
-	for (;;) {
-		ie_hdr = uwb_ie_next(&ptr, &ielen);
-		if (!ie_hdr)
-			break;
-
-		drp_ie = container_of(ie_hdr, struct uwb_ie_drp, hdr);
-
-		/* FIXME: check if this DRP IE conflicts. */
-	}
-
-	if (ielen > 0)
-		dev_warn(dev, "%d octets remaining in DRP notification\n",
-			 (int)ielen);
-}
-
-
-/*
- * Terminate all reservations owned by, or targeted at, 'uwb_dev'.
- */
-static void uwb_drp_terminate_all(struct uwb_rc *rc, struct uwb_dev *uwb_dev)
-{
-	struct uwb_rsv *rsv;
-
-	list_for_each_entry(rsv, &rc->reservations, rc_node) {
-		if (rsv->owner == uwb_dev
-		    || (rsv->target.type == UWB_RSV_TARGET_DEV && rsv->target.dev == uwb_dev))
-			uwb_rsv_remove(rsv);
-	}
-}
-
-
 /**
  * uwbd_evt_handle_rc_drp - handle a DRP_IE event
  * @evt: the DRP_IE event from the radio controller
@@ -401,7 +769,6 @@
 	size_t ielength, bytes_left;
 	struct uwb_dev_addr src_addr;
 	struct uwb_dev *src_dev;
-	int reason;
 
 	/* Is there enough data to decode the event (and any IEs in
 	   its payload)? */
@@ -437,22 +804,8 @@
 
 	mutex_lock(&rc->rsvs_mutex);
 
-	reason = uwb_rc_evt_drp_reason(drp_evt);
-
-	switch (reason) {
-	case UWB_DRP_NOTIF_DRP_IE_RCVD:
-		uwb_drp_process_all(rc, drp_evt, ielength, src_dev);
-		break;
-	case UWB_DRP_NOTIF_CONFLICT:
-		uwb_drp_process_conflict_all(rc, drp_evt, ielength, src_dev);
-		break;
-	case UWB_DRP_NOTIF_TERMINATE:
-		uwb_drp_terminate_all(rc, src_dev);
-		break;
-	default:
-		dev_warn(dev, "ignored DRP event with reason code: %d\n", reason);
-		break;
-	}
+	/* We do not distinguish from the reason */
+	uwb_drp_process_all(rc, drp_evt, ielength, src_dev);
 
 	mutex_unlock(&rc->rsvs_mutex);
 
diff --git a/drivers/uwb/est.c b/drivers/uwb/est.c
index 5fe566b..328fcc2 100644
--- a/drivers/uwb/est.c
+++ b/drivers/uwb/est.c
@@ -40,10 +40,8 @@
  *   uwb_est_get_size()
  */
 #include <linux/spinlock.h>
-#define D_LOCAL 0
-#include <linux/uwb/debug.h>
-#include "uwb-internal.h"
 
+#include "uwb-internal.h"
 
 struct uwb_est {
 	u16 type_event_high;
@@ -52,7 +50,6 @@
 	const struct uwb_est_entry *entry;
 };
 
-
 static struct uwb_est *uwb_est;
 static u8 uwb_est_size;
 static u8 uwb_est_used;
@@ -440,21 +437,12 @@
 	u8 *ptr = (u8 *) rceb;
 
 	read_lock_irqsave(&uwb_est_lock, flags);
-	d_printf(2, dev, "Size query for event 0x%02x/%04x/%02x,"
-		 " buffer size %ld\n",
-		 (unsigned) rceb->bEventType,
-		 (unsigned) le16_to_cpu(rceb->wEvent),
-		 (unsigned) rceb->bEventContext,
-		 (long) rceb_size);
 	size = -ENOSPC;
 	if (rceb_size < sizeof(*rceb))
 		goto out;
 	event = le16_to_cpu(rceb->wEvent);
 	type_event_high = rceb->bEventType << 8 | (event & 0xff00) >> 8;
 	for (itr = 0; itr < uwb_est_used; itr++) {
-		d_printf(3, dev, "Checking EST 0x%04x/%04x/%04x\n",
-			uwb_est[itr].type_event_high, uwb_est[itr].vendor,
-			uwb_est[itr].product);
 		if (uwb_est[itr].type_event_high != type_event_high)
 			continue;
 		size = uwb_est_get_size(rc, &uwb_est[itr],
diff --git a/drivers/uwb/hwa-rc.c b/drivers/uwb/hwa-rc.c
index 3d26fa0..559f878 100644
--- a/drivers/uwb/hwa-rc.c
+++ b/drivers/uwb/hwa-rc.c
@@ -51,16 +51,14 @@
  *
  *
  */
-#include <linux/version.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/usb.h>
 #include <linux/usb/wusb.h>
 #include <linux/usb/wusb-wa.h>
 #include <linux/uwb.h>
+
 #include "uwb-internal.h"
-#define D_LOCAL 1
-#include <linux/uwb/debug.h>
 
 /* The device uses commands and events from the WHCI specification, although
  * reporting itself as WUSB compliant. */
@@ -631,17 +629,13 @@
 
 	switch (result = urb->status) {
 	case 0:
-		d_printf(3, dev, "NEEP: receive stat %d, %zu bytes\n",
-			 urb->status, (size_t)urb->actual_length);
 		uwb_rc_neh_grok(hwarc->uwb_rc, urb->transfer_buffer,
 				urb->actual_length);
 		break;
 	case -ECONNRESET:	/* Not an error, but a controlled situation; */
 	case -ENOENT:		/* (we killed the URB)...so, no broadcast */
-		d_printf(2, dev, "NEEP: URB reset/noent %d\n", urb->status);
 		goto out;
 	case -ESHUTDOWN:	/* going away! */
-		d_printf(2, dev, "NEEP: URB down %d\n", urb->status);
 		goto out;
 	default:		/* On general errors, retry unless it gets ugly */
 		if (edc_inc(&hwarc->neep_edc, EDC_MAX_ERRORS,
@@ -650,7 +644,6 @@
 		dev_err(dev, "NEEP: URB error %d\n", urb->status);
 	}
 	result = usb_submit_urb(urb, GFP_ATOMIC);
-	d_printf(3, dev, "NEEP: submit %d\n", result);
 	if (result < 0) {
 		dev_err(dev, "NEEP: Can't resubmit URB (%d) resetting device\n",
 			result);
@@ -759,11 +752,11 @@
 	itr_size = le16_to_cpu(usb_dev->actconfig->desc.wTotalLength);
 	while (itr_size >= sizeof(*hdr)) {
 		hdr = (struct usb_descriptor_header *) itr;
-		d_printf(3, dev, "Extra device descriptor: "
-			 "type %02x/%u bytes @ %zu (%zu left)\n",
-			 hdr->bDescriptorType, hdr->bLength,
-			 (itr - usb_dev->rawdescriptors[actconfig_idx]),
-			 itr_size);
+		dev_dbg(dev, "Extra device descriptor: "
+			"type %02x/%u bytes @ %zu (%zu left)\n",
+			hdr->bDescriptorType, hdr->bLength,
+			(itr - usb_dev->rawdescriptors[actconfig_idx]),
+			itr_size);
 		if (hdr->bDescriptorType == USB_DT_CS_RADIO_CONTROL)
 			goto found;
 		itr += hdr->bLength;
@@ -795,8 +788,7 @@
 		goto error;
 	}
 	rc->version = version;
-	d_printf(3, dev, "Device supports WUSB protocol version 0x%04x \n",
-		 rc->version);
+	dev_dbg(dev, "Device supports WUSB protocol version 0x%04x \n",	rc->version);
 	result = 0;
 error:
 	return result;
@@ -877,11 +869,28 @@
 	uwb_rc_rm(uwb_rc);
 	usb_put_intf(hwarc->usb_iface);
 	usb_put_dev(hwarc->usb_dev);
-	d_printf(1, &hwarc->usb_iface->dev, "freed hwarc %p\n", hwarc);
 	kfree(hwarc);
 	uwb_rc_put(uwb_rc);	/* when creating the device, refcount = 1 */
 }
 
+static int hwarc_pre_reset(struct usb_interface *iface)
+{
+	struct hwarc *hwarc = usb_get_intfdata(iface);
+	struct uwb_rc *uwb_rc = hwarc->uwb_rc;
+
+	uwb_rc_pre_reset(uwb_rc);
+	return 0;
+}
+
+static int hwarc_post_reset(struct usb_interface *iface)
+{
+	struct hwarc *hwarc = usb_get_intfdata(iface);
+	struct uwb_rc *uwb_rc = hwarc->uwb_rc;
+
+	uwb_rc_post_reset(uwb_rc);
+	return 0;
+}
+
 /** USB device ID's that we handle */
 static struct usb_device_id hwarc_id_table[] = {
 	/* D-Link DUB-1210 */
@@ -898,20 +907,16 @@
 
 static struct usb_driver hwarc_driver = {
 	.name =		"hwa-rc",
+	.id_table =	hwarc_id_table,
 	.probe =	hwarc_probe,
 	.disconnect =	hwarc_disconnect,
-	.id_table =	hwarc_id_table,
+	.pre_reset =    hwarc_pre_reset,
+	.post_reset =   hwarc_post_reset,
 };
 
 static int __init hwarc_driver_init(void)
 {
-	int result;
-	result = usb_register(&hwarc_driver);
-	if (result < 0)
-		printk(KERN_ERR "HWA-RC: Cannot register USB driver: %d\n",
-		       result);
-	return result;
-
+	return usb_register(&hwarc_driver);
 }
 module_init(hwarc_driver_init);
 
diff --git a/drivers/uwb/i1480/dfu/dfu.c b/drivers/uwb/i1480/dfu/dfu.c
index 9097b3b..da7b1d0 100644
--- a/drivers/uwb/i1480/dfu/dfu.c
+++ b/drivers/uwb/i1480/dfu/dfu.c
@@ -34,10 +34,7 @@
 #include <linux/uwb.h>
 #include <linux/random.h>
 
-#define D_LOCAL 0
-#include <linux/uwb/debug.h>
-
-/**
+/*
  * i1480_rceb_check - Check RCEB for expected field values
  * @i1480: pointer to device for which RCEB is being checked
  * @rceb: RCEB being checked
@@ -83,7 +80,7 @@
 EXPORT_SYMBOL_GPL(i1480_rceb_check);
 
 
-/**
+/*
  * Execute a Radio Control Command
  *
  * Command data has to be in i1480->cmd_buf.
@@ -101,7 +98,6 @@
 	u8 expected_type = reply->bEventType;
 	u8 context;
 
-	d_fnstart(3, i1480->dev, "(%p, %s, %zu)\n", i1480, cmd_name, cmd_size);
 	init_completion(&i1480->evt_complete);
 	i1480->evt_result = -EINPROGRESS;
 	do {
@@ -150,8 +146,6 @@
 	result = i1480_rceb_check(i1480, i1480->evt_buf, cmd_name, context,
 				  expected_type, expected_event);
 error:
-	d_fnend(3, i1480->dev, "(%p, %s, %zu) = %zd\n",
-		i1480, cmd_name, cmd_size, result);
 	return result;
 }
 EXPORT_SYMBOL_GPL(i1480_cmd);
diff --git a/drivers/uwb/i1480/dfu/mac.c b/drivers/uwb/i1480/dfu/mac.c
index 2e4d8f0..694d0da 100644
--- a/drivers/uwb/i1480/dfu/mac.c
+++ b/drivers/uwb/i1480/dfu/mac.c
@@ -31,9 +31,6 @@
 #include <linux/uwb.h>
 #include "i1480-dfu.h"
 
-#define D_LOCAL 0
-#include <linux/uwb/debug.h>
-
 /*
  * Descriptor for a continuous segment of MAC fw data
  */
@@ -184,10 +181,6 @@
 		}
 		if (memcmp(i1480->cmd_buf, bin + src_itr, result)) {
 			u8 *buf = i1480->cmd_buf;
-			d_printf(2, i1480->dev,
-				 "original data @ %p + %u, %zu bytes\n",
-				 bin, src_itr, result);
-			d_dump(4, i1480->dev, bin + src_itr, result);
 			for (cnt = 0; cnt < result; cnt++)
 				if (bin[src_itr + cnt] != buf[cnt]) {
 					dev_err(i1480->dev, "byte failed at "
@@ -224,7 +217,6 @@
 	struct fw_hdr *hdr_itr;
 	int verif_retry_count;
 
-	d_fnstart(3, dev, "(%p, %p)\n", i1480, hdr);
 	/* Now, header by header, push them to the hw */
 	for (hdr_itr = hdr; hdr_itr != NULL; hdr_itr = hdr_itr->next) {
 		verif_retry_count = 0;
@@ -264,7 +256,6 @@
 			break;
 		}
 	}
-	d_fnend(3, dev, "(%zd)\n", result);
 	return result;
 }
 
@@ -337,11 +328,9 @@
 	const struct firmware *fw;
 	struct fw_hdr *fw_hdrs;
 
-	d_fnstart(3, i1480->dev, "(%p, %s, %s)\n", i1480, fw_name, fw_tag);
 	result = request_firmware(&fw, fw_name, i1480->dev);
 	if (result < 0)	/* Up to caller to complain on -ENOENT */
 		goto out;
-	d_printf(3, i1480->dev, "%s fw '%s': uploading\n", fw_tag, fw_name);
 	result = fw_hdrs_load(i1480, &fw_hdrs, fw->data, fw->size);
 	if (result < 0) {
 		dev_err(i1480->dev, "%s fw '%s': failed to parse firmware "
@@ -363,8 +352,6 @@
 out_release:
 	release_firmware(fw);
 out:
-	d_fnend(3, i1480->dev, "(%p, %s, %s) = %d\n", i1480, fw_name, fw_tag,
-		result);
 	return result;
 }
 
@@ -433,7 +420,6 @@
 	int result;
 	u32 *val = (u32 *) i1480->cmd_buf;
 
-	d_fnstart(3, i1480->dev, "(i1480 %p)\n", i1480);
 	for (cnt = 0; cnt < 10; cnt++) {
 		msleep(100);
 		result = i1480->read(i1480, 0x80080000, 4);
@@ -447,7 +433,6 @@
 	dev_err(i1480->dev, "Timed out waiting for fw to start\n");
 	result = -ETIMEDOUT;
 out:
-	d_fnend(3, i1480->dev, "(i1480 %p) = %d\n", i1480, result);
 	return result;
 
 }
@@ -467,7 +452,6 @@
 	int result = 0, deprecated_name = 0;
 	struct i1480_rceb *rcebe = (void *) i1480->evt_buf;
 
-	d_fnstart(3, i1480->dev, "(%p)\n", i1480);
 	result = __mac_fw_upload(i1480, i1480->mac_fw_name, "MAC");
 	if (result == -ENOENT) {
 		result = __mac_fw_upload(i1480, i1480->mac_fw_name_deprecate,
@@ -501,7 +485,6 @@
 		dev_err(i1480->dev, "MAC fw '%s': initialization event returns "
 			"wrong size (%zu bytes vs %zu needed)\n",
 			i1480->mac_fw_name, i1480->evt_result, sizeof(*rcebe));
-		dump_bytes(i1480->dev, rcebe, min(i1480->evt_result, (ssize_t)32));
 		goto error_size;
 	}
 	result = -EIO;
@@ -522,6 +505,5 @@
 error_init_timeout:
 error_size:
 error_setup:
-	d_fnend(3, i1480->dev, "(i1480 %p) = %d\n", i1480, result);
 	return result;
 }
diff --git a/drivers/uwb/i1480/dfu/usb.c b/drivers/uwb/i1480/dfu/usb.c
index 98eeeff..686795e 100644
--- a/drivers/uwb/i1480/dfu/usb.c
+++ b/drivers/uwb/i1480/dfu/usb.c
@@ -35,7 +35,6 @@
  * the functions are i1480_usb_NAME().
  */
 #include <linux/module.h>
-#include <linux/version.h>
 #include <linux/usb.h>
 #include <linux/interrupt.h>
 #include <linux/delay.h>
@@ -44,10 +43,6 @@
 #include <linux/usb/wusb-wa.h>
 #include "i1480-dfu.h"
 
-#define D_LOCAL 0
-#include <linux/uwb/debug.h>
-
-
 struct i1480_usb {
 	struct i1480 i1480;
 	struct usb_device *usb_dev;
@@ -118,8 +113,6 @@
 	struct i1480_usb *i1480_usb = container_of(i1480, struct i1480_usb, i1480);
 	size_t buffer_size, itr = 0;
 
-	d_fnstart(3, i1480->dev, "(%p, 0x%08x, %p, %zu)\n",
-		  i1480, memory_address, buffer, size);
 	BUG_ON(size & 0x3); /* Needs to be a multiple of 4 */
 	while (size > 0) {
 		buffer_size = size < i1480->buf_size ? size : i1480->buf_size;
@@ -132,16 +125,10 @@
 			i1480->cmd_buf, buffer_size, 100 /* FIXME: arbitrary */);
 		if (result < 0)
 			break;
-		d_printf(3, i1480->dev,
-			 "wrote @ 0x%08x %u bytes (of %zu bytes requested)\n",
-			 memory_address, result, buffer_size);
-		d_dump(4, i1480->dev, i1480->cmd_buf, result);
 		itr += result;
 		memory_address += result;
 		size -= result;
 	}
-	d_fnend(3, i1480->dev, "(%p, 0x%08x, %p, %zu) = %d\n",
-		i1480, memory_address, buffer, size, result);
 	return result;
 }
 
@@ -166,8 +153,6 @@
 	size_t itr, read_size = i1480->buf_size;
 	struct i1480_usb *i1480_usb = container_of(i1480, struct i1480_usb, i1480);
 
-	d_fnstart(3, i1480->dev, "(%p, 0x%08x, %zu)\n",
-		  i1480, addr, size);
 	BUG_ON(size > i1480->buf_size);
 	BUG_ON(size & 0x3); /* Needs to be a multiple of 4 */
 	BUG_ON(read_size > 512);
@@ -201,10 +186,6 @@
 	}
 	result = bytes;
 out:
-	d_fnend(3, i1480->dev, "(%p, 0x%08x, %zu) = %zd\n",
-		i1480, addr, size, result);
-	if (result > 0)
-		d_dump(4, i1480->dev, i1480->cmd_buf, result);
 	return result;
 }
 
@@ -260,7 +241,6 @@
 	struct i1480_usb *i1480_usb = container_of(i1480, struct i1480_usb, i1480);
 	struct usb_endpoint_descriptor *epd;
 
-	d_fnstart(3, dev, "(%p)\n", i1480);
 	init_completion(&i1480->evt_complete);
 	i1480->evt_result = -EINPROGRESS;
 	epd = &i1480_usb->usb_iface->cur_altsetting->endpoint[0].desc;
@@ -282,14 +262,12 @@
 		goto error_wait;
 	}
 	usb_kill_urb(i1480_usb->neep_urb);
-	d_fnend(3, dev, "(%p) = 0\n", i1480);
 	return 0;
 
 error_wait:
 	usb_kill_urb(i1480_usb->neep_urb);
 error_submit:
 	i1480->evt_result = result;
-	d_fnend(3, dev, "(%p) = %d\n", i1480, result);
 	return result;
 }
 
@@ -320,7 +298,6 @@
 	struct uwb_rccb *cmd = i1480->cmd_buf;
 	u8 iface_no;
 
-	d_fnstart(3, dev, "(%p, %s, %zu)\n", i1480, cmd_name, cmd_size);
 	/* Post a read on the notification & event endpoint */
 	iface_no = i1480_usb->usb_iface->cur_altsetting->desc.bInterfaceNumber;
 	epd = &i1480_usb->usb_iface->cur_altsetting->endpoint[0].desc;
@@ -348,15 +325,11 @@
 			cmd_name, result);
 		goto error_submit_ep0;
 	}
-	d_fnend(3, dev, "(%p, %s, %zu) = %d\n",
-		i1480, cmd_name, cmd_size, result);
 	return result;
 
 error_submit_ep0:
 	usb_kill_urb(i1480_usb->neep_urb);
 error_submit_ep1:
-	d_fnend(3, dev, "(%p, %s, %zu) = %d\n",
-		i1480, cmd_name, cmd_size, result);
 	return result;
 }
 
diff --git a/drivers/uwb/i1480/i1480u-wlp/lc.c b/drivers/uwb/i1480/i1480u-wlp/lc.c
index 737d60c..049c05d 100644
--- a/drivers/uwb/i1480/i1480u-wlp/lc.c
+++ b/drivers/uwb/i1480/i1480u-wlp/lc.c
@@ -55,10 +55,9 @@
  *                          is being removed.
  *         i1480u_rm()
  */
-#include <linux/version.h>
 #include <linux/if_arp.h>
 #include <linux/etherdevice.h>
-#include <linux/uwb/debug.h>
+
 #include "i1480u-wlp.h"
 
 
@@ -207,7 +206,7 @@
 	wlp->fill_device_info = i1480u_fill_device_info;
 	wlp->stop_queue = i1480u_stop_queue;
 	wlp->start_queue = i1480u_start_queue;
-	result = wlp_setup(wlp, rc);
+	result = wlp_setup(wlp, rc, net_dev);
 	if (result < 0) {
 		dev_err(&iface->dev, "Cannot setup WLP\n");
 		goto error_wlp_setup;
diff --git a/drivers/uwb/i1480/i1480u-wlp/netdev.c b/drivers/uwb/i1480/i1480u-wlp/netdev.c
index 8802ac4..e3873ff 100644
--- a/drivers/uwb/i1480/i1480u-wlp/netdev.c
+++ b/drivers/uwb/i1480/i1480u-wlp/netdev.c
@@ -41,7 +41,7 @@
 
 #include <linux/if_arp.h>
 #include <linux/etherdevice.h>
-#include <linux/uwb/debug.h>
+
 #include "i1480u-wlp.h"
 
 struct i1480u_cmd_set_ip_mas {
@@ -207,6 +207,11 @@
 	result = i1480u_rx_setup(i1480u);		/* Alloc RX stuff */
 	if (result < 0)
 		goto error_rx_setup;
+
+	result = uwb_radio_start(&wlp->pal);
+	if (result < 0)
+		goto error_radio_start;
+
 	netif_wake_queue(net_dev);
 #ifdef i1480u_FLOW_CONTROL
 	result = usb_submit_urb(i1480u->notif_urb, GFP_KERNEL);;
@@ -215,25 +220,20 @@
 		goto error_notif_urb_submit;
 	}
 #endif
-	i1480u->uwb_notifs_handler.cb = i1480u_uwb_notifs_cb;
-	i1480u->uwb_notifs_handler.data = i1480u;
-	if (uwb_bg_joined(rc))
-		netif_carrier_on(net_dev);
-	else
-		netif_carrier_off(net_dev);
-	uwb_notifs_register(rc, &i1480u->uwb_notifs_handler);
 	/* Interface is up with an address, now we can create WSS */
 	result = wlp_wss_setup(net_dev, &wlp->wss);
 	if (result < 0) {
 		dev_err(dev, "Can't create WSS: %d. \n", result);
-		goto error_notif_deregister;
+		goto error_wss_setup;
 	}
 	return 0;
-error_notif_deregister:
-	uwb_notifs_deregister(rc, &i1480u->uwb_notifs_handler);
+error_wss_setup:
 #ifdef i1480u_FLOW_CONTROL
+	usb_kill_urb(i1480u->notif_urb);
 error_notif_urb_submit:
 #endif
+	uwb_radio_stop(&wlp->pal);
+error_radio_start:
 	netif_stop_queue(net_dev);
 	i1480u_rx_release(i1480u);
 error_rx_setup:
@@ -248,16 +248,15 @@
 {
 	struct i1480u *i1480u = netdev_priv(net_dev);
 	struct wlp *wlp = &i1480u->wlp;
-	struct uwb_rc *rc = wlp->rc;
 
 	BUG_ON(wlp->rc == NULL);
 	wlp_wss_remove(&wlp->wss);
-	uwb_notifs_deregister(rc, &i1480u->uwb_notifs_handler);
 	netif_carrier_off(net_dev);
 #ifdef i1480u_FLOW_CONTROL
 	usb_kill_urb(i1480u->notif_urb);
 #endif
 	netif_stop_queue(net_dev);
+	uwb_radio_stop(&wlp->pal);
 	i1480u_rx_release(i1480u);
 	i1480u_tx_release(i1480u);
 	return 0;
@@ -303,34 +302,6 @@
 	return 0;
 }
 
-
-/**
- * Callback function to handle events from UWB
- * When we see other devices we know the carrier is ok,
- * if we are the only device in the beacon group we set the carrier
- * state to off.
- * */
-void i1480u_uwb_notifs_cb(void *data, struct uwb_dev *uwb_dev,
-			  enum uwb_notifs event)
-{
-	struct i1480u *i1480u = data;
-	struct net_device *net_dev = i1480u->net_dev;
-	struct device *dev = &i1480u->usb_iface->dev;
-	switch (event) {
-	case UWB_NOTIF_BG_JOIN:
-		netif_carrier_on(net_dev);
-		dev_info(dev, "Link is up\n");
-		break;
-	case UWB_NOTIF_BG_LEAVE:
-		netif_carrier_off(net_dev);
-		dev_info(dev, "Link is down\n");
-		break;
-	default:
-		dev_err(dev, "don't know how to handle event %d from uwb\n",
-				event);
-	}
-}
-
 /**
  * Stop the network queue
  *
diff --git a/drivers/uwb/i1480/i1480u-wlp/rx.c b/drivers/uwb/i1480/i1480u-wlp/rx.c
index 9fc0353..34f4cf9 100644
--- a/drivers/uwb/i1480/i1480u-wlp/rx.c
+++ b/drivers/uwb/i1480/i1480u-wlp/rx.c
@@ -68,11 +68,7 @@
 #include <linux/etherdevice.h>
 #include "i1480u-wlp.h"
 
-#define D_LOCAL 0
-#include <linux/uwb/debug.h>
-
-
-/**
+/*
  * Setup the RX context
  *
  * Each URB is provided with a transfer_buffer that is the data field
@@ -129,7 +125,7 @@
 }
 
 
-/** Release resources associated to the rx context */
+/* Release resources associated to the rx context */
 void i1480u_rx_release(struct i1480u *i1480u)
 {
 	int cnt;
@@ -155,7 +151,7 @@
 	}
 }
 
-/** Fix an out-of-sequence packet */
+/* Fix an out-of-sequence packet */
 #define i1480u_fix(i1480u, msg...)			\
 do {							\
 	if (printk_ratelimit())				\
@@ -166,7 +162,7 @@
 } while (0)
 
 
-/** Drop an out-of-sequence packet */
+/* Drop an out-of-sequence packet */
 #define i1480u_drop(i1480u, msg...)			\
 do {							\
 	if (printk_ratelimit())				\
@@ -177,7 +173,7 @@
 
 
 
-/** Finalizes setting up the SKB and delivers it
+/* Finalizes setting up the SKB and delivers it
  *
  * We first pass the incoming frame to WLP substack for verification. It
  * may also be a WLP association frame in which case WLP will take over the
@@ -192,18 +188,11 @@
 	struct net_device *net_dev = i1480u->net_dev;
 	struct device *dev = &i1480u->usb_iface->dev;
 
-	d_printf(6, dev, "RX delivered pre skb(%p), %u bytes\n",
-		 i1480u->rx_skb, i1480u->rx_skb->len);
-	d_dump(7, dev, i1480u->rx_skb->data, i1480u->rx_skb->len);
 	should_parse = wlp_receive_frame(dev, &i1480u->wlp, i1480u->rx_skb,
 					 &i1480u->rx_srcaddr);
 	if (!should_parse)
 		goto out;
 	i1480u->rx_skb->protocol = eth_type_trans(i1480u->rx_skb, net_dev);
-	d_printf(5, dev, "RX delivered skb(%p), %u bytes\n",
-		 i1480u->rx_skb, i1480u->rx_skb->len);
-	d_dump(7, dev, i1480u->rx_skb->data,
-	       i1480u->rx_skb->len > 72 ? 72 : i1480u->rx_skb->len);
 	i1480u->stats.rx_packets++;
 	i1480u->stats.rx_bytes += i1480u->rx_untd_pkt_size;
 	net_dev->last_rx = jiffies;
@@ -216,7 +205,7 @@
 }
 
 
-/**
+/*
  * Process a buffer of data received from the USB RX endpoint
  *
  * First fragment arrives with next or last fragment. All other fragments
@@ -404,7 +393,7 @@
 }
 
 
-/**
+/*
  * Called when an RX URB has finished receiving or has found some kind
  * of error condition.
  *
diff --git a/drivers/uwb/i1480/i1480u-wlp/sysfs.c b/drivers/uwb/i1480/i1480u-wlp/sysfs.c
index a1d8ca6..4ffaf54 100644
--- a/drivers/uwb/i1480/i1480u-wlp/sysfs.c
+++ b/drivers/uwb/i1480/i1480u-wlp/sysfs.c
@@ -25,8 +25,8 @@
 
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
-#include <linux/uwb/debug.h>
 #include <linux/device.h>
+
 #include "i1480u-wlp.h"
 
 
@@ -226,7 +226,6 @@
  * (CLASS_DEVICE_ATTR or DEVICE_ATTR) and i1480u_ATTR_NAME produces a
  * class_device_attr_NAME or device_attr_NAME (for group registration).
  */
-#include <linux/version.h>
 
 #define i1480u_SHOW(name, fn, param)				\
 static ssize_t i1480u_show_##name(struct device *dev,		\
diff --git a/drivers/uwb/i1480/i1480u-wlp/tx.c b/drivers/uwb/i1480/i1480u-wlp/tx.c
index 3426bfb..39032cc 100644
--- a/drivers/uwb/i1480/i1480u-wlp/tx.c
+++ b/drivers/uwb/i1480/i1480u-wlp/tx.c
@@ -55,8 +55,6 @@
  */
 
 #include "i1480u-wlp.h"
-#define D_LOCAL 5
-#include <linux/uwb/debug.h>
 
 enum {
 	/* This is only for Next and Last TX packets */
@@ -64,7 +62,7 @@
 		- sizeof(struct untd_hdr_rst),
 };
 
-/** Free resources allocated to a i1480u tx context. */
+/* Free resources allocated to a i1480u tx context. */
 static
 void i1480u_tx_free(struct i1480u_tx *wtx)
 {
@@ -99,7 +97,7 @@
 }
 
 
-/**
+/*
  * Callback for a completed tx USB URB.
  *
  * TODO:
@@ -149,8 +147,6 @@
 	    <= i1480u->tx_inflight.threshold
 	    && netif_queue_stopped(net_dev)
 	    && i1480u->tx_inflight.threshold != 0) {
-		if (d_test(2) && printk_ratelimit())
-			d_printf(2, dev, "Restart queue. \n");
 		netif_start_queue(net_dev);
 		atomic_inc(&i1480u->tx_inflight.restart_count);
 	}
@@ -158,7 +154,7 @@
 }
 
 
-/**
+/*
  * Given a buffer that doesn't fit in a single fragment, create an
  * scatter/gather structure for delivery to the USB pipe.
  *
@@ -253,15 +249,11 @@
 	/* Now do each remaining fragment */
 	result = -EINVAL;
 	while (pl_size_left > 0) {
-		d_printf(5, NULL, "ITR HDR: pl_size_left %zu buf_itr %zu\n",
-			 pl_size_left, buf_itr - wtx->buf);
 		if (buf_itr + sizeof(*untd_hdr_rst) - wtx->buf
 		    > wtx->buf_size) {
 			printk(KERN_ERR "BUG: no space for header\n");
 			goto error_bug;
 		}
-		d_printf(5, NULL, "ITR HDR 2: pl_size_left %zu buf_itr %zu\n",
-			 pl_size_left, buf_itr - wtx->buf);
 		untd_hdr_rst = buf_itr;
 		buf_itr += sizeof(*untd_hdr_rst);
 		if (pl_size_left > i1480u_MAX_PL_SIZE) {
@@ -271,9 +263,6 @@
 			frg_pl_size = pl_size_left;
 			untd_hdr_set_type(&untd_hdr_rst->hdr, i1480u_PKT_FRAG_LST);
 		}
-		d_printf(5, NULL,
-			 "ITR PL: pl_size_left %zu buf_itr %zu frg_pl_size %zu\n",
-			 pl_size_left, buf_itr - wtx->buf, frg_pl_size);
 		untd_hdr_set_rx_tx(&untd_hdr_rst->hdr, 0);
 		untd_hdr_rst->hdr.len = cpu_to_le16(frg_pl_size);
 		untd_hdr_rst->padding = 0;
@@ -286,9 +275,6 @@
 		buf_itr += frg_pl_size;
 		pl_itr += frg_pl_size;
 		pl_size_left -= frg_pl_size;
-		d_printf(5, NULL,
-			 "ITR PL 2: pl_size_left %zu buf_itr %zu frg_pl_size %zu\n",
-			 pl_size_left, buf_itr - wtx->buf, frg_pl_size);
 	}
 	dev_kfree_skb_irq(skb);
 	return 0;
@@ -308,7 +294,7 @@
 }
 
 
-/**
+/*
  * Given a buffer that fits in a single fragment, fill out a @wtx
  * struct for transmitting it down the USB pipe.
  *
@@ -346,7 +332,7 @@
 }
 
 
-/**
+/*
  * Given a skb to transmit, massage it to become palatable for the TX pipe
  *
  * This will break the buffer in chunks smaller than
@@ -425,7 +411,7 @@
 	return NULL;
 }
 
-/**
+/*
  * Actual fragmentation and transmission of frame
  *
  * @wlp:  WLP substack data structure
@@ -447,20 +433,12 @@
 	struct i1480u_tx *wtx;
 	struct wlp_tx_hdr *wlp_tx_hdr;
 	static unsigned char dev_bcast[2] = { 0xff, 0xff };
-#if 0
-	int lockup = 50;
-#endif
 
-	d_fnstart(6, dev, "(skb %p (%u), net_dev %p)\n", skb, skb->len,
-		  net_dev);
 	BUG_ON(i1480u->wlp.rc == NULL);
 	if ((net_dev->flags & IFF_UP) == 0)
 		goto out;
 	result = -EBUSY;
 	if (atomic_read(&i1480u->tx_inflight.count) >= i1480u->tx_inflight.max) {
-		if (d_test(2) && printk_ratelimit())
-			d_printf(2, dev, "Max frames in flight "
-				 "stopping queue.\n");
 		netif_stop_queue(net_dev);
 		goto error_max_inflight;
 	}
@@ -489,21 +467,6 @@
 		wlp_tx_hdr_set_delivery_id_type(wlp_tx_hdr, i1480u->options.pca_base_priority);
 	}
 
-#if 0
-	dev_info(dev, "TX delivering skb -> USB, %zu bytes\n", skb->len);
-	dump_bytes(dev, skb->data, skb->len > 72 ? 72 : skb->len);
-#endif
-#if 0
-	/* simulates a device lockup after every lockup# packets */
-	if (lockup && ((i1480u->stats.tx_packets + 1) % lockup) == 0) {
-		/* Simulate a dropped transmit interrupt */
-		net_dev->trans_start = jiffies;
-		netif_stop_queue(net_dev);
-		dev_err(dev, "Simulate lockup at %ld\n", jiffies);
-		return result;
-	}
-#endif
-
 	result = usb_submit_urb(wtx->urb, GFP_ATOMIC);		/* Go baby */
 	if (result < 0) {
 		dev_err(dev, "TX: cannot submit URB: %d\n", result);
@@ -513,8 +476,6 @@
 	}
 	atomic_inc(&i1480u->tx_inflight.count);
 	net_dev->trans_start = jiffies;
-	d_fnend(6, dev, "(skb %p (%u), net_dev %p) = %d\n", skb, skb->len,
-		net_dev, result);
 	return result;
 
 error_tx_urb_submit:
@@ -522,13 +483,11 @@
 error_wtx_alloc:
 error_max_inflight:
 out:
-	d_fnend(6, dev, "(skb %p (%u), net_dev %p) = %d\n", skb, skb->len,
-		net_dev, result);
 	return result;
 }
 
 
-/**
+/*
  * Transmit an skb  Called when an skbuf has to be transmitted
  *
  * The skb is first passed to WLP substack to ensure this is a valid
@@ -551,9 +510,6 @@
 	struct device *dev = &i1480u->usb_iface->dev;
 	struct uwb_dev_addr dst;
 
-	d_fnstart(6, dev, "(skb %p (%u), net_dev %p)\n", skb, skb->len,
-		  net_dev);
-	BUG_ON(i1480u->wlp.rc == NULL);
 	if ((net_dev->flags & IFF_UP) == 0)
 		goto error;
 	result = wlp_prepare_tx_frame(dev, &i1480u->wlp, skb, &dst);
@@ -562,31 +518,25 @@
 			"Dropping packet.\n", result);
 		goto error;
 	} else if (result == 1) {
-		d_printf(6, dev, "WLP will transmit frame. \n");
 		/* trans_start time will be set when WLP actually transmits
 		 * the frame */
 		goto out;
 	}
-	d_printf(6, dev, "Transmitting frame. \n");
 	result = i1480u_xmit_frame(&i1480u->wlp, skb, &dst);
 	if (result < 0) {
 		dev_err(dev, "Frame TX failed (%d).\n", result);
 		goto error;
 	}
-	d_fnend(6, dev, "(skb %p (%u), net_dev %p) = %d\n", skb, skb->len,
-		net_dev, result);
 	return NETDEV_TX_OK;
 error:
 	dev_kfree_skb_any(skb);
 	i1480u->stats.tx_dropped++;
 out:
-	d_fnend(6, dev, "(skb %p (%u), net_dev %p) = %d\n", skb, skb->len,
-		net_dev, result);
 	return NETDEV_TX_OK;
 }
 
 
-/**
+/*
  * Called when a pkt transmission doesn't complete in a reasonable period
  * Device reset may sleep - do it outside of interrupt context (delayed)
  */
diff --git a/drivers/uwb/ie-rcv.c b/drivers/uwb/ie-rcv.c
new file mode 100644
index 0000000..917e6d7
--- /dev/null
+++ b/drivers/uwb/ie-rcv.c
@@ -0,0 +1,55 @@
+/*
+ * Ultra Wide Band
+ * IE Received notification handling.
+ *
+ * Copyright (C) 2008 Cambridge Silicon Radio Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/bitmap.h>
+#include "uwb-internal.h"
+
+/*
+ * Process an incoming IE Received notification.
+ */
+int uwbd_evt_handle_rc_ie_rcv(struct uwb_event *evt)
+{
+	int result = -EINVAL;
+	struct device *dev = &evt->rc->uwb_dev.dev;
+	struct uwb_rc_evt_ie_rcv *iercv;
+	size_t iesize;
+
+	/* Is there enough data to decode it? */
+	if (evt->notif.size < sizeof(*iercv)) {
+		dev_err(dev, "IE Received notification: Not enough data to "
+			"decode (%zu vs %zu bytes needed)\n",
+			evt->notif.size, sizeof(*iercv));
+		goto error;
+	}
+	iercv = container_of(evt->notif.rceb, struct uwb_rc_evt_ie_rcv, rceb);
+	iesize = le16_to_cpu(iercv->wIELength);
+
+	dev_dbg(dev, "IE received, element ID=%d\n", iercv->IEData[0]);
+
+	if (iercv->IEData[0] == UWB_RELINQUISH_REQUEST_IE) {
+		dev_warn(dev, "unhandled Relinquish Request IE\n");
+	}
+
+	return 0;
+error:
+	return result;
+}
diff --git a/drivers/uwb/ie.c b/drivers/uwb/ie.c
index cf6f3d1..ab97668 100644
--- a/drivers/uwb/ie.c
+++ b/drivers/uwb/ie.c
@@ -25,8 +25,6 @@
  */
 
 #include "uwb-internal.h"
-#define D_LOCAL 0
-#include <linux/uwb/debug.h>
 
 /**
  * uwb_ie_next - get the next IE in a buffer
@@ -61,6 +59,42 @@
 EXPORT_SYMBOL_GPL(uwb_ie_next);
 
 /**
+ * uwb_ie_dump_hex - print IEs to a character buffer
+ * @ies: the IEs to print.
+ * @len: length of all the IEs.
+ * @buf: the destination buffer.
+ * @size: size of @buf.
+ *
+ * Returns the number of characters written.
+ */
+int uwb_ie_dump_hex(const struct uwb_ie_hdr *ies, size_t len,
+		    char *buf, size_t size)
+{
+	void *ptr;
+	const struct uwb_ie_hdr *ie;
+	int r = 0;
+	u8 *d;
+
+	ptr = (void *)ies;
+	for (;;) {
+		ie = uwb_ie_next(&ptr, &len);
+		if (!ie)
+			break;
+
+		r += scnprintf(buf + r, size - r, "%02x %02x",
+			       (unsigned)ie->element_id,
+			       (unsigned)ie->length);
+		d = (uint8_t *)ie + sizeof(struct uwb_ie_hdr);
+		while (d != ptr && r < size)
+			r += scnprintf(buf + r, size - r, " %02x", (unsigned)*d++);
+		if (r < size)
+			buf[r++] = '\n';
+	};
+
+	return r;
+}
+
+/**
  * Get the IEs that a radio controller is sending in its beacon
  *
  * @uwb_rc:  UWB Radio Controller
@@ -70,6 +104,7 @@
  * anything. Once done with the iedata buffer, call
  * uwb_rc_ie_release(iedata). Don't call kfree on it.
  */
+static
 ssize_t uwb_rc_get_ie(struct uwb_rc *uwb_rc, struct uwb_rc_evt_get_ie **pget_ie)
 {
 	ssize_t result;
@@ -78,148 +113,35 @@
 	struct uwb_rceb *reply = NULL;
 	struct uwb_rc_evt_get_ie *get_ie;
 
-	d_fnstart(3, dev, "(%p, %p)\n", uwb_rc, pget_ie);
-	result = -ENOMEM;
 	cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
 	if (cmd == NULL)
-		goto error_kzalloc;
+		return -ENOMEM;
+
 	cmd->bCommandType = UWB_RC_CET_GENERAL;
 	cmd->wCommand = cpu_to_le16(UWB_RC_CMD_GET_IE);
 	result = uwb_rc_vcmd(uwb_rc, "GET_IE", cmd, sizeof(*cmd),
 			     UWB_RC_CET_GENERAL, UWB_RC_CMD_GET_IE,
 			     &reply);
+	kfree(cmd);
 	if (result < 0)
-		goto error_cmd;
+		return result;
+
 	get_ie = container_of(reply, struct uwb_rc_evt_get_ie, rceb);
 	if (result < sizeof(*get_ie)) {
 		dev_err(dev, "not enough data returned for decoding GET IE "
 			"(%zu bytes received vs %zu needed)\n",
 			result, sizeof(*get_ie));
-		result = -EINVAL;
+		return -EINVAL;
 	} else if (result < sizeof(*get_ie) + le16_to_cpu(get_ie->wIELength)) {
 		dev_err(dev, "not enough data returned for decoding GET IE "
 			"payload (%zu bytes received vs %zu needed)\n", result,
 			sizeof(*get_ie) + le16_to_cpu(get_ie->wIELength));
-		result = -EINVAL;
-	} else
-		*pget_ie = get_ie;
-error_cmd:
-	kfree(cmd);
-error_kzalloc:
-	d_fnend(3, dev, "(%p, %p) = %d\n", uwb_rc, pget_ie, (int)result);
-	return result;
-}
-EXPORT_SYMBOL_GPL(uwb_rc_get_ie);
-
-
-/*
- * Given a pointer to an IE, print it in ASCII/hex followed by a new line
- *
- * @ie_hdr: pointer to the IE header. Length is in there, and it is
- *          guaranteed that the ie_hdr->length bytes following it are
- *          safely accesible.
- *
- * @_data: context data passed from uwb_ie_for_each(), an struct output_ctx
- */
-int uwb_ie_dump_hex(struct uwb_dev *uwb_dev, const struct uwb_ie_hdr *ie_hdr,
-		    size_t offset, void *_ctx)
-{
-	struct uwb_buf_ctx *ctx = _ctx;
-	const u8 *pl = (void *)(ie_hdr + 1);
-	u8 pl_itr;
-
-	ctx->bytes += scnprintf(ctx->buf + ctx->bytes, ctx->size - ctx->bytes,
-				"%02x %02x ", (unsigned) ie_hdr->element_id,
-				(unsigned) ie_hdr->length);
-	pl_itr = 0;
-	while (pl_itr < ie_hdr->length && ctx->bytes < ctx->size)
-		ctx->bytes += scnprintf(ctx->buf + ctx->bytes,
-					ctx->size - ctx->bytes,
-					"%02x ", (unsigned) pl[pl_itr++]);
-	if (ctx->bytes < ctx->size)
-		ctx->buf[ctx->bytes++] = '\n';
-	return 0;
-}
-EXPORT_SYMBOL_GPL(uwb_ie_dump_hex);
-
-
-/**
- * Verify that a pointer in a buffer points to valid IE
- *
- * @start: pointer to start of buffer in which IE appears
- * @itr:   pointer to IE inside buffer that will be verified
- * @top:   pointer to end of buffer
- *
- * @returns: 0 if IE is valid, <0 otherwise
- *
- * Verification involves checking that the buffer can contain a
- * header and the amount of data reported in the IE header can be found in
- * the buffer.
- */
-static
-int uwb_rc_ie_verify(struct uwb_dev *uwb_dev, const void *start,
-		     const void *itr, const void *top)
-{
-	struct device *dev = &uwb_dev->dev;
-	const struct uwb_ie_hdr *ie_hdr;
-
-	if (top - itr < sizeof(*ie_hdr)) {
-		dev_err(dev, "Bad IE: no data to decode header "
-			"(%zu bytes left vs %zu needed) at offset %zu\n",
-			top - itr, sizeof(*ie_hdr), itr - start);
 		return -EINVAL;
 	}
-	ie_hdr = itr;
-	itr += sizeof(*ie_hdr);
-	if (top - itr < ie_hdr->length) {
-		dev_err(dev, "Bad IE: not enough data for payload "
-			"(%zu bytes left vs %zu needed) at offset %zu\n",
-			top - itr, (size_t)ie_hdr->length,
-			(void *)ie_hdr - start);
-		return -EINVAL;
-	}
-	return 0;
-}
 
-
-/**
- * Walk a buffer filled with consecutive IE's a buffer
- *
- * @uwb_dev: UWB device this IEs belong to (for err messages mainly)
- *
- * @fn: function to call with each IE; if it returns 0, we keep
- *      traversing the buffer. If it returns !0, we'll stop and return
- *      that value.
- *
- * @data: pointer passed to @fn
- *
- * @buf: buffer where the consecutive IEs are located
- *
- * @size: size of @buf
- *
- * Each IE is checked for basic correctness (there is space left for
- * the header and the payload). If that test is failed, we stop
- * processing. For every good IE, @fn is called.
- */
-ssize_t uwb_ie_for_each(struct uwb_dev *uwb_dev, uwb_ie_f fn, void *data,
-			const void *buf, size_t size)
-{
-	ssize_t result = 0;
-	const struct uwb_ie_hdr *ie_hdr;
-	const void *itr = buf, *top = itr + size;
-
-	while (itr < top) {
-		if (uwb_rc_ie_verify(uwb_dev, buf, itr, top) != 0)
-			break;
-		ie_hdr = itr;
-		itr += sizeof(*ie_hdr) + ie_hdr->length;
-		result = fn(uwb_dev, ie_hdr, itr - buf, data);
-		if (result != 0)
-			break;
-	}
+	*pget_ie = get_ie;
 	return result;
 }
-EXPORT_SYMBOL_GPL(uwb_ie_for_each);
 
 
 /**
@@ -256,70 +178,6 @@
 	return result;
 }
 
-/**
- * Determine by IE id if IE is host settable
- * WUSB 1.0 [8.6.2.8 Table 8.85]
- *
- * EXCEPTION:
- * All but UWB_IE_WLP appears in Table 8.85 from WUSB 1.0. Setting this IE
- * is required for the WLP substack to perform association with its WSS so
- * we hope that the WUSB spec will be changed to reflect this.
- */
-static
-int uwb_rc_ie_is_host_settable(enum uwb_ie element_id)
-{
-	if (element_id == UWB_PCA_AVAILABILITY ||
-	    element_id == UWB_BP_SWITCH_IE ||
-	    element_id == UWB_MAC_CAPABILITIES_IE ||
-	    element_id == UWB_PHY_CAPABILITIES_IE ||
-	    element_id == UWB_APP_SPEC_PROBE_IE ||
-	    element_id == UWB_IDENTIFICATION_IE ||
-	    element_id == UWB_MASTER_KEY_ID_IE ||
-	    element_id == UWB_IE_WLP ||
-	    element_id == UWB_APP_SPEC_IE)
-		return 1;
-	return 0;
-}
-
-
-/**
- * Extract Host Settable IEs from IE
- *
- * @ie_data: pointer to buffer containing all IEs
- * @size:    size of buffer
- *
- * @returns: length of buffer that only includes host settable IEs
- *
- * Given a buffer of IEs we move all Host Settable IEs to front of buffer
- * by overwriting the IEs that are not Host Settable.
- * Buffer length is adjusted accordingly.
- */
-static
-ssize_t uwb_rc_parse_host_settable_ie(struct uwb_dev *uwb_dev,
-				      void *ie_data, size_t size)
-{
-	size_t new_len = size;
-	struct uwb_ie_hdr *ie_hdr;
-	size_t ie_length;
-	void *itr = ie_data, *top = itr + size;
-
-	while (itr < top) {
-		if (uwb_rc_ie_verify(uwb_dev, ie_data, itr, top) != 0)
-			break;
-		ie_hdr = itr;
-		ie_length = sizeof(*ie_hdr) + ie_hdr->length;
-		if (uwb_rc_ie_is_host_settable(ie_hdr->element_id)) {
-			itr += ie_length;
-		} else {
-			memmove(itr, itr + ie_length, top - (itr + ie_length));
-			new_len -= ie_length;
-			top -= ie_length;
-		}
-	}
-	return new_len;
-}
-
-
 /* Cleanup the whole IE management subsystem */
 void uwb_rc_ie_init(struct uwb_rc *uwb_rc)
 {
@@ -328,49 +186,34 @@
 
 
 /**
- * Set up cache for host settable IEs currently being transmitted
+ * uwb_rc_ie_setup - setup a radio controller's IE manager
+ * @uwb_rc: the radio controller.
  *
- * First we just call GET-IE to get the current IEs being transmitted
- * (or we workaround and pretend we did) and (because the format is
- * the same) reuse that as the IE cache (with the command prefix, as
- * explained in 'struct uwb_rc').
+ * The current set of IEs are obtained from the hardware with a GET-IE
+ * command (since the radio controller is not yet beaconing this will
+ * be just the hardware's MAC and PHY Capability IEs).
  *
- * @returns: size of cache created
+ * Returns 0 on success; -ve on an error.
  */
-ssize_t uwb_rc_ie_setup(struct uwb_rc *uwb_rc)
+int uwb_rc_ie_setup(struct uwb_rc *uwb_rc)
 {
-	struct device *dev = &uwb_rc->uwb_dev.dev;
-	ssize_t result;
-	size_t capacity;
-	struct uwb_rc_evt_get_ie *ie_info;
+	struct uwb_rc_evt_get_ie *ie_info = NULL;
+	int capacity;
 
-	d_fnstart(3, dev, "(%p)\n", uwb_rc);
+	capacity = uwb_rc_get_ie(uwb_rc, &ie_info);
+	if (capacity < 0)
+		return capacity;
+
 	mutex_lock(&uwb_rc->ies_mutex);
-	result = uwb_rc_get_ie(uwb_rc, &ie_info);
-	if (result < 0)
-		goto error_get_ie;
-	capacity = result;
-	d_printf(5, dev, "Got IEs %zu bytes (%zu long at %p)\n", result,
-		 (size_t)le16_to_cpu(ie_info->wIELength), ie_info);
 
-	/* Remove IEs that host should not set. */
-	result = uwb_rc_parse_host_settable_ie(&uwb_rc->uwb_dev,
-			ie_info->IEData, le16_to_cpu(ie_info->wIELength));
-	if (result < 0)
-		goto error_parse;
-	d_printf(5, dev, "purged non-settable IEs to %zu bytes\n", result);
-	uwb_rc->ies = (void *) ie_info;
+	uwb_rc->ies = (struct uwb_rc_cmd_set_ie *)ie_info;
 	uwb_rc->ies->rccb.bCommandType = UWB_RC_CET_GENERAL;
 	uwb_rc->ies->rccb.wCommand = cpu_to_le16(UWB_RC_CMD_SET_IE);
 	uwb_rc->ies_capacity = capacity;
-	d_printf(5, dev, "IE cache at %p %zu bytes, %zu capacity\n",
-		 ie_info, result, capacity);
-	result = 0;
-error_parse:
-error_get_ie:
+
 	mutex_unlock(&uwb_rc->ies_mutex);
-	d_fnend(3, dev, "(%p) = %zu\n", uwb_rc, result);
-	return result;
+
+	return 0;
 }
 
 
@@ -383,26 +226,47 @@
 }
 
 
-static
-int __acc_size(struct uwb_dev *uwb_dev, const struct uwb_ie_hdr *ie_hdr,
-	       size_t offset, void *_ctx)
+static int uwb_rc_ie_add_one(struct uwb_rc *rc, const struct uwb_ie_hdr *new_ie)
 {
-	size_t *acc_size = _ctx;
-	*acc_size += sizeof(*ie_hdr) + ie_hdr->length;
-	d_printf(6, &uwb_dev->dev, "new acc size %zu\n", *acc_size);
+	struct uwb_rc_cmd_set_ie *new_ies;
+	void *ptr, *prev_ie;
+	struct uwb_ie_hdr *ie;
+	size_t length, new_ie_len, new_capacity, size, prev_size;
+
+	length = le16_to_cpu(rc->ies->wIELength);
+	new_ie_len = sizeof(struct uwb_ie_hdr) + new_ie->length;
+	new_capacity = sizeof(struct uwb_rc_cmd_set_ie) + length + new_ie_len;
+
+	if (new_capacity > rc->ies_capacity) {
+		new_ies = krealloc(rc->ies, new_capacity, GFP_KERNEL);
+		if (!new_ies)
+			return -ENOMEM;
+		rc->ies = new_ies;
+	}
+
+	ptr = rc->ies->IEData;
+	size = length;
+	for (;;) {
+		prev_ie = ptr;
+		prev_size = size;
+		ie = uwb_ie_next(&ptr, &size);
+		if (!ie || ie->element_id > new_ie->element_id)
+			break;
+	}
+
+	memmove(prev_ie + new_ie_len, prev_ie, prev_size);
+	memcpy(prev_ie, new_ie, new_ie_len);
+	rc->ies->wIELength = cpu_to_le16(length + new_ie_len);
+
 	return 0;
 }
 
-
 /**
- * Add a new IE to IEs currently being transmitted by device
- *
+ * uwb_rc_ie_add - add new IEs to the radio controller's beacon
+ * @uwb_rc: the radio controller.
  * @ies: the buffer containing the new IE or IEs to be added to
- *       the device's beacon. The buffer will be verified for
- *       consistence (meaning the headers should be right) and
- *       consistent with the buffer size.
- * @size: size of @ies (in bytes, total buffer size)
- * @returns: 0 if ok, <0 errno code on error
+ *       the device's beacon.
+ * @size: length of all the IEs.
  *
  * According to WHCI 0.95 [4.13.6] the driver will only receive the RCEB
  * after the device sent the first beacon that includes the IEs specified
@@ -411,66 +275,40 @@
  * we start beaconing.
  *
  * Setting an IE on the device will overwrite all current IEs in device. So
- * we take the current IEs being transmitted by the device, append the
+ * we take the current IEs being transmitted by the device, insert the
  * new one, and call SET IE with all the IEs needed.
  *
- * The local IE cache will only be updated with the new IE if SET IE
- * completed successfully.
+ * Returns 0 on success; or -ENOMEM.
  */
 int uwb_rc_ie_add(struct uwb_rc *uwb_rc,
 		  const struct uwb_ie_hdr *ies, size_t size)
 {
 	int result = 0;
-	struct device *dev = &uwb_rc->uwb_dev.dev;
-	struct uwb_rc_cmd_set_ie *new_ies;
-	size_t ies_size, total_size, acc_size = 0;
+	void *ptr;
+	const struct uwb_ie_hdr *ie;
 
-	if (uwb_rc->ies == NULL)
-		return -ESHUTDOWN;
-	uwb_ie_for_each(&uwb_rc->uwb_dev, __acc_size, &acc_size, ies, size);
-	if (acc_size != size) {
-		dev_err(dev, "BUG: bad IEs, misconstructed headers "
-			"[%zu bytes reported vs %zu calculated]\n",
-			size, acc_size);
-		WARN_ON(1);
-		return -EINVAL;
-	}
 	mutex_lock(&uwb_rc->ies_mutex);
-	ies_size = le16_to_cpu(uwb_rc->ies->wIELength);
-	total_size = sizeof(*uwb_rc->ies) + ies_size;
-	if (total_size + size > uwb_rc->ies_capacity) {
-		d_printf(4, dev, "Reallocating IE cache from %p capacity %zu "
-			 "to capacity %zu\n", uwb_rc->ies, uwb_rc->ies_capacity,
-			 total_size + size);
-		new_ies = kzalloc(total_size + size, GFP_KERNEL);
-		if (new_ies == NULL) {
-			dev_err(dev, "No memory for adding new IE\n");
-			result = -ENOMEM;
-			goto error_alloc;
-		}
-		memcpy(new_ies, uwb_rc->ies, total_size);
-		uwb_rc->ies_capacity = total_size + size;
-		kfree(uwb_rc->ies);
-		uwb_rc->ies = new_ies;
-		d_printf(4, dev, "New IE cache at %p capacity %zu\n",
-			 uwb_rc->ies, uwb_rc->ies_capacity);
+
+	ptr = (void *)ies;
+	for (;;) {
+		ie = uwb_ie_next(&ptr, &size);
+		if (!ie)
+			break;
+
+		result = uwb_rc_ie_add_one(uwb_rc, ie);
+		if (result < 0)
+			break;
 	}
-	memcpy((void *)uwb_rc->ies + total_size, ies, size);
-	uwb_rc->ies->wIELength = cpu_to_le16(ies_size + size);
-	if (uwb_rc->beaconing != -1) {
-		result = uwb_rc_set_ie(uwb_rc, uwb_rc->ies);
-		if (result < 0) {
-			dev_err(dev, "Cannot set new IE on device: %d\n",
-				result);
-			uwb_rc->ies->wIELength = cpu_to_le16(ies_size);
+	if (result >= 0) {
+		if (size == 0) {
+			if (uwb_rc->beaconing != -1)
+				result = uwb_rc_set_ie(uwb_rc, uwb_rc->ies);
 		} else
-			result = 0;
+			result = -EINVAL;
 	}
-	d_printf(4, dev, "IEs now occupy %hu bytes of %zu capacity at %p\n",
-		 le16_to_cpu(uwb_rc->ies->wIELength), uwb_rc->ies_capacity,
-		 uwb_rc->ies);
-error_alloc:
+
 	mutex_unlock(&uwb_rc->ies_mutex);
+
 	return result;
 }
 EXPORT_SYMBOL_GPL(uwb_rc_ie_add);
@@ -489,53 +327,52 @@
  * beacon. We don't reallocate, we just mark the size smaller.
  */
 static
-int uwb_rc_ie_cache_rm(struct uwb_rc *uwb_rc, enum uwb_ie to_remove)
+void uwb_rc_ie_cache_rm(struct uwb_rc *uwb_rc, enum uwb_ie to_remove)
 {
-	struct uwb_ie_hdr *ie_hdr;
-	size_t new_len = le16_to_cpu(uwb_rc->ies->wIELength);
-	void *itr = uwb_rc->ies->IEData;
-	void *top = itr + new_len;
+	struct uwb_ie_hdr *ie;
+	size_t len = le16_to_cpu(uwb_rc->ies->wIELength);
+	void *ptr;
+	size_t size;
 
-	while (itr < top) {
-		ie_hdr = itr;
-		if (ie_hdr->element_id != to_remove) {
-			itr += sizeof(*ie_hdr) + ie_hdr->length;
-		} else {
-			int ie_length;
-			ie_length = sizeof(*ie_hdr) + ie_hdr->length;
-			if (top - itr != ie_length)
-				memmove(itr, itr + ie_length, top - itr + ie_length);
-			top -= ie_length;
-			new_len -= ie_length;
+	ptr = uwb_rc->ies->IEData;
+	size = len;
+	for (;;) {
+		ie = uwb_ie_next(&ptr, &size);
+		if (!ie)
+			break;
+		if (ie->element_id == to_remove) {
+			len -= sizeof(struct uwb_ie_hdr) + ie->length;
+			memmove(ie, ptr, size);
+			ptr = ie;
 		}
 	}
-	uwb_rc->ies->wIELength = cpu_to_le16(new_len);
-	return 0;
+	uwb_rc->ies->wIELength = cpu_to_le16(len);
 }
 
 
 /**
- * Remove an IE currently being transmitted by device
+ * uwb_rc_ie_rm - remove an IE from the radio controller's beacon
+ * @uwb_rc: the radio controller.
+ * @element_id: the element ID of the IE to remove.
  *
- * @element_id: id of IE to be removed from device's beacon
+ * Only IEs previously added with uwb_rc_ie_add() may be removed.
+ *
+ * Returns 0 on success; or -ve the SET-IE command to the radio
+ * controller failed.
  */
 int uwb_rc_ie_rm(struct uwb_rc *uwb_rc, enum uwb_ie element_id)
 {
-	struct device *dev = &uwb_rc->uwb_dev.dev;
-	int result;
+	int result = 0;
 
-	if (uwb_rc->ies == NULL)
-		return -ESHUTDOWN;
 	mutex_lock(&uwb_rc->ies_mutex);
-	result = uwb_rc_ie_cache_rm(uwb_rc, element_id);
-	if (result < 0)
-		dev_err(dev, "Cannot remove IE from cache.\n");
-	if (uwb_rc->beaconing != -1) {
+
+	uwb_rc_ie_cache_rm(uwb_rc, element_id);
+
+	if (uwb_rc->beaconing != -1)
 		result = uwb_rc_set_ie(uwb_rc, uwb_rc->ies);
-		if (result < 0)
-			dev_err(dev, "Cannot set new IE on device.\n");
-	}
+
 	mutex_unlock(&uwb_rc->ies_mutex);
+
 	return result;
 }
 EXPORT_SYMBOL_GPL(uwb_rc_ie_rm);
diff --git a/drivers/uwb/lc-dev.c b/drivers/uwb/lc-dev.c
index 15f856c..e9fe1bb 100644
--- a/drivers/uwb/lc-dev.c
+++ b/drivers/uwb/lc-dev.c
@@ -22,7 +22,6 @@
  *
  * FIXME: docs
  */
-
 #include <linux/kernel.h>
 #include <linux/device.h>
 #include <linux/err.h>
@@ -30,10 +29,6 @@
 #include <linux/random.h>
 #include "uwb-internal.h"
 
-#define D_LOCAL 1
-#include <linux/uwb/debug.h>
-
-
 /* We initialize addresses to 0xff (invalid, as it is bcast) */
 static inline void uwb_dev_addr_init(struct uwb_dev_addr *addr)
 {
@@ -104,12 +99,9 @@
 {
 	struct uwb_dev *uwb_dev = to_uwb_dev(dev);
 
-	d_fnstart(4, NULL, "(dev %p uwb_dev %p)\n", dev, uwb_dev);
 	uwb_bce_put(uwb_dev->bce);
-	d_printf(0, &uwb_dev->dev, "uwb_dev %p freed\n", uwb_dev);
 	memset(uwb_dev, 0x69, sizeof(*uwb_dev));
 	kfree(uwb_dev);
-	d_fnend(4, NULL, "(dev %p uwb_dev %p) = void\n", dev, uwb_dev);
 }
 
 /*
@@ -275,12 +267,8 @@
  */
 static int __uwb_dev_sys_add(struct uwb_dev *uwb_dev, struct device *parent_dev)
 {
-	int result;
 	struct device *dev;
 
-	d_fnstart(4, NULL, "(uwb_dev %p parent_dev %p)\n", uwb_dev, parent_dev);
-	BUG_ON(parent_dev == NULL);
-
 	dev = &uwb_dev->dev;
 	/* Device sysfs files are only useful for neighbor devices not
 	   local radio controllers. */
@@ -289,18 +277,14 @@
 	dev->parent = parent_dev;
 	dev_set_drvdata(dev, uwb_dev);
 
-	result = device_add(dev);
-	d_fnend(4, NULL, "(uwb_dev %p parent_dev %p) = %d\n", uwb_dev, parent_dev, result);
-	return result;
+	return device_add(dev);
 }
 
 
 static void __uwb_dev_sys_rm(struct uwb_dev *uwb_dev)
 {
-	d_fnstart(4, NULL, "(uwb_dev %p)\n", uwb_dev);
 	dev_set_drvdata(&uwb_dev->dev, NULL);
 	device_del(&uwb_dev->dev);
-	d_fnend(4, NULL, "(uwb_dev %p) = void\n", uwb_dev);
 }
 
 
@@ -384,7 +368,6 @@
 	struct device *dev = &uwb_dev->dev;
 	char macbuf[UWB_ADDR_STRSIZE], devbuf[UWB_ADDR_STRSIZE];
 
-	d_fnstart(3, NULL, "(dev %p [uwb_dev %p], uwb_rc %p)\n", dev, uwb_dev, rc);
 	uwb_mac_addr_print(macbuf, sizeof(macbuf), &uwb_dev->mac_addr);
 	uwb_dev_addr_print(devbuf, sizeof(devbuf), &uwb_dev->dev_addr);
 	dev_info(dev, "uwb device (mac %s dev %s) disconnected from %s %s\n",
@@ -392,8 +375,10 @@
 		 rc ? rc->uwb_dev.dev.parent->bus->name : "n/a",
 		 rc ? dev_name(rc->uwb_dev.dev.parent) : "");
 	uwb_dev_rm(uwb_dev);
+	list_del(&uwb_dev->bce->node);
+	uwb_bce_put(uwb_dev->bce);
 	uwb_dev_put(uwb_dev);	/* for the creation in _onair() */
-	d_fnend(3, NULL, "(dev %p [uwb_dev %p], uwb_rc %p) = 0\n", dev, uwb_dev, rc);
+
 	return 0;
 }
 
diff --git a/drivers/uwb/lc-rc.c b/drivers/uwb/lc-rc.c
index ee5772f..9cf21e6 100644
--- a/drivers/uwb/lc-rc.c
+++ b/drivers/uwb/lc-rc.c
@@ -36,8 +36,6 @@
 #include <linux/etherdevice.h>
 #include <linux/usb.h>
 
-#define D_LOCAL 1
-#include <linux/uwb/debug.h>
 #include "uwb-internal.h"
 
 static int uwb_rc_index_match(struct device *dev, void *data)
@@ -81,9 +79,7 @@
 	struct uwb_dev *uwb_dev = container_of(dev, struct uwb_dev, dev);
 	struct uwb_rc *rc = container_of(uwb_dev, struct uwb_rc, uwb_dev);
 
-	uwb_rc_neh_destroy(rc);
 	uwb_rc_ie_release(rc);
-	d_printf(1, dev, "freed uwb_rc %p\n", rc);
 	kfree(rc);
 }
 
@@ -100,6 +96,8 @@
 	rc->scan_type = UWB_SCAN_DISABLED;
 	INIT_LIST_HEAD(&rc->notifs_chain.list);
 	mutex_init(&rc->notifs_chain.mutex);
+	INIT_LIST_HEAD(&rc->uwb_beca.list);
+	mutex_init(&rc->uwb_beca.mutex);
 	uwb_drp_avail_init(rc);
 	uwb_rc_ie_init(rc);
 	uwb_rsv_init(rc);
@@ -191,9 +189,9 @@
 	int result;
 	struct device *dev = &rc->uwb_dev.dev;
 
-	result = uwb_rc_reset(rc);
+	result = uwb_radio_setup(rc);
 	if (result < 0) {
-		dev_err(dev, "cannot reset UWB radio: %d\n", result);
+		dev_err(dev, "cannot setup UWB radio: %d\n", result);
 		goto error;
 	}
 	result = uwb_rc_mac_addr_setup(rc);
@@ -250,6 +248,12 @@
 
 	rc->priv = priv;
 
+	init_waitqueue_head(&rc->uwbd.wq);
+	INIT_LIST_HEAD(&rc->uwbd.event_list);
+	spin_lock_init(&rc->uwbd.event_list_lock);
+
+	uwbd_start(rc);
+
 	result = rc->start(rc);
 	if (result < 0)
 		goto error_rc_start;
@@ -284,7 +288,7 @@
 error_dev_add:
 error_rc_setup:
 	rc->stop(rc);
-	uwbd_flush(rc);
+	uwbd_stop(rc);
 error_rc_start:
 	return result;
 }
@@ -306,25 +310,24 @@
 	rc->ready = 0;
 
 	uwb_dbg_del_rc(rc);
-	uwb_rsv_cleanup(rc);
-	uwb_rc_ie_rm(rc, UWB_IDENTIFICATION_IE);
-	if (rc->beaconing >= 0)
-		uwb_rc_beacon(rc, -1, 0);
-	if (rc->scan_type != UWB_SCAN_DISABLED)
-		uwb_rc_scan(rc, rc->scanning, UWB_SCAN_DISABLED, 0);
-	uwb_rc_reset(rc);
+	uwb_rsv_remove_all(rc);
+	uwb_radio_shutdown(rc);
 
 	rc->stop(rc);
-	uwbd_flush(rc);
+
+	uwbd_stop(rc);
+	uwb_rc_neh_destroy(rc);
 
 	uwb_dev_lock(&rc->uwb_dev);
 	rc->priv = NULL;
 	rc->cmd = NULL;
 	uwb_dev_unlock(&rc->uwb_dev);
-	mutex_lock(&uwb_beca.mutex);
+	mutex_lock(&rc->uwb_beca.mutex);
 	uwb_dev_for_each(rc, uwb_dev_offair_helper, NULL);
 	__uwb_rc_sys_rm(rc);
-	mutex_unlock(&uwb_beca.mutex);
+	mutex_unlock(&rc->uwb_beca.mutex);
+	uwb_rsv_cleanup(rc);
+ 	uwb_beca_release(rc);
 	uwb_dev_rm(&rc->uwb_dev);
 }
 EXPORT_SYMBOL_GPL(uwb_rc_rm);
@@ -468,28 +471,3 @@
 	__uwb_rc_put(rc);
 }
 EXPORT_SYMBOL_GPL(uwb_rc_put);
-
-/*
- *
- *
- */
-ssize_t uwb_rc_print_IEs(struct uwb_rc *uwb_rc, char *buf, size_t size)
-{
-	ssize_t result;
-	struct uwb_rc_evt_get_ie *ie_info;
-	struct uwb_buf_ctx ctx;
-
-	result = uwb_rc_get_ie(uwb_rc, &ie_info);
-	if (result < 0)
-		goto error_get_ie;
-	ctx.buf = buf;
-	ctx.size = size;
-	ctx.bytes = 0;
-	uwb_ie_for_each(&uwb_rc->uwb_dev, uwb_ie_dump_hex, &ctx,
-			ie_info->IEData, result - sizeof(*ie_info));
-	result = ctx.bytes;
-	kfree(ie_info);
-error_get_ie:
-	return result;
-}
-
diff --git a/drivers/uwb/neh.c b/drivers/uwb/neh.c
index 9b4eb64..0af8916 100644
--- a/drivers/uwb/neh.c
+++ b/drivers/uwb/neh.c
@@ -86,8 +86,6 @@
 #include <linux/err.h>
 
 #include "uwb-internal.h"
-#define D_LOCAL 0
-#include <linux/uwb/debug.h>
 
 /*
  * UWB Radio Controller Notification/Event Handle
@@ -254,7 +252,6 @@
 
 static void __uwb_rc_neh_rm(struct uwb_rc *rc, struct uwb_rc_neh *neh)
 {
-	del_timer(&neh->timer);
 	__uwb_rc_ctx_put(rc, neh);
 	list_del(&neh->list_node);
 }
@@ -275,6 +272,7 @@
 	__uwb_rc_neh_rm(rc, neh);
 	spin_unlock_irqrestore(&rc->neh_lock, flags);
 
+	del_timer_sync(&neh->timer);
 	uwb_rc_neh_put(neh);
 }
 
@@ -349,7 +347,7 @@
 }
 
 
-/**
+/*
  * Process notifications coming from the radio control interface
  *
  * @rc:    UWB Radio Control Interface descriptor
@@ -401,23 +399,6 @@
 	uwb_evt->notif.size = size;
 	uwb_evt->notif.rceb = rceb;
 
-	switch (le16_to_cpu(rceb->wEvent)) {
-		/* Trap some vendor specific events
-		 *
-		 * FIXME: move this to handling in ptc-est, where we
-		 * register a NULL event handler for these two guys
-		 * using the Intel IDs.
-		 */
-	case 0x0103:
-		dev_info(dev, "FIXME: DEVICE ADD\n");
-		return;
-	case 0x0104:
-		dev_info(dev, "FIXME: DEVICE RM\n");
-		return;
-	default:
-		break;
-	}
-
 	uwbd_event_queue(uwb_evt);
 }
 
@@ -438,9 +419,10 @@
 				rceb->bEventContext, size);
 	} else {
 		neh = uwb_rc_neh_lookup(rc, rceb);
-		if (neh)
+		if (neh) {
+			del_timer_sync(&neh->timer);
 			uwb_rc_neh_cb(neh, rceb, size);
-		else
+		} else
 			dev_warn(dev, "event 0x%02x/%04x/%02x (%zu bytes): nobody cared\n",
 				 rceb->bEventType, le16_to_cpu(rceb->wEvent),
 				 rceb->bEventContext, size);
@@ -495,8 +477,6 @@
 	size_t size, real_size, event_size;
 	int needtofree;
 
-	d_fnstart(3, dev, "(rc %p buf %p %zu buf_size)\n", rc, buf, buf_size);
-	d_printf(2, dev, "groking event block: %zu bytes\n", buf_size);
 	itr = buf;
 	size = buf_size;
 	while (size > 0) {
@@ -544,10 +524,7 @@
 
 		itr += real_size;
 		size -= real_size;
-		d_printf(2, dev, "consumed %zd bytes, %zu left\n",
-			 event_size, size);
 	}
-	d_fnend(3, dev, "(rc %p buf %p %zu buf_size) = void\n", rc, buf, buf_size);
 }
 EXPORT_SYMBOL_GPL(uwb_rc_neh_grok);
 
@@ -562,16 +539,22 @@
  */
 void uwb_rc_neh_error(struct uwb_rc *rc, int error)
 {
-	struct uwb_rc_neh *neh, *next;
+	struct uwb_rc_neh *neh;
 	unsigned long flags;
 
-	BUG_ON(error >= 0);
-	spin_lock_irqsave(&rc->neh_lock, flags);
-	list_for_each_entry_safe(neh, next, &rc->neh_list, list_node) {
+	for (;;) {
+		spin_lock_irqsave(&rc->neh_lock, flags);
+		if (list_empty(&rc->neh_list)) {
+			spin_unlock_irqrestore(&rc->neh_lock, flags);
+			break;
+		}
+		neh = list_first_entry(&rc->neh_list, struct uwb_rc_neh, list_node);
 		__uwb_rc_neh_rm(rc, neh);
+		spin_unlock_irqrestore(&rc->neh_lock, flags);
+
+		del_timer_sync(&neh->timer);
 		uwb_rc_neh_cb(neh, NULL, error);
 	}
-	spin_unlock_irqrestore(&rc->neh_lock, flags);
 }
 EXPORT_SYMBOL_GPL(uwb_rc_neh_error);
 
@@ -583,10 +566,14 @@
 	unsigned long flags;
 
 	spin_lock_irqsave(&rc->neh_lock, flags);
-	__uwb_rc_neh_rm(rc, neh);
+	if (neh->context)
+		__uwb_rc_neh_rm(rc, neh);
+	else
+		neh = NULL;
 	spin_unlock_irqrestore(&rc->neh_lock, flags);
 
-	uwb_rc_neh_cb(neh, NULL, -ETIMEDOUT);
+	if (neh)
+		uwb_rc_neh_cb(neh, NULL, -ETIMEDOUT);
 }
 
 /** Initializes the @rc's neh subsystem
@@ -605,12 +592,19 @@
 void uwb_rc_neh_destroy(struct uwb_rc *rc)
 {
 	unsigned long flags;
-	struct uwb_rc_neh *neh, *next;
+	struct uwb_rc_neh *neh;
 
-	spin_lock_irqsave(&rc->neh_lock, flags);
-	list_for_each_entry_safe(neh, next, &rc->neh_list, list_node) {
+	for (;;) {
+		spin_lock_irqsave(&rc->neh_lock, flags);
+		if (list_empty(&rc->neh_list)) {
+			spin_unlock_irqrestore(&rc->neh_lock, flags);
+			break;
+		}
+		neh = list_first_entry(&rc->neh_list, struct uwb_rc_neh, list_node);
 		__uwb_rc_neh_rm(rc, neh);
+		spin_unlock_irqrestore(&rc->neh_lock, flags);
+
+		del_timer_sync(&neh->timer);
 		uwb_rc_neh_put(neh);
 	}
-	spin_unlock_irqrestore(&rc->neh_lock, flags);
 }
diff --git a/drivers/uwb/pal.c b/drivers/uwb/pal.c
index 1afb38e..99a19c1 100644
--- a/drivers/uwb/pal.c
+++ b/drivers/uwb/pal.c
@@ -16,6 +16,7 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 #include <linux/kernel.h>
+#include <linux/debugfs.h>
 #include <linux/uwb.h>
 
 #include "uwb-internal.h"
@@ -32,13 +33,13 @@
 
 /**
  * uwb_pal_register - register a UWB PAL
- * @rc: the radio controller the PAL will be using
  * @pal: the PAL
  *
  * The PAL must be initialized with uwb_pal_init().
  */
-int uwb_pal_register(struct uwb_rc *rc, struct uwb_pal *pal)
+int uwb_pal_register(struct uwb_pal *pal)
 {
+	struct uwb_rc *rc = pal->rc;
 	int ret;
 
 	if (pal->device) {
@@ -54,9 +55,11 @@
 		}
 	}
 
-	spin_lock(&rc->pal_lock);
+	pal->debugfs_dir = uwb_dbg_create_pal_dir(pal);
+
+	mutex_lock(&rc->uwb_dev.mutex);
 	list_add(&pal->node, &rc->pals);
-	spin_unlock(&rc->pal_lock);
+	mutex_unlock(&rc->uwb_dev.mutex);
 
 	return 0;
 }
@@ -64,14 +67,19 @@
 
 /**
  * uwb_pal_register - unregister a UWB PAL
- * @rc: the radio controller the PAL was using
  * @pal: the PAL
  */
-void uwb_pal_unregister(struct uwb_rc *rc, struct uwb_pal *pal)
+void uwb_pal_unregister(struct uwb_pal *pal)
 {
-	spin_lock(&rc->pal_lock);
+	struct uwb_rc *rc = pal->rc;
+
+	uwb_radio_stop(pal);
+
+	mutex_lock(&rc->uwb_dev.mutex);
 	list_del(&pal->node);
-	spin_unlock(&rc->pal_lock);
+	mutex_unlock(&rc->uwb_dev.mutex);
+
+	debugfs_remove(pal->debugfs_dir);
 
 	if (pal->device) {
 		sysfs_remove_link(&rc->uwb_dev.dev.kobj, pal->name);
@@ -86,6 +94,5 @@
  */
 void uwb_rc_pal_init(struct uwb_rc *rc)
 {
-	spin_lock_init(&rc->pal_lock);
 	INIT_LIST_HEAD(&rc->pals);
 }
diff --git a/drivers/uwb/radio.c b/drivers/uwb/radio.c
new file mode 100644
index 0000000..f0d5549
--- /dev/null
+++ b/drivers/uwb/radio.c
@@ -0,0 +1,202 @@
+/*
+ * UWB radio (channel) management.
+ *
+ * Copyright (C) 2008 Cambridge Silicon Radio Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/kernel.h>
+#include <linux/uwb.h>
+
+#include "uwb-internal.h"
+
+
+static int uwb_radio_select_channel(struct uwb_rc *rc)
+{
+	/*
+	 * Default to channel 9 (BG1, TFC1) unless the user has
+	 * selected a specific channel or there are no active PALs.
+	 */
+	if (rc->active_pals == 0)
+		return -1;
+	if (rc->beaconing_forced)
+		return rc->beaconing_forced;
+	return 9;
+}
+
+
+/*
+ * Notify all active PALs that the channel has changed.
+ */
+static void uwb_radio_channel_changed(struct uwb_rc *rc, int channel)
+{
+	struct uwb_pal *pal;
+
+	list_for_each_entry(pal, &rc->pals, node) {
+		if (pal->channel && channel != pal->channel) {
+			pal->channel = channel;
+			if (pal->channel_changed)
+				pal->channel_changed(pal, pal->channel);
+		}
+	}
+}
+
+/*
+ * Change to a new channel and notify any active PALs of the new
+ * channel.
+ *
+ * When stopping the radio, PALs need to be notified first so they can
+ * terminate any active reservations.
+ */
+static int uwb_radio_change_channel(struct uwb_rc *rc, int channel)
+{
+	int ret = 0;
+
+	if (channel == -1)
+		uwb_radio_channel_changed(rc, channel);
+
+	if (channel != rc->beaconing) {
+		if (rc->beaconing != -1 && channel != -1) {
+			/*
+			 * FIXME: should signal the channel change
+			 * with a Channel Change IE.
+			 */
+			ret = uwb_radio_change_channel(rc, -1);
+			if (ret < 0)
+				return ret;
+		}
+		ret = uwb_rc_beacon(rc, channel, 0);
+	}
+
+	if (channel != -1)
+		uwb_radio_channel_changed(rc, rc->beaconing);
+
+	return ret;
+}
+
+/**
+ * uwb_radio_start - request that the radio be started
+ * @pal: the PAL making the request.
+ *
+ * If the radio is not already active, aa suitable channel is selected
+ * and beacons are started.
+ */
+int uwb_radio_start(struct uwb_pal *pal)
+{
+	struct uwb_rc *rc = pal->rc;
+	int ret = 0;
+
+	mutex_lock(&rc->uwb_dev.mutex);
+
+	if (!pal->channel) {
+		pal->channel = -1;
+		rc->active_pals++;
+		ret = uwb_radio_change_channel(rc, uwb_radio_select_channel(rc));
+	}
+
+	mutex_unlock(&rc->uwb_dev.mutex);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(uwb_radio_start);
+
+/**
+ * uwb_radio_stop - request tha the radio be stopped.
+ * @pal: the PAL making the request.
+ *
+ * Stops the radio if no other PAL is making use of it.
+ */
+void uwb_radio_stop(struct uwb_pal *pal)
+{
+	struct uwb_rc *rc = pal->rc;
+
+	mutex_lock(&rc->uwb_dev.mutex);
+
+	if (pal->channel) {
+		rc->active_pals--;
+		uwb_radio_change_channel(rc, uwb_radio_select_channel(rc));
+		pal->channel = 0;
+	}
+
+	mutex_unlock(&rc->uwb_dev.mutex);
+}
+EXPORT_SYMBOL_GPL(uwb_radio_stop);
+
+/*
+ * uwb_radio_force_channel - force a specific channel to be used
+ * @rc: the radio controller.
+ * @channel: the channel to use; -1 to force the radio to stop; 0 to
+ *   use the default channel selection algorithm.
+ */
+int uwb_radio_force_channel(struct uwb_rc *rc, int channel)
+{
+	int ret = 0;
+
+	mutex_lock(&rc->uwb_dev.mutex);
+
+	rc->beaconing_forced = channel;
+	ret = uwb_radio_change_channel(rc, uwb_radio_select_channel(rc));
+
+	mutex_unlock(&rc->uwb_dev.mutex);
+	return ret;
+}
+
+/*
+ * uwb_radio_setup - setup the radio manager
+ * @rc: the radio controller.
+ *
+ * The radio controller is reset to ensure it's in a known state
+ * before it's used.
+ */
+int uwb_radio_setup(struct uwb_rc *rc)
+{
+	return uwb_rc_reset(rc);
+}
+
+/*
+ * uwb_radio_reset_state - reset any radio manager state
+ * @rc: the radio controller.
+ *
+ * All internal radio manager state is reset to values corresponding
+ * to a reset radio controller.
+ */
+void uwb_radio_reset_state(struct uwb_rc *rc)
+{
+	struct uwb_pal *pal;
+
+	mutex_lock(&rc->uwb_dev.mutex);
+
+	list_for_each_entry(pal, &rc->pals, node) {
+		if (pal->channel) {
+			pal->channel = -1;
+			if (pal->channel_changed)
+				pal->channel_changed(pal, -1);
+		}
+	}
+
+	rc->beaconing = -1;
+	rc->scanning = -1;
+
+	mutex_unlock(&rc->uwb_dev.mutex);
+}
+
+/*
+ * uwb_radio_shutdown - shutdown the radio manager
+ * @rc: the radio controller.
+ *
+ * The radio controller is reset.
+ */
+void uwb_radio_shutdown(struct uwb_rc *rc)
+{
+	uwb_radio_reset_state(rc);
+	uwb_rc_reset(rc);
+}
diff --git a/drivers/uwb/reset.c b/drivers/uwb/reset.c
index 8de856f..70f8050 100644
--- a/drivers/uwb/reset.c
+++ b/drivers/uwb/reset.c
@@ -32,8 +32,6 @@
 #include <linux/err.h>
 
 #include "uwb-internal.h"
-#define D_LOCAL 0
-#include <linux/uwb/debug.h>
 
 /**
  * Command result codes (WUSB1.0[T8-69])
@@ -323,17 +321,16 @@
 	struct uwb_rc *rc = evt->rc;
 	int ret;
 
-	/* Need to prevent the RC hardware module going away while in
-	   the rc->reset() call. */
-	if (!try_module_get(rc->owner))
-		return 0;
-
 	dev_info(&rc->uwb_dev.dev, "resetting radio controller\n");
 	ret = rc->reset(rc);
-	if (ret)
+	if (ret) {
 		dev_err(&rc->uwb_dev.dev, "failed to reset hardware: %d\n", ret);
-
-	module_put(rc->owner);
+		goto error;
+	}
+	return 0;
+error:
+	/* Nothing can be done except try the reset again. */
+	uwb_rc_reset_all(rc);
 	return ret;
 }
 
@@ -360,3 +357,33 @@
 	uwbd_event_queue(evt);
 }
 EXPORT_SYMBOL_GPL(uwb_rc_reset_all);
+
+void uwb_rc_pre_reset(struct uwb_rc *rc)
+{
+	rc->stop(rc);
+	uwbd_flush(rc);
+
+	uwb_radio_reset_state(rc);
+	uwb_rsv_remove_all(rc);
+}
+EXPORT_SYMBOL_GPL(uwb_rc_pre_reset);
+
+void uwb_rc_post_reset(struct uwb_rc *rc)
+{
+	int ret;
+
+	ret = rc->start(rc);
+	if (ret)
+		goto error;
+	ret = uwb_rc_mac_addr_set(rc, &rc->uwb_dev.mac_addr);
+	if (ret)
+		goto error;
+	ret = uwb_rc_dev_addr_set(rc, &rc->uwb_dev.dev_addr);
+	if (ret)
+		goto error;
+	return;
+error:
+	/* Nothing can be done except try the reset again. */
+	uwb_rc_reset_all(rc);
+}
+EXPORT_SYMBOL_GPL(uwb_rc_post_reset);
diff --git a/drivers/uwb/rsv.c b/drivers/uwb/rsv.c
index bae16204..ec6eecb 100644
--- a/drivers/uwb/rsv.c
+++ b/drivers/uwb/rsv.c
@@ -15,23 +15,33 @@
  * You should have received a copy of the GNU General Public License
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
-#include <linux/version.h>
 #include <linux/kernel.h>
 #include <linux/uwb.h>
+#include <linux/random.h>
 
 #include "uwb-internal.h"
 
 static void uwb_rsv_timer(unsigned long arg);
 
 static const char *rsv_states[] = {
-	[UWB_RSV_STATE_NONE]          = "none",
-	[UWB_RSV_STATE_O_INITIATED]   = "initiated",
-	[UWB_RSV_STATE_O_PENDING]     = "pending",
-	[UWB_RSV_STATE_O_MODIFIED]    = "modified",
-	[UWB_RSV_STATE_O_ESTABLISHED] = "established",
-	[UWB_RSV_STATE_T_ACCEPTED]    = "accepted",
-	[UWB_RSV_STATE_T_DENIED]      = "denied",
-	[UWB_RSV_STATE_T_PENDING]     = "pending",
+	[UWB_RSV_STATE_NONE]                 = "none            ",
+	[UWB_RSV_STATE_O_INITIATED]          = "o initiated     ",
+	[UWB_RSV_STATE_O_PENDING]            = "o pending       ",
+	[UWB_RSV_STATE_O_MODIFIED]           = "o modified      ",
+	[UWB_RSV_STATE_O_ESTABLISHED]        = "o established   ",
+	[UWB_RSV_STATE_O_TO_BE_MOVED]        = "o to be moved   ",
+	[UWB_RSV_STATE_O_MOVE_EXPANDING]     = "o move expanding",
+	[UWB_RSV_STATE_O_MOVE_COMBINING]     = "o move combining",
+	[UWB_RSV_STATE_O_MOVE_REDUCING]      = "o move reducing ",
+	[UWB_RSV_STATE_T_ACCEPTED]           = "t accepted      ",
+	[UWB_RSV_STATE_T_CONFLICT]           = "t conflict      ",
+	[UWB_RSV_STATE_T_PENDING]            = "t pending       ",
+	[UWB_RSV_STATE_T_DENIED]             = "t denied        ",
+	[UWB_RSV_STATE_T_RESIZED]            = "t resized       ",
+	[UWB_RSV_STATE_T_EXPANDING_ACCEPTED] = "t expanding acc ",
+	[UWB_RSV_STATE_T_EXPANDING_CONFLICT] = "t expanding conf",
+	[UWB_RSV_STATE_T_EXPANDING_PENDING]  = "t expanding pend",
+	[UWB_RSV_STATE_T_EXPANDING_DENIED]   = "t expanding den ",
 };
 
 static const char *rsv_types[] = {
@@ -42,6 +52,31 @@
 	[UWB_DRP_TYPE_PCA]      = "pca",
 };
 
+bool uwb_rsv_has_two_drp_ies(struct uwb_rsv *rsv)
+{
+	static const bool has_two_drp_ies[] = {
+		[UWB_RSV_STATE_O_INITIATED]               = false,
+		[UWB_RSV_STATE_O_PENDING]                 = false,
+		[UWB_RSV_STATE_O_MODIFIED]                = false,
+		[UWB_RSV_STATE_O_ESTABLISHED]             = false,
+		[UWB_RSV_STATE_O_TO_BE_MOVED]             = false,
+		[UWB_RSV_STATE_O_MOVE_COMBINING]          = false,
+		[UWB_RSV_STATE_O_MOVE_REDUCING]           = false,
+		[UWB_RSV_STATE_O_MOVE_EXPANDING]          = true,
+		[UWB_RSV_STATE_T_ACCEPTED]                = false,
+		[UWB_RSV_STATE_T_CONFLICT]                = false,
+		[UWB_RSV_STATE_T_PENDING]                 = false,
+		[UWB_RSV_STATE_T_DENIED]                  = false,
+		[UWB_RSV_STATE_T_RESIZED]                 = false,
+		[UWB_RSV_STATE_T_EXPANDING_ACCEPTED]      = true,
+		[UWB_RSV_STATE_T_EXPANDING_CONFLICT]      = true,
+		[UWB_RSV_STATE_T_EXPANDING_PENDING]       = true,
+		[UWB_RSV_STATE_T_EXPANDING_DENIED]        = true,
+	};
+
+	return has_two_drp_ies[rsv->state];
+}
+
 /**
  * uwb_rsv_state_str - return a string for a reservation state
  * @state: the reservation state.
@@ -66,7 +101,7 @@
 }
 EXPORT_SYMBOL_GPL(uwb_rsv_type_str);
 
-static void uwb_rsv_dump(struct uwb_rsv *rsv)
+void uwb_rsv_dump(char *text, struct uwb_rsv *rsv)
 {
 	struct device *dev = &rsv->rc->uwb_dev.dev;
 	struct uwb_dev_addr devaddr;
@@ -82,6 +117,23 @@
 	dev_dbg(dev, "rsv %s -> %s: %s\n", owner, target, uwb_rsv_state_str(rsv->state));
 }
 
+static void uwb_rsv_release(struct kref *kref)
+{
+	struct uwb_rsv *rsv = container_of(kref, struct uwb_rsv, kref);
+
+	kfree(rsv);
+}
+
+void uwb_rsv_get(struct uwb_rsv *rsv)
+{
+	kref_get(&rsv->kref);
+}
+
+void uwb_rsv_put(struct uwb_rsv *rsv)
+{
+	kref_put(&rsv->kref, uwb_rsv_release);
+}
+
 /*
  * Get a free stream index for a reservation.
  *
@@ -92,6 +144,7 @@
 static int uwb_rsv_get_stream(struct uwb_rsv *rsv)
 {
 	struct uwb_rc *rc = rsv->rc;
+	struct device *dev = &rc->uwb_dev.dev;
 	unsigned long *streams_bm;
 	int stream;
 
@@ -113,12 +166,15 @@
 	rsv->stream = stream;
 	set_bit(stream, streams_bm);
 
+	dev_dbg(dev, "get stream %d\n", rsv->stream);
+
 	return 0;
 }
 
 static void uwb_rsv_put_stream(struct uwb_rsv *rsv)
 {
 	struct uwb_rc *rc = rsv->rc;
+	struct device *dev = &rc->uwb_dev.dev;
 	unsigned long *streams_bm;
 
 	switch (rsv->target.type) {
@@ -133,86 +189,52 @@
 	}
 
 	clear_bit(rsv->stream, streams_bm);
+
+	dev_dbg(dev, "put stream %d\n", rsv->stream);
 }
 
-/*
- * Generate a MAS allocation with a single row component.
- */
-static void uwb_rsv_gen_alloc_row(struct uwb_mas_bm *mas,
-				  int first_mas, int mas_per_zone,
-				  int zs, int ze)
+void uwb_rsv_backoff_win_timer(unsigned long arg)
 {
-	struct uwb_mas_bm col;
-	int z;
+	struct uwb_drp_backoff_win *bow = (struct uwb_drp_backoff_win *)arg;
+	struct uwb_rc *rc = container_of(bow, struct uwb_rc, bow);
+	struct device *dev = &rc->uwb_dev.dev;
 
-	bitmap_zero(mas->bm, UWB_NUM_MAS);
-	bitmap_zero(col.bm, UWB_NUM_MAS);
-	bitmap_fill(col.bm, mas_per_zone);
-	bitmap_shift_left(col.bm, col.bm, first_mas + zs * UWB_MAS_PER_ZONE, UWB_NUM_MAS);
-
-	for (z = zs; z <= ze; z++) {
-		bitmap_or(mas->bm, mas->bm, col.bm, UWB_NUM_MAS);
-		bitmap_shift_left(col.bm, col.bm, UWB_MAS_PER_ZONE, UWB_NUM_MAS);
+	bow->can_reserve_extra_mases = true;
+	if (bow->total_expired <= 4) {
+		bow->total_expired++;
+	} else {
+		/* after 4 backoff window has expired we can exit from
+		 * the backoff procedure */
+		bow->total_expired = 0;
+		bow->window = UWB_DRP_BACKOFF_WIN_MIN >> 1;
 	}
+	dev_dbg(dev, "backoff_win_timer total_expired=%d, n=%d\n: ", bow->total_expired, bow->n);
+
+	/* try to relocate all the "to be moved" relocations */
+	uwb_rsv_handle_drp_avail_change(rc);
 }
 
-/*
- * Allocate some MAS for this reservation based on current local
- * availability, the reservation parameters (max_mas, min_mas,
- * sparsity), and the WiMedia rules for MAS allocations.
- *
- * Returns -EBUSY is insufficient free MAS are available.
- *
- * FIXME: to simplify this, only safe reservations with a single row
- * component in zones 1 to 15 are tried (zone 0 is skipped to avoid
- * problems with the MAS reserved for the BP).
- *
- * [ECMA-368] section B.2.
- */
-static int uwb_rsv_alloc_mas(struct uwb_rsv *rsv)
+void uwb_rsv_backoff_win_increment(struct uwb_rc *rc)
 {
-	static const int safe_mas_in_row[UWB_NUM_ZONES] = {
-		8, 7, 6, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 2, 1,
-	};
-	int n, r;
-	struct uwb_mas_bm mas;
-	bool found = false;
+	struct uwb_drp_backoff_win *bow = &rc->bow;
+	struct device *dev = &rc->uwb_dev.dev;
+	unsigned timeout_us;
 
-	/*
-	 * Search all valid safe allocations until either: too few MAS
-	 * are available; or the smallest allocation with sufficient
-	 * MAS is found.
-	 *
-	 * The top of the zones are preferred, so space for larger
-	 * allocations is available in the bottom of the zone (e.g., a
-	 * 15 MAS allocation should start in row 14 leaving space for
-	 * a 120 MAS allocation at row 0).
-	 */
-	for (n = safe_mas_in_row[0]; n >= 1; n--) {
-		int num_mas;
+	dev_dbg(dev, "backoff_win_increment: window=%d\n", bow->window);
 
-		num_mas = n * (UWB_NUM_ZONES - 1);
-		if (num_mas < rsv->min_mas)
-			break;
-		if (found && num_mas < rsv->max_mas)
-			break;
+	bow->can_reserve_extra_mases = false;
 
-		for (r = UWB_MAS_PER_ZONE-1;  r >= 0; r--) {
-			if (safe_mas_in_row[r] < n)
-				continue;
-			uwb_rsv_gen_alloc_row(&mas, r, n, 1, UWB_NUM_ZONES);
-			if (uwb_drp_avail_reserve_pending(rsv->rc, &mas) == 0) {
-				found = true;
-				break;
-			}
-		}
-	}
+	if((bow->window << 1) == UWB_DRP_BACKOFF_WIN_MAX)
+		return;
 
-	if (!found)
-		return -EBUSY;
+	bow->window <<= 1;
+	bow->n = random32() & (bow->window - 1);
+	dev_dbg(dev, "new_window=%d, n=%d\n: ", bow->window, bow->n);
 
-	bitmap_copy(rsv->mas.bm, mas.bm, UWB_NUM_MAS);
-	return 0;
+	/* reset the timer associated variables */
+	timeout_us = bow->n * UWB_SUPERFRAME_LENGTH_US;
+	bow->total_expired = 0;
+	mod_timer(&bow->timer, jiffies + usecs_to_jiffies(timeout_us));		
 }
 
 static void uwb_rsv_stroke_timer(struct uwb_rsv *rsv)
@@ -225,13 +247,16 @@
 	 * received.
 	 */
 	if (rsv->is_multicast) {
-		if (rsv->state == UWB_RSV_STATE_O_INITIATED)
+		if (rsv->state == UWB_RSV_STATE_O_INITIATED
+		    || rsv->state == UWB_RSV_STATE_O_MOVE_EXPANDING
+		    || rsv->state == UWB_RSV_STATE_O_MOVE_COMBINING
+		    || rsv->state == UWB_RSV_STATE_O_MOVE_REDUCING)
 			sframes = 1;
 		if (rsv->state == UWB_RSV_STATE_O_ESTABLISHED)
 			sframes = 0;
+		
 	}
 
-	rsv->expired = false;
 	if (sframes > 0) {
 		/*
 		 * Add an additional 2 superframes to account for the
@@ -253,7 +278,7 @@
 	rsv->state = new_state;
 	rsv->ie_valid = false;
 
-	uwb_rsv_dump(rsv);
+	uwb_rsv_dump("SU", rsv);
 
 	uwb_rsv_stroke_timer(rsv);
 	uwb_rsv_sched_update(rsv->rc);
@@ -267,10 +292,17 @@
 
 void uwb_rsv_set_state(struct uwb_rsv *rsv, enum uwb_rsv_state new_state)
 {
+	struct uwb_rsv_move *mv = &rsv->mv;
+
 	if (rsv->state == new_state) {
 		switch (rsv->state) {
 		case UWB_RSV_STATE_O_ESTABLISHED:
+		case UWB_RSV_STATE_O_MOVE_EXPANDING:
+		case UWB_RSV_STATE_O_MOVE_COMBINING:
+		case UWB_RSV_STATE_O_MOVE_REDUCING:
 		case UWB_RSV_STATE_T_ACCEPTED:
+		case UWB_RSV_STATE_T_EXPANDING_ACCEPTED:
+		case UWB_RSV_STATE_T_RESIZED:
 		case UWB_RSV_STATE_NONE:
 			uwb_rsv_stroke_timer(rsv);
 			break;
@@ -282,10 +314,10 @@
 		return;
 	}
 
+	uwb_rsv_dump("SC", rsv);
+
 	switch (new_state) {
 	case UWB_RSV_STATE_NONE:
-		uwb_drp_avail_release(rsv->rc, &rsv->mas);
-		uwb_rsv_put_stream(rsv);
 		uwb_rsv_state_update(rsv, UWB_RSV_STATE_NONE);
 		uwb_rsv_callback(rsv);
 		break;
@@ -295,12 +327,45 @@
 	case UWB_RSV_STATE_O_PENDING:
 		uwb_rsv_state_update(rsv, UWB_RSV_STATE_O_PENDING);
 		break;
+	case UWB_RSV_STATE_O_MODIFIED:
+		/* in the companion there are the MASes to drop */
+		bitmap_andnot(rsv->mas.bm, rsv->mas.bm, mv->companion_mas.bm, UWB_NUM_MAS);
+		uwb_rsv_state_update(rsv, UWB_RSV_STATE_O_MODIFIED);
+		break;
 	case UWB_RSV_STATE_O_ESTABLISHED:
+		if (rsv->state == UWB_RSV_STATE_O_MODIFIED
+		    || rsv->state == UWB_RSV_STATE_O_MOVE_REDUCING) {
+			uwb_drp_avail_release(rsv->rc, &mv->companion_mas);
+			rsv->needs_release_companion_mas = false;
+		}
 		uwb_drp_avail_reserve(rsv->rc, &rsv->mas);
 		uwb_rsv_state_update(rsv, UWB_RSV_STATE_O_ESTABLISHED);
 		uwb_rsv_callback(rsv);
 		break;
+	case UWB_RSV_STATE_O_MOVE_EXPANDING:
+		rsv->needs_release_companion_mas = true;
+		uwb_rsv_state_update(rsv, UWB_RSV_STATE_O_MOVE_EXPANDING);
+		break;
+	case UWB_RSV_STATE_O_MOVE_COMBINING:
+		rsv->needs_release_companion_mas = false;
+		uwb_drp_avail_reserve(rsv->rc, &mv->companion_mas);
+		bitmap_or(rsv->mas.bm, rsv->mas.bm, mv->companion_mas.bm, UWB_NUM_MAS);
+		rsv->mas.safe   += mv->companion_mas.safe;
+		rsv->mas.unsafe += mv->companion_mas.unsafe;
+		uwb_rsv_state_update(rsv, UWB_RSV_STATE_O_MOVE_COMBINING);
+		break;
+	case UWB_RSV_STATE_O_MOVE_REDUCING:
+		bitmap_andnot(mv->companion_mas.bm, rsv->mas.bm, mv->final_mas.bm, UWB_NUM_MAS);
+		rsv->needs_release_companion_mas = true;
+		rsv->mas.safe   = mv->final_mas.safe;
+		rsv->mas.unsafe = mv->final_mas.unsafe;
+		bitmap_copy(rsv->mas.bm, mv->final_mas.bm, UWB_NUM_MAS);
+		bitmap_copy(rsv->mas.unsafe_bm, mv->final_mas.unsafe_bm, UWB_NUM_MAS);
+		uwb_rsv_state_update(rsv, UWB_RSV_STATE_O_MOVE_REDUCING);
+		break;
 	case UWB_RSV_STATE_T_ACCEPTED:
+	case UWB_RSV_STATE_T_RESIZED:
+		rsv->needs_release_companion_mas = false;
 		uwb_drp_avail_reserve(rsv->rc, &rsv->mas);
 		uwb_rsv_state_update(rsv, UWB_RSV_STATE_T_ACCEPTED);
 		uwb_rsv_callback(rsv);
@@ -308,12 +373,82 @@
 	case UWB_RSV_STATE_T_DENIED:
 		uwb_rsv_state_update(rsv, UWB_RSV_STATE_T_DENIED);
 		break;
+	case UWB_RSV_STATE_T_CONFLICT:
+		uwb_rsv_state_update(rsv, UWB_RSV_STATE_T_CONFLICT);
+		break;
+	case UWB_RSV_STATE_T_PENDING:
+		uwb_rsv_state_update(rsv, UWB_RSV_STATE_T_PENDING);
+		break;
+	case UWB_RSV_STATE_T_EXPANDING_ACCEPTED:
+		rsv->needs_release_companion_mas = true;
+		uwb_drp_avail_reserve(rsv->rc, &mv->companion_mas);
+		uwb_rsv_state_update(rsv, UWB_RSV_STATE_T_EXPANDING_ACCEPTED);
+		break;
 	default:
 		dev_err(&rsv->rc->uwb_dev.dev, "unhandled state: %s (%d)\n",
 			uwb_rsv_state_str(new_state), new_state);
 	}
 }
 
+static void uwb_rsv_handle_timeout_work(struct work_struct *work)
+{
+	struct uwb_rsv *rsv = container_of(work, struct uwb_rsv,
+					   handle_timeout_work);
+	struct uwb_rc *rc = rsv->rc;
+
+	mutex_lock(&rc->rsvs_mutex);
+
+	uwb_rsv_dump("TO", rsv);
+
+	switch (rsv->state) {
+	case UWB_RSV_STATE_O_INITIATED:
+		if (rsv->is_multicast) {
+			uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_ESTABLISHED);
+			goto unlock;
+		}
+		break;
+	case UWB_RSV_STATE_O_MOVE_EXPANDING:
+		if (rsv->is_multicast) {
+			uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MOVE_COMBINING);
+			goto unlock;
+		}
+		break;
+	case UWB_RSV_STATE_O_MOVE_COMBINING:
+		if (rsv->is_multicast) {
+			uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MOVE_REDUCING);
+			goto unlock;
+		}
+		break;
+	case UWB_RSV_STATE_O_MOVE_REDUCING:
+		if (rsv->is_multicast) {
+			uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_ESTABLISHED);
+			goto unlock;
+		}
+		break;
+	case UWB_RSV_STATE_O_ESTABLISHED:
+		if (rsv->is_multicast)
+			goto unlock;
+		break;
+	case UWB_RSV_STATE_T_EXPANDING_ACCEPTED:
+		/*
+		 * The time out could be for the main or of the
+		 * companion DRP, assume it's for the companion and
+		 * drop that first.  A further time out is required to
+		 * drop the main.
+		 */
+		uwb_rsv_set_state(rsv, UWB_RSV_STATE_T_ACCEPTED);
+		uwb_drp_avail_release(rsv->rc, &rsv->mv.companion_mas);
+		goto unlock;
+	default:
+		break;
+	}
+
+	uwb_rsv_remove(rsv);
+
+unlock:
+	mutex_unlock(&rc->rsvs_mutex);
+}
+
 static struct uwb_rsv *uwb_rsv_alloc(struct uwb_rc *rc)
 {
 	struct uwb_rsv *rsv;
@@ -324,23 +459,17 @@
 
 	INIT_LIST_HEAD(&rsv->rc_node);
 	INIT_LIST_HEAD(&rsv->pal_node);
+	kref_init(&rsv->kref);
 	init_timer(&rsv->timer);
 	rsv->timer.function = uwb_rsv_timer;
 	rsv->timer.data     = (unsigned long)rsv;
 
 	rsv->rc = rc;
+	INIT_WORK(&rsv->handle_timeout_work, uwb_rsv_handle_timeout_work);
 
 	return rsv;
 }
 
-static void uwb_rsv_free(struct uwb_rsv *rsv)
-{
-	uwb_dev_put(rsv->owner);
-	if (rsv->target.type == UWB_RSV_TARGET_DEV)
-		uwb_dev_put(rsv->target.dev);
-	kfree(rsv);
-}
-
 /**
  * uwb_rsv_create - allocate and initialize a UWB reservation structure
  * @rc: the radio controller
@@ -371,26 +500,36 @@
 
 void uwb_rsv_remove(struct uwb_rsv *rsv)
 {
+	uwb_rsv_dump("RM", rsv);
+
 	if (rsv->state != UWB_RSV_STATE_NONE)
 		uwb_rsv_set_state(rsv, UWB_RSV_STATE_NONE);
+
+	if (rsv->needs_release_companion_mas)
+		uwb_drp_avail_release(rsv->rc, &rsv->mv.companion_mas);
+	uwb_drp_avail_release(rsv->rc, &rsv->mas);
+
+	if (uwb_rsv_is_owner(rsv))
+		uwb_rsv_put_stream(rsv);
+	
 	del_timer_sync(&rsv->timer);
-	list_del(&rsv->rc_node);
-	uwb_rsv_free(rsv);
+	uwb_dev_put(rsv->owner);
+	if (rsv->target.type == UWB_RSV_TARGET_DEV)
+		uwb_dev_put(rsv->target.dev);
+
+	list_del_init(&rsv->rc_node);
+	uwb_rsv_put(rsv);
 }
 
 /**
  * uwb_rsv_destroy - free a UWB reservation structure
  * @rsv: the reservation to free
  *
- * The reservation will be terminated if it is pending or established.
+ * The reservation must already be terminated.
  */
 void uwb_rsv_destroy(struct uwb_rsv *rsv)
 {
-	struct uwb_rc *rc = rsv->rc;
-
-	mutex_lock(&rc->rsvs_mutex);
-	uwb_rsv_remove(rsv);
-	mutex_unlock(&rc->rsvs_mutex);
+	uwb_rsv_put(rsv);
 }
 EXPORT_SYMBOL_GPL(uwb_rsv_destroy);
 
@@ -399,7 +538,7 @@
  * @rsv: the reservation
  *
  * The PAL should fill in @rsv's owner, target, type, max_mas,
- * min_mas, sparsity and is_multicast fields.  If the target is a
+ * min_mas, max_interval and is_multicast fields.  If the target is a
  * uwb_dev it must be referenced.
  *
  * The reservation's callback will be called when the reservation is
@@ -408,20 +547,32 @@
 int uwb_rsv_establish(struct uwb_rsv *rsv)
 {
 	struct uwb_rc *rc = rsv->rc;
+	struct uwb_mas_bm available;
 	int ret;
 
 	mutex_lock(&rc->rsvs_mutex);
-
 	ret = uwb_rsv_get_stream(rsv);
 	if (ret)
 		goto out;
 
-	ret = uwb_rsv_alloc_mas(rsv);
-	if (ret) {
+	rsv->tiebreaker = random32() & 1;
+	/* get available mas bitmap */
+	uwb_drp_available(rc, &available);
+
+	ret = uwb_rsv_find_best_allocation(rsv, &available, &rsv->mas);
+	if (ret == UWB_RSV_ALLOC_NOT_FOUND) {
+		ret = -EBUSY;
 		uwb_rsv_put_stream(rsv);
 		goto out;
 	}
 
+	ret = uwb_drp_avail_reserve_pending(rc, &rsv->mas);
+	if (ret != 0) {
+		uwb_rsv_put_stream(rsv);
+		goto out;
+	}
+
+	uwb_rsv_get(rsv);
 	list_add_tail(&rsv->rc_node, &rc->reservations);
 	rsv->owner = &rc->uwb_dev;
 	uwb_dev_get(rsv->owner);
@@ -437,16 +588,71 @@
  * @rsv: the reservation to modify
  * @max_mas: new maximum MAS to reserve
  * @min_mas: new minimum MAS to reserve
- * @sparsity: new sparsity to use
+ * @max_interval: new max_interval to use
  *
  * FIXME: implement this once there are PALs that use it.
  */
-int uwb_rsv_modify(struct uwb_rsv *rsv, int max_mas, int min_mas, int sparsity)
+int uwb_rsv_modify(struct uwb_rsv *rsv, int max_mas, int min_mas, int max_interval)
 {
 	return -ENOSYS;
 }
 EXPORT_SYMBOL_GPL(uwb_rsv_modify);
 
+/*
+ * move an already established reservation (rc->rsvs_mutex must to be
+ * taken when tis function is called)
+ */
+int uwb_rsv_try_move(struct uwb_rsv *rsv, struct uwb_mas_bm *available)
+{
+	struct uwb_rc *rc = rsv->rc;
+	struct uwb_drp_backoff_win *bow = &rc->bow;
+	struct device *dev = &rc->uwb_dev.dev;
+	struct uwb_rsv_move *mv;
+	int ret = 0;
+ 
+	if (bow->can_reserve_extra_mases == false)
+		return -EBUSY;
+
+	mv = &rsv->mv;
+
+	if (uwb_rsv_find_best_allocation(rsv, available, &mv->final_mas) == UWB_RSV_ALLOC_FOUND) {
+
+		if (!bitmap_equal(rsv->mas.bm, mv->final_mas.bm, UWB_NUM_MAS)) {
+			/* We want to move the reservation */
+			bitmap_andnot(mv->companion_mas.bm, mv->final_mas.bm, rsv->mas.bm, UWB_NUM_MAS);
+			uwb_drp_avail_reserve_pending(rc, &mv->companion_mas);
+			uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MOVE_EXPANDING);
+		}
+	} else {
+		dev_dbg(dev, "new allocation not found\n");
+	}
+	
+	return ret;
+}
+
+/* It will try to move every reservation in state O_ESTABLISHED giving
+ * to the MAS allocator algorithm an availability that is the real one
+ * plus the allocation already established from the reservation. */
+void uwb_rsv_handle_drp_avail_change(struct uwb_rc *rc)
+{
+	struct uwb_drp_backoff_win *bow = &rc->bow;
+	struct uwb_rsv *rsv;
+	struct uwb_mas_bm mas;
+	
+	if (bow->can_reserve_extra_mases == false)
+		return;
+
+	list_for_each_entry(rsv, &rc->reservations, rc_node) {
+		if (rsv->state == UWB_RSV_STATE_O_ESTABLISHED ||
+		    rsv->state == UWB_RSV_STATE_O_TO_BE_MOVED) {
+			uwb_drp_available(rc, &mas);
+			bitmap_or(mas.bm, mas.bm, rsv->mas.bm, UWB_NUM_MAS);
+			uwb_rsv_try_move(rsv, &mas);
+		}
+	}
+	
+}
+
 /**
  * uwb_rsv_terminate - terminate an established reservation
  * @rsv: the reservation to terminate
@@ -463,7 +669,8 @@
 
 	mutex_lock(&rc->rsvs_mutex);
 
-	uwb_rsv_set_state(rsv, UWB_RSV_STATE_NONE);
+	if (rsv->state != UWB_RSV_STATE_NONE)
+		uwb_rsv_set_state(rsv, UWB_RSV_STATE_NONE);
 
 	mutex_unlock(&rc->rsvs_mutex);
 }
@@ -477,9 +684,14 @@
  *
  * Reservation requests from peers are denied unless a PAL accepts it
  * by calling this function.
+ *
+ * The PAL call uwb_rsv_destroy() for all accepted reservations before
+ * calling uwb_pal_unregister().
  */
 void uwb_rsv_accept(struct uwb_rsv *rsv, uwb_rsv_cb_f cb, void *pal_priv)
 {
+	uwb_rsv_get(rsv);
+
 	rsv->callback = cb;
 	rsv->pal_priv = pal_priv;
 	rsv->state    = UWB_RSV_STATE_T_ACCEPTED;
@@ -530,9 +742,9 @@
 	uwb_dev_get(rsv->owner);
 	rsv->target.type = UWB_RSV_TARGET_DEV;
 	rsv->target.dev  = &rc->uwb_dev;
+	uwb_dev_get(&rc->uwb_dev);
 	rsv->type        = uwb_ie_drp_type(drp_ie);
 	rsv->stream      = uwb_ie_drp_stream_index(drp_ie);
-	set_bit(rsv->stream, rsv->owner->streams);
 	uwb_drp_ie_to_bm(&rsv->mas, drp_ie);
 
 	/*
@@ -540,24 +752,46 @@
 	 * deny the request.
 	 */
 	rsv->state = UWB_RSV_STATE_T_DENIED;
-	spin_lock(&rc->pal_lock);
+	mutex_lock(&rc->uwb_dev.mutex);
 	list_for_each_entry(pal, &rc->pals, node) {
 		if (pal->new_rsv)
-			pal->new_rsv(rsv);
+			pal->new_rsv(pal, rsv);
 		if (rsv->state == UWB_RSV_STATE_T_ACCEPTED)
 			break;
 	}
-	spin_unlock(&rc->pal_lock);
+	mutex_unlock(&rc->uwb_dev.mutex);
 
 	list_add_tail(&rsv->rc_node, &rc->reservations);
 	state = rsv->state;
 	rsv->state = UWB_RSV_STATE_NONE;
-	uwb_rsv_set_state(rsv, state);
+
+	/* FIXME: do something sensible here */
+	if (state == UWB_RSV_STATE_T_ACCEPTED
+	    && uwb_drp_avail_reserve_pending(rc, &rsv->mas) == -EBUSY) {
+		/* FIXME: do something sensible here */
+	} else {
+		uwb_rsv_set_state(rsv, state);
+	}
 
 	return rsv;
 }
 
 /**
+ * uwb_rsv_get_usable_mas - get the bitmap of the usable MAS of a reservations
+ * @rsv: the reservation.
+ * @mas: returns the available MAS.
+ *
+ * The usable MAS of a reservation may be less than the negotiated MAS
+ * if alien BPs are present.
+ */
+void uwb_rsv_get_usable_mas(struct uwb_rsv *rsv, struct uwb_mas_bm *mas)
+{
+	bitmap_zero(mas->bm, UWB_NUM_MAS);
+	bitmap_andnot(mas->bm, rsv->mas.bm, rsv->rc->cnflt_alien_bitmap.bm, UWB_NUM_MAS);
+}
+EXPORT_SYMBOL_GPL(uwb_rsv_get_usable_mas);
+
+/**
  * uwb_rsv_find - find a reservation for a received DRP IE.
  * @rc: the radio controller
  * @src: source of the DRP IE
@@ -596,8 +830,6 @@
 	bool ie_updated = false;
 
 	list_for_each_entry_safe(rsv, t, &rc->reservations, rc_node) {
-		if (rsv->expired)
-			uwb_drp_handle_timeout(rsv);
 		if (!rsv->ie_valid) {
 			uwb_drp_ie_update(rsv);
 			ie_updated = true;
@@ -607,9 +839,47 @@
 	return ie_updated;
 }
 
+void uwb_rsv_queue_update(struct uwb_rc *rc)
+{
+	unsigned long delay_us = UWB_MAS_LENGTH_US * UWB_MAS_PER_ZONE;
+
+	queue_delayed_work(rc->rsv_workq, &rc->rsv_update_work, usecs_to_jiffies(delay_us));
+}
+
+/**
+ * uwb_rsv_sched_update - schedule an update of the DRP IEs
+ * @rc: the radio controller.
+ *
+ * To improve performance and ensure correctness with [ECMA-368] the
+ * number of SET-DRP-IE commands that are done are limited.
+ *
+ * DRP IEs update come from two sources: DRP events from the hardware
+ * which all occur at the beginning of the superframe ('syncronous'
+ * events) and reservation establishment/termination requests from
+ * PALs or timers ('asynchronous' events).
+ *
+ * A delayed work ensures that all the synchronous events result in
+ * one SET-DRP-IE command.
+ *
+ * Additional logic (the set_drp_ie_pending and rsv_updated_postponed
+ * flags) will prevent an asynchrous event starting a SET-DRP-IE
+ * command if one is currently awaiting a response.
+ *
+ * FIXME: this does leave a window where an asynchrous event can delay
+ * the SET-DRP-IE for a synchronous event by one superframe.
+ */
 void uwb_rsv_sched_update(struct uwb_rc *rc)
 {
-	queue_work(rc->rsv_workq, &rc->rsv_update_work);
+	spin_lock(&rc->rsvs_lock);
+	if (!delayed_work_pending(&rc->rsv_update_work)) {
+		if (rc->set_drp_ie_pending > 0) {
+			rc->set_drp_ie_pending++;
+			goto unlock;
+		}
+		uwb_rsv_queue_update(rc);
+	}
+unlock:
+	spin_unlock(&rc->rsvs_lock);
 }
 
 /*
@@ -618,7 +888,8 @@
  */
 static void uwb_rsv_update_work(struct work_struct *work)
 {
-	struct uwb_rc *rc = container_of(work, struct uwb_rc, rsv_update_work);
+	struct uwb_rc *rc = container_of(work, struct uwb_rc,
+					 rsv_update_work.work);
 	bool ie_updated;
 
 	mutex_lock(&rc->rsvs_mutex);
@@ -630,25 +901,71 @@
 		ie_updated = true;
 	}
 
-	if (ie_updated)
+	if (ie_updated && (rc->set_drp_ie_pending == 0))
 		uwb_rc_send_all_drp_ie(rc);
 
 	mutex_unlock(&rc->rsvs_mutex);
 }
 
+static void uwb_rsv_alien_bp_work(struct work_struct *work)
+{
+	struct uwb_rc *rc = container_of(work, struct uwb_rc,
+					 rsv_alien_bp_work.work);
+	struct uwb_rsv *rsv;
+
+	mutex_lock(&rc->rsvs_mutex);
+	
+	list_for_each_entry(rsv, &rc->reservations, rc_node) {
+		if (rsv->type != UWB_DRP_TYPE_ALIEN_BP) {
+			rsv->callback(rsv);
+		}
+	}
+
+	mutex_unlock(&rc->rsvs_mutex);
+}
+
 static void uwb_rsv_timer(unsigned long arg)
 {
 	struct uwb_rsv *rsv = (struct uwb_rsv *)arg;
 
-	rsv->expired = true;
-	uwb_rsv_sched_update(rsv->rc);
+	queue_work(rsv->rc->rsv_workq, &rsv->handle_timeout_work);
+}
+
+/**
+ * uwb_rsv_remove_all - remove all reservations
+ * @rc: the radio controller
+ *
+ * A DRP IE update is not done.
+ */
+void uwb_rsv_remove_all(struct uwb_rc *rc)
+{
+	struct uwb_rsv *rsv, *t;
+
+	mutex_lock(&rc->rsvs_mutex);
+	list_for_each_entry_safe(rsv, t, &rc->reservations, rc_node) {
+		uwb_rsv_remove(rsv);
+	}
+	/* Cancel any postponed update. */
+	rc->set_drp_ie_pending = 0;
+	mutex_unlock(&rc->rsvs_mutex);
+
+	cancel_delayed_work_sync(&rc->rsv_update_work);
 }
 
 void uwb_rsv_init(struct uwb_rc *rc)
 {
 	INIT_LIST_HEAD(&rc->reservations);
+	INIT_LIST_HEAD(&rc->cnflt_alien_list);
 	mutex_init(&rc->rsvs_mutex);
-	INIT_WORK(&rc->rsv_update_work, uwb_rsv_update_work);
+	spin_lock_init(&rc->rsvs_lock);
+	INIT_DELAYED_WORK(&rc->rsv_update_work, uwb_rsv_update_work);
+	INIT_DELAYED_WORK(&rc->rsv_alien_bp_work, uwb_rsv_alien_bp_work);
+	rc->bow.can_reserve_extra_mases = true;
+	rc->bow.total_expired = 0;
+	rc->bow.window = UWB_DRP_BACKOFF_WIN_MIN >> 1;
+	init_timer(&rc->bow.timer);
+	rc->bow.timer.function = uwb_rsv_backoff_win_timer;
+	rc->bow.timer.data     = (unsigned long)&rc->bow;
 
 	bitmap_complement(rc->uwb_dev.streams, rc->uwb_dev.streams, UWB_NUM_STREAMS);
 }
@@ -667,14 +984,6 @@
 
 void uwb_rsv_cleanup(struct uwb_rc *rc)
 {
-	struct uwb_rsv *rsv, *t;
-
-	mutex_lock(&rc->rsvs_mutex);
-	list_for_each_entry_safe(rsv, t, &rc->reservations, rc_node) {
-		uwb_rsv_remove(rsv);
-	}
-	mutex_unlock(&rc->rsvs_mutex);
-
-	cancel_work_sync(&rc->rsv_update_work);
+	uwb_rsv_remove_all(rc);
 	destroy_workqueue(rc->rsv_workq);
 }
diff --git a/drivers/uwb/umc-bus.c b/drivers/uwb/umc-bus.c
index 2d8d62d..5ad3616 100644
--- a/drivers/uwb/umc-bus.c
+++ b/drivers/uwb/umc-bus.c
@@ -11,23 +11,48 @@
 #include <linux/uwb/umc.h>
 #include <linux/pci.h>
 
-static int umc_bus_unbind_helper(struct device *dev, void *data)
+static int umc_bus_pre_reset_helper(struct device *dev, void *data)
 {
-	struct device *parent = data;
+	int ret = 0;
 
-	if (dev->parent == parent && dev->driver)
-		device_release_driver(dev);
-	return 0;
+	if (dev->driver) {
+		struct umc_dev *umc = to_umc_dev(dev);
+		struct umc_driver *umc_drv = to_umc_driver(dev->driver);
+
+		if (umc_drv->pre_reset)
+			ret = umc_drv->pre_reset(umc);
+		else
+			device_release_driver(dev);
+	}
+	return ret;
+}
+
+static int umc_bus_post_reset_helper(struct device *dev, void *data)
+{
+	int ret = 0;
+
+	if (dev->driver) {
+		struct umc_dev *umc = to_umc_dev(dev);
+		struct umc_driver *umc_drv = to_umc_driver(dev->driver);
+
+		if (umc_drv->post_reset)
+			ret = umc_drv->post_reset(umc);
+	} else
+		ret = device_attach(dev);
+
+	return ret;
 }
 
 /**
  * umc_controller_reset - reset the whole UMC controller
  * @umc: the UMC device for the radio controller.
  *
- * Drivers will be unbound from all UMC devices belonging to the
- * controller and then the radio controller will be rebound.  The
- * radio controller is expected to do a full hardware reset when it is
- * probed.
+ * Drivers or all capabilities of the controller will have their
+ * pre_reset methods called or be unbound from their device.  Then all
+ * post_reset methods will be called or the drivers will be rebound.
+ *
+ * Radio controllers must provide pre_reset and post_reset methods and
+ * reset the hardware in their start method.
  *
  * If this is called while a probe() or remove() is in progress it
  * will return -EAGAIN and not perform the reset.
@@ -35,14 +60,13 @@
 int umc_controller_reset(struct umc_dev *umc)
 {
 	struct device *parent = umc->dev.parent;
-	int ret;
+	int ret = 0;
 
-	if (down_trylock(&parent->sem))
+	if(down_trylock(&parent->sem))
 		return -EAGAIN;
-	bus_for_each_dev(&umc_bus_type, NULL, parent, umc_bus_unbind_helper);
-	ret = device_attach(&umc->dev);
-	if (ret == 1)
-		ret = 0;
+	ret = device_for_each_child(parent, parent, umc_bus_pre_reset_helper);
+	if (ret >= 0)
+		device_for_each_child(parent, parent, umc_bus_post_reset_helper);
 	up(&parent->sem);
 
 	return ret;
@@ -75,10 +99,10 @@
 	if (!dev->driver)
 		ret = device_attach(dev);
 
-	return ret < 0 ? ret : 0;
+	return ret;
 }
 
-static void umc_bus_rescan(void)
+static void umc_bus_rescan(struct device *parent)
 {
 	int err;
 
@@ -86,7 +110,7 @@
 	 * We can't use bus_rescan_devices() here as it deadlocks when
 	 * it tries to retake the dev->parent semaphore.
 	 */
-	err = bus_for_each_dev(&umc_bus_type, NULL, NULL, umc_bus_rescan_helper);
+	err = device_for_each_child(parent, NULL, umc_bus_rescan_helper);
 	if (err < 0)
 		printk(KERN_WARNING "%s: rescan of bus failed: %d\n",
 		       KBUILD_MODNAME, err);
@@ -120,7 +144,7 @@
 	if (err)
 		put_device(dev);
 	else
-		umc_bus_rescan();
+		umc_bus_rescan(dev->parent);
 
 	return err;
 }
diff --git a/drivers/uwb/umc-dev.c b/drivers/uwb/umc-dev.c
index aa44e1c..1fc7d82 100644
--- a/drivers/uwb/umc-dev.c
+++ b/drivers/uwb/umc-dev.c
@@ -7,8 +7,6 @@
  */
 #include <linux/kernel.h>
 #include <linux/uwb/umc.h>
-#define D_LOCAL 0
-#include <linux/uwb/debug.h>
 
 static void umc_device_release(struct device *dev)
 {
@@ -31,8 +29,7 @@
 
 	umc = kzalloc(sizeof(struct umc_dev), GFP_KERNEL);
 	if (umc) {
-		snprintf(umc->dev.bus_id, sizeof(umc->dev.bus_id), "%s-%d",
-			 parent->bus_id, n);
+		dev_set_name(&umc->dev, "%s-%d", dev_name(parent), n);
 		umc->dev.parent  = parent;
 		umc->dev.bus     = &umc_bus_type;
 		umc->dev.release = umc_device_release;
@@ -54,8 +51,6 @@
 {
 	int err;
 
-	d_fnstart(3, &umc->dev, "(umc_dev %p)\n", umc);
-
 	err = request_resource(umc->resource.parent, &umc->resource);
 	if (err < 0) {
 		dev_err(&umc->dev, "can't allocate resource range "
@@ -69,13 +64,11 @@
 	err = device_register(&umc->dev);
 	if (err < 0)
 		goto error_device_register;
-	d_fnend(3, &umc->dev, "(umc_dev %p) = 0\n", umc);
 	return 0;
 
 error_device_register:
 	release_resource(&umc->resource);
 error_request_resource:
-	d_fnend(3, &umc->dev, "(umc_dev %p) = %d\n", umc, err);
 	return err;
 }
 EXPORT_SYMBOL_GPL(umc_device_register);
@@ -95,10 +88,8 @@
 	if (!umc)
 		return;
 	dev = get_device(&umc->dev);
-	d_fnstart(3, dev, "(umc_dev %p)\n", umc);
 	device_unregister(&umc->dev);
 	release_resource(&umc->resource);
-	d_fnend(3, dev, "(umc_dev %p) = void\n", umc);
 	put_device(dev);
 }
 EXPORT_SYMBOL_GPL(umc_device_unregister);
diff --git a/drivers/uwb/uwb-debug.c b/drivers/uwb/uwb-debug.c
index 6d232c3..4a42993 100644
--- a/drivers/uwb/uwb-debug.c
+++ b/drivers/uwb/uwb-debug.c
@@ -4,6 +4,7 @@
  *
  * Copyright (C) 2005-2006 Intel Corporation
  * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ * Copyright (C) 2008 Cambridge Silicon Radio Ltd.
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License version
@@ -33,31 +34,9 @@
 #include <linux/seq_file.h>
 
 #include <linux/uwb/debug-cmd.h>
-#define D_LOCAL 0
-#include <linux/uwb/debug.h>
 
 #include "uwb-internal.h"
 
-void dump_bytes(struct device *dev, const void *_buf, size_t rsize)
-{
-	const char *buf = _buf;
-	char line[32];
-	size_t offset = 0;
-	int cnt, cnt2;
-	for (cnt = 0; cnt < rsize; cnt += 8) {
-		size_t rtop = rsize - cnt < 8 ? rsize - cnt : 8;
-		for (offset = cnt2 = 0; cnt2 < rtop; cnt2++) {
-			offset += scnprintf(line + offset, sizeof(line) - offset,
-					    "%02x ", buf[cnt + cnt2] & 0xff);
-		}
-		if (dev)
-			dev_info(dev, "%s\n", line);
-		else
-			printk(KERN_INFO "%s\n", line);
-	}
-}
-EXPORT_SYMBOL_GPL(dump_bytes);
-
 /*
  * Debug interface
  *
@@ -84,26 +63,23 @@
 	struct dentry *reservations_f;
 	struct dentry *accept_f;
 	struct dentry *drp_avail_f;
+	spinlock_t list_lock;
 };
 
 static struct dentry *root_dir;
 
 static void uwb_dbg_rsv_cb(struct uwb_rsv *rsv)
 {
-	struct uwb_rc *rc = rsv->rc;
-	struct device *dev = &rc->uwb_dev.dev;
-	struct uwb_dev_addr devaddr;
-	char owner[UWB_ADDR_STRSIZE], target[UWB_ADDR_STRSIZE];
+	struct uwb_dbg *dbg = rsv->pal_priv;
 
-	uwb_dev_addr_print(owner, sizeof(owner), &rsv->owner->dev_addr);
-	if (rsv->target.type == UWB_RSV_TARGET_DEV)
-		devaddr = rsv->target.dev->dev_addr;
-	else
-		devaddr = rsv->target.devaddr;
-	uwb_dev_addr_print(target, sizeof(target), &devaddr);
+	uwb_rsv_dump("debug", rsv);
 
-	dev_dbg(dev, "debug: rsv %s -> %s: %s\n",
-		owner, target, uwb_rsv_state_str(rsv->state));
+	if (rsv->state == UWB_RSV_STATE_NONE) {
+		spin_lock(&dbg->list_lock);
+		list_del(&rsv->pal_node);
+		spin_unlock(&dbg->list_lock);
+		uwb_rsv_destroy(rsv);
+	}
 }
 
 static int cmd_rsv_establish(struct uwb_rc *rc,
@@ -119,26 +95,27 @@
 	if (target == NULL)
 		return -ENODEV;
 
-	rsv = uwb_rsv_create(rc, uwb_dbg_rsv_cb, NULL);
+	rsv = uwb_rsv_create(rc, uwb_dbg_rsv_cb, rc->dbg);
 	if (rsv == NULL) {
 		uwb_dev_put(target);
 		return -ENOMEM;
 	}
 
-	rsv->owner       = &rc->uwb_dev;
-	rsv->target.type = UWB_RSV_TARGET_DEV;
-	rsv->target.dev  = target;
-	rsv->type        = cmd->type;
-	rsv->max_mas     = cmd->max_mas;
-	rsv->min_mas     = cmd->min_mas;
-	rsv->sparsity    = cmd->sparsity;
+	rsv->target.type  = UWB_RSV_TARGET_DEV;
+	rsv->target.dev   = target;
+	rsv->type         = cmd->type;
+	rsv->max_mas      = cmd->max_mas;
+	rsv->min_mas      = cmd->min_mas;
+	rsv->max_interval = cmd->max_interval;
 
 	ret = uwb_rsv_establish(rsv);
 	if (ret)
 		uwb_rsv_destroy(rsv);
-	else
+	else {
+		spin_lock(&(rc->dbg)->list_lock);
 		list_add_tail(&rsv->pal_node, &rc->dbg->rsvs);
-
+		spin_unlock(&(rc->dbg)->list_lock);
+	}
 	return ret;
 }
 
@@ -148,21 +125,40 @@
 	struct uwb_rsv *rsv, *found = NULL;
 	int i = 0;
 
+	spin_lock(&(rc->dbg)->list_lock);
+
 	list_for_each_entry(rsv, &rc->dbg->rsvs, pal_node) {
 		if (i == cmd->index) {
 			found = rsv;
+			uwb_rsv_get(found);
 			break;
 		}
+		i++;
 	}
+
+	spin_unlock(&(rc->dbg)->list_lock);
+
 	if (!found)
 		return -EINVAL;
 
-	list_del(&found->pal_node);
 	uwb_rsv_terminate(found);
+	uwb_rsv_put(found);
 
 	return 0;
 }
 
+static int cmd_ie_add(struct uwb_rc *rc, struct uwb_dbg_cmd_ie *ie_to_add)
+{
+	return uwb_rc_ie_add(rc,
+			     (const struct uwb_ie_hdr *) ie_to_add->data,
+			     ie_to_add->len);
+}
+
+static int cmd_ie_rm(struct uwb_rc *rc, struct uwb_dbg_cmd_ie *ie_to_rm)
+{
+	return uwb_rc_ie_rm(rc, ie_to_rm->data[0]);
+}
+
 static int command_open(struct inode *inode, struct file *file)
 {
 	file->private_data = inode->i_private;
@@ -175,8 +171,8 @@
 {
 	struct uwb_rc *rc = file->private_data;
 	struct uwb_dbg_cmd cmd;
-	int ret;
-
+	int ret = 0;
+	
 	if (len != sizeof(struct uwb_dbg_cmd))
 		return -EINVAL;
 
@@ -190,6 +186,18 @@
 	case UWB_DBG_CMD_RSV_TERMINATE:
 		ret = cmd_rsv_terminate(rc, &cmd.rsv_terminate);
 		break;
+	case UWB_DBG_CMD_IE_ADD:
+		ret = cmd_ie_add(rc, &cmd.ie_add);
+		break;
+	case UWB_DBG_CMD_IE_RM:
+		ret = cmd_ie_rm(rc, &cmd.ie_rm);
+		break;
+	case UWB_DBG_CMD_RADIO_START:
+		ret = uwb_radio_start(&rc->dbg->pal);
+		break;
+	case UWB_DBG_CMD_RADIO_STOP:
+		uwb_radio_stop(&rc->dbg->pal);
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -283,12 +291,26 @@
 	.owner   = THIS_MODULE,
 };
 
-static void uwb_dbg_new_rsv(struct uwb_rsv *rsv)
+static void uwb_dbg_channel_changed(struct uwb_pal *pal, int channel)
 {
-	struct uwb_rc *rc = rsv->rc;
+	struct device *dev = &pal->rc->uwb_dev.dev;
 
-	if (rc->dbg->accept)
-		uwb_rsv_accept(rsv, uwb_dbg_rsv_cb, NULL);
+	if (channel > 0)
+		dev_info(dev, "debug: channel %d started\n", channel);
+	else
+		dev_info(dev, "debug: channel stopped\n");
+}
+
+static void uwb_dbg_new_rsv(struct uwb_pal *pal, struct uwb_rsv *rsv)
+{
+	struct uwb_dbg *dbg = container_of(pal, struct uwb_dbg, pal);
+
+	if (dbg->accept) {
+		spin_lock(&dbg->list_lock);
+		list_add_tail(&rsv->pal_node, &dbg->rsvs);
+		spin_unlock(&dbg->list_lock);
+		uwb_rsv_accept(rsv, uwb_dbg_rsv_cb, dbg);
+	}
 }
 
 /**
@@ -302,10 +324,14 @@
 		return;
 
 	INIT_LIST_HEAD(&rc->dbg->rsvs);
+	spin_lock_init(&(rc->dbg)->list_lock);
 
 	uwb_pal_init(&rc->dbg->pal);
+	rc->dbg->pal.rc = rc;
+	rc->dbg->pal.channel_changed = uwb_dbg_channel_changed;
 	rc->dbg->pal.new_rsv = uwb_dbg_new_rsv;
-	uwb_pal_register(rc, &rc->dbg->pal);
+	uwb_pal_register(&rc->dbg->pal);
+
 	if (root_dir) {
 		rc->dbg->root_d = debugfs_create_dir(dev_name(&rc->uwb_dev.dev),
 						     root_dir);
@@ -325,7 +351,7 @@
 }
 
 /**
- * uwb_dbg_add_rc - remove a radio controller's debug interface
+ * uwb_dbg_del_rc - remove a radio controller's debug interface
  * @rc: the radio controller
  */
 void uwb_dbg_del_rc(struct uwb_rc *rc)
@@ -336,10 +362,10 @@
 		return;
 
 	list_for_each_entry_safe(rsv, t, &rc->dbg->rsvs, pal_node) {
-		uwb_rsv_destroy(rsv);
+		uwb_rsv_terminate(rsv);
 	}
 
-	uwb_pal_unregister(rc, &rc->dbg->pal);
+	uwb_pal_unregister(&rc->dbg->pal);
 
 	if (root_dir) {
 		debugfs_remove(rc->dbg->drp_avail_f);
@@ -365,3 +391,16 @@
 {
 	debugfs_remove(root_dir);
 }
+
+/**
+ * uwb_dbg_create_pal_dir - create a debugfs directory for a PAL
+ * @pal: The PAL.
+ */
+struct dentry *uwb_dbg_create_pal_dir(struct uwb_pal *pal)
+{
+	struct uwb_rc *rc = pal->rc;
+
+	if (root_dir && rc->dbg && rc->dbg->root_d && pal->name)
+		return debugfs_create_dir(pal->name, rc->dbg->root_d);
+	return NULL;
+}
diff --git a/drivers/uwb/uwb-internal.h b/drivers/uwb/uwb-internal.h
index 2ad307d..d5bcfc1 100644
--- a/drivers/uwb/uwb-internal.h
+++ b/drivers/uwb/uwb-internal.h
@@ -66,14 +66,14 @@
 		       unsigned channel, enum uwb_scan_type type,
 		       unsigned bpst_offset);
 extern int uwb_rc_send_all_drp_ie(struct uwb_rc *rc);
-extern ssize_t uwb_rc_print_IEs(struct uwb_rc *rc, char *, size_t);
-extern void uwb_rc_ie_init(struct uwb_rc *);
-extern void uwb_rc_ie_init(struct uwb_rc *);
-extern ssize_t uwb_rc_ie_setup(struct uwb_rc *);
-extern void uwb_rc_ie_release(struct uwb_rc *);
-extern int uwb_rc_ie_add(struct uwb_rc *,
-			 const struct uwb_ie_hdr *, size_t);
-extern int uwb_rc_ie_rm(struct uwb_rc *, enum uwb_ie);
+
+void uwb_rc_ie_init(struct uwb_rc *);
+int uwb_rc_ie_setup(struct uwb_rc *);
+void uwb_rc_ie_release(struct uwb_rc *);
+int uwb_ie_dump_hex(const struct uwb_ie_hdr *ies, size_t len,
+		    char *buf, size_t size);
+int uwb_rc_set_ie(struct uwb_rc *, struct uwb_rc_cmd_set_ie *);
+
 
 extern const char *uwb_rc_strerror(unsigned code);
 
@@ -92,6 +92,12 @@
 
 struct uwb_rc_neh;
 
+extern int uwb_rc_cmd_async(struct uwb_rc *rc, const char *cmd_name,
+			    struct uwb_rccb *cmd, size_t cmd_size,
+			    u8 expected_type, u16 expected_event,
+			    uwb_rc_cmd_cb_f cb, void *arg);
+
+
 void uwb_rc_neh_create(struct uwb_rc *rc);
 void uwb_rc_neh_destroy(struct uwb_rc *rc);
 
@@ -106,7 +112,69 @@
 extern int uwb_est_create(void);
 extern void uwb_est_destroy(void);
 
+/*
+ * UWB conflicting alien reservations
+ */
+struct uwb_cnflt_alien {
+	struct uwb_rc *rc;
+	struct list_head rc_node;
+	struct uwb_mas_bm mas;
+	struct timer_list timer;
+	struct work_struct cnflt_update_work;
+};
 
+enum uwb_uwb_rsv_alloc_result {
+	UWB_RSV_ALLOC_FOUND = 0,
+	UWB_RSV_ALLOC_NOT_FOUND,
+};
+
+enum uwb_rsv_mas_status {
+	UWB_RSV_MAS_NOT_AVAIL = 1,
+	UWB_RSV_MAS_SAFE,
+	UWB_RSV_MAS_UNSAFE,
+};
+
+struct uwb_rsv_col_set_info {
+	unsigned char start_col;
+	unsigned char interval;
+	unsigned char safe_mas_per_col;
+	unsigned char unsafe_mas_per_col;
+};
+
+struct uwb_rsv_col_info {
+	unsigned char max_avail_safe;
+	unsigned char max_avail_unsafe;
+	unsigned char highest_mas[UWB_MAS_PER_ZONE];
+	struct uwb_rsv_col_set_info csi;
+};
+
+struct uwb_rsv_row_info {
+	unsigned char avail[UWB_MAS_PER_ZONE];
+	unsigned char free_rows;
+	unsigned char used_rows;
+};
+
+/*
+ * UWB find allocation
+ */
+struct uwb_rsv_alloc_info {
+	unsigned char bm[UWB_MAS_PER_ZONE * UWB_NUM_ZONES];
+	struct uwb_rsv_col_info ci[UWB_NUM_ZONES];
+	struct uwb_rsv_row_info ri;
+	struct uwb_mas_bm *not_available;
+	struct uwb_mas_bm *result;
+	int min_mas;
+	int max_mas;
+	int max_interval;
+	int total_allocated_mases;
+	int safe_allocated_mases;
+	int unsafe_allocated_mases;
+	int interval;
+};
+
+int uwb_rsv_find_best_allocation(struct uwb_rsv *rsv, struct uwb_mas_bm *available, 
+				 struct uwb_mas_bm *result);
+void uwb_rsv_handle_drp_avail_change(struct uwb_rc *rc);
 /*
  * UWB Events & management daemon
  */
@@ -160,13 +228,14 @@
 	};
 };
 
-extern void uwbd_start(void);
-extern void uwbd_stop(void);
+extern void uwbd_start(struct uwb_rc *rc);
+extern void uwbd_stop(struct uwb_rc *rc);
 extern struct uwb_event *uwb_event_alloc(size_t, gfp_t gfp_mask);
 extern void uwbd_event_queue(struct uwb_event *);
 void uwbd_flush(struct uwb_rc *rc);
 
 /* UWB event handlers */
+extern int uwbd_evt_handle_rc_ie_rcv(struct uwb_event *);
 extern int uwbd_evt_handle_rc_beacon(struct uwb_event *);
 extern int uwbd_evt_handle_rc_beacon_size(struct uwb_event *);
 extern int uwbd_evt_handle_rc_bpoie_change(struct uwb_event *);
@@ -193,15 +262,6 @@
 
 extern unsigned long beacon_timeout_ms;
 
-/** Beacon cache list */
-struct uwb_beca {
-	struct list_head list;
-	size_t entries;
-	struct mutex mutex;
-};
-
-extern struct uwb_beca uwb_beca;
-
 /**
  * Beacon cache entry
  *
@@ -228,9 +288,6 @@
 struct uwb_beacon_frame;
 extern ssize_t uwb_bce_print_IEs(struct uwb_dev *, struct uwb_beca_e *,
 				 char *, size_t);
-extern struct uwb_beca_e *__uwb_beca_add(struct uwb_rc_evt_beacon *,
-					 struct uwb_beacon_frame *,
-					 unsigned long);
 
 extern void uwb_bce_kfree(struct kref *_bce);
 static inline void uwb_bce_get(struct uwb_beca_e *bce)
@@ -241,14 +298,19 @@
 {
 	kref_put(&bce->refcnt, uwb_bce_kfree);
 }
-extern void uwb_beca_purge(void);
-extern void uwb_beca_release(void);
+extern void uwb_beca_purge(struct uwb_rc *rc);
+extern void uwb_beca_release(struct uwb_rc *rc);
 
 struct uwb_dev *uwb_dev_get_by_devaddr(struct uwb_rc *rc,
 				       const struct uwb_dev_addr *devaddr);
 struct uwb_dev *uwb_dev_get_by_macaddr(struct uwb_rc *rc,
 				       const struct uwb_mac_addr *macaddr);
 
+int uwb_radio_setup(struct uwb_rc *rc);
+void uwb_radio_reset_state(struct uwb_rc *rc);
+void uwb_radio_shutdown(struct uwb_rc *rc);
+int uwb_radio_force_channel(struct uwb_rc *rc, int channel);
+
 /* -- UWB Sysfs representation */
 extern struct class uwb_rc_class;
 extern struct device_attribute dev_attr_mac_address;
@@ -259,18 +321,29 @@
 void uwb_rsv_init(struct uwb_rc *rc);
 int uwb_rsv_setup(struct uwb_rc *rc);
 void uwb_rsv_cleanup(struct uwb_rc *rc);
+void uwb_rsv_remove_all(struct uwb_rc *rc);
+void uwb_rsv_get(struct uwb_rsv *rsv);
+void uwb_rsv_put(struct uwb_rsv *rsv);
+bool uwb_rsv_has_two_drp_ies(struct uwb_rsv *rsv);
+void uwb_rsv_dump(char *text, struct uwb_rsv *rsv);
+int uwb_rsv_try_move(struct uwb_rsv *rsv, struct uwb_mas_bm *available);
+void uwb_rsv_backoff_win_timer(unsigned long arg);
+void uwb_rsv_backoff_win_increment(struct uwb_rc *rc);
+int uwb_rsv_status(struct uwb_rsv *rsv);
+int uwb_rsv_companion_status(struct uwb_rsv *rsv);
 
 void uwb_rsv_set_state(struct uwb_rsv *rsv, enum uwb_rsv_state new_state);
 void uwb_rsv_remove(struct uwb_rsv *rsv);
 struct uwb_rsv *uwb_rsv_find(struct uwb_rc *rc, struct uwb_dev *src,
 			     struct uwb_ie_drp *drp_ie);
 void uwb_rsv_sched_update(struct uwb_rc *rc);
+void uwb_rsv_queue_update(struct uwb_rc *rc);
 
-void uwb_drp_handle_timeout(struct uwb_rsv *rsv);
 int uwb_drp_ie_update(struct uwb_rsv *rsv);
 void uwb_drp_ie_to_bm(struct uwb_mas_bm *bm, const struct uwb_ie_drp *drp_ie);
 
 void uwb_drp_avail_init(struct uwb_rc *rc);
+void uwb_drp_available(struct uwb_rc *rc, struct uwb_mas_bm *avail);
 int  uwb_drp_avail_reserve_pending(struct uwb_rc *rc, struct uwb_mas_bm *mas);
 void uwb_drp_avail_reserve(struct uwb_rc *rc, struct uwb_mas_bm *mas);
 void uwb_drp_avail_release(struct uwb_rc *rc, struct uwb_mas_bm *mas);
@@ -289,8 +362,7 @@
 void uwb_dbg_exit(void);
 void uwb_dbg_add_rc(struct uwb_rc *rc);
 void uwb_dbg_del_rc(struct uwb_rc *rc);
-
-/* Workarounds for version specific stuff */
+struct dentry *uwb_dbg_create_pal_dir(struct uwb_pal *pal);
 
 static inline void uwb_dev_lock(struct uwb_dev *uwb_dev)
 {
diff --git a/drivers/uwb/uwbd.c b/drivers/uwb/uwbd.c
index 7890841..57bd6bf 100644
--- a/drivers/uwb/uwbd.c
+++ b/drivers/uwb/uwbd.c
@@ -68,17 +68,13 @@
  *
  * Handler functions are called normally uwbd_evt_handle_*().
  */
-
 #include <linux/kthread.h>
 #include <linux/module.h>
 #include <linux/freezer.h>
+
 #include "uwb-internal.h"
 
-#define D_LOCAL 1
-#include <linux/uwb/debug.h>
-
-
-/**
+/*
  * UWBD Event handler function signature
  *
  * Return !0 if the event needs not to be freed (ie the handler
@@ -101,9 +97,12 @@
 	const char *name;
 };
 
-/** Table of handlers for and properties of the UWBD Radio Control Events */
-static
-struct uwbd_event uwbd_events[] = {
+/* Table of handlers for and properties of the UWBD Radio Control Events */
+static struct uwbd_event uwbd_urc_events[] = {
+	[UWB_RC_EVT_IE_RCV] = {
+		.handler = uwbd_evt_handle_rc_ie_rcv,
+		.name = "IE_RECEIVED"
+	},
 	[UWB_RC_EVT_BEACON] = {
 		.handler = uwbd_evt_handle_rc_beacon,
 		.name = "BEACON_RECEIVED"
@@ -142,23 +141,15 @@
 	size_t size;
 };
 
-#define UWBD_EVT_TYPE_HANDLER(n,a) {		\
-	.name = (n),				\
-	.uwbd_events = (a),			\
-	.size = sizeof(a)/sizeof((a)[0])	\
-}
-
-
-/** Table of handlers for each UWBD Event type. */
-static
-struct uwbd_evt_type_handler uwbd_evt_type_handlers[] = {
-	[UWB_RC_CET_GENERAL] = UWBD_EVT_TYPE_HANDLER("RC", uwbd_events)
+/* Table of handlers for each UWBD Event type. */
+static struct uwbd_evt_type_handler uwbd_urc_evt_type_handlers[] = {
+	[UWB_RC_CET_GENERAL] = {
+		.name        = "URC",
+		.uwbd_events = uwbd_urc_events,
+		.size        = ARRAY_SIZE(uwbd_urc_events),
+	},
 };
 
-static const
-size_t uwbd_evt_type_handlers_len =
-	sizeof(uwbd_evt_type_handlers) / sizeof(uwbd_evt_type_handlers[0]);
-
 static const struct uwbd_event uwbd_message_handlers[] = {
 	[UWB_EVT_MSG_RESET] = {
 		.handler = uwbd_msg_handle_reset,
@@ -166,9 +157,7 @@
 	},
 };
 
-static DEFINE_MUTEX(uwbd_event_mutex);
-
-/**
+/*
  * Handle an URC event passed to the UWB Daemon
  *
  * @evt: the event to handle
@@ -188,6 +177,7 @@
 static
 int uwbd_event_handle_urc(struct uwb_event *evt)
 {
+	int result = -EINVAL;
 	struct uwbd_evt_type_handler *type_table;
 	uwbd_evt_handler_f handler;
 	u8 type, context;
@@ -197,26 +187,24 @@
 	event = le16_to_cpu(evt->notif.rceb->wEvent);
 	context = evt->notif.rceb->bEventContext;
 
-	if (type > uwbd_evt_type_handlers_len) {
-		printk(KERN_ERR "UWBD: event type %u: unknown (too high)\n", type);
-		return -EINVAL;
-	}
-	type_table = &uwbd_evt_type_handlers[type];
-	if (type_table->uwbd_events == NULL) {
-		printk(KERN_ERR "UWBD: event type %u: unknown\n", type);
-		return -EINVAL;
-	}
-	if (event > type_table->size) {
-		printk(KERN_ERR "UWBD: event %s[%u]: unknown (too high)\n",
-		       type_table->name, event);
-		return -EINVAL;
-	}
+	if (type > ARRAY_SIZE(uwbd_urc_evt_type_handlers))
+		goto out;
+	type_table = &uwbd_urc_evt_type_handlers[type];
+	if (type_table->uwbd_events == NULL)
+		goto out;
+	if (event > type_table->size)
+		goto out;
 	handler = type_table->uwbd_events[event].handler;
-	if (handler == NULL) {
-		printk(KERN_ERR "UWBD: event %s[%u]: unknown\n", type_table->name, event);
-		return -EINVAL;
-	}
-	return (*handler)(evt);
+	if (handler == NULL)
+		goto out;
+
+	result = (*handler)(evt);
+out:
+	if (result < 0)
+		dev_err(&evt->rc->uwb_dev.dev,
+			"UWBD: event 0x%02x/%04x/%02x, handling failed: %d\n",
+			type, event, context, result);
+	return result;
 }
 
 static void uwbd_event_handle_message(struct uwb_event *evt)
@@ -231,19 +219,10 @@
 		return;
 	}
 
-	/* If this is a reset event we need to drop the
-	 * uwbd_event_mutex or it deadlocks when the reset handler
-	 * attempts to flush the uwbd events. */
-	if (evt->message == UWB_EVT_MSG_RESET)
-		mutex_unlock(&uwbd_event_mutex);
-
 	result = uwbd_message_handlers[evt->message].handler(evt);
 	if (result < 0)
 		dev_err(&rc->uwb_dev.dev, "UWBD: '%s' message failed: %d\n",
 			uwbd_message_handlers[evt->message].name, result);
-
-	if (evt->message == UWB_EVT_MSG_RESET)
-		mutex_lock(&uwbd_event_mutex);
 }
 
 static void uwbd_event_handle(struct uwb_event *evt)
@@ -271,20 +250,6 @@
 
 	__uwb_rc_put(rc);	/* for the __uwb_rc_get() in uwb_rc_notif_cb() */
 }
-/* The UWB Daemon */
-
-
-/** Daemon's PID: used to decide if we can queue or not */
-static int uwbd_pid;
-/** Daemon's task struct for managing the kthread */
-static struct task_struct *uwbd_task;
-/** Daemon's waitqueue for waiting for new events */
-static DECLARE_WAIT_QUEUE_HEAD(uwbd_wq);
-/** Daemon's list of events; we queue/dequeue here */
-static struct list_head uwbd_event_list = LIST_HEAD_INIT(uwbd_event_list);
-/** Daemon's list lock to protect concurent access */
-static DEFINE_SPINLOCK(uwbd_event_list_lock);
-
 
 /**
  * UWB Daemon
@@ -298,65 +263,58 @@
  * FIXME: should change so we don't have a 1HZ timer all the time, but
  *        only if there are devices.
  */
-static int uwbd(void *unused)
+static int uwbd(void *param)
 {
+	struct uwb_rc *rc = param;
 	unsigned long flags;
-	struct list_head list = LIST_HEAD_INIT(list);
-	struct uwb_event *evt, *nxt;
+	struct uwb_event *evt;
 	int should_stop = 0;
+
 	while (1) {
 		wait_event_interruptible_timeout(
-			uwbd_wq,
-			!list_empty(&uwbd_event_list)
+			rc->uwbd.wq,
+			!list_empty(&rc->uwbd.event_list)
 			  || (should_stop = kthread_should_stop()),
 			HZ);
 		if (should_stop)
 			break;
 		try_to_freeze();
 
-		mutex_lock(&uwbd_event_mutex);
-		spin_lock_irqsave(&uwbd_event_list_lock, flags);
-		list_splice_init(&uwbd_event_list, &list);
-		spin_unlock_irqrestore(&uwbd_event_list_lock, flags);
-		list_for_each_entry_safe(evt, nxt, &list, list_node) {
+		spin_lock_irqsave(&rc->uwbd.event_list_lock, flags);
+		if (!list_empty(&rc->uwbd.event_list)) {
+			evt = list_first_entry(&rc->uwbd.event_list, struct uwb_event, list_node);
 			list_del(&evt->list_node);
+		} else
+			evt = NULL;
+		spin_unlock_irqrestore(&rc->uwbd.event_list_lock, flags);
+
+		if (evt) {
 			uwbd_event_handle(evt);
 			kfree(evt);
 		}
-		mutex_unlock(&uwbd_event_mutex);
 
-		uwb_beca_purge();	/* Purge devices that left */
+		uwb_beca_purge(rc);	/* Purge devices that left */
 	}
 	return 0;
 }
 
 
 /** Start the UWB daemon */
-void uwbd_start(void)
+void uwbd_start(struct uwb_rc *rc)
 {
-	uwbd_task = kthread_run(uwbd, NULL, "uwbd");
-	if (uwbd_task == NULL)
+	rc->uwbd.task = kthread_run(uwbd, rc, "uwbd");
+	if (rc->uwbd.task == NULL)
 		printk(KERN_ERR "UWB: Cannot start management daemon; "
 		       "UWB won't work\n");
 	else
-		uwbd_pid = uwbd_task->pid;
+		rc->uwbd.pid = rc->uwbd.task->pid;
 }
 
 /* Stop the UWB daemon and free any unprocessed events */
-void uwbd_stop(void)
+void uwbd_stop(struct uwb_rc *rc)
 {
-	unsigned long flags;
-	struct uwb_event *evt, *nxt;
-	kthread_stop(uwbd_task);
-	spin_lock_irqsave(&uwbd_event_list_lock, flags);
-	uwbd_pid = 0;
-	list_for_each_entry_safe(evt, nxt, &uwbd_event_list, list_node) {
-		if (evt->type == UWB_EVT_TYPE_NOTIF)
-			kfree(evt->notif.rceb);
-		kfree(evt);
-	}
-	spin_unlock_irqrestore(&uwbd_event_list_lock, flags);
-	uwb_beca_release();
+	kthread_stop(rc->uwbd.task);
+	uwbd_flush(rc);
 }
 
 /*
@@ -373,18 +331,20 @@
  */
 void uwbd_event_queue(struct uwb_event *evt)
 {
+	struct uwb_rc *rc = evt->rc;
 	unsigned long flags;
-	spin_lock_irqsave(&uwbd_event_list_lock, flags);
-	if (uwbd_pid != 0) {
-		list_add(&evt->list_node, &uwbd_event_list);
-		wake_up_all(&uwbd_wq);
+
+	spin_lock_irqsave(&rc->uwbd.event_list_lock, flags);
+	if (rc->uwbd.pid != 0) {
+		list_add(&evt->list_node, &rc->uwbd.event_list);
+		wake_up_all(&rc->uwbd.wq);
 	} else {
 		__uwb_rc_put(evt->rc);
 		if (evt->type == UWB_EVT_TYPE_NOTIF)
 			kfree(evt->notif.rceb);
 		kfree(evt);
 	}
-	spin_unlock_irqrestore(&uwbd_event_list_lock, flags);
+	spin_unlock_irqrestore(&rc->uwbd.event_list_lock, flags);
 	return;
 }
 
@@ -392,10 +352,8 @@
 {
 	struct uwb_event *evt, *nxt;
 
-	mutex_lock(&uwbd_event_mutex);
-
-	spin_lock_irq(&uwbd_event_list_lock);
-	list_for_each_entry_safe(evt, nxt, &uwbd_event_list, list_node) {
+	spin_lock_irq(&rc->uwbd.event_list_lock);
+	list_for_each_entry_safe(evt, nxt, &rc->uwbd.event_list, list_node) {
 		if (evt->rc == rc) {
 			__uwb_rc_put(rc);
 			list_del(&evt->list_node);
@@ -404,7 +362,5 @@
 			kfree(evt);
 		}
 	}
-	spin_unlock_irq(&uwbd_event_list_lock);
-
-	mutex_unlock(&uwbd_event_mutex);
+	spin_unlock_irq(&rc->uwbd.event_list_lock);
 }
diff --git a/drivers/uwb/whc-rc.c b/drivers/uwb/whc-rc.c
index 1711dea..19a1dd1 100644
--- a/drivers/uwb/whc-rc.c
+++ b/drivers/uwb/whc-rc.c
@@ -39,7 +39,6 @@
  * them to the hw and transfer the replies/notifications back to the
  * UWB stack through the UWB daemon (UWBD).
  */
-#include <linux/version.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/pci.h>
@@ -49,10 +48,8 @@
 #include <linux/uwb.h>
 #include <linux/uwb/whci.h>
 #include <linux/uwb/umc.h>
-#include "uwb-internal.h"
 
-#define D_LOCAL 0
-#include <linux/uwb/debug.h>
+#include "uwb-internal.h"
 
 /**
  * Descriptor for an instance of the UWB Radio Control Driver that
@@ -98,13 +95,8 @@
 	struct device *dev = &whcrc->umc_dev->dev;
 	u32 urccmd;
 
-	d_fnstart(3, dev, "(%p, %p, %zu)\n", uwb_rc, cmd, cmd_size);
-	might_sleep();
-
-	if (cmd_size >= 4096) {
-		result = -E2BIG;
-		goto error;
-	}
+	if (cmd_size >= 4096)
+		return -EINVAL;
 
 	/*
 	 * If the URC is halted, then the hardware has reset itself.
@@ -115,16 +107,14 @@
 	if (le_readl(whcrc->rc_base + URCSTS) & URCSTS_HALTED) {
 		dev_err(dev, "requesting reset of halted radio controller\n");
 		uwb_rc_reset_all(uwb_rc);
-		result = -EIO;
-		goto error;
+		return -EIO;
 	}
 
 	result = wait_event_timeout(whcrc->cmd_wq,
 		!(le_readl(whcrc->rc_base + URCCMD) & URCCMD_ACTIVE), HZ/2);
 	if (result == 0) {
 		dev_err(dev, "device is not ready to execute commands\n");
-		result = -ETIMEDOUT;
-		goto error;
+		return -ETIMEDOUT;
 	}
 
 	memmove(whcrc->cmd_buf, cmd, cmd_size);
@@ -137,10 +127,7 @@
 		  whcrc->rc_base + URCCMD);
 	spin_unlock(&whcrc->irq_lock);
 
-error:
-	d_fnend(3, dev, "(%p, %p, %zu) = %d\n",
-		uwb_rc, cmd, cmd_size, result);
-	return result;
+	return 0;
 }
 
 static int whcrc_reset(struct uwb_rc *rc)
@@ -167,34 +154,25 @@
 static
 void whcrc_enable_events(struct whcrc *whcrc)
 {
-	struct device *dev = &whcrc->umc_dev->dev;
 	u32 urccmd;
 
-	d_fnstart(4, dev, "(whcrc %p)\n", whcrc);
-
 	le_writeq(whcrc->evt_dma_buf, whcrc->rc_base + URCEVTADDR);
 
 	spin_lock(&whcrc->irq_lock);
 	urccmd = le_readl(whcrc->rc_base + URCCMD) & ~URCCMD_ACTIVE;
 	le_writel(urccmd | URCCMD_EARV, whcrc->rc_base + URCCMD);
 	spin_unlock(&whcrc->irq_lock);
-
-	d_fnend(4, dev, "(whcrc %p) = void\n", whcrc);
 }
 
 static void whcrc_event_work(struct work_struct *work)
 {
 	struct whcrc *whcrc = container_of(work, struct whcrc, event_work);
-	struct device *dev = &whcrc->umc_dev->dev;
 	size_t size;
 	u64 urcevtaddr;
 
 	urcevtaddr = le_readq(whcrc->rc_base + URCEVTADDR);
 	size = urcevtaddr & URCEVTADDR_OFFSET_MASK;
 
-	d_printf(3, dev, "received %zu octet event\n", size);
-	d_dump(4, dev, whcrc->evt_buf, size > 32 ? 32 : size);
-
 	uwb_rc_neh_grok(whcrc->uwb_rc, whcrc->evt_buf, size);
 	whcrc_enable_events(whcrc);
 }
@@ -217,22 +195,15 @@
 		return IRQ_NONE;
 	le_writel(urcsts & URCSTS_INT_MASK, whcrc->rc_base + URCSTS);
 
-	d_printf(4, dev, "acked 0x%08x, urcsts 0x%08x\n",
-		 le_readl(whcrc->rc_base + URCSTS), urcsts);
-
 	if (urcsts & URCSTS_HSE) {
 		dev_err(dev, "host system error -- hardware halted\n");
 		/* FIXME: do something sensible here */
 		goto out;
 	}
-	if (urcsts & URCSTS_ER) {
-		d_printf(3, dev, "ER: event ready\n");
+	if (urcsts & URCSTS_ER)
 		schedule_work(&whcrc->event_work);
-	}
-	if (urcsts & URCSTS_RCI) {
-		d_printf(3, dev, "RCI: ready to execute another command\n");
+	if (urcsts & URCSTS_RCI)
 		wake_up_all(&whcrc->cmd_wq);
-	}
 out:
 	return IRQ_HANDLED;
 }
@@ -251,8 +222,7 @@
 	whcrc->area = umc_dev->resource.start;
 	whcrc->rc_len = umc_dev->resource.end - umc_dev->resource.start + 1;
 	result = -EBUSY;
-	if (request_mem_region(whcrc->area, whcrc->rc_len, KBUILD_MODNAME)
-	    == NULL) {
+	if (request_mem_region(whcrc->area, whcrc->rc_len, KBUILD_MODNAME) == NULL) {
 		dev_err(dev, "can't request URC region (%zu bytes @ 0x%lx): %d\n",
 			whcrc->rc_len, whcrc->area, result);
 		goto error_request_region;
@@ -287,8 +257,6 @@
 		dev_err(dev, "Can't allocate evt transfer buffer\n");
 		goto error_evt_buffer;
 	}
-	d_printf(3, dev, "UWB RC Interface: %zu bytes at 0x%p, irq %u\n",
-		 whcrc->rc_len, whcrc->rc_base, umc_dev->irq);
 	return 0;
 
 error_evt_buffer:
@@ -333,47 +301,23 @@
 static int whcrc_start_rc(struct uwb_rc *rc)
 {
 	struct whcrc *whcrc = rc->priv;
-	int result = 0;
 	struct device *dev = &whcrc->umc_dev->dev;
-	unsigned long start, duration;
 
 	/* Reset the thing */
 	le_writel(URCCMD_RESET, whcrc->rc_base + URCCMD);
-	if (d_test(3))
-		start = jiffies;
 	if (whci_wait_for(dev, whcrc->rc_base + URCCMD, URCCMD_RESET, 0,
-			  5000, "device to reset at init") < 0) {
-		result = -EBUSY;
-		goto error;
-	} else if (d_test(3)) {
-		duration = jiffies - start;
-		if (duration > msecs_to_jiffies(40))
-			dev_err(dev, "Device took %ums to "
-				     "reset. MAX expected: 40ms\n",
-				     jiffies_to_msecs(duration));
-	}
+			  5000, "hardware reset") < 0)
+		return -EBUSY;
 
 	/* Set the event buffer, start the controller (enable IRQs later) */
 	le_writel(0, whcrc->rc_base + URCINTR);
 	le_writel(URCCMD_RS, whcrc->rc_base + URCCMD);
-	result = -ETIMEDOUT;
-	if (d_test(3))
-		start = jiffies;
 	if (whci_wait_for(dev, whcrc->rc_base + URCSTS, URCSTS_HALTED, 0,
-			  5000, "device to start") < 0)
-		goto error;
-	if (d_test(3)) {
-		duration = jiffies - start;
-		if (duration > msecs_to_jiffies(40))
-			dev_err(dev, "Device took %ums to start. "
-				     "MAX expected: 40ms\n",
-				     jiffies_to_msecs(duration));
-	}
+			  5000, "radio controller start") < 0)
+		return -ETIMEDOUT;
 	whcrc_enable_events(whcrc);
-	result = 0;
 	le_writel(URCINTR_EN_ALL, whcrc->rc_base + URCINTR);
-error:
-	return result;
+	return 0;
 }
 
 
@@ -395,7 +339,7 @@
 
 	le_writel(0, whcrc->rc_base + URCCMD);
 	whci_wait_for(&umc_dev->dev, whcrc->rc_base + URCSTS,
-		      URCSTS_HALTED, 0, 40, "URCSTS.HALTED");
+		      URCSTS_HALTED, URCSTS_HALTED, 100, "radio controller stop");
 }
 
 static void whcrc_init(struct whcrc *whcrc)
@@ -421,7 +365,6 @@
 	struct whcrc *whcrc;
 	struct device *dev = &umc_dev->dev;
 
-	d_fnstart(3, dev, "(umc_dev %p)\n", umc_dev);
 	result = -ENOMEM;
 	uwb_rc = uwb_rc_alloc();
 	if (uwb_rc == NULL) {
@@ -453,7 +396,6 @@
 	if (result < 0)
 		goto error_rc_add;
 	umc_set_drvdata(umc_dev, whcrc);
-	d_fnend(3, dev, "(umc_dev %p) = 0\n", umc_dev);
 	return 0;
 
 error_rc_add:
@@ -463,7 +405,6 @@
 error_alloc:
 	uwb_rc_put(uwb_rc);
 error_rc_alloc:
-	d_fnend(3, dev, "(umc_dev %p) = %d\n", umc_dev, result);
 	return result;
 }
 
@@ -486,7 +427,24 @@
 	whcrc_release_rc_umc(whcrc);
 	kfree(whcrc);
 	uwb_rc_put(uwb_rc);
-	d_printf(1, &umc_dev->dev, "freed whcrc %p\n", whcrc);
+}
+
+static int whcrc_pre_reset(struct umc_dev *umc)
+{
+	struct whcrc *whcrc = umc_get_drvdata(umc);
+	struct uwb_rc *uwb_rc = whcrc->uwb_rc;
+
+	uwb_rc_pre_reset(uwb_rc);
+	return 0;
+}
+
+static int whcrc_post_reset(struct umc_dev *umc)
+{
+	struct whcrc *whcrc = umc_get_drvdata(umc);
+	struct uwb_rc *uwb_rc = whcrc->uwb_rc;
+
+	uwb_rc_post_reset(uwb_rc);
+	return 0;
 }
 
 /* PCI device ID's that we handle [so it gets loaded] */
@@ -497,10 +455,12 @@
 MODULE_DEVICE_TABLE(pci, whcrc_id_table);
 
 static struct umc_driver whcrc_driver = {
-	.name   = "whc-rc",
-	.cap_id = UMC_CAP_ID_WHCI_RC,
-	.probe  = whcrc_probe,
-	.remove = whcrc_remove,
+	.name       = "whc-rc",
+	.cap_id     = UMC_CAP_ID_WHCI_RC,
+	.probe      = whcrc_probe,
+	.remove     = whcrc_remove,
+	.pre_reset  = whcrc_pre_reset,
+	.post_reset = whcrc_post_reset,
 };
 
 static int __init whcrc_driver_init(void)
diff --git a/drivers/uwb/whci.c b/drivers/uwb/whci.c
index 3df2388..1f8964e 100644
--- a/drivers/uwb/whci.c
+++ b/drivers/uwb/whci.c
@@ -67,11 +67,11 @@
 		val = le_readl(reg);
 		if ((val & mask) == result)
 			break;
-		msleep(10);
 		if (t >= max_ms) {
-			dev_err(dev, "timed out waiting for %s ", tag);
+			dev_err(dev, "%s timed out\n", tag);
 			return -ETIMEDOUT;
 		}
+		msleep(10);
 		t += 10;
 	}
 	return 0;
@@ -111,7 +111,7 @@
 		+ UWBCAPDATA_TO_OFFSET(capdata);
 	umc->resource.end    = umc->resource.start
 		+ (n == 0 ? 0x20 : UWBCAPDATA_TO_SIZE(capdata)) - 1;
-	umc->resource.name   = umc->dev.bus_id;
+	umc->resource.name   = dev_name(&umc->dev);
 	umc->resource.flags  = card->pci->resource[bar].flags;
 	umc->resource.parent = &card->pci->resource[bar];
 	umc->irq             = card->pci->irq;
diff --git a/drivers/uwb/wlp/eda.c b/drivers/uwb/wlp/eda.c
index 10985fa..69e0200 100644
--- a/drivers/uwb/wlp/eda.c
+++ b/drivers/uwb/wlp/eda.c
@@ -51,9 +51,7 @@
  * the tag and address of the transmitting neighbor.
  */
 
-#define D_LOCAL 5
 #include <linux/netdevice.h>
-#include <linux/uwb/debug.h>
 #include <linux/etherdevice.h>
 #include <linux/wlp.h>
 #include "wlp-internal.h"
@@ -304,7 +302,6 @@
 {
 	int result = 0;
 	struct wlp *wlp = container_of(eda, struct wlp, eda);
-	struct device *dev = &wlp->rc->uwb_dev.dev;
 	struct wlp_eda_node *itr;
 	unsigned long flags;
 	int found = 0;
@@ -313,26 +310,14 @@
 	list_for_each_entry(itr, &eda->cache, list_node) {
 		if (!memcmp(itr->virt_addr, virt_addr,
 			   sizeof(itr->virt_addr))) {
-			d_printf(6, dev, "EDA: looking for %pM hit %02x:%02x "
-			       "wss %p tag 0x%02x state %u\n",
-			       virt_addr,
-			       itr->dev_addr.data[1],
-			       itr->dev_addr.data[0], itr->wss,
-			       itr->tag, itr->state);
 			result = (*function)(wlp, itr, priv);
 			*dev_addr = itr->dev_addr;
 			found = 1;
 			break;
-		} else
-			d_printf(6, dev, "EDA: looking for %pM against %pM miss\n",
-			         virt_addr, itr->virt_addr);
+		}
 	}
-	if (!found) {
-		if (printk_ratelimit())
-			dev_err(dev, "EDA: Eth addr %pM not found.\n",
-				virt_addr);
+	if (!found)
 		result = -ENODEV;
-	}
 	spin_unlock_irqrestore(&eda->lock, flags);
 	return result;
 }
diff --git a/drivers/uwb/wlp/messages.c b/drivers/uwb/wlp/messages.c
index a64cb82..aa42fce 100644
--- a/drivers/uwb/wlp/messages.c
+++ b/drivers/uwb/wlp/messages.c
@@ -24,8 +24,7 @@
  */
 
 #include <linux/wlp.h>
-#define D_LOCAL 6
-#include <linux/uwb/debug.h>
+
 #include "wlp-internal.h"
 
 static
@@ -105,24 +104,18 @@
 #define wlp_set(type, type_code, name)					\
 static size_t wlp_set_##name(struct wlp_attr_##name *attr, type value)	\
 {									\
-	d_fnstart(6, NULL, "(attribute %p)\n", attr);			\
 	wlp_set_attr_hdr(&attr->hdr, type_code,				\
 			 sizeof(*attr) - sizeof(struct wlp_attr_hdr));	\
 	attr->name = value;						\
-	d_dump(6, NULL, attr, sizeof(*attr));				\
-	d_fnend(6, NULL, "(attribute %p)\n", attr);			\
 	return sizeof(*attr);						\
 }
 
 #define wlp_pset(type, type_code, name)					\
 static size_t wlp_set_##name(struct wlp_attr_##name *attr, type value)	\
 {									\
-	d_fnstart(6, NULL, "(attribute %p)\n", attr);			\
 	wlp_set_attr_hdr(&attr->hdr, type_code,				\
 			 sizeof(*attr) - sizeof(struct wlp_attr_hdr));	\
 	attr->name = *value;						\
-	d_dump(6, NULL, attr, sizeof(*attr));				\
-	d_fnend(6, NULL, "(attribute %p)\n", attr);			\
 	return sizeof(*attr);						\
 }
 
@@ -139,11 +132,8 @@
 static size_t wlp_set_##name(struct wlp_attr_##name *attr, type value,	\
 				size_t len)				\
 {									\
-	d_fnstart(6, NULL, "(attribute %p)\n", attr);			\
 	wlp_set_attr_hdr(&attr->hdr, type_code, len);			\
 	memcpy(attr->name, value, len);					\
-	d_dump(6, NULL, attr, sizeof(*attr) + len);			\
-	d_fnend(6, NULL, "(attribute %p)\n", attr);			\
 	return sizeof(*attr) + len;					\
 }
 
@@ -182,7 +172,7 @@
 	size_t datalen;
 	void *ptr = attr->wss_info;
 	size_t used = sizeof(*attr);
-	d_fnstart(6, NULL, "(attribute %p)\n", attr);
+
 	datalen = sizeof(struct wlp_wss_info) + strlen(wss->name);
 	wlp_set_attr_hdr(&attr->hdr, WLP_ATTR_WSS_INFO, datalen);
 	used = wlp_set_wssid(ptr, &wss->wssid);
@@ -190,9 +180,6 @@
 	used += wlp_set_accept_enrl(ptr + used, wss->accept_enroll);
 	used += wlp_set_wss_sec_status(ptr + used, wss->secure_status);
 	used += wlp_set_wss_bcast(ptr + used, &wss->bcast);
-	d_dump(6, NULL, attr, sizeof(*attr) + datalen);
-	d_fnend(6, NULL, "(attribute %p, used %d)\n",
-		attr, (int)(sizeof(*attr) + used));
 	return sizeof(*attr) + used;
 }
 
@@ -414,7 +401,6 @@
 	size_t used = 0;
 	ssize_t result = -EINVAL;
 
-	d_printf(6, dev, "WLP: WSS info: Retrieving WSS name\n");
 	result = wlp_get_wss_name(wlp, ptr, info->name, buflen);
 	if (result < 0) {
 		dev_err(dev, "WLP: unable to obtain WSS name from "
@@ -422,7 +408,7 @@
 		goto error_parse;
 	}
 	used += result;
-	d_printf(6, dev, "WLP: WSS info: Retrieving accept enroll\n");
+
 	result = wlp_get_accept_enrl(wlp, ptr + used, &info->accept_enroll,
 				     buflen - used);
 	if (result < 0) {
@@ -437,7 +423,7 @@
 		goto error_parse;
 	}
 	used += result;
-	d_printf(6, dev, "WLP: WSS info: Retrieving secure status\n");
+
 	result = wlp_get_wss_sec_status(wlp, ptr + used, &info->sec_status,
 					buflen - used);
 	if (result < 0) {
@@ -452,7 +438,7 @@
 		goto error_parse;
 	}
 	used += result;
-	d_printf(6, dev, "WLP: WSS info: Retrieving broadcast\n");
+
 	result = wlp_get_wss_bcast(wlp, ptr + used, &info->bcast,
 				   buflen - used);
 	if (result < 0) {
@@ -530,7 +516,7 @@
 	len = result;
 	used = sizeof(*attr);
 	ptr = attr;
-	d_printf(6, dev, "WLP: WSS info: Retrieving WSSID\n");
+
 	result = wlp_get_wssid(wlp, ptr + used, wssid, buflen - used);
 	if (result < 0) {
 		dev_err(dev, "WLP: unable to obtain WSSID from WSS info.\n");
@@ -553,8 +539,6 @@
 		goto out;
 	}
 	result = used;
-	d_printf(6, dev, "WLP: Successfully parsed WLP information "
-		 "attribute. used %zu bytes\n", used);
 out:
 	return result;
 }
@@ -598,8 +582,6 @@
 	struct wlp_wssid_e *wssid_e;
 	char buf[WLP_WSS_UUID_STRSIZE];
 
-	d_fnstart(6, dev, "wlp %p, attr %p, neighbor %p, wss %p, buflen %d \n",
-		  wlp, attr, neighbor, wss, (int)buflen);
 	if (buflen < 0)
 		goto out;
 
@@ -638,8 +620,7 @@
 			wss->accept_enroll = wss_info.accept_enroll;
 			wss->state = WLP_WSS_STATE_PART_ENROLLED;
 			wlp_wss_uuid_print(buf, sizeof(buf), &wssid);
-			d_printf(2, dev, "WLP: Found WSS %s. Enrolling.\n",
-				 buf);
+			dev_dbg(dev, "WLP: Found WSS %s. Enrolling.\n", buf);
 		} else {
 			wssid_e = wlp_create_wssid_e(wlp, neighbor);
 			if (wssid_e == NULL) {
@@ -660,9 +641,6 @@
 	if (result < 0 && !enroll) /* this was a discovery */
 		wlp_remove_neighbor_tmp_info(neighbor);
 out:
-	d_fnend(6, dev, "wlp %p, attr %p, neighbor %p, wss %p, buflen %d, "
-		"result %d \n", wlp, attr, neighbor, wss, (int)buflen,
-		(int)result);
 	return result;
 
 }
@@ -718,7 +696,6 @@
 	struct sk_buff *_skb;
 	void *d1_itr;
 
-	d_fnstart(6, dev, "wlp %p\n", wlp);
 	if (wlp->dev_info == NULL) {
 		result = __wlp_setup_device_info(wlp);
 		if (result < 0) {
@@ -728,24 +705,6 @@
 		}
 	}
 	info = wlp->dev_info;
-	d_printf(6, dev, "Local properties:\n"
-		 "Device name (%d bytes): %s\n"
-		 "Model name (%d bytes): %s\n"
-		 "Manufacturer (%d bytes): %s\n"
-		 "Model number (%d bytes): %s\n"
-		 "Serial number (%d bytes): %s\n"
-		 "Primary device type: \n"
-		 " Category: %d \n"
-		 " OUI: %02x:%02x:%02x \n"
-		 " OUI Subdivision: %u \n",
-		 (int)strlen(info->name), info->name,
-		 (int)strlen(info->model_name), info->model_name,
-		 (int)strlen(info->manufacturer), info->manufacturer,
-		 (int)strlen(info->model_nr),  info->model_nr,
-		 (int)strlen(info->serial), info->serial,
-		 info->prim_dev_type.category,
-		 info->prim_dev_type.OUI[0], info->prim_dev_type.OUI[1],
-		 info->prim_dev_type.OUI[2], info->prim_dev_type.OUIsubdiv);
 	_skb = dev_alloc_skb(sizeof(*_d1)
 		      + sizeof(struct wlp_attr_uuid_e)
 		      + sizeof(struct wlp_attr_wss_sel_mthd)
@@ -768,7 +727,6 @@
 		goto error;
 	}
 	_d1 = (void *) _skb->data;
-	d_printf(6, dev, "D1 starts at %p \n", _d1);
 	_d1->hdr.mux_hdr = cpu_to_le16(WLP_PROTOCOL_ID);
 	_d1->hdr.type = WLP_FRAME_ASSOCIATION;
 	_d1->type = WLP_ASSOC_D1;
@@ -791,25 +749,8 @@
 	used += wlp_set_prim_dev_type(d1_itr + used, &info->prim_dev_type);
 	used += wlp_set_wlp_assc_err(d1_itr + used, WLP_ASSOC_ERROR_NONE);
 	skb_put(_skb, sizeof(*_d1) + used);
-	d_printf(6, dev, "D1 message:\n");
-	d_dump(6, dev, _d1, sizeof(*_d1)
-		     + sizeof(struct wlp_attr_uuid_e)
-		     + sizeof(struct wlp_attr_wss_sel_mthd)
-		     + sizeof(struct wlp_attr_dev_name)
-		     + strlen(info->name)
-		     + sizeof(struct wlp_attr_manufacturer)
-		     + strlen(info->manufacturer)
-		     + sizeof(struct wlp_attr_model_name)
-		     + strlen(info->model_name)
-		     + sizeof(struct wlp_attr_model_nr)
-		     + strlen(info->model_nr)
-		     + sizeof(struct wlp_attr_serial)
-		     + strlen(info->serial)
-		     + sizeof(struct wlp_attr_prim_dev_type)
-		     + sizeof(struct wlp_attr_wlp_assc_err));
 	*skb = _skb;
 error:
-	d_fnend(6, dev, "wlp %p, result = %d\n", wlp, result);
 	return result;
 }
 
@@ -837,7 +778,6 @@
 	void *d2_itr;
 	size_t mem_needed;
 
-	d_fnstart(6, dev, "wlp %p\n", wlp);
 	if (wlp->dev_info == NULL) {
 		result = __wlp_setup_device_info(wlp);
 		if (result < 0) {
@@ -847,24 +787,6 @@
 		}
 	}
 	info = wlp->dev_info;
-	d_printf(6, dev, "Local properties:\n"
-		 "Device name (%d bytes): %s\n"
-		 "Model name (%d bytes): %s\n"
-		 "Manufacturer (%d bytes): %s\n"
-		 "Model number (%d bytes): %s\n"
-		 "Serial number (%d bytes): %s\n"
-		 "Primary device type: \n"
-		 " Category: %d \n"
-		 " OUI: %02x:%02x:%02x \n"
-		 " OUI Subdivision: %u \n",
-		 (int)strlen(info->name), info->name,
-		 (int)strlen(info->model_name), info->model_name,
-		 (int)strlen(info->manufacturer), info->manufacturer,
-		 (int)strlen(info->model_nr),  info->model_nr,
-		 (int)strlen(info->serial), info->serial,
-		 info->prim_dev_type.category,
-		 info->prim_dev_type.OUI[0], info->prim_dev_type.OUI[1],
-		 info->prim_dev_type.OUI[2], info->prim_dev_type.OUIsubdiv);
 	mem_needed = sizeof(*_d2)
 		      + sizeof(struct wlp_attr_uuid_e)
 		      + sizeof(struct wlp_attr_uuid_r)
@@ -892,7 +814,6 @@
 		goto error;
 	}
 	_d2 = (void *) _skb->data;
-	d_printf(6, dev, "D2 starts at %p \n", _d2);
 	_d2->hdr.mux_hdr = cpu_to_le16(WLP_PROTOCOL_ID);
 	_d2->hdr.type = WLP_FRAME_ASSOCIATION;
 	_d2->type = WLP_ASSOC_D2;
@@ -917,11 +838,8 @@
 	used += wlp_set_prim_dev_type(d2_itr + used, &info->prim_dev_type);
 	used += wlp_set_wlp_assc_err(d2_itr + used, WLP_ASSOC_ERROR_NONE);
 	skb_put(_skb, sizeof(*_d2) + used);
-	d_printf(6, dev, "D2 message:\n");
-	d_dump(6, dev, _d2, mem_needed);
 	*skb = _skb;
 error:
-	d_fnend(6, dev, "wlp %p, result = %d\n", wlp, result);
 	return result;
 }
 
@@ -947,7 +865,6 @@
 	struct sk_buff *_skb;
 	struct wlp_nonce tmp;
 
-	d_fnstart(6, dev, "wlp %p\n", wlp);
 	_skb = dev_alloc_skb(sizeof(*f0));
 	if (_skb == NULL) {
 		dev_err(dev, "WLP: Unable to allocate memory for F0 "
@@ -955,7 +872,6 @@
 		goto error_alloc;
 	}
 	f0 = (void *) _skb->data;
-	d_printf(6, dev, "F0 starts at %p \n", f0);
 	f0->f0_hdr.hdr.mux_hdr = cpu_to_le16(WLP_PROTOCOL_ID);
 	f0->f0_hdr.hdr.type = WLP_FRAME_ASSOCIATION;
 	f0->f0_hdr.type = WLP_ASSOC_F0;
@@ -969,7 +885,6 @@
 	*skb = _skb;
 	result = 0;
 error_alloc:
-	d_fnend(6, dev, "wlp %p, result %d \n", wlp, result);
 	return result;
 }
 
@@ -1242,12 +1157,9 @@
 	enum wlp_wss_sel_mthd sel_mthd = 0;
 	struct wlp_device_info dev_info;
 	enum wlp_assc_error assc_err;
-	char uuid[WLP_WSS_UUID_STRSIZE];
 	struct sk_buff *resp = NULL;
 
 	/* Parse D1 frame */
-	d_fnstart(6, dev, "WLP: handle D1 frame. wlp = %p, skb = %p\n",
-		  wlp, skb);
 	mutex_lock(&wss->mutex);
 	mutex_lock(&wlp->mutex); /* to access wlp->uuid */
 	memset(&dev_info, 0, sizeof(dev_info));
@@ -1258,30 +1170,6 @@
 		kfree_skb(skb);
 		goto out;
 	}
-	wlp_wss_uuid_print(uuid, sizeof(uuid), &uuid_e);
-	d_printf(6, dev, "From D1 frame:\n"
-		 "UUID-E: %s\n"
-		 "Selection method: %d\n"
-		 "Device name (%d bytes): %s\n"
-		 "Model name (%d bytes): %s\n"
-		 "Manufacturer (%d bytes): %s\n"
-		 "Model number (%d bytes): %s\n"
-		 "Serial number (%d bytes): %s\n"
-		 "Primary device type: \n"
-		 " Category: %d \n"
-		 " OUI: %02x:%02x:%02x \n"
-		 " OUI Subdivision: %u \n",
-		 uuid, sel_mthd,
-		 (int)strlen(dev_info.name), dev_info.name,
-		 (int)strlen(dev_info.model_name), dev_info.model_name,
-		 (int)strlen(dev_info.manufacturer), dev_info.manufacturer,
-		 (int)strlen(dev_info.model_nr),  dev_info.model_nr,
-		 (int)strlen(dev_info.serial), dev_info.serial,
-		 dev_info.prim_dev_type.category,
-		 dev_info.prim_dev_type.OUI[0],
-		 dev_info.prim_dev_type.OUI[1],
-		 dev_info.prim_dev_type.OUI[2],
-		 dev_info.prim_dev_type.OUIsubdiv);
 
 	kfree_skb(skb);
 	if (!wlp_uuid_is_set(&wlp->uuid)) {
@@ -1316,7 +1204,6 @@
 	kfree(frame_ctx);
 	mutex_unlock(&wlp->mutex);
 	mutex_unlock(&wss->mutex);
-	d_fnend(6, dev, "WLP: handle D1 frame. wlp = %p\n", wlp);
 }
 
 /**
@@ -1546,10 +1433,8 @@
 	void *ptr = skb->data;
 	size_t len = skb->len;
 	size_t used;
-	char buf[WLP_WSS_UUID_STRSIZE];
 	struct wlp_frame_assoc *assoc = ptr;
 
-	d_fnstart(6, dev, "wlp %p, skb %p \n", wlp, skb);
 	used = sizeof(*assoc);
 	result = wlp_get_wssid(wlp, ptr + used, wssid, len - used);
 	if (result < 0) {
@@ -1572,14 +1457,7 @@
 			wlp_assoc_frame_str(assoc->type));
 		goto error_parse;
 	}
-	wlp_wss_uuid_print(buf, sizeof(buf), wssid);
-	d_printf(6, dev, "WLP: parsed: WSSID %s, tag 0x%02x, virt "
-		 "%02x:%02x:%02x:%02x:%02x:%02x \n", buf, *tag,
-		 virt_addr->data[0], virt_addr->data[1], virt_addr->data[2],
-		 virt_addr->data[3], virt_addr->data[4], virt_addr->data[5]);
-
 error_parse:
-	d_fnend(6, dev, "wlp %p, skb %p, result = %d \n", wlp, skb, result);
 	return result;
 }
 
@@ -1600,7 +1478,6 @@
 	} *c;
 	struct sk_buff *_skb;
 
-	d_fnstart(6, dev, "wlp %p, wss %p \n", wlp, wss);
 	_skb = dev_alloc_skb(sizeof(*c));
 	if (_skb == NULL) {
 		dev_err(dev, "WLP: Unable to allocate memory for C1/C2 "
@@ -1608,7 +1485,6 @@
 		goto error_alloc;
 	}
 	c = (void *) _skb->data;
-	d_printf(6, dev, "C1/C2 starts at %p \n", c);
 	c->c_hdr.hdr.mux_hdr = cpu_to_le16(WLP_PROTOCOL_ID);
 	c->c_hdr.hdr.type = WLP_FRAME_ASSOCIATION;
 	c->c_hdr.type = type;
@@ -1616,12 +1492,9 @@
 	wlp_set_msg_type(&c->c_hdr.msg_type, type);
 	wlp_set_wssid(&c->wssid, &wss->wssid);
 	skb_put(_skb, sizeof(*c));
-	d_printf(6, dev, "C1/C2 message:\n");
-	d_dump(6, dev, c, sizeof(*c));
 	*skb = _skb;
 	result = 0;
 error_alloc:
-	d_fnend(6, dev, "wlp %p, wss %p, result %d \n", wlp, wss, result);
 	return result;
 }
 
@@ -1660,7 +1533,6 @@
 	} *c;
 	struct sk_buff *_skb;
 
-	d_fnstart(6, dev, "wlp %p, wss %p \n", wlp, wss);
 	_skb = dev_alloc_skb(sizeof(*c));
 	if (_skb == NULL) {
 		dev_err(dev, "WLP: Unable to allocate memory for C3/C4 "
@@ -1668,7 +1540,6 @@
 		goto error_alloc;
 	}
 	c = (void *) _skb->data;
-	d_printf(6, dev, "C3/C4 starts at %p \n", c);
 	c->c_hdr.hdr.mux_hdr = cpu_to_le16(WLP_PROTOCOL_ID);
 	c->c_hdr.hdr.type = WLP_FRAME_ASSOCIATION;
 	c->c_hdr.type = type;
@@ -1678,12 +1549,9 @@
 	wlp_set_wss_tag(&c->wss_tag, wss->tag);
 	wlp_set_wss_virt(&c->wss_virt, &wss->virtual_addr);
 	skb_put(_skb, sizeof(*c));
-	d_printf(6, dev, "C3/C4 message:\n");
-	d_dump(6, dev, c, sizeof(*c));
 	*skb = _skb;
 	result = 0;
 error_alloc:
-	d_fnend(6, dev, "wlp %p, wss %p, result %d \n", wlp, wss, result);
 	return result;
 }
 
@@ -1709,10 +1577,7 @@
 	struct device *dev = &wlp->rc->uwb_dev.dev;			\
 	int result;							\
 	struct sk_buff *skb = NULL;					\
-	d_fnstart(6, dev, "wlp %p, wss %p, neighbor: %02x:%02x\n",	\
-		  wlp, wss, dev_addr->data[1], dev_addr->data[0]);	\
-	d_printf(6, dev, "WLP: Constructing %s frame. \n",		\
-		 wlp_assoc_frame_str(id));				\
+									\
 	/* Build the frame */						\
 	result = wlp_build_assoc_##type(wlp, wss, &skb);		\
 	if (result < 0) {						\
@@ -1721,9 +1586,6 @@
 		goto error_build_assoc;					\
 	}								\
 	/* Send the frame */						\
-	d_printf(6, dev, "Transmitting %s frame to %02x:%02x \n",	\
-		 wlp_assoc_frame_str(id),				\
-		 dev_addr->data[1], dev_addr->data[0]);			\
 	BUG_ON(wlp->xmit_frame == NULL);				\
 	result = wlp->xmit_frame(wlp, skb, dev_addr);			\
 	if (result < 0) {						\
@@ -1740,8 +1602,6 @@
 	/* We could try again ... */					\
 	dev_kfree_skb_any(skb);/*we need to free if tx fails*/		\
 error_build_assoc:							\
-	d_fnend(6, dev, "wlp %p, wss %p, neighbor: %02x:%02x\n",	\
-		wlp, wss, dev_addr->data[1], dev_addr->data[0]);	\
 	return result;							\
 }
 
@@ -1794,12 +1654,9 @@
 	struct uwb_dev_addr *src = &frame_ctx->src;
 	int result;
 	struct wlp_uuid wssid;
-	char buf[WLP_WSS_UUID_STRSIZE];
 	struct sk_buff *resp = NULL;
 
 	/* Parse C1 frame */
-	d_fnstart(6, dev, "WLP: handle C1 frame. wlp = %p, c1 = %p\n",
-		  wlp, c1);
 	mutex_lock(&wss->mutex);
 	result = wlp_get_wssid(wlp, (void *)c1 + sizeof(*c1), &wssid,
 			       len - sizeof(*c1));
@@ -1807,12 +1664,8 @@
 		dev_err(dev, "WLP: unable to obtain WSSID from C1 frame.\n");
 		goto out;
 	}
-	wlp_wss_uuid_print(buf, sizeof(buf), &wssid);
-	d_printf(6, dev, "Received C1 frame with WSSID %s \n", buf);
 	if (!memcmp(&wssid, &wss->wssid, sizeof(wssid))
 	    && wss->state == WLP_WSS_STATE_ACTIVE) {
-		d_printf(6, dev, "WSSID from C1 frame is known locally "
-			 "and is active\n");
 		/* Construct C2 frame */
 		result = wlp_build_assoc_c2(wlp, wss, &resp);
 		if (result < 0) {
@@ -1820,8 +1673,6 @@
 			goto out;
 		}
 	} else {
-		d_printf(6, dev, "WSSID from C1 frame is not known locally "
-			 "or is not active\n");
 		/* Construct F0 frame */
 		result = wlp_build_assoc_f0(wlp, &resp, WLP_ASSOC_ERROR_INV);
 		if (result < 0) {
@@ -1830,8 +1681,6 @@
 		}
 	}
 	/* Send C2 frame */
-	d_printf(6, dev, "Transmitting response (C2/F0) frame to %02x:%02x \n",
-		 src->data[1], src->data[0]);
 	BUG_ON(wlp->xmit_frame == NULL);
 	result = wlp->xmit_frame(wlp, resp, src);
 	if (result < 0) {
@@ -1846,7 +1695,6 @@
 	kfree_skb(frame_ctx->skb);
 	kfree(frame_ctx);
 	mutex_unlock(&wss->mutex);
-	d_fnend(6, dev, "WLP: handle C1 frame. wlp = %p\n", wlp);
 }
 
 /**
@@ -1868,27 +1716,20 @@
 	struct sk_buff *skb = frame_ctx->skb;
 	struct uwb_dev_addr *src = &frame_ctx->src;
 	int result;
-	char buf[WLP_WSS_UUID_STRSIZE];
 	struct sk_buff *resp = NULL;
 	struct wlp_uuid wssid;
 	u8 tag;
 	struct uwb_mac_addr virt_addr;
 
 	/* Parse C3 frame */
-	d_fnstart(6, dev, "WLP: handle C3 frame. wlp = %p, skb = %p\n",
-		  wlp, skb);
 	mutex_lock(&wss->mutex);
 	result = wlp_parse_c3c4_frame(wlp, skb, &wssid, &tag, &virt_addr);
 	if (result < 0) {
 		dev_err(dev, "WLP: unable to obtain values from C3 frame.\n");
 		goto out;
 	}
-	wlp_wss_uuid_print(buf, sizeof(buf), &wssid);
-	d_printf(6, dev, "Received C3 frame with WSSID %s \n", buf);
 	if (!memcmp(&wssid, &wss->wssid, sizeof(wssid))
 	    && wss->state >= WLP_WSS_STATE_ACTIVE) {
-		d_printf(6, dev, "WSSID from C3 frame is known locally "
-			 "and is active\n");
 		result = wlp_eda_update_node(&wlp->eda, src, wss,
 					     (void *) virt_addr.data, tag,
 					     WLP_WSS_CONNECTED);
@@ -1913,8 +1754,6 @@
 			}
 		}
 	} else {
-		d_printf(6, dev, "WSSID from C3 frame is not known locally "
-			 "or is not active\n");
 		/* Construct F0 frame */
 		result = wlp_build_assoc_f0(wlp, &resp, WLP_ASSOC_ERROR_INV);
 		if (result < 0) {
@@ -1923,8 +1762,6 @@
 		}
 	}
 	/* Send C4 frame */
-	d_printf(6, dev, "Transmitting response (C4/F0) frame to %02x:%02x \n",
-		 src->data[1], src->data[0]);
 	BUG_ON(wlp->xmit_frame == NULL);
 	result = wlp->xmit_frame(wlp, resp, src);
 	if (result < 0) {
@@ -1939,8 +1776,6 @@
 	kfree_skb(frame_ctx->skb);
 	kfree(frame_ctx);
 	mutex_unlock(&wss->mutex);
-	d_fnend(6, dev, "WLP: handle C3 frame. wlp = %p, skb = %p\n",
-		wlp, skb);
 }
 
 
diff --git a/drivers/uwb/wlp/sysfs.c b/drivers/uwb/wlp/sysfs.c
index 1bb9b1f..0370399 100644
--- a/drivers/uwb/wlp/sysfs.c
+++ b/drivers/uwb/wlp/sysfs.c
@@ -23,8 +23,8 @@
  * FIXME: Docs
  *
  */
-
 #include <linux/wlp.h>
+
 #include "wlp-internal.h"
 
 static
diff --git a/drivers/uwb/wlp/txrx.c b/drivers/uwb/wlp/txrx.c
index c701bd1..cd20357 100644
--- a/drivers/uwb/wlp/txrx.c
+++ b/drivers/uwb/wlp/txrx.c
@@ -26,12 +26,10 @@
 
 #include <linux/etherdevice.h>
 #include <linux/wlp.h>
-#define D_LOCAL 5
-#include <linux/uwb/debug.h>
+
 #include "wlp-internal.h"
 
-
-/**
+/*
  * Direct incoming association msg to correct parsing routine
  *
  * We only expect D1, E1, C1, C3 messages as new. All other incoming
@@ -48,35 +46,31 @@
 	struct device *dev = &wlp->rc->uwb_dev.dev;
 	struct wlp_frame_assoc *assoc = (void *) skb->data;
 	struct wlp_assoc_frame_ctx *frame_ctx;
-	d_fnstart(5, dev, "wlp %p, skb %p\n", wlp, skb);
+
 	frame_ctx = kmalloc(sizeof(*frame_ctx), GFP_ATOMIC);
 	if (frame_ctx == NULL) {
 		dev_err(dev, "WLP: Unable to allocate memory for association "
 			"frame handling.\n");
 		kfree_skb(skb);
-		goto out;
+		return;
 	}
 	frame_ctx->wlp = wlp;
 	frame_ctx->skb = skb;
 	frame_ctx->src = *src;
 	switch (assoc->type) {
 	case WLP_ASSOC_D1:
-		d_printf(5, dev, "Received a D1 frame.\n");
 		INIT_WORK(&frame_ctx->ws, wlp_handle_d1_frame);
 		schedule_work(&frame_ctx->ws);
 		break;
 	case WLP_ASSOC_E1:
-		d_printf(5, dev, "Received a E1 frame. FIXME?\n");
 		kfree_skb(skb); /* Temporary until we handle it */
 		kfree(frame_ctx); /* Temporary until we handle it */
 		break;
 	case WLP_ASSOC_C1:
-		d_printf(5, dev, "Received a C1 frame.\n");
 		INIT_WORK(&frame_ctx->ws, wlp_handle_c1_frame);
 		schedule_work(&frame_ctx->ws);
 		break;
 	case WLP_ASSOC_C3:
-		d_printf(5, dev, "Received a C3 frame.\n");
 		INIT_WORK(&frame_ctx->ws, wlp_handle_c3_frame);
 		schedule_work(&frame_ctx->ws);
 		break;
@@ -87,11 +81,9 @@
 		kfree(frame_ctx);
 		break;
 	}
-out:
-	d_fnend(5, dev, "wlp %p\n", wlp);
 }
 
-/**
+/*
  * Process incoming association frame
  *
  * Although it could be possible to deal with some incoming association
@@ -112,7 +104,6 @@
 	struct wlp_frame_assoc *assoc = (void *) skb->data;
 	struct wlp_session *session = wlp->session;
 	u8 version;
-	d_fnstart(5, dev, "wlp %p, skb %p\n", wlp, skb);
 
 	if (wlp_get_version(wlp, &assoc->version, &version,
 			    sizeof(assoc->version)) < 0)
@@ -150,14 +141,12 @@
 	} else {
 		wlp_direct_assoc_frame(wlp, skb, src);
 	}
-	d_fnend(5, dev, "wlp %p\n", wlp);
 	return;
 error:
 	kfree_skb(skb);
-	d_fnend(5, dev, "wlp %p\n", wlp);
 }
 
-/**
+/*
  * Verify incoming frame is from connected neighbor, prep to pass to WLP client
  *
  * Verification proceeds according to WLP 0.99 [7.3.1]. The source address
@@ -176,7 +165,6 @@
 	struct wlp_eda_node eda_entry;
 	struct wlp_frame_std_abbrv_hdr *hdr = (void *) skb->data;
 
-	d_fnstart(6, dev, "wlp %p, skb %p \n", wlp, skb);
 	/*verify*/
 	result = wlp_copy_eda_node(&wlp->eda, src, &eda_entry);
 	if (result < 0) {
@@ -207,11 +195,10 @@
 	/*prep*/
 	skb_pull(skb, sizeof(*hdr));
 out:
-	d_fnend(6, dev, "wlp %p, skb %p, result = %d \n", wlp, skb, result);
 	return result;
 }
 
-/**
+/*
  * Receive a WLP frame from device
  *
  * @returns: 1 if calling function should free the skb
@@ -226,14 +213,12 @@
 	struct wlp_frame_hdr *hdr;
 	int result = 0;
 
-	d_fnstart(6, dev, "skb (%p), len (%u)\n", skb, len);
 	if (len < sizeof(*hdr)) {
 		dev_err(dev, "Not enough data to parse WLP header.\n");
 		result = -EINVAL;
 		goto out;
 	}
 	hdr = ptr;
-	d_dump(6, dev, hdr, sizeof(*hdr));
 	if (le16_to_cpu(hdr->mux_hdr) != WLP_PROTOCOL_ID) {
 		dev_err(dev, "Not a WLP frame type.\n");
 		result = -EINVAL;
@@ -270,7 +255,6 @@
 				"WLP header.\n");
 			goto out;
 		}
-		d_printf(5, dev, "Association frame received.\n");
 		wlp_receive_assoc_frame(wlp, skb, src);
 		break;
 	default:
@@ -283,13 +267,12 @@
 		kfree_skb(skb);
 		result = 0;
 	}
-	d_fnend(6, dev, "skb (%p)\n", skb);
 	return result;
 }
 EXPORT_SYMBOL_GPL(wlp_receive_frame);
 
 
-/**
+/*
  * Verify frame from network stack, prepare for further transmission
  *
  * @skb:   the socket buffer that needs to be prepared for transmission (it
@@ -343,9 +326,7 @@
 	int result = -EINVAL;
 	struct ethhdr *eth_hdr = (void *) skb->data;
 
-	d_fnstart(6, dev, "wlp (%p), skb (%p) \n", wlp, skb);
 	if (is_broadcast_ether_addr(eth_hdr->h_dest)) {
-		d_printf(6, dev, "WLP: handling broadcast frame. \n");
 		result = wlp_eda_for_each(&wlp->eda, wlp_wss_send_copy, skb);
 		if (result < 0) {
 			if (printk_ratelimit())
@@ -357,7 +338,6 @@
 		result = 1;
 		/* Frame will be transmitted by WLP. */
 	} else {
-		d_printf(6, dev, "WLP: handling unicast frame. \n");
 		result = wlp_eda_for_virtual(&wlp->eda, eth_hdr->h_dest, dst,
 					     wlp_wss_prep_hdr, skb);
 		if (unlikely(result < 0)) {
@@ -368,7 +348,6 @@
 		}
 	}
 out:
-	d_fnend(6, dev, "wlp (%p), skb (%p). result = %d \n", wlp, skb, result);
 	return result;
 }
 EXPORT_SYMBOL_GPL(wlp_prepare_tx_frame);
diff --git a/drivers/uwb/wlp/wlp-internal.h b/drivers/uwb/wlp/wlp-internal.h
index 1c94fab..3e8d5de 100644
--- a/drivers/uwb/wlp/wlp-internal.h
+++ b/drivers/uwb/wlp/wlp-internal.h
@@ -42,10 +42,6 @@
 extern struct kobj_type wss_ktype;
 extern struct attribute_group wss_attr_group;
 
-extern int uwb_rc_ie_add(struct uwb_rc *, const struct uwb_ie_hdr *, size_t);
-extern int uwb_rc_ie_rm(struct uwb_rc *, enum uwb_ie);
-
-
 /* This should be changed to a dynamic array where entries are sorted
  * by eth_addr and search is done in a binary form
  *
diff --git a/drivers/uwb/wlp/wlp-lc.c b/drivers/uwb/wlp/wlp-lc.c
index 0799402e..13db739 100644
--- a/drivers/uwb/wlp/wlp-lc.c
+++ b/drivers/uwb/wlp/wlp-lc.c
@@ -21,12 +21,9 @@
  *
  * FIXME: docs
  */
-
 #include <linux/wlp.h>
-#define D_LOCAL 6
-#include <linux/uwb/debug.h>
-#include "wlp-internal.h"
 
+#include "wlp-internal.h"
 
 static
 void wlp_neighbor_init(struct wlp_neighbor_e *neighbor)
@@ -61,11 +58,6 @@
 static
 void __wlp_fill_device_info(struct wlp *wlp)
 {
-	struct device *dev = &wlp->rc->uwb_dev.dev;
-
-	BUG_ON(wlp->fill_device_info == NULL);
-	d_printf(6, dev, "Retrieving device information "
-			 "from device driver.\n");
 	wlp->fill_device_info(wlp, wlp->dev_info);
 }
 
@@ -127,7 +119,7 @@
 	}
 }
 
-/**
+/*
  * Populate WLP neighborhood cache with neighbor information
  *
  * A new neighbor is found. If it is discoverable then we add it to the
@@ -141,10 +133,7 @@
 	int discoverable;
 	struct wlp_neighbor_e *neighbor;
 
-	d_fnstart(6, &dev->dev, "uwb %p \n", dev);
-	d_printf(6, &dev->dev, "Found neighbor device %02x:%02x \n",
-		 dev->dev_addr.data[1], dev->dev_addr.data[0]);
-	/**
+	/*
 	 * FIXME:
 	 * Use contents of WLP IE found in beacon cache to determine if
 	 * neighbor is discoverable.
@@ -167,7 +156,6 @@
 		list_add(&neighbor->node, &wlp->neighbors);
 	}
 error_no_mem:
-	d_fnend(6, &dev->dev, "uwb %p, result = %d \n", dev, result);
 	return result;
 }
 
@@ -255,8 +243,6 @@
 		dev_err(dev, "Unable to send D1 frame to neighbor "
 			"%02x:%02x (%d)\n", dev_addr->data[1],
 			dev_addr->data[0], result);
-		d_printf(6, dev, "Add placeholders into buffer next to "
-			 "neighbor information we have (dev address).\n");
 		goto out;
 	}
 	/* Create session, wait for response */
@@ -284,8 +270,6 @@
 	/* Parse message in session->data: it will be either D2 or F0 */
 	skb = session.data;
 	resp = (void *) skb->data;
-	d_printf(6, dev, "Received response to D1 frame. \n");
-	d_dump(6, dev, skb->data, skb->len > 72 ? 72 : skb->len);
 
 	if (resp->type == WLP_ASSOC_F0) {
 		result = wlp_parse_f0(wlp, skb);
@@ -337,10 +321,9 @@
 	struct device *dev = &wlp->rc->uwb_dev.dev;
 	char buf[WLP_WSS_UUID_STRSIZE];
 	struct uwb_dev_addr *dev_addr = &neighbor->uwb_dev->dev_addr;
+
 	wlp_wss_uuid_print(buf, sizeof(buf), wssid);
-	d_fnstart(6, dev, "wlp %p, neighbor %p, wss %p, wssid %p (%s)\n",
-		  wlp, neighbor, wss, wssid, buf);
-	d_printf(6, dev, "Complete me.\n");
+
 	result =  wlp_d1d2_exchange(wlp, neighbor, wss, wssid);
 	if (result < 0) {
 		dev_err(dev, "WLP: D1/D2 message exchange for enrollment "
@@ -360,13 +343,10 @@
 		goto error;
 	} else {
 		wss->state = WLP_WSS_STATE_ENROLLED;
-		d_printf(2, dev, "WLP: Success Enrollment into unsecure WSS "
-			 "%s using neighbor %02x:%02x. \n", buf,
-			 dev_addr->data[1], dev_addr->data[0]);
+		dev_dbg(dev, "WLP: Success Enrollment into unsecure WSS "
+			"%s using neighbor %02x:%02x. \n",
+			buf, dev_addr->data[1], dev_addr->data[0]);
 	}
-
-	d_fnend(6, dev, "wlp %p, neighbor %p, wss %p, wssid %p (%s)\n",
-		  wlp, neighbor, wss, wssid, buf);
 out:
 	return result;
 error:
@@ -449,7 +429,6 @@
 	int result = 0;
 	struct device *dev = &wlp->rc->uwb_dev.dev;
 
-	d_fnstart(6, dev, "wlp %p \n", wlp);
 	mutex_lock(&wlp->nbmutex);
 	/* Clear current neighborhood cache. */
 	__wlp_neighbors_release(wlp);
@@ -469,7 +448,6 @@
 	}
 error_dev_for_each:
 	mutex_unlock(&wlp->nbmutex);
-	d_fnend(6, dev, "wlp %p \n", wlp);
 	return result;
 }
 
@@ -492,9 +470,6 @@
 	int result;
 	switch (event) {
 	case UWB_NOTIF_ONAIR:
-		d_printf(6, dev, "UWB device %02x:%02x is onair\n",
-				uwb_dev->dev_addr.data[1],
-				uwb_dev->dev_addr.data[0]);
 		result = wlp_eda_create_node(&wlp->eda,
 					     uwb_dev->mac_addr.data,
 					     &uwb_dev->dev_addr);
@@ -505,18 +480,11 @@
 				uwb_dev->dev_addr.data[0]);
 		break;
 	case UWB_NOTIF_OFFAIR:
-		d_printf(6, dev, "UWB device %02x:%02x is offair\n",
-				uwb_dev->dev_addr.data[1],
-				uwb_dev->dev_addr.data[0]);
 		wlp_eda_rm_node(&wlp->eda, &uwb_dev->dev_addr);
 		mutex_lock(&wlp->nbmutex);
-		list_for_each_entry_safe(neighbor, next, &wlp->neighbors,
-					 node) {
-			if (neighbor->uwb_dev == uwb_dev) {
-				d_printf(6, dev, "Removing device from "
-					 "neighborhood.\n");
+		list_for_each_entry_safe(neighbor, next, &wlp->neighbors, node) {
+			if (neighbor->uwb_dev == uwb_dev)
 				__wlp_neighbor_release(neighbor);
-			}
 		}
 		mutex_unlock(&wlp->nbmutex);
 		break;
@@ -526,38 +494,47 @@
 	}
 }
 
-int wlp_setup(struct wlp *wlp, struct uwb_rc *rc)
+static void wlp_channel_changed(struct uwb_pal *pal, int channel)
 {
-	struct device *dev = &rc->uwb_dev.dev;
+	struct wlp *wlp = container_of(pal, struct wlp, pal);
+
+	if (channel < 0)
+		netif_carrier_off(wlp->ndev);
+	else
+		netif_carrier_on(wlp->ndev);
+}
+
+int wlp_setup(struct wlp *wlp, struct uwb_rc *rc, struct net_device *ndev)
+{
 	int result;
 
-	d_fnstart(6, dev, "wlp %p\n", wlp);
 	BUG_ON(wlp->fill_device_info == NULL);
 	BUG_ON(wlp->xmit_frame == NULL);
 	BUG_ON(wlp->stop_queue == NULL);
 	BUG_ON(wlp->start_queue == NULL);
+
 	wlp->rc = rc;
+	wlp->ndev = ndev;
 	wlp_eda_init(&wlp->eda);/* Set up address cache */
 	wlp->uwb_notifs_handler.cb = wlp_uwb_notifs_cb;
 	wlp->uwb_notifs_handler.data = wlp;
 	uwb_notifs_register(rc, &wlp->uwb_notifs_handler);
 
 	uwb_pal_init(&wlp->pal);
-	result = uwb_pal_register(rc, &wlp->pal);
+	wlp->pal.rc = rc;
+	wlp->pal.channel_changed = wlp_channel_changed;
+	result = uwb_pal_register(&wlp->pal);
 	if (result < 0)
 		uwb_notifs_deregister(wlp->rc, &wlp->uwb_notifs_handler);
 
-	d_fnend(6, dev, "wlp %p, result = %d\n", wlp, result);
 	return result;
 }
 EXPORT_SYMBOL_GPL(wlp_setup);
 
 void wlp_remove(struct wlp *wlp)
 {
-	struct device *dev = &wlp->rc->uwb_dev.dev;
-	d_fnstart(6, dev, "wlp %p\n", wlp);
 	wlp_neighbors_release(wlp);
-	uwb_pal_unregister(wlp->rc, &wlp->pal);
+	uwb_pal_unregister(&wlp->pal);
 	uwb_notifs_deregister(wlp->rc, &wlp->uwb_notifs_handler);
 	wlp_eda_release(&wlp->eda);
 	mutex_lock(&wlp->mutex);
@@ -565,9 +542,6 @@
 		kfree(wlp->dev_info);
 	mutex_unlock(&wlp->mutex);
 	wlp->rc = NULL;
-	/* We have to use NULL here because this function can be called
-	 * when the device disappeared. */
-	d_fnend(6, NULL, "wlp %p\n", wlp);
 }
 EXPORT_SYMBOL_GPL(wlp_remove);
 
diff --git a/drivers/uwb/wlp/wss-lc.c b/drivers/uwb/wlp/wss-lc.c
index 96b18c9..5913c7a 100644
--- a/drivers/uwb/wlp/wss-lc.c
+++ b/drivers/uwb/wlp/wss-lc.c
@@ -43,14 +43,11 @@
  * 	wlp_wss_release()
  * 		wlp_wss_reset()
  */
-
 #include <linux/etherdevice.h> /* for is_valid_ether_addr */
 #include <linux/skbuff.h>
 #include <linux/wlp.h>
-#define D_LOCAL 5
-#include <linux/uwb/debug.h>
-#include "wlp-internal.h"
 
+#include "wlp-internal.h"
 
 size_t wlp_wss_key_print(char *buf, size_t bufsize, u8 *key)
 {
@@ -116,9 +113,6 @@
  */
 void wlp_wss_reset(struct wlp_wss *wss)
 {
-	struct wlp *wlp = container_of(wss, struct wlp, wss);
-	struct device *dev = &wlp->rc->uwb_dev.dev;
-	d_fnstart(5, dev, "wss (%p) \n", wss);
 	memset(&wss->wssid, 0, sizeof(wss->wssid));
 	wss->hash = 0;
 	memset(&wss->name[0], 0, sizeof(wss->name));
@@ -127,7 +121,6 @@
 	memset(&wss->master_key[0], 0, sizeof(wss->master_key));
 	wss->tag = 0;
 	wss->state = WLP_WSS_STATE_NONE;
-	d_fnend(5, dev, "wss (%p) \n", wss);
 }
 
 /**
@@ -145,7 +138,6 @@
 	struct device *dev = &wlp->rc->uwb_dev.dev;
 	int result;
 
-	d_fnstart(5, dev, "wss (%p), wssid: %s\n", wss, wssid_str);
 	result = kobject_set_name(&wss->kobj, "wss-%s", wssid_str);
 	if (result < 0)
 		return result;
@@ -162,7 +154,6 @@
 			result);
 		goto error_sysfs_create_group;
 	}
-	d_fnend(5, dev, "Completed. result = %d \n", result);
 	return 0;
 error_sysfs_create_group:
 
@@ -214,22 +205,14 @@
 	struct wlp *wlp = container_of(wss, struct wlp, wss);
 	struct device *dev = &wlp->rc->uwb_dev.dev;
 	struct wlp_neighbor_e *neighbor;
-	char buf[WLP_WSS_UUID_STRSIZE];
 	int result = -ENXIO;
 	struct uwb_dev_addr *dev_addr;
 
-	wlp_wss_uuid_print(buf, sizeof(buf), wssid);
-	d_fnstart(5, dev, "wss %p, wssid %s, registrar %02x:%02x \n",
-		  wss, buf, dest->data[1], dest->data[0]);
 	mutex_lock(&wlp->nbmutex);
 	list_for_each_entry(neighbor, &wlp->neighbors, node) {
 		dev_addr = &neighbor->uwb_dev->dev_addr;
 		if (!memcmp(dest, dev_addr, sizeof(*dest))) {
-			d_printf(5, dev, "Neighbor %02x:%02x is valid, "
-				 "enrolling. \n",
-				 dev_addr->data[1], dev_addr->data[0]);
-			result = wlp_enroll_neighbor(wlp, neighbor, wss,
-						     wssid);
+			result = wlp_enroll_neighbor(wlp, neighbor, wss, wssid);
 			break;
 		}
 	}
@@ -237,8 +220,6 @@
 		dev_err(dev, "WLP: Cannot find neighbor %02x:%02x. \n",
 			dest->data[1], dest->data[0]);
 	mutex_unlock(&wlp->nbmutex);
-	d_fnend(5, dev, "wss %p, wssid %s, registrar %02x:%02x, result %d \n",
-		  wss, buf, dest->data[1], dest->data[0], result);
 	return result;
 }
 
@@ -260,16 +241,11 @@
 	char buf[WLP_WSS_UUID_STRSIZE];
 	int result = -ENXIO;
 
-	wlp_wss_uuid_print(buf, sizeof(buf), wssid);
-	d_fnstart(5, dev, "wss %p, wssid %s \n", wss, buf);
+
 	mutex_lock(&wlp->nbmutex);
 	list_for_each_entry(neighbor, &wlp->neighbors, node) {
 		list_for_each_entry(wssid_e, &neighbor->wssid, node) {
 			if (!memcmp(wssid, &wssid_e->wssid, sizeof(*wssid))) {
-				d_printf(5, dev, "Found WSSID %s in neighbor "
-					 "%02x:%02x cache. \n", buf,
-					 neighbor->uwb_dev->dev_addr.data[1],
-					 neighbor->uwb_dev->dev_addr.data[0]);
 				result = wlp_enroll_neighbor(wlp, neighbor,
 							     wss, wssid);
 				if (result == 0) /* enrollment success */
@@ -279,10 +255,11 @@
 		}
 	}
 out:
-	if (result == -ENXIO)
+	if (result == -ENXIO) {
+		wlp_wss_uuid_print(buf, sizeof(buf), wssid);
 		dev_err(dev, "WLP: Cannot find WSSID %s in cache. \n", buf);
+	}
 	mutex_unlock(&wlp->nbmutex);
-	d_fnend(5, dev, "wss %p, wssid %s, result %d \n", wss, buf, result);
 	return result;
 }
 
@@ -307,27 +284,22 @@
 	struct uwb_dev_addr bcast = {.data = {0xff, 0xff} };
 
 	wlp_wss_uuid_print(buf, sizeof(buf), wssid);
+
 	if (wss->state != WLP_WSS_STATE_NONE) {
 		dev_err(dev, "WLP: Already enrolled in WSS %s.\n", buf);
 		result = -EEXIST;
 		goto error;
 	}
-	if (!memcmp(&bcast, devaddr, sizeof(bcast))) {
-		d_printf(5, dev, "Request to enroll in discovered WSS "
-			 "with WSSID %s \n", buf);
+	if (!memcmp(&bcast, devaddr, sizeof(bcast)))
 		result = wlp_wss_enroll_discovered(wss, wssid);
-	} else {
-		d_printf(5, dev, "Request to enroll in WSSID %s with "
-			 "registrar %02x:%02x\n", buf, devaddr->data[1],
-			 devaddr->data[0]);
+	else
 		result = wlp_wss_enroll_target(wss, wssid, devaddr);
-	}
 	if (result < 0) {
 		dev_err(dev, "WLP: Unable to enroll into WSS %s, result %d \n",
 			buf, result);
 		goto error;
 	}
-	d_printf(2, dev, "Successfully enrolled into WSS %s \n", buf);
+	dev_dbg(dev, "Successfully enrolled into WSS %s \n", buf);
 	result = wlp_wss_sysfs_add(wss, buf);
 	if (result < 0) {
 		dev_err(dev, "WLP: Unable to set up sysfs for WSS kobject.\n");
@@ -363,7 +335,6 @@
 		u8 hash; /* only include one hash */
 	} ie_data;
 
-	d_fnstart(5, dev, "Activating WSS %p. \n", wss);
 	BUG_ON(wss->state != WLP_WSS_STATE_ENROLLED);
 	wss->hash = wlp_wss_comp_wssid_hash(&wss->wssid);
 	wss->tag = wss->hash;
@@ -382,7 +353,6 @@
 	wss->state = WLP_WSS_STATE_ACTIVE;
 	result = 0;
 error_wlp_ie:
-	d_fnend(5, dev, "Activating WSS %p, result = %d \n", wss, result);
 	return result;
 }
 
@@ -405,7 +375,6 @@
 	int result = 0;
 	char buf[WLP_WSS_UUID_STRSIZE];
 
-	d_fnstart(5, dev, "Enrollment and activation requested. \n");
 	mutex_lock(&wss->mutex);
 	result = wlp_wss_enroll(wss, wssid, devaddr);
 	if (result < 0) {
@@ -424,7 +393,6 @@
 error_activate:
 error_enroll:
 	mutex_unlock(&wss->mutex);
-	d_fnend(5, dev, "Completed. result = %d \n", result);
 	return result;
 }
 
@@ -447,11 +415,9 @@
 	struct device *dev = &wlp->rc->uwb_dev.dev;
 	int result = 0;
 	char buf[WLP_WSS_UUID_STRSIZE];
-	d_fnstart(5, dev, "Request to create new WSS.\n");
+
 	result = wlp_wss_uuid_print(buf, sizeof(buf), wssid);
-	d_printf(5, dev, "Request to create WSS: WSSID=%s, name=%s, "
-		 "sec_status=%u, accepting enrollment=%u \n",
-		 buf, name, sec_status, accept);
+
 	if (!mutex_trylock(&wss->mutex)) {
 		dev_err(dev, "WLP: WLP association session in progress.\n");
 		return -EBUSY;
@@ -498,7 +464,6 @@
 	result = 0;
 out:
 	mutex_unlock(&wss->mutex);
-	d_fnend(5, dev, "Completed. result = %d \n", result);
 	return result;
 }
 
@@ -520,16 +485,12 @@
 {
 	int result = 0;
 	struct device *dev = &wlp->rc->uwb_dev.dev;
-	char buf[WLP_WSS_UUID_STRSIZE];
 	DECLARE_COMPLETION_ONSTACK(completion);
 	struct wlp_session session;
 	struct sk_buff  *skb;
 	struct wlp_frame_assoc *resp;
 	struct wlp_uuid wssid;
 
-	wlp_wss_uuid_print(buf, sizeof(buf), &wss->wssid);
-	d_fnstart(5, dev, "wlp %p, wss %p (wssid %s), neighbor %02x:%02x \n",
-		  wlp, wss, buf, dev_addr->data[1], dev_addr->data[0]);
 	mutex_lock(&wlp->mutex);
 	/* Send C1 association frame */
 	result = wlp_send_assoc_frame(wlp, wss, dev_addr, WLP_ASSOC_C1);
@@ -565,8 +526,6 @@
 	/* Parse message in session->data: it will be either C2 or F0 */
 	skb = session.data;
 	resp = (void *) skb->data;
-	d_printf(5, dev, "Received response to C1 frame. \n");
-	d_dump(5, dev, skb->data, skb->len > 72 ? 72 : skb->len);
 	if (resp->type == WLP_ASSOC_F0) {
 		result = wlp_parse_f0(wlp, skb);
 		if (result < 0)
@@ -584,11 +543,9 @@
 		result = 0;
 		goto error_resp_parse;
 	}
-	if (!memcmp(&wssid, &wss->wssid, sizeof(wssid))) {
-		d_printf(5, dev, "WSSID in C2 frame matches local "
-			 "active WSS.\n");
+	if (!memcmp(&wssid, &wss->wssid, sizeof(wssid)))
 		result = 1;
-	} else {
+	else {
 		dev_err(dev, "WLP: Received a C2 frame without matching "
 			"WSSID.\n");
 		result = 0;
@@ -598,8 +555,6 @@
 out:
 	wlp->session = NULL;
 	mutex_unlock(&wlp->mutex);
-	d_fnend(5, dev, "wlp %p, wss %p (wssid %s), neighbor %02x:%02x \n",
-		  wlp, wss, buf, dev_addr->data[1], dev_addr->data[0]);
 	return result;
 }
 
@@ -620,16 +575,8 @@
 {
 	struct device *dev = &wlp->rc->uwb_dev.dev;
 	int result = 0;
-	char buf[WLP_WSS_UUID_STRSIZE];
-	wlp_wss_uuid_print(buf, sizeof(buf), wssid);
-	d_fnstart(5, dev, "wlp %p, wss %p, wssid %s, tag %u, virtual "
-		  "%02x:%02x:%02x:%02x:%02x:%02x \n", wlp, wss, buf, *tag,
-		  virt_addr->data[0], virt_addr->data[1], virt_addr->data[2],
-		  virt_addr->data[3], virt_addr->data[4], virt_addr->data[5]);
 
 	if (!memcmp(wssid, &wss->wssid, sizeof(*wssid))) {
-		d_printf(5, dev, "WSSID from neighbor frame matches local "
-			 "active WSS.\n");
 		/* Update EDA cache */
 		result = wlp_eda_update_node(&wlp->eda, dev_addr, wss,
 					     (void *) virt_addr->data, *tag,
@@ -638,18 +585,9 @@
 			dev_err(dev, "WLP: Unable to update EDA cache "
 				"with new connected neighbor information.\n");
 	} else {
-		dev_err(dev, "WLP: Neighbor does not have matching "
-			"WSSID.\n");
+		dev_err(dev, "WLP: Neighbor does not have matching WSSID.\n");
 		result = -EINVAL;
 	}
-
-	d_fnend(5, dev, "wlp %p, wss %p, wssid %s, tag %u, virtual "
-		  "%02x:%02x:%02x:%02x:%02x:%02x, result = %d \n",
-		  wlp, wss, buf, *tag,
-		  virt_addr->data[0], virt_addr->data[1], virt_addr->data[2],
-		  virt_addr->data[3], virt_addr->data[4], virt_addr->data[5],
-		  result);
-
 	return result;
 }
 
@@ -665,7 +603,6 @@
 {
 	int result;
 	struct device *dev = &wlp->rc->uwb_dev.dev;
-	char buf[WLP_WSS_UUID_STRSIZE];
 	struct wlp_uuid wssid;
 	u8 tag;
 	struct uwb_mac_addr virt_addr;
@@ -674,9 +611,6 @@
 	struct wlp_frame_assoc *resp;
 	struct sk_buff *skb;
 
-	wlp_wss_uuid_print(buf, sizeof(buf), &wss->wssid);
-	d_fnstart(5, dev, "wlp %p, wss %p (wssid %s), neighbor %02x:%02x \n",
-		  wlp, wss, buf, dev_addr->data[1], dev_addr->data[0]);
 	mutex_lock(&wlp->mutex);
 	/* Send C3 association frame */
 	result = wlp_send_assoc_frame(wlp, wss, dev_addr, WLP_ASSOC_C3);
@@ -711,8 +645,6 @@
 	/* Parse message in session->data: it will be either C4 or F0 */
 	skb = session.data;
 	resp = (void *) skb->data;
-	d_printf(5, dev, "Received response to C3 frame. \n");
-	d_dump(5, dev, skb->data, skb->len > 72 ? 72 : skb->len);
 	if (resp->type == WLP_ASSOC_F0) {
 		result = wlp_parse_f0(wlp, skb);
 		if (result < 0)
@@ -744,8 +676,6 @@
 					  WLP_WSS_CONNECT_FAILED);
 	wlp->session = NULL;
 	mutex_unlock(&wlp->mutex);
-	d_fnend(5, dev, "wlp %p, wss %p (wssid %s), neighbor %02x:%02x \n",
-		  wlp, wss, buf, dev_addr->data[1], dev_addr->data[0]);
 	return result;
 }
 
@@ -780,12 +710,8 @@
 	struct wlp_wss *wss = &wlp->wss;
 	int result;
 	struct device *dev = &wlp->rc->uwb_dev.dev;
-	char buf[WLP_WSS_UUID_STRSIZE];
 
 	mutex_lock(&wss->mutex);
-	wlp_wss_uuid_print(buf, sizeof(buf), &wss->wssid);
-	d_fnstart(5, dev, "wlp %p, wss %p (wssid %s), neighbor %02x:%02x \n",
-		  wlp, wss, buf, dev_addr->data[1], dev_addr->data[0]);
 	if (wss->state < WLP_WSS_STATE_ACTIVE) {
 		if (printk_ratelimit())
 			dev_err(dev, "WLP: Attempting to connect with "
@@ -836,7 +762,6 @@
 	BUG_ON(wlp->start_queue == NULL);
 	wlp->start_queue(wlp);
 	mutex_unlock(&wss->mutex);
-	d_fnend(5, dev, "wlp %p, wss %p (wssid %s)\n", wlp, wss, buf);
 }
 
 /**
@@ -855,7 +780,6 @@
 	struct sk_buff *skb = _skb;
 	struct wlp_frame_std_abbrv_hdr *std_hdr;
 
-	d_fnstart(6, dev, "wlp %p \n", wlp);
 	if (eda_entry->state == WLP_WSS_CONNECTED) {
 		/* Add WLP header */
 		BUG_ON(skb_headroom(skb) < sizeof(*std_hdr));
@@ -873,7 +797,6 @@
 				dev_addr->data[0]);
 		result = -EINVAL;
 	}
-	d_fnend(6, dev, "wlp %p \n", wlp);
 	return result;
 }
 
@@ -893,16 +816,9 @@
 {
 	int result = 0;
 	struct device *dev = &wlp->rc->uwb_dev.dev;
-	struct uwb_dev_addr *dev_addr = &eda_entry->dev_addr;
-	unsigned char *eth_addr = eda_entry->eth_addr;
 	struct sk_buff *skb = _skb;
 	struct wlp_assoc_conn_ctx *conn_ctx;
 
-	d_fnstart(5, dev, "wlp %p\n", wlp);
-	d_printf(5, dev, "To neighbor %02x:%02x with eth "
-		  "%02x:%02x:%02x:%02x:%02x:%02x\n", dev_addr->data[1],
-		  dev_addr->data[0], eth_addr[0], eth_addr[1], eth_addr[2],
-		  eth_addr[3], eth_addr[4], eth_addr[5]);
 	if (eda_entry->state == WLP_WSS_UNCONNECTED) {
 		/* We don't want any more packets while we set up connection */
 		BUG_ON(wlp->stop_queue == NULL);
@@ -929,12 +845,9 @@
 			 "previously. Not retrying. \n");
 		result = -ENONET;
 		goto out;
-	} else { /* eda_entry->state == WLP_WSS_CONNECTED */
-		d_printf(5, dev, "Neighbor is connected, preparing frame.\n");
+	} else /* eda_entry->state == WLP_WSS_CONNECTED */
 		result = wlp_wss_prep_hdr(wlp, eda_entry, skb);
-	}
 out:
-	d_fnend(5, dev, "wlp %p, result = %d \n", wlp, result);
 	return result;
 }
 
@@ -957,8 +870,6 @@
 	struct sk_buff *copy;
 	struct uwb_dev_addr *dev_addr = &eda_entry->dev_addr;
 
-	d_fnstart(5, dev, "to neighbor %02x:%02x, skb (%p) \n",
-		  dev_addr->data[1], dev_addr->data[0], skb);
 	copy = skb_copy(skb, GFP_ATOMIC);
 	if (copy == NULL) {
 		if (printk_ratelimit())
@@ -988,8 +899,6 @@
 		dev_kfree_skb_irq(copy);/*we need to free if tx fails */
 	}
 out:
-	d_fnend(5, dev, "to neighbor %02x:%02x \n", dev_addr->data[1],
-		  dev_addr->data[0]);
 	return result;
 }
 
@@ -1005,7 +914,7 @@
 	struct wlp *wlp = container_of(wss, struct wlp, wss);
 	struct device *dev = &wlp->rc->uwb_dev.dev;
 	int result = 0;
-	d_fnstart(5, dev, "wss (%p) \n", wss);
+
 	mutex_lock(&wss->mutex);
 	wss->kobj.parent = &net_dev->dev.kobj;
 	if (!is_valid_ether_addr(net_dev->dev_addr)) {
@@ -1018,7 +927,6 @@
 	       sizeof(wss->virtual_addr.data));
 out:
 	mutex_unlock(&wss->mutex);
-	d_fnend(5, dev, "wss (%p) \n", wss);
 	return result;
 }
 EXPORT_SYMBOL_GPL(wlp_wss_setup);
@@ -1035,8 +943,7 @@
 void wlp_wss_remove(struct wlp_wss *wss)
 {
 	struct wlp *wlp = container_of(wss, struct wlp, wss);
-	struct device *dev = &wlp->rc->uwb_dev.dev;
-	d_fnstart(5, dev, "wss (%p) \n", wss);
+
 	mutex_lock(&wss->mutex);
 	if (wss->state == WLP_WSS_STATE_ACTIVE)
 		uwb_rc_ie_rm(wlp->rc, UWB_IE_WLP);
@@ -1050,6 +957,5 @@
 	wlp_eda_release(&wlp->eda);
 	wlp_eda_init(&wlp->eda);
 	mutex_unlock(&wss->mutex);
-	d_fnend(5, dev, "wss (%p) \n", wss);
 }
 EXPORT_SYMBOL_GPL(wlp_wss_remove);
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index e26733a9..eb0dfde 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -585,7 +585,7 @@
 	spin_unlock(&irq_mapping_update_lock);
 
 	/* new event channels are always bound to cpu 0 */
-	irq_set_affinity(irq, cpumask_of_cpu(0));
+	irq_set_affinity(irq, cpumask_of(0));
 
 	/* Unmask the event channel. */
 	enable_irq(irq);
@@ -614,9 +614,9 @@
 }
 
 
-static void set_affinity_irq(unsigned irq, cpumask_t dest)
+static void set_affinity_irq(unsigned irq, const struct cpumask *dest)
 {
-	unsigned tcpu = first_cpu(dest);
+	unsigned tcpu = cpumask_first(dest);
 	rebind_irq_to_cpu(irq, tcpu);
 }
 
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index c16d9be..3bbdb9d 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -79,9 +79,12 @@
 	if (IS_ERR(anon_inode_inode))
 		return -ENODEV;
 
+	if (fops->owner && !try_module_get(fops->owner))
+		return -ENOENT;
+
 	error = get_unused_fd_flags(flags);
 	if (error < 0)
-		return error;
+		goto err_module;
 	fd = error;
 
 	/*
@@ -128,6 +131,8 @@
 	dput(dentry);
 err_put_unused_fd:
 	put_unused_fd(fd);
+err_module:
+	module_put(fops->owner);
 	return error;
 }
 EXPORT_SYMBOL_GPL(anon_inode_getfd);
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 5d61b7c..fff96e1 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -27,25 +27,32 @@
 #define DEVPTS_SUPER_MAGIC 0x1cd1
 
 #define DEVPTS_DEFAULT_MODE 0600
+/*
+ * ptmx is a new node in /dev/pts and will be unused in legacy (single-
+ * instance) mode. To prevent surprises in user space, set permissions of
+ * ptmx to 0. Use 'chmod' or remount with '-o ptmxmode' to set meaningful
+ * permissions.
+ */
+#define DEVPTS_DEFAULT_PTMX_MODE 0000
 #define PTMX_MINOR	2
 
 extern int pty_limit;			/* Config limit on Unix98 ptys */
-static DEFINE_IDA(allocated_ptys);
 static DEFINE_MUTEX(allocated_ptys_lock);
 
 static struct vfsmount *devpts_mnt;
-static struct dentry *devpts_root;
 
-static struct {
+struct pts_mount_opts {
 	int setuid;
 	int setgid;
 	uid_t   uid;
 	gid_t   gid;
 	umode_t mode;
-} config = {.mode = DEVPTS_DEFAULT_MODE};
+	umode_t ptmxmode;
+	int newinstance;
+};
 
 enum {
-	Opt_uid, Opt_gid, Opt_mode,
+	Opt_uid, Opt_gid, Opt_mode, Opt_ptmxmode, Opt_newinstance,
 	Opt_err
 };
 
@@ -53,18 +60,50 @@
 	{Opt_uid, "uid=%u"},
 	{Opt_gid, "gid=%u"},
 	{Opt_mode, "mode=%o"},
+#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
+	{Opt_ptmxmode, "ptmxmode=%o"},
+	{Opt_newinstance, "newinstance"},
+#endif
 	{Opt_err, NULL}
 };
 
-static int devpts_remount(struct super_block *sb, int *flags, char *data)
+struct pts_fs_info {
+	struct ida allocated_ptys;
+	struct pts_mount_opts mount_opts;
+	struct dentry *ptmx_dentry;
+};
+
+static inline struct pts_fs_info *DEVPTS_SB(struct super_block *sb)
+{
+	return sb->s_fs_info;
+}
+
+static inline struct super_block *pts_sb_from_inode(struct inode *inode)
+{
+#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
+	if (inode->i_sb->s_magic == DEVPTS_SUPER_MAGIC)
+		return inode->i_sb;
+#endif
+	return devpts_mnt->mnt_sb;
+}
+
+#define PARSE_MOUNT	0
+#define PARSE_REMOUNT	1
+
+static int parse_mount_options(char *data, int op, struct pts_mount_opts *opts)
 {
 	char *p;
 
-	config.setuid  = 0;
-	config.setgid  = 0;
-	config.uid     = 0;
-	config.gid     = 0;
-	config.mode    = DEVPTS_DEFAULT_MODE;
+	opts->setuid  = 0;
+	opts->setgid  = 0;
+	opts->uid     = 0;
+	opts->gid     = 0;
+	opts->mode    = DEVPTS_DEFAULT_MODE;
+	opts->ptmxmode = DEVPTS_DEFAULT_PTMX_MODE;
+
+	/* newinstance makes sense only on initial mount */
+	if (op == PARSE_MOUNT)
+		opts->newinstance = 0;
 
 	while ((p = strsep(&data, ",")) != NULL) {
 		substring_t args[MAX_OPT_ARGS];
@@ -79,20 +118,32 @@
 		case Opt_uid:
 			if (match_int(&args[0], &option))
 				return -EINVAL;
-			config.uid = option;
-			config.setuid = 1;
+			opts->uid = option;
+			opts->setuid = 1;
 			break;
 		case Opt_gid:
 			if (match_int(&args[0], &option))
 				return -EINVAL;
-			config.gid = option;
-			config.setgid = 1;
+			opts->gid = option;
+			opts->setgid = 1;
 			break;
 		case Opt_mode:
 			if (match_octal(&args[0], &option))
 				return -EINVAL;
-			config.mode = option & S_IALLUGO;
+			opts->mode = option & S_IALLUGO;
 			break;
+#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
+		case Opt_ptmxmode:
+			if (match_octal(&args[0], &option))
+				return -EINVAL;
+			opts->ptmxmode = option & S_IALLUGO;
+			break;
+		case Opt_newinstance:
+			/* newinstance makes sense only on initial mount */
+			if (op == PARSE_MOUNT)
+				opts->newinstance = 1;
+			break;
+#endif
 		default:
 			printk(KERN_ERR "devpts: called with bogus options\n");
 			return -EINVAL;
@@ -102,13 +153,108 @@
 	return 0;
 }
 
+#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
+static int mknod_ptmx(struct super_block *sb)
+{
+	int mode;
+	int rc = -ENOMEM;
+	struct dentry *dentry;
+	struct inode *inode;
+	struct dentry *root = sb->s_root;
+	struct pts_fs_info *fsi = DEVPTS_SB(sb);
+	struct pts_mount_opts *opts = &fsi->mount_opts;
+
+	mutex_lock(&root->d_inode->i_mutex);
+
+	/* If we have already created ptmx node, return */
+	if (fsi->ptmx_dentry) {
+		rc = 0;
+		goto out;
+	}
+
+	dentry = d_alloc_name(root, "ptmx");
+	if (!dentry) {
+		printk(KERN_NOTICE "Unable to alloc dentry for ptmx node\n");
+		goto out;
+	}
+
+	/*
+	 * Create a new 'ptmx' node in this mount of devpts.
+	 */
+	inode = new_inode(sb);
+	if (!inode) {
+		printk(KERN_ERR "Unable to alloc inode for ptmx node\n");
+		dput(dentry);
+		goto out;
+	}
+
+	inode->i_ino = 2;
+	inode->i_uid = inode->i_gid = 0;
+	inode->i_blocks = 0;
+	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+
+	mode = S_IFCHR|opts->ptmxmode;
+	init_special_inode(inode, mode, MKDEV(TTYAUX_MAJOR, 2));
+
+	d_add(dentry, inode);
+
+	fsi->ptmx_dentry = dentry;
+	rc = 0;
+
+	printk(KERN_DEBUG "Created ptmx node in devpts ino %lu\n",
+			inode->i_ino);
+out:
+	mutex_unlock(&root->d_inode->i_mutex);
+	return rc;
+}
+
+static void update_ptmx_mode(struct pts_fs_info *fsi)
+{
+	struct inode *inode;
+	if (fsi->ptmx_dentry) {
+		inode = fsi->ptmx_dentry->d_inode;
+		inode->i_mode = S_IFCHR|fsi->mount_opts.ptmxmode;
+	}
+}
+#else
+static inline void update_ptmx_mode(struct pts_fs_info *fsi)
+{
+       return;
+}
+#endif
+
+static int devpts_remount(struct super_block *sb, int *flags, char *data)
+{
+	int err;
+	struct pts_fs_info *fsi = DEVPTS_SB(sb);
+	struct pts_mount_opts *opts = &fsi->mount_opts;
+
+	err = parse_mount_options(data, PARSE_REMOUNT, opts);
+
+	/*
+	 * parse_mount_options() restores options to default values
+	 * before parsing and may have changed ptmxmode. So, update the
+	 * mode in the inode too. Bogus options don't fail the remount,
+	 * so do this even on error return.
+	 */
+	update_ptmx_mode(fsi);
+
+	return err;
+}
+
 static int devpts_show_options(struct seq_file *seq, struct vfsmount *vfs)
 {
-	if (config.setuid)
-		seq_printf(seq, ",uid=%u", config.uid);
-	if (config.setgid)
-		seq_printf(seq, ",gid=%u", config.gid);
-	seq_printf(seq, ",mode=%03o", config.mode);
+	struct pts_fs_info *fsi = DEVPTS_SB(vfs->mnt_sb);
+	struct pts_mount_opts *opts = &fsi->mount_opts;
+
+	if (opts->setuid)
+		seq_printf(seq, ",uid=%u", opts->uid);
+	if (opts->setgid)
+		seq_printf(seq, ",gid=%u", opts->gid);
+	seq_printf(seq, ",mode=%03o", opts->mode);
+#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
+	seq_printf(seq, ",ptmxmode=%03o", opts->ptmxmode);
+#endif
 
 	return 0;
 }
@@ -119,10 +265,25 @@
 	.show_options	= devpts_show_options,
 };
 
+static void *new_pts_fs_info(void)
+{
+	struct pts_fs_info *fsi;
+
+	fsi = kzalloc(sizeof(struct pts_fs_info), GFP_KERNEL);
+	if (!fsi)
+		return NULL;
+
+	ida_init(&fsi->allocated_ptys);
+	fsi->mount_opts.mode = DEVPTS_DEFAULT_MODE;
+	fsi->mount_opts.ptmxmode = DEVPTS_DEFAULT_PTMX_MODE;
+
+	return fsi;
+}
+
 static int
 devpts_fill_super(struct super_block *s, void *data, int silent)
 {
-	struct inode * inode;
+	struct inode *inode;
 
 	s->s_blocksize = 1024;
 	s->s_blocksize_bits = 10;
@@ -130,9 +291,13 @@
 	s->s_op = &devpts_sops;
 	s->s_time_gran = 1;
 
+	s->s_fs_info = new_pts_fs_info();
+	if (!s->s_fs_info)
+		goto fail;
+
 	inode = new_inode(s);
 	if (!inode)
-		goto fail;
+		goto free_fsi;
 	inode->i_ino = 1;
 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
 	inode->i_blocks = 0;
@@ -142,27 +307,226 @@
 	inode->i_fop = &simple_dir_operations;
 	inode->i_nlink = 2;
 
-	devpts_root = s->s_root = d_alloc_root(inode);
+	s->s_root = d_alloc_root(inode);
 	if (s->s_root)
 		return 0;
-	
-	printk("devpts: get root dentry failed\n");
+
+	printk(KERN_ERR "devpts: get root dentry failed\n");
 	iput(inode);
+
+free_fsi:
+	kfree(s->s_fs_info);
 fail:
 	return -ENOMEM;
 }
 
+#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
+static int compare_init_pts_sb(struct super_block *s, void *p)
+{
+	if (devpts_mnt)
+		return devpts_mnt->mnt_sb == s;
+	return 0;
+}
+
+/*
+ * Safely parse the mount options in @data and update @opts.
+ *
+ * devpts ends up parsing options two times during mount, due to the
+ * two modes of operation it supports. The first parse occurs in
+ * devpts_get_sb() when determining the mode (single-instance or
+ * multi-instance mode). The second parse happens in devpts_remount()
+ * or new_pts_mount() depending on the mode.
+ *
+ * Parsing of options modifies the @data making subsequent parsing
+ * incorrect. So make a local copy of @data and parse it.
+ *
+ * Return: 0 On success, -errno on error
+ */
+static int safe_parse_mount_options(void *data, struct pts_mount_opts *opts)
+{
+	int rc;
+	void *datacp;
+
+	if (!data)
+		return 0;
+
+	/* Use kstrdup() ?  */
+	datacp = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!datacp)
+		return -ENOMEM;
+
+	memcpy(datacp, data, PAGE_SIZE);
+	rc = parse_mount_options((char *)datacp, PARSE_MOUNT, opts);
+	kfree(datacp);
+
+	return rc;
+}
+
+/*
+ * Mount a new (private) instance of devpts.  PTYs created in this
+ * instance are independent of the PTYs in other devpts instances.
+ */
+static int new_pts_mount(struct file_system_type *fs_type, int flags,
+		void *data, struct vfsmount *mnt)
+{
+	int err;
+	struct pts_fs_info *fsi;
+	struct pts_mount_opts *opts;
+
+	printk(KERN_NOTICE "devpts: newinstance mount\n");
+
+	err = get_sb_nodev(fs_type, flags, data, devpts_fill_super, mnt);
+	if (err)
+		return err;
+
+	fsi = DEVPTS_SB(mnt->mnt_sb);
+	opts = &fsi->mount_opts;
+
+	err = parse_mount_options(data, PARSE_MOUNT, opts);
+	if (err)
+		goto fail;
+
+	err = mknod_ptmx(mnt->mnt_sb);
+	if (err)
+		goto fail;
+
+	return 0;
+
+fail:
+	dput(mnt->mnt_sb->s_root);
+	deactivate_super(mnt->mnt_sb);
+	return err;
+}
+
+/*
+ * Check if 'newinstance' mount option was specified in @data.
+ *
+ * Return: -errno  	on error (eg: invalid mount options specified)
+ * 	 : 1 		if 'newinstance' mount option was specified
+ * 	 : 0 		if 'newinstance' mount option was NOT specified
+ */
+static int is_new_instance_mount(void *data)
+{
+	int rc;
+	struct pts_mount_opts opts;
+
+	if (!data)
+		return 0;
+
+	rc = safe_parse_mount_options(data, &opts);
+	if (!rc)
+		rc = opts.newinstance;
+
+	return rc;
+}
+
+/*
+ * get_init_pts_sb()
+ *
+ *     This interface is needed to support multiple namespace semantics in
+ *     devpts while preserving backward compatibility of the current 'single-
+ *     namespace' semantics. i.e all mounts of devpts without the 'newinstance'
+ *     mount option should bind to the initial kernel mount, like
+ *     get_sb_single().
+ *
+ *     Mounts with 'newinstance' option create a new private namespace.
+ *
+ *     But for single-mount semantics, devpts cannot use get_sb_single(),
+ *     because get_sb_single()/sget() find and use the super-block from
+ *     the most recent mount of devpts. But that recent mount may be a
+ *     'newinstance' mount and get_sb_single() would pick the newinstance
+ *     super-block instead of the initial super-block.
+ *
+ *     This interface is identical to get_sb_single() except that it
+ *     consistently selects the 'single-namespace' superblock even in the
+ *     presence of the private namespace (i.e 'newinstance') super-blocks.
+ */
+static int get_init_pts_sb(struct file_system_type *fs_type, int flags,
+		void *data, struct vfsmount *mnt)
+{
+	struct super_block *s;
+	int error;
+
+	s = sget(fs_type, compare_init_pts_sb, set_anon_super, NULL);
+	if (IS_ERR(s))
+		return PTR_ERR(s);
+
+	if (!s->s_root) {
+		s->s_flags = flags;
+		error = devpts_fill_super(s, data, flags & MS_SILENT ? 1 : 0);
+		if (error) {
+			up_write(&s->s_umount);
+			deactivate_super(s);
+			return error;
+		}
+		s->s_flags |= MS_ACTIVE;
+	}
+	do_remount_sb(s, flags, data, 0);
+	return simple_set_mnt(mnt, s);
+}
+
+/*
+ * Mount or remount the initial kernel mount of devpts. This type of
+ * mount maintains the legacy, single-instance semantics, while the
+ * kernel still allows multiple-instances.
+ */
+static int init_pts_mount(struct file_system_type *fs_type, int flags,
+		void *data, struct vfsmount *mnt)
+{
+	int err;
+
+	err = get_init_pts_sb(fs_type, flags, data, mnt);
+	if (err)
+		return err;
+
+	err = mknod_ptmx(mnt->mnt_sb);
+	if (err) {
+		dput(mnt->mnt_sb->s_root);
+		deactivate_super(mnt->mnt_sb);
+	}
+
+	return err;
+}
+
 static int devpts_get_sb(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
+	int new;
+
+	new = is_new_instance_mount(data);
+	if (new < 0)
+		return new;
+
+	if (new)
+		return new_pts_mount(fs_type, flags, data, mnt);
+
+	return init_pts_mount(fs_type, flags, data, mnt);
+}
+#else
+/*
+ * This supports only the legacy single-instance semantics (no
+ * multiple-instance semantics)
+ */
+static int devpts_get_sb(struct file_system_type *fs_type, int flags,
+		const char *dev_name, void *data, struct vfsmount *mnt)
+{
 	return get_sb_single(fs_type, flags, data, devpts_fill_super, mnt);
 }
+#endif
+
+static void devpts_kill_sb(struct super_block *sb)
+{
+	struct pts_fs_info *fsi = DEVPTS_SB(sb);
+
+	kfree(fsi);
+	kill_litter_super(sb);
+}
 
 static struct file_system_type devpts_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "devpts",
 	.get_sb		= devpts_get_sb,
-	.kill_sb	= kill_anon_super,
+	.kill_sb	= devpts_kill_sb,
 };
 
 /*
@@ -172,16 +536,17 @@
 
 int devpts_new_index(struct inode *ptmx_inode)
 {
+	struct super_block *sb = pts_sb_from_inode(ptmx_inode);
+	struct pts_fs_info *fsi = DEVPTS_SB(sb);
 	int index;
 	int ida_ret;
 
 retry:
-	if (!ida_pre_get(&allocated_ptys, GFP_KERNEL)) {
+	if (!ida_pre_get(&fsi->allocated_ptys, GFP_KERNEL))
 		return -ENOMEM;
-	}
 
 	mutex_lock(&allocated_ptys_lock);
-	ida_ret = ida_get_new(&allocated_ptys, &index);
+	ida_ret = ida_get_new(&fsi->allocated_ptys, &index);
 	if (ida_ret < 0) {
 		mutex_unlock(&allocated_ptys_lock);
 		if (ida_ret == -EAGAIN)
@@ -190,7 +555,7 @@
 	}
 
 	if (index >= pty_limit) {
-		ida_remove(&allocated_ptys, index);
+		ida_remove(&fsi->allocated_ptys, index);
 		mutex_unlock(&allocated_ptys_lock);
 		return -EIO;
 	}
@@ -200,18 +565,26 @@
 
 void devpts_kill_index(struct inode *ptmx_inode, int idx)
 {
+	struct super_block *sb = pts_sb_from_inode(ptmx_inode);
+	struct pts_fs_info *fsi = DEVPTS_SB(sb);
+
 	mutex_lock(&allocated_ptys_lock);
-	ida_remove(&allocated_ptys, idx);
+	ida_remove(&fsi->allocated_ptys, idx);
 	mutex_unlock(&allocated_ptys_lock);
 }
 
 int devpts_pty_new(struct inode *ptmx_inode, struct tty_struct *tty)
 {
-	int number = tty->index; /* tty layer puts index from devpts_new_index() in here */
+	/* tty layer puts index from devpts_new_index() in here */
+	int number = tty->index;
 	struct tty_driver *driver = tty->driver;
 	dev_t device = MKDEV(driver->major, driver->minor_start+number);
 	struct dentry *dentry;
-	struct inode *inode = new_inode(devpts_mnt->mnt_sb);
+	struct super_block *sb = pts_sb_from_inode(ptmx_inode);
+	struct inode *inode = new_inode(sb);
+	struct dentry *root = sb->s_root;
+	struct pts_fs_info *fsi = DEVPTS_SB(sb);
+	struct pts_mount_opts *opts = &fsi->mount_opts;
 	char s[12];
 
 	/* We're supposed to be given the slave end of a pty */
@@ -221,25 +594,25 @@
 	if (!inode)
 		return -ENOMEM;
 
-	inode->i_ino = number+2;
-	inode->i_uid = config.setuid ? config.uid : current_fsuid();
-	inode->i_gid = config.setgid ? config.gid : current_fsgid();
+	inode->i_ino = number + 3;
+	inode->i_uid = opts->setuid ? opts->uid : current_fsuid();
+	inode->i_gid = opts->setgid ? opts->gid : current_fsgid();
 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
-	init_special_inode(inode, S_IFCHR|config.mode, device);
+	init_special_inode(inode, S_IFCHR|opts->mode, device);
 	inode->i_private = tty;
 	tty->driver_data = inode;
 
 	sprintf(s, "%d", number);
 
-	mutex_lock(&devpts_root->d_inode->i_mutex);
+	mutex_lock(&root->d_inode->i_mutex);
 
-	dentry = d_alloc_name(devpts_root, s);
+	dentry = d_alloc_name(root, s);
 	if (!IS_ERR(dentry)) {
 		d_add(dentry, inode);
-		fsnotify_create(devpts_root->d_inode, dentry);
+		fsnotify_create(root->d_inode, dentry);
 	}
 
-	mutex_unlock(&devpts_root->d_inode->i_mutex);
+	mutex_unlock(&root->d_inode->i_mutex);
 
 	return 0;
 }
@@ -256,20 +629,27 @@
 void devpts_pty_kill(struct tty_struct *tty)
 {
 	struct inode *inode = tty->driver_data;
+	struct super_block *sb = pts_sb_from_inode(inode);
+	struct dentry *root = sb->s_root;
 	struct dentry *dentry;
 
 	BUG_ON(inode->i_rdev == MKDEV(TTYAUX_MAJOR, PTMX_MINOR));
 
-	mutex_lock(&devpts_root->d_inode->i_mutex);
+	mutex_lock(&root->d_inode->i_mutex);
 
 	dentry = d_find_alias(inode);
-	if (dentry && !IS_ERR(dentry)) {
+	if (IS_ERR(dentry))
+		goto out;
+
+	if (dentry) {
 		inode->i_nlink--;
 		d_delete(dentry);
-		dput(dentry);
+		dput(dentry);	/* d_alloc_name() in devpts_pty_new() */
 	}
 
-	mutex_unlock(&devpts_root->d_inode->i_mutex);
+	dput(dentry);		/* d_find_alias above */
+out:
+	mutex_unlock(&root->d_inode->i_mutex);
 }
 
 static int __init init_devpts_fs(void)
diff --git a/include/asm-generic/topology.h b/include/asm-generic/topology.h
index 54bbf6e..0e9e2bc 100644
--- a/include/asm-generic/topology.h
+++ b/include/asm-generic/topology.h
@@ -40,6 +40,9 @@
 #ifndef node_to_cpumask
 #define node_to_cpumask(node)	((void)node, cpu_online_map)
 #endif
+#ifndef cpumask_of_node
+#define cpumask_of_node(node)	((void)node, cpu_online_mask)
+#endif
 #ifndef node_to_first_cpu
 #define node_to_first_cpu(node)	((void)(node),0)
 #endif
@@ -54,9 +57,18 @@
 				)
 #endif
 
+#ifndef cpumask_of_pcibus
+#define cpumask_of_pcibus(bus)	(pcibus_to_node(bus) == -1 ?		\
+				 cpu_all_mask :				\
+				 cpumask_of_node(pcibus_to_node(bus)))
+#endif
+
 #endif	/* CONFIG_NUMA */
 
-/* returns pointer to cpumask for specified node */
+/*
+ * returns pointer to cpumask for specified node
+ * Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)"
+ */
 #ifndef node_to_cpumask_ptr
 
 #define	node_to_cpumask_ptr(v, node) 					\
diff --git a/include/asm-m32r/smp.h b/include/asm-m32r/smp.h
index c5dd669..b96a6d2 100644
--- a/include/asm-m32r/smp.h
+++ b/include/asm-m32r/smp.h
@@ -63,8 +63,6 @@
 #define raw_smp_processor_id()	(current_thread_info()->cpu)
 
 extern cpumask_t cpu_callout_map;
-extern cpumask_t cpu_possible_map;
-extern cpumask_t cpu_present_map;
 
 static __inline__ int hard_smp_processor_id(void)
 {
diff --git a/include/linux/8250_pci.h b/include/linux/8250_pci.h
index 3209dd4..b24ff08 100644
--- a/include/linux/8250_pci.h
+++ b/include/linux/8250_pci.h
@@ -31,7 +31,7 @@
 struct serial_private;
 
 struct serial_private *
-pciserial_init_ports(struct pci_dev *dev, struct pciserial_board *board);
+pciserial_init_ports(struct pci_dev *dev, const struct pciserial_board *board);
 void pciserial_remove_ports(struct serial_private *priv);
 void pciserial_suspend_ports(struct serial_private *priv);
 void pciserial_resume_ports(struct serial_private *priv);
diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index ed3a5d4..cea1536 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -82,13 +82,13 @@
 	int			shift;
 	int			rating;
 	int			irq;
-	cpumask_t		cpumask;
+	const struct cpumask	*cpumask;
 	int			(*set_next_event)(unsigned long evt,
 						  struct clock_event_device *);
 	void			(*set_mode)(enum clock_event_mode mode,
 					    struct clock_event_device *);
 	void			(*event_handler)(struct clock_event_device *);
-	void			(*broadcast)(cpumask_t mask);
+	void			(*broadcast)(const struct cpumask *mask);
 	struct list_head	list;
 	enum clock_event_mode	mode;
 	ktime_t			next_event;
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 5c8351b..af40f8e 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -61,3 +61,8 @@
 #define  noinline			__attribute__((noinline))
 #define __attribute_const__		__attribute__((__const__))
 #define __maybe_unused			__attribute__((unused))
+
+#define __gcc_header(x) #x
+#define _gcc_header(x) __gcc_header(linux/compiler-gcc##x.h)
+#define gcc_header(x) _gcc_header(x)
+#include gcc_header(__GNUC__)
diff --git a/include/linux/compiler-gcc3.h b/include/linux/compiler-gcc3.h
index e5eb795..2befe65 100644
--- a/include/linux/compiler-gcc3.h
+++ b/include/linux/compiler-gcc3.h
@@ -2,9 +2,6 @@
 #error "Please don't include <linux/compiler-gcc3.h> directly, include <linux/compiler.h> instead."
 #endif
 
-/* These definitions are for GCC v3.x.  */
-#include <linux/compiler-gcc.h>
-
 #if __GNUC_MINOR__ >= 3
 # define __used			__attribute__((__used__))
 #else
diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h
index 974f5b7..0999271 100644
--- a/include/linux/compiler-gcc4.h
+++ b/include/linux/compiler-gcc4.h
@@ -2,8 +2,10 @@
 #error "Please don't include <linux/compiler-gcc4.h> directly, include <linux/compiler.h> instead."
 #endif
 
-/* These definitions are for GCC v4.x.  */
-#include <linux/compiler-gcc.h>
+/* GCC 4.1.[01] miscompiles __weak */
+#if __GNUC_MINOR__ == 1 && __GNUC_PATCHLEVEL__ <= 1
+# error Your version of gcc miscompiles the __weak directive
+#endif
 
 #define __used			__attribute__((__used__))
 #define __must_check 		__attribute__((warn_unused_result))
@@ -16,7 +18,7 @@
  */
 #define uninitialized_var(x) x = x
 
-#if !(__GNUC__ == 4 && __GNUC_MINOR__ < 3)
+#if __GNUC_MINOR__ >= 3
 /* Mark functions as cold. gcc will assume any path leading to a call
    to them will be unlikely.  This means a lot of manual unlikely()s
    are unnecessary now for any paths leading to the usual suspects
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index ea7c6be..d95da10 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -36,12 +36,8 @@
 
 #ifdef __KERNEL__
 
-#if __GNUC__ >= 4
-# include <linux/compiler-gcc4.h>
-#elif __GNUC__ == 3 && __GNUC_MINOR__ >= 2
-# include <linux/compiler-gcc3.h>
-#else
-# error Sorry, your compiler is too old/not recognized.
+#ifdef __GNUC__
+#include <linux/compiler-gcc.h>
 #endif
 
 #define notrace __attribute__((no_instrument_function))
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 21e1dd4..d4bf526 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -339,36 +339,6 @@
 #endif
 #define	CPUMASK_PTR(v, m) 	cpumask_t *v = &(m->v)
 
-#define cpumask_scnprintf(buf, len, src) \
-			__cpumask_scnprintf((buf), (len), &(src), NR_CPUS)
-static inline int __cpumask_scnprintf(char *buf, int len,
-					const cpumask_t *srcp, int nbits)
-{
-	return bitmap_scnprintf(buf, len, srcp->bits, nbits);
-}
-
-#define cpumask_parse_user(ubuf, ulen, dst) \
-			__cpumask_parse_user((ubuf), (ulen), &(dst), NR_CPUS)
-static inline int __cpumask_parse_user(const char __user *buf, int len,
-					cpumask_t *dstp, int nbits)
-{
-	return bitmap_parse_user(buf, len, dstp->bits, nbits);
-}
-
-#define cpulist_scnprintf(buf, len, src) \
-			__cpulist_scnprintf((buf), (len), &(src), NR_CPUS)
-static inline int __cpulist_scnprintf(char *buf, int len,
-					const cpumask_t *srcp, int nbits)
-{
-	return bitmap_scnlistprintf(buf, len, srcp->bits, nbits);
-}
-
-#define cpulist_parse(buf, dst) __cpulist_parse((buf), &(dst), NR_CPUS)
-static inline int __cpulist_parse(const char *buf, cpumask_t *dstp, int nbits)
-{
-	return bitmap_parselist(buf, dstp->bits, nbits);
-}
-
 #define cpu_remap(oldbit, old, new) \
 		__cpu_remap((oldbit), &(old), &(new), NR_CPUS)
 static inline int __cpu_remap(int oldbit,
@@ -540,9 +510,6 @@
 	[BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD	\
 }
 
-/* This produces more efficient code. */
-#define nr_cpumask_bits	NR_CPUS
-
 #else /* NR_CPUS > BITS_PER_LONG */
 
 #define CPU_BITS_ALL						\
@@ -550,10 +517,16 @@
 	[0 ... BITS_TO_LONGS(NR_CPUS)-2] = ~0UL,		\
 	[BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD		\
 }
-
-#define nr_cpumask_bits	nr_cpu_ids
 #endif /* NR_CPUS > BITS_PER_LONG */
 
+#ifdef CONFIG_CPUMASK_OFFSTACK
+/* Assuming NR_CPUS is huge, a runtime limit is more efficient.  Also,
+ * not all bits may be allocated. */
+#define nr_cpumask_bits	nr_cpu_ids
+#else
+#define nr_cpumask_bits	NR_CPUS
+#endif
+
 /* verify cpu argument to cpumask_* operators */
 static inline unsigned int cpumask_check(unsigned int cpu)
 {
@@ -946,6 +919,63 @@
 #define cpumask_of(cpu) (get_cpu_mask(cpu))
 
 /**
+ * cpumask_scnprintf - print a cpumask into a string as comma-separated hex
+ * @buf: the buffer to sprintf into
+ * @len: the length of the buffer
+ * @srcp: the cpumask to print
+ *
+ * If len is zero, returns zero.  Otherwise returns the length of the
+ * (nul-terminated) @buf string.
+ */
+static inline int cpumask_scnprintf(char *buf, int len,
+				    const struct cpumask *srcp)
+{
+	return bitmap_scnprintf(buf, len, srcp->bits, nr_cpumask_bits);
+}
+
+/**
+ * cpumask_parse_user - extract a cpumask from a user string
+ * @buf: the buffer to extract from
+ * @len: the length of the buffer
+ * @dstp: the cpumask to set.
+ *
+ * Returns -errno, or 0 for success.
+ */
+static inline int cpumask_parse_user(const char __user *buf, int len,
+				     struct cpumask *dstp)
+{
+	return bitmap_parse_user(buf, len, dstp->bits, nr_cpumask_bits);
+}
+
+/**
+ * cpulist_scnprintf - print a cpumask into a string as comma-separated list
+ * @buf: the buffer to sprintf into
+ * @len: the length of the buffer
+ * @srcp: the cpumask to print
+ *
+ * If len is zero, returns zero.  Otherwise returns the length of the
+ * (nul-terminated) @buf string.
+ */
+static inline int cpulist_scnprintf(char *buf, int len,
+				    const struct cpumask *srcp)
+{
+	return bitmap_scnlistprintf(buf, len, srcp->bits, nr_cpumask_bits);
+}
+
+/**
+ * cpulist_parse_user - extract a cpumask from a user string of ranges
+ * @buf: the buffer to extract from
+ * @len: the length of the buffer
+ * @dstp: the cpumask to set.
+ *
+ * Returns -errno, or 0 for success.
+ */
+static inline int cpulist_parse(const char *buf, struct cpumask *dstp)
+{
+	return bitmap_parselist(buf, dstp->bits, nr_cpumask_bits);
+}
+
+/**
  * to_cpumask - convert an NR_CPUS bitmap to a struct cpumask *
  * @bitmap: the bitmap
  *
diff --git a/include/linux/generic_serial.h b/include/linux/generic_serial.h
index 4cc9139..fadff28 100644
--- a/include/linux/generic_serial.h
+++ b/include/linux/generic_serial.h
@@ -21,7 +21,6 @@
   void                    (*enable_tx_interrupts) (void *);
   void                    (*disable_rx_interrupts) (void *);
   void                    (*enable_rx_interrupts) (void *);
-  int                     (*get_CD) (void *);
   void                    (*shutdown_port) (void*);
   int                     (*set_real_termios) (void*);
   int                     (*chars_in_buffer) (void*);
diff --git a/include/linux/ide.h b/include/linux/ide.h
index e99c56d..db5ef8a 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -32,13 +32,6 @@
 # define SUPPORT_VLB_SYNC 1
 #endif
 
-/*
- * Used to indicate "no IRQ", should be a value that cannot be an IRQ
- * number.
- */
- 
-#define IDE_NO_IRQ		(-1)
-
 typedef unsigned char	byte;	/* used everywhere */
 
 /*
@@ -403,6 +396,7 @@
  * This is used for several packet commands (not for READ/WRITE commands).
  */
 #define IDE_PC_BUFFER_SIZE	256
+#define ATAPI_WAIT_PC		(60 * HZ)
 
 struct ide_atapi_pc {
 	/* actual packet bytes */
@@ -480,53 +474,53 @@
 
 	/* ide-cd */
 	/* Drive cannot eject the disc. */
-	IDE_AFLAG_NO_EJECT		= (1 << 3),
+	IDE_AFLAG_NO_EJECT		= (1 << 1),
 	/* Drive is a pre ATAPI 1.2 drive. */
-	IDE_AFLAG_PRE_ATAPI12		= (1 << 4),
+	IDE_AFLAG_PRE_ATAPI12		= (1 << 2),
 	/* TOC addresses are in BCD. */
-	IDE_AFLAG_TOCADDR_AS_BCD	= (1 << 5),
+	IDE_AFLAG_TOCADDR_AS_BCD	= (1 << 3),
 	/* TOC track numbers are in BCD. */
-	IDE_AFLAG_TOCTRACKS_AS_BCD	= (1 << 6),
+	IDE_AFLAG_TOCTRACKS_AS_BCD	= (1 << 4),
 	/*
 	 * Drive does not provide data in multiples of SECTOR_SIZE
 	 * when more than one interrupt is needed.
 	 */
-	IDE_AFLAG_LIMIT_NFRAMES		= (1 << 7),
+	IDE_AFLAG_LIMIT_NFRAMES		= (1 << 5),
 	/* Saved TOC information is current. */
-	IDE_AFLAG_TOC_VALID		= (1 << 9),
+	IDE_AFLAG_TOC_VALID		= (1 << 6),
 	/* We think that the drive door is locked. */
-	IDE_AFLAG_DOOR_LOCKED		= (1 << 10),
+	IDE_AFLAG_DOOR_LOCKED		= (1 << 7),
 	/* SET_CD_SPEED command is unsupported. */
-	IDE_AFLAG_NO_SPEED_SELECT	= (1 << 11),
-	IDE_AFLAG_VERTOS_300_SSD	= (1 << 12),
-	IDE_AFLAG_VERTOS_600_ESD	= (1 << 13),
-	IDE_AFLAG_SANYO_3CD		= (1 << 14),
-	IDE_AFLAG_FULL_CAPS_PAGE	= (1 << 15),
-	IDE_AFLAG_PLAY_AUDIO_OK		= (1 << 16),
-	IDE_AFLAG_LE_SPEED_FIELDS	= (1 << 17),
+	IDE_AFLAG_NO_SPEED_SELECT	= (1 << 8),
+	IDE_AFLAG_VERTOS_300_SSD	= (1 << 9),
+	IDE_AFLAG_VERTOS_600_ESD	= (1 << 10),
+	IDE_AFLAG_SANYO_3CD		= (1 << 11),
+	IDE_AFLAG_FULL_CAPS_PAGE	= (1 << 12),
+	IDE_AFLAG_PLAY_AUDIO_OK		= (1 << 13),
+	IDE_AFLAG_LE_SPEED_FIELDS	= (1 << 14),
 
 	/* ide-floppy */
 	/* Avoid commands not supported in Clik drive */
-	IDE_AFLAG_CLIK_DRIVE		= (1 << 19),
+	IDE_AFLAG_CLIK_DRIVE		= (1 << 15),
 	/* Requires BH algorithm for packets */
-	IDE_AFLAG_ZIP_DRIVE		= (1 << 20),
+	IDE_AFLAG_ZIP_DRIVE		= (1 << 16),
 	/* Supports format progress report */
-	IDE_AFLAG_SRFP			= (1 << 22),
+	IDE_AFLAG_SRFP			= (1 << 17),
 
 	/* ide-tape */
-	IDE_AFLAG_IGNORE_DSC		= (1 << 23),
+	IDE_AFLAG_IGNORE_DSC		= (1 << 18),
 	/* 0 When the tape position is unknown */
-	IDE_AFLAG_ADDRESS_VALID		= (1 <<	24),
+	IDE_AFLAG_ADDRESS_VALID		= (1 <<	19),
 	/* Device already opened */
-	IDE_AFLAG_BUSY			= (1 << 25),
+	IDE_AFLAG_BUSY			= (1 << 20),
 	/* Attempt to auto-detect the current user block size */
-	IDE_AFLAG_DETECT_BS		= (1 << 26),
+	IDE_AFLAG_DETECT_BS		= (1 << 21),
 	/* Currently on a filemark */
-	IDE_AFLAG_FILEMARK		= (1 << 27),
+	IDE_AFLAG_FILEMARK		= (1 << 22),
 	/* 0 = no tape is loaded, so we don't rewind after ejecting */
-	IDE_AFLAG_MEDIUM_PRESENT	= (1 << 28),
+	IDE_AFLAG_MEDIUM_PRESENT	= (1 << 23),
 
-	IDE_AFLAG_NO_AUTOCLOSE		= (1 << 29),
+	IDE_AFLAG_NO_AUTOCLOSE		= (1 << 24),
 };
 
 /* device flags */
@@ -565,28 +559,26 @@
 	IDE_DFLAG_NODMA			= (1 << 16),
 	/* powermanagment told us not to do anything, so sleep nicely */
 	IDE_DFLAG_BLOCKED		= (1 << 17),
-	/* ide-scsi emulation */
-	IDE_DFLAG_SCSI			= (1 << 18),
 	/* sleeping & sleep field valid */
-	IDE_DFLAG_SLEEPING		= (1 << 19),
-	IDE_DFLAG_POST_RESET		= (1 << 20),
-	IDE_DFLAG_UDMA33_WARNED		= (1 << 21),
-	IDE_DFLAG_LBA48			= (1 << 22),
+	IDE_DFLAG_SLEEPING		= (1 << 18),
+	IDE_DFLAG_POST_RESET		= (1 << 19),
+	IDE_DFLAG_UDMA33_WARNED		= (1 << 20),
+	IDE_DFLAG_LBA48			= (1 << 21),
 	/* status of write cache */
-	IDE_DFLAG_WCACHE		= (1 << 23),
+	IDE_DFLAG_WCACHE		= (1 << 22),
 	/* used for ignoring ATA_DF */
-	IDE_DFLAG_NOWERR		= (1 << 24),
+	IDE_DFLAG_NOWERR		= (1 << 23),
 	/* retrying in PIO */
-	IDE_DFLAG_DMA_PIO_RETRY		= (1 << 25),
-	IDE_DFLAG_LBA			= (1 << 26),
+	IDE_DFLAG_DMA_PIO_RETRY		= (1 << 24),
+	IDE_DFLAG_LBA			= (1 << 25),
 	/* don't unload heads */
-	IDE_DFLAG_NO_UNLOAD		= (1 << 27),
+	IDE_DFLAG_NO_UNLOAD		= (1 << 26),
 	/* heads unloaded, please don't reset port */
-	IDE_DFLAG_PARKED		= (1 << 28),
-	IDE_DFLAG_MEDIA_CHANGED		= (1 << 29),
+	IDE_DFLAG_PARKED		= (1 << 27),
+	IDE_DFLAG_MEDIA_CHANGED		= (1 << 28),
 	/* write protect */
-	IDE_DFLAG_WP			= (1 << 30),
-	IDE_DFLAG_FORMAT_IN_PROGRESS	= (1 << 31),
+	IDE_DFLAG_WP			= (1 << 29),
+	IDE_DFLAG_FORMAT_IN_PROGRESS	= (1 << 30),
 };
 
 struct ide_drive_s {
@@ -610,8 +602,6 @@
 	unsigned long dev_flags;
 
 	unsigned long sleep;		/* sleep until this time */
-	unsigned long service_start;	/* time we started last request */
-	unsigned long service_time;	/* service time of last request */
 	unsigned long timeout;		/* max time to wait for irq */
 
 	special_t	special;	/* special action flags */
@@ -879,8 +869,6 @@
 
 		/* BOOL: protects all fields below */
 	volatile int busy;
-		/* BOOL: wake us up on timer expiry */
-	unsigned int sleeping	: 1;
 		/* BOOL: polling active & poll_timeout field valid */
 	unsigned int polling	: 1;
 
@@ -1258,14 +1246,11 @@
 void ide_create_request_sense_cmd(ide_drive_t *, struct ide_atapi_pc *);
 void ide_retry_pc(ide_drive_t *, struct gendisk *);
 
-static inline unsigned long ide_scsi_get_timeout(struct ide_atapi_pc *pc)
-{
-	return max_t(unsigned long, WAIT_CMD, pc->timeout - jiffies);
-}
+int ide_cd_expiry(ide_drive_t *);
 
-int ide_scsi_expiry(ide_drive_t *);
+int ide_cd_get_xferlen(struct request *);
 
-ide_startstop_t ide_issue_pc(ide_drive_t *, unsigned int, ide_expiry_t *);
+ide_startstop_t ide_issue_pc(ide_drive_t *);
 
 ide_startstop_t do_rw_taskfile(ide_drive_t *, ide_task_t *);
 
@@ -1287,6 +1272,26 @@
 
 extern void ide_timer_expiry(unsigned long);
 extern irqreturn_t ide_intr(int irq, void *dev_id);
+
+static inline int ide_lock_hwgroup(ide_hwgroup_t *hwgroup)
+{
+	if (hwgroup->busy)
+		return 1;
+
+	hwgroup->busy = 1;
+	/* for atari only */
+	ide_get_lock(ide_intr, hwgroup);
+
+	return 0;
+}
+
+static inline void ide_unlock_hwgroup(ide_hwgroup_t *hwgroup)
+{
+	/* for atari only */
+	ide_release_lock();
+	hwgroup->busy = 0;
+}
+
 extern void do_ide_request(struct request_queue *);
 
 void ide_init_disk(struct gendisk *, ide_drive_t *);
@@ -1533,6 +1538,7 @@
 void ide_undecoded_slave(ide_drive_t *);
 
 void ide_port_apply_params(ide_hwif_t *);
+int ide_sysfs_register_port(ide_hwif_t *);
 
 struct ide_host *ide_host_alloc(const struct ide_port_info *, hw_regs_t **);
 void ide_host_free(struct ide_host *);
@@ -1627,6 +1633,9 @@
 
 #define local_irq_set(flags)	do { local_save_flags((flags)); local_irq_enable_in_hardirq(); } while (0)
 
+char *ide_media_string(ide_drive_t *);
+
+extern struct device_attribute ide_dev_attrs[];
 extern struct bus_type ide_bus_type;
 extern struct class *ide_port_class;
 
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 8cc8ef4..990355f 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -111,13 +111,13 @@
 
 extern cpumask_t irq_default_affinity;
 
-extern int irq_set_affinity(unsigned int irq, cpumask_t cpumask);
+extern int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask);
 extern int irq_can_set_affinity(unsigned int irq);
 extern int irq_select_affinity(unsigned int irq);
 
 #else /* CONFIG_SMP */
 
-static inline int irq_set_affinity(unsigned int irq, cpumask_t cpumask)
+static inline int irq_set_affinity(unsigned int irq, const struct cpumask *m)
 {
 	return -EINVAL;
 }
diff --git a/include/linux/irq.h b/include/linux/irq.h
index d64a6d4..f899b50 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -113,7 +113,8 @@
 	void		(*eoi)(unsigned int irq);
 
 	void		(*end)(unsigned int irq);
-	void		(*set_affinity)(unsigned int irq, cpumask_t dest);
+	void		(*set_affinity)(unsigned int irq,
+					const struct cpumask *dest);
 	int		(*retrigger)(unsigned int irq);
 	int		(*set_type)(unsigned int irq, unsigned int flow_type);
 	int		(*set_wake)(unsigned int irq, unsigned int on);
diff --git a/include/linux/istallion.h b/include/linux/istallion.h
index 0d18407..7faca98 100644
--- a/include/linux/istallion.h
+++ b/include/linux/istallion.h
@@ -59,9 +59,7 @@
 	unsigned int		devnr;
 	int			baud_base;
 	int			custom_divisor;
-	int			close_delay;
 	int			closing_wait;
-	int			openwaitcnt;
 	int			rc;
 	int			argsize;
 	void			*argp;
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index f18b86f..35525ac 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -83,6 +83,7 @@
 #define KVM_EXIT_S390_SIEIC       13
 #define KVM_EXIT_S390_RESET       14
 #define KVM_EXIT_DCR              15
+#define KVM_EXIT_NMI              16
 
 /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
 struct kvm_run {
@@ -387,6 +388,14 @@
 #define KVM_CAP_DEVICE_ASSIGNMENT 17
 #endif
 #define KVM_CAP_IOMMU 18
+#if defined(CONFIG_X86)
+#define KVM_CAP_DEVICE_MSI 20
+#endif
+/* Bug in KVM_SET_USER_MEMORY_REGION fixed: */
+#define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21
+#if defined(CONFIG_X86)
+#define KVM_CAP_USER_NMI 22
+#endif
 
 /*
  * ioctls for VM fds
@@ -458,6 +467,8 @@
 #define KVM_S390_INITIAL_RESET    _IO(KVMIO,  0x97)
 #define KVM_GET_MP_STATE          _IOR(KVMIO,  0x98, struct kvm_mp_state)
 #define KVM_SET_MP_STATE          _IOW(KVMIO,  0x99, struct kvm_mp_state)
+/* Available with KVM_CAP_NMI */
+#define KVM_NMI                   _IO(KVMIO,  0x9a)
 
 #define KVM_TRC_INJ_VIRQ         (KVM_TRC_HANDLER + 0x02)
 #define KVM_TRC_REDELIVER_EVT    (KVM_TRC_HANDLER + 0x03)
@@ -500,10 +511,17 @@
 	__u32 guest_irq;
 	__u32 flags;
 	union {
+		struct {
+			__u32 addr_lo;
+			__u32 addr_hi;
+			__u32 data;
+		} guest_msi;
 		__u32 reserved[12];
 	};
 };
 
 #define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
 
+#define KVM_DEV_IRQ_ASSIGN_ENABLE_MSI	(1 << 0)
+
 #endif
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index bb92be2..eafabd5 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -16,6 +16,7 @@
 #include <linux/mm.h>
 #include <linux/preempt.h>
 #include <linux/marker.h>
+#include <linux/msi.h>
 #include <asm/signal.h>
 
 #include <linux/kvm.h>
@@ -306,8 +307,14 @@
 	int host_busnr;
 	int host_devfn;
 	int host_irq;
+	bool host_irq_disabled;
 	int guest_irq;
-	int irq_requested;
+	struct msi_msg guest_msi;
+#define KVM_ASSIGNED_DEV_GUEST_INTX	(1 << 0)
+#define KVM_ASSIGNED_DEV_GUEST_MSI	(1 << 1)
+#define KVM_ASSIGNED_DEV_HOST_INTX	(1 << 8)
+#define KVM_ASSIGNED_DEV_HOST_MSI	(1 << 9)
+	unsigned long irq_requested_type;
 	int irq_source_id;
 	struct pci_dev *dev;
 	struct kvm *kvm;
@@ -316,8 +323,7 @@
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi);
 void kvm_register_irq_ack_notifier(struct kvm *kvm,
 				   struct kvm_irq_ack_notifier *kian);
-void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
-				     struct kvm_irq_ack_notifier *kian);
+void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian);
 int kvm_request_irq_source_id(struct kvm *kvm);
 void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
 
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index b6e6944..218c73b 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1766,6 +1766,7 @@
 #define PCI_DEVICE_ID_SIIG_8S_20x_650	0x2081
 #define PCI_DEVICE_ID_SIIG_8S_20x_850	0x2082
 #define PCI_SUBDEVICE_ID_SIIG_QUARTET_SERIAL	0x2050
+#define PCI_SUBDEVICE_ID_SIIG_DUAL_SERIAL	0x2530
 
 #define PCI_VENDOR_ID_RADISYS		0x1331
 
@@ -1795,6 +1796,7 @@
 #define PCI_DEVICE_ID_SEALEVEL_UCOMM232	0x7202
 #define PCI_DEVICE_ID_SEALEVEL_COMM4	0x7401
 #define PCI_DEVICE_ID_SEALEVEL_COMM8	0x7801
+#define PCI_DEVICE_ID_SEALEVEL_7803	0x7803
 #define PCI_DEVICE_ID_SEALEVEL_UCOMM8	0x7804
 
 #define PCI_VENDOR_ID_HYPERCOPE		0x1365
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8395e71..158d53d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -250,7 +250,7 @@
 extern int runqueue_is_locked(void);
 extern void task_rq_unlock_wait(struct task_struct *p);
 
-extern cpumask_t nohz_cpu_mask;
+extern cpumask_var_t nohz_cpu_mask;
 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ)
 extern int select_nohz_load_balancer(int cpu);
 #else
@@ -758,20 +758,51 @@
 #define SD_SERIALIZE		1024	/* Only a single load balancing instance */
 #define SD_WAKE_IDLE_FAR	2048	/* Gain latency sacrificing cache hit */
 
-#define BALANCE_FOR_MC_POWER	\
-	(sched_smt_power_savings ? SD_POWERSAVINGS_BALANCE : 0)
+enum powersavings_balance_level {
+	POWERSAVINGS_BALANCE_NONE = 0,  /* No power saving load balance */
+	POWERSAVINGS_BALANCE_BASIC,	/* Fill one thread/core/package
+					 * first for long running threads
+					 */
+	POWERSAVINGS_BALANCE_WAKEUP,	/* Also bias task wakeups to semi-idle
+					 * cpu package for power savings
+					 */
+	MAX_POWERSAVINGS_BALANCE_LEVELS
+};
 
-#define BALANCE_FOR_PKG_POWER	\
-	((sched_mc_power_savings || sched_smt_power_savings) ?	\
-	 SD_POWERSAVINGS_BALANCE : 0)
+extern int sched_mc_power_savings, sched_smt_power_savings;
 
-#define test_sd_parent(sd, flag)	((sd->parent &&		\
-					 (sd->parent->flags & flag)) ? 1 : 0)
+static inline int sd_balance_for_mc_power(void)
+{
+	if (sched_smt_power_savings)
+		return SD_POWERSAVINGS_BALANCE;
 
+	return 0;
+}
+
+static inline int sd_balance_for_package_power(void)
+{
+	if (sched_mc_power_savings | sched_smt_power_savings)
+		return SD_POWERSAVINGS_BALANCE;
+
+	return 0;
+}
+
+/*
+ * Optimise SD flags for power savings:
+ * SD_BALANCE_NEWIDLE helps agressive task consolidation and power savings.
+ * Keep default SD flags if sched_{smt,mc}_power_saving=0
+ */
+
+static inline int sd_power_saving_flags(void)
+{
+	if (sched_mc_power_savings | sched_smt_power_savings)
+		return SD_BALANCE_NEWIDLE;
+
+	return 0;
+}
 
 struct sched_group {
 	struct sched_group *next;	/* Must be a circular list */
-	cpumask_t cpumask;
 
 	/*
 	 * CPU power of this group, SCHED_LOAD_SCALE being max power for a
@@ -784,8 +815,15 @@
 	 * (see include/linux/reciprocal_div.h)
 	 */
 	u32 reciprocal_cpu_power;
+
+	unsigned long cpumask[];
 };
 
+static inline struct cpumask *sched_group_cpus(struct sched_group *sg)
+{
+	return to_cpumask(sg->cpumask);
+}
+
 enum sched_domain_level {
 	SD_LV_NONE = 0,
 	SD_LV_SIBLING,
@@ -809,7 +847,6 @@
 	struct sched_domain *parent;	/* top domain must be null terminated */
 	struct sched_domain *child;	/* bottom domain must be null terminated */
 	struct sched_group *groups;	/* the balancing groups of the domain */
-	cpumask_t span;			/* span of all CPUs in this domain */
 	unsigned long min_interval;	/* Minimum balance interval ms */
 	unsigned long max_interval;	/* Maximum balance interval ms */
 	unsigned int busy_factor;	/* less balancing by factor if busy */
@@ -864,18 +901,35 @@
 #ifdef CONFIG_SCHED_DEBUG
 	char *name;
 #endif
+
+	/* span of all CPUs in this domain */
+	unsigned long span[];
 };
 
-extern void partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
+static inline struct cpumask *sched_domain_span(struct sched_domain *sd)
+{
+	return to_cpumask(sd->span);
+}
+
+extern void partition_sched_domains(int ndoms_new, struct cpumask *doms_new,
 				    struct sched_domain_attr *dattr_new);
 extern int arch_reinit_sched_domains(void);
 
+/* Test a flag in parent sched domain */
+static inline int test_sd_parent(struct sched_domain *sd, int flag)
+{
+	if (sd->parent && (sd->parent->flags & flag))
+		return 1;
+
+	return 0;
+}
+
 #else /* CONFIG_SMP */
 
 struct sched_domain_attr;
 
 static inline void
-partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
+partition_sched_domains(int ndoms_new, struct cpumask *doms_new,
 			struct sched_domain_attr *dattr_new)
 {
 }
@@ -926,7 +980,7 @@
 	void (*task_wake_up) (struct rq *this_rq, struct task_struct *task);
 
 	void (*set_cpus_allowed)(struct task_struct *p,
-				 const cpumask_t *newmask);
+				 const struct cpumask *newmask);
 
 	void (*rq_online)(struct rq *rq);
 	void (*rq_offline)(struct rq *rq);
@@ -1579,12 +1633,12 @@
 
 #ifdef CONFIG_SMP
 extern int set_cpus_allowed_ptr(struct task_struct *p,
-				const cpumask_t *new_mask);
+				const struct cpumask *new_mask);
 #else
 static inline int set_cpus_allowed_ptr(struct task_struct *p,
-				       const cpumask_t *new_mask)
+				       const struct cpumask *new_mask)
 {
-	if (!cpu_isset(0, *new_mask))
+	if (!cpumask_test_cpu(0, new_mask))
 		return -EINVAL;
 	return 0;
 }
@@ -2195,10 +2249,8 @@
 }
 #endif
 
-extern long sched_setaffinity(pid_t pid, const cpumask_t *new_mask);
-extern long sched_getaffinity(pid_t pid, cpumask_t *mask);
-
-extern int sched_mc_power_savings, sched_smt_power_savings;
+extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
+extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
 
 extern void normalize_rt_tasks(void);
 
diff --git a/include/linux/serial.h b/include/linux/serial.h
index 1ea8d92..9136cc5 100644
--- a/include/linux/serial.h
+++ b/include/linux/serial.h
@@ -10,8 +10,9 @@
 #ifndef _LINUX_SERIAL_H
 #define _LINUX_SERIAL_H
 
-#ifdef __KERNEL__
 #include <linux/types.h>
+
+#ifdef __KERNEL__
 #include <asm/page.h>
 
 /*
diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h
index 3d37c94..d4d2a78 100644
--- a/include/linux/serial_8250.h
+++ b/include/linux/serial_8250.h
@@ -28,6 +28,9 @@
 	unsigned char	iotype;		/* UPIO_* */
 	unsigned char	hub6;
 	upf_t		flags;		/* UPF_* flags */
+	unsigned int	type;		/* If UPF_FIXED_TYPE */
+	unsigned int	(*serial_in)(struct uart_port *, int);
+	void		(*serial_out)(struct uart_port *, int, int);
 };
 
 /*
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index feb3b93..b419984 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -40,7 +40,8 @@
 #define PORT_NS16550A	14
 #define PORT_XSCALE	15
 #define PORT_RM9000	16	/* PMC-Sierra RM9xxx internal UART */
-#define PORT_MAX_8250	16	/* max port ID */
+#define PORT_OCTEON	17	/* Cavium OCTEON internal UART */
+#define PORT_MAX_8250	17	/* max port ID */
 
 /*
  * ARM specific type numbers.  These are not currently guaranteed
@@ -248,6 +249,8 @@
 	spinlock_t		lock;			/* port lock */
 	unsigned long		iobase;			/* in/out[bwl] */
 	unsigned char __iomem	*membase;		/* read/write[bwl] */
+	unsigned int		(*serial_in)(struct uart_port *, int);
+	void			(*serial_out)(struct uart_port *, int, int);
 	unsigned int		irq;			/* irq number */
 	unsigned int		uartclk;		/* base uart clock */
 	unsigned int		fifosize;		/* tx fifo size */
@@ -293,6 +296,8 @@
 #define UPF_MAGIC_MULTIPLIER	((__force upf_t) (1 << 16))
 #define UPF_CONS_FLOW		((__force upf_t) (1 << 23))
 #define UPF_SHARE_IRQ		((__force upf_t) (1 << 24))
+/* The exact UART type is known and should not be probed.  */
+#define UPF_FIXED_TYPE		((__force upf_t) (1 << 27))
 #define UPF_BOOT_AUTOCONF	((__force upf_t) (1 << 28))
 #define UPF_FIXED_PORT		((__force upf_t) (1 << 29))
 #define UPF_DEAD		((__force upf_t) (1 << 30))
@@ -316,35 +321,13 @@
 };
 
 /*
- * This is the state information which is persistent across opens.
- * The low level driver must not to touch any elements contained
- * within.
- */
-struct uart_state {
-	unsigned int		close_delay;		/* msec */
-	unsigned int		closing_wait;		/* msec */
-
-#define USF_CLOSING_WAIT_INF	(0)
-#define USF_CLOSING_WAIT_NONE	(~0U)
-
-	int			count;
-	int			pm_state;
-	struct uart_info	*info;
-	struct uart_port	*port;
-
-	struct mutex		mutex;
-};
-
-#define UART_XMIT_SIZE	PAGE_SIZE
-
-typedef unsigned int __bitwise__ uif_t;
-
-/*
  * This is the state information which is only valid when the port
- * is open; it may be freed by the core driver once the device has
+ * is open; it may be cleared the core driver once the device has
  * been closed.  Either the low level driver or the core can modify
  * stuff here.
  */
+typedef unsigned int __bitwise__ uif_t;
+
 struct uart_info {
 	struct tty_port		port;
 	struct circ_buf		xmit;
@@ -366,6 +349,29 @@
 	wait_queue_head_t	delta_msr_wait;
 };
 
+/*
+ * This is the state information which is persistent across opens.
+ * The low level driver must not to touch any elements contained
+ * within.
+ */
+struct uart_state {
+	unsigned int		close_delay;		/* msec */
+	unsigned int		closing_wait;		/* msec */
+
+#define USF_CLOSING_WAIT_INF	(0)
+#define USF_CLOSING_WAIT_NONE	(~0U)
+
+	int			count;
+	int			pm_state;
+	struct uart_info	info;
+	struct uart_port	*port;
+
+	struct mutex		mutex;
+};
+
+#define UART_XMIT_SIZE	PAGE_SIZE
+
+
 /* number of characters left in xmit buffer before we ask for more */
 #define WAKEUP_CHARS		256
 
@@ -439,8 +445,13 @@
 #define uart_circ_chars_free(circ)	\
 	(CIRC_SPACE((circ)->head, (circ)->tail, UART_XMIT_SIZE))
 
-#define uart_tx_stopped(portp)		\
-	((portp)->info->port.tty->stopped || (portp)->info->port.tty->hw_stopped)
+static inline int uart_tx_stopped(struct uart_port *port)
+{
+	struct tty_struct *tty = port->info->port.tty;
+	if(tty->stopped || tty->hw_stopped)
+		return 1;
+	return 0;
+}
 
 /*
  * The following are helper functions for the low level drivers.
@@ -451,7 +462,7 @@
 #ifdef SUPPORT_SYSRQ
 	if (port->sysrq) {
 		if (ch && time_before(jiffies, port->sysrq)) {
-			handle_sysrq(ch, port->info ? port->info->port.tty : NULL);
+			handle_sysrq(ch, port->info->port.tty);
 			port->sysrq = 0;
 			return 1;
 		}
diff --git a/include/linux/topology.h b/include/linux/topology.h
index 0c5b5ac..e632d29 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -125,7 +125,8 @@
 				| SD_WAKE_AFFINE	\
 				| SD_WAKE_BALANCE	\
 				| SD_SHARE_PKG_RESOURCES\
-				| BALANCE_FOR_MC_POWER,	\
+				| sd_balance_for_mc_power()\
+				| sd_power_saving_flags(),\
 	.last_balance		= jiffies,		\
 	.balance_interval	= 1,			\
 }
@@ -150,7 +151,8 @@
 				| SD_BALANCE_FORK	\
 				| SD_WAKE_AFFINE	\
 				| SD_WAKE_BALANCE	\
-				| BALANCE_FOR_PKG_POWER,\
+				| sd_balance_for_package_power()\
+				| sd_power_saving_flags(),\
 	.last_balance		= jiffies,		\
 	.balance_interval	= 1,			\
 }
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 3f4954c..fc39db95 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -180,8 +180,17 @@
  * until a hangup so don't use the wrong path.
  */
 
+struct tty_port;
+
+struct tty_port_operations {
+	/* Return 1 if the carrier is raised */
+	int (*carrier_raised)(struct tty_port *port);
+	void (*raise_dtr_rts)(struct tty_port *port);
+};
+	
 struct tty_port {
 	struct tty_struct	*tty;		/* Back pointer */
+	const struct tty_port_operations *ops;	/* Port operations */
 	spinlock_t		lock;		/* Lock protecting tty field */
 	int			blocked_open;	/* Waiting to open */
 	int			count;		/* Usage count */
@@ -253,6 +262,7 @@
 	unsigned int column;
 	unsigned char lnext:1, erasing:1, raw:1, real_raw:1, icanon:1;
 	unsigned char closing:1;
+	unsigned char echo_overrun:1;
 	unsigned short minimum_to_wake;
 	unsigned long overrun_time;
 	int num_overrun;
@@ -262,11 +272,16 @@
 	int read_tail;
 	int read_cnt;
 	unsigned long read_flags[N_TTY_BUF_SIZE/(8*sizeof(unsigned long))];
+	unsigned char *echo_buf;
+	unsigned int echo_pos;
+	unsigned int echo_cnt;
 	int canon_data;
 	unsigned long canon_head;
 	unsigned int canon_column;
 	struct mutex atomic_read_lock;
 	struct mutex atomic_write_lock;
+	struct mutex output_lock;
+	struct mutex echo_lock;
 	unsigned char *write_buf;
 	int write_cnt;
 	spinlock_t read_lock;
@@ -295,6 +310,7 @@
 #define TTY_PUSH 		6	/* n_tty private */
 #define TTY_CLOSING 		7	/* ->close() in progress */
 #define TTY_LDISC 		9	/* Line discipline attached */
+#define TTY_LDISC_CHANGING 	10	/* Line discipline changing */
 #define TTY_HW_COOK_OUT 	14	/* Hardware can do output cooking */
 #define TTY_HW_COOK_IN 		15	/* Hardware can do input cooking */
 #define TTY_PTY_LOCK 		16	/* pty private */
@@ -354,8 +370,7 @@
 extern void tty_driver_flush_buffer(struct tty_struct *tty);
 extern void tty_throttle(struct tty_struct *tty);
 extern void tty_unthrottle(struct tty_struct *tty);
-extern int tty_do_resize(struct tty_struct *tty, struct tty_struct *real_tty,
-						struct winsize *ws);
+extern int tty_do_resize(struct tty_struct *tty, struct winsize *ws);
 extern void tty_shutdown(struct tty_struct *tty);
 extern void tty_free_termios(struct tty_struct *tty);
 extern int is_current_pgrp_orphaned(void);
@@ -421,6 +436,14 @@
 extern void tty_port_free_xmit_buf(struct tty_port *port);
 extern struct tty_struct *tty_port_tty_get(struct tty_port *port);
 extern void tty_port_tty_set(struct tty_port *port, struct tty_struct *tty);
+extern int tty_port_carrier_raised(struct tty_port *port);
+extern void tty_port_raise_dtr_rts(struct tty_port *port);
+extern void tty_port_hangup(struct tty_port *port);
+extern int tty_port_block_til_ready(struct tty_port *port,
+				struct tty_struct *tty, struct file *filp);
+extern int tty_port_close_start(struct tty_port *port,
+				struct tty_struct *tty, struct file *filp);
+extern void tty_port_close_end(struct tty_port *port, struct tty_struct *tty);
 
 extern int tty_register_ldisc(int disc, struct tty_ldisc_ops *new_ldisc);
 extern int tty_unregister_ldisc(int disc);
diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h
index 78416b9..08e0883 100644
--- a/include/linux/tty_driver.h
+++ b/include/linux/tty_driver.h
@@ -196,8 +196,7 @@
  *	Optional: If not provided then the write method is called under
  *	the atomic write lock to keep it serialized with the ldisc.
  *
- * int (*resize)(struct tty_struct *tty, struct tty_struct *real_tty,
- *				unsigned int rows, unsigned int cols);
+ * int (*resize)(struct tty_struct *tty, struct winsize *ws)
  *
  *	Called when a termios request is issued which changes the
  *	requested terminal geometry.
@@ -258,8 +257,7 @@
 	int (*tiocmget)(struct tty_struct *tty, struct file *file);
 	int (*tiocmset)(struct tty_struct *tty, struct file *file,
 			unsigned int set, unsigned int clear);
-	int (*resize)(struct tty_struct *tty, struct tty_struct *real_tty,
-				struct winsize *ws);
+	int (*resize)(struct tty_struct *tty, struct winsize *ws);
 	int (*set_termiox)(struct tty_struct *tty, struct termiox *tnew);
 #ifdef CONFIG_CONSOLE_POLL
 	int (*poll_init)(struct tty_driver *driver, int line, char *options);
diff --git a/include/linux/usb/wusb-wa.h b/include/linux/usb/wusb-wa.h
index a102561..fb7c359 100644
--- a/include/linux/usb/wusb-wa.h
+++ b/include/linux/usb/wusb-wa.h
@@ -51,6 +51,7 @@
 	WUSB_REQ_GET_TIME       = 25,
 	WUSB_REQ_SET_STREAM_IDX = 26,
 	WUSB_REQ_SET_WUSB_MAS   = 27,
+	WUSB_REQ_CHAN_STOP      = 28,
 };
 
 
diff --git a/include/linux/uwb.h b/include/linux/uwb.h
index f9ccbd9..c021289 100644
--- a/include/linux/uwb.h
+++ b/include/linux/uwb.h
@@ -30,6 +30,7 @@
 #include <linux/device.h>
 #include <linux/mutex.h>
 #include <linux/timer.h>
+#include <linux/wait.h>
 #include <linux/workqueue.h>
 #include <linux/uwb/spec.h>
 
@@ -66,6 +67,7 @@
 	struct uwb_dev_addr dev_addr;
 	int beacon_slot;
 	DECLARE_BITMAP(streams, UWB_NUM_STREAMS);
+	DECLARE_BITMAP(last_availability_bm, UWB_NUM_MAS);
 };
 #define to_uwb_dev(d) container_of(d, struct uwb_dev, dev)
 
@@ -86,12 +88,31 @@
 	struct mutex mutex;
 };
 
+/* Beacon cache list */
+struct uwb_beca {
+	struct list_head list;
+	size_t entries;
+	struct mutex mutex;
+};
+
+/* Event handling thread. */
+struct uwbd {
+	int pid;
+	struct task_struct *task;
+	wait_queue_head_t wq;
+	struct list_head event_list;
+	spinlock_t event_list_lock;
+};
+
 /**
  * struct uwb_mas_bm - a bitmap of all MAS in a superframe
  * @bm: a bitmap of length #UWB_NUM_MAS
  */
 struct uwb_mas_bm {
 	DECLARE_BITMAP(bm, UWB_NUM_MAS);
+	DECLARE_BITMAP(unsafe_bm, UWB_NUM_MAS);
+	int safe;
+	int unsafe;
 };
 
 /**
@@ -117,14 +138,24 @@
  * FIXME: further target states TBD.
  */
 enum uwb_rsv_state {
-	UWB_RSV_STATE_NONE,
+	UWB_RSV_STATE_NONE = 0,
 	UWB_RSV_STATE_O_INITIATED,
 	UWB_RSV_STATE_O_PENDING,
 	UWB_RSV_STATE_O_MODIFIED,
 	UWB_RSV_STATE_O_ESTABLISHED,
+	UWB_RSV_STATE_O_TO_BE_MOVED,
+	UWB_RSV_STATE_O_MOVE_EXPANDING,
+	UWB_RSV_STATE_O_MOVE_COMBINING,
+	UWB_RSV_STATE_O_MOVE_REDUCING,
 	UWB_RSV_STATE_T_ACCEPTED,
 	UWB_RSV_STATE_T_DENIED,
+	UWB_RSV_STATE_T_CONFLICT,
 	UWB_RSV_STATE_T_PENDING,
+	UWB_RSV_STATE_T_EXPANDING_ACCEPTED,
+	UWB_RSV_STATE_T_EXPANDING_CONFLICT,
+	UWB_RSV_STATE_T_EXPANDING_PENDING,
+	UWB_RSV_STATE_T_EXPANDING_DENIED,
+	UWB_RSV_STATE_T_RESIZED,
 
 	UWB_RSV_STATE_LAST,
 };
@@ -149,6 +180,12 @@
 	};
 };
 
+struct uwb_rsv_move {
+	struct uwb_mas_bm final_mas;
+	struct uwb_ie_drp *companion_drp_ie;
+	struct uwb_mas_bm companion_mas;
+};
+
 /*
  * Number of streams reserved for reservations targeted at DevAddrs.
  */
@@ -186,6 +223,7 @@
  *
  * @status:         negotiation status
  * @stream:         stream index allocated for this reservation
+ * @tiebreaker:     conflict tiebreaker for this reservation
  * @mas:            reserved MAS
  * @drp_ie:         the DRP IE
  * @ie_valid:       true iff the DRP IE matches the reservation parameters
@@ -201,25 +239,29 @@
 	struct uwb_rc *rc;
 	struct list_head rc_node;
 	struct list_head pal_node;
+	struct kref kref;
 
 	struct uwb_dev *owner;
 	struct uwb_rsv_target target;
 	enum uwb_drp_type type;
 	int max_mas;
 	int min_mas;
-	int sparsity;
+	int max_interval;
 	bool is_multicast;
 
 	uwb_rsv_cb_f callback;
 	void *pal_priv;
 
 	enum uwb_rsv_state state;
+	bool needs_release_companion_mas;
 	u8 stream;
+	u8 tiebreaker;
 	struct uwb_mas_bm mas;
 	struct uwb_ie_drp *drp_ie;
+	struct uwb_rsv_move mv;
 	bool ie_valid;
 	struct timer_list timer;
-	bool expired;
+	struct work_struct handle_timeout_work;
 };
 
 static const
@@ -261,6 +303,13 @@
 	bool ie_valid;
 };
 
+struct uwb_drp_backoff_win {
+	u8 window;
+	u8 n;
+	int total_expired;
+	struct timer_list timer;
+	bool can_reserve_extra_mases;
+};
 
 const char *uwb_rsv_state_str(enum uwb_rsv_state state);
 const char *uwb_rsv_type_str(enum uwb_drp_type type);
@@ -276,6 +325,8 @@
 
 void uwb_rsv_accept(struct uwb_rsv *rsv, uwb_rsv_cb_f cb, void *pal_priv);
 
+void uwb_rsv_get_usable_mas(struct uwb_rsv *orig_rsv, struct uwb_mas_bm *mas);
+
 /**
  * Radio Control Interface instance
  *
@@ -337,23 +388,33 @@
 	u8 ctx_roll;
 
 	int beaconing;			/* Beaconing state [channel number] */
+	int beaconing_forced;
 	int scanning;
 	enum uwb_scan_type scan_type:3;
 	unsigned ready:1;
 	struct uwb_notifs_chain notifs_chain;
+	struct uwb_beca uwb_beca;
 
+	struct uwbd uwbd;
+
+	struct uwb_drp_backoff_win bow;
 	struct uwb_drp_avail drp_avail;
 	struct list_head reservations;
+	struct list_head cnflt_alien_list;
+	struct uwb_mas_bm cnflt_alien_bitmap;
 	struct mutex rsvs_mutex;
+	spinlock_t rsvs_lock;
 	struct workqueue_struct *rsv_workq;
-	struct work_struct rsv_update_work;
 
+	struct delayed_work rsv_update_work;
+	struct delayed_work rsv_alien_bp_work;
+	int set_drp_ie_pending;
 	struct mutex ies_mutex;
 	struct uwb_rc_cmd_set_ie *ies;
 	size_t ies_capacity;
 
-	spinlock_t pal_lock;
 	struct list_head pals;
+	int active_pals;
 
 	struct uwb_dbg *dbg;
 };
@@ -361,11 +422,19 @@
 
 /**
  * struct uwb_pal - a UWB PAL
- * @name:    descriptive name for this PAL (wushc, wlp, etc.).
+ * @name:    descriptive name for this PAL (wusbhc, wlp, etc.).
  * @device:  a device for the PAL.  Used to link the PAL and the radio
  *           controller in sysfs.
+ * @rc:      the radio controller the PAL uses.
+ * @channel_changed: called when the channel used by the radio changes.
+ *           A channel of -1 means the channel has been stopped.
  * @new_rsv: called when a peer requests a reservation (may be NULL if
  *           the PAL cannot accept reservation requests).
+ * @channel: channel being used by the PAL; 0 if the PAL isn't using
+ *           the radio; -1 if the PAL wishes to use the radio but
+ *           cannot.
+ * @debugfs_dir: a debugfs directory which the PAL can use for its own
+ *           debugfs files.
  *
  * A Protocol Adaptation Layer (PAL) is a user of the WiMedia UWB
  * radio platform (e.g., WUSB, WLP or Bluetooth UWB AMP).
@@ -384,12 +453,21 @@
 	struct list_head node;
 	const char *name;
 	struct device *device;
-	void (*new_rsv)(struct uwb_rsv *rsv);
+	struct uwb_rc *rc;
+
+	void (*channel_changed)(struct uwb_pal *pal, int channel);
+	void (*new_rsv)(struct uwb_pal *pal, struct uwb_rsv *rsv);
+
+	int channel;
+	struct dentry *debugfs_dir;
 };
 
 void uwb_pal_init(struct uwb_pal *pal);
-int uwb_pal_register(struct uwb_rc *rc, struct uwb_pal *pal);
-void uwb_pal_unregister(struct uwb_rc *rc, struct uwb_pal *pal);
+int uwb_pal_register(struct uwb_pal *pal);
+void uwb_pal_unregister(struct uwb_pal *pal);
+
+int uwb_radio_start(struct uwb_pal *pal);
+void uwb_radio_stop(struct uwb_pal *pal);
 
 /*
  * General public API
@@ -443,8 +521,6 @@
 		    struct uwb_rccb *cmd, size_t cmd_size,
 		    u8 expected_type, u16 expected_event,
 		    struct uwb_rceb **preply);
-ssize_t uwb_rc_get_ie(struct uwb_rc *, struct uwb_rc_evt_get_ie **);
-int uwb_bg_joined(struct uwb_rc *rc);
 
 size_t __uwb_addr_print(char *, size_t, const unsigned char *, int);
 
@@ -520,6 +596,8 @@
 void uwb_rc_neh_grok(struct uwb_rc *, void *, size_t);
 void uwb_rc_neh_error(struct uwb_rc *, int);
 void uwb_rc_reset_all(struct uwb_rc *rc);
+void uwb_rc_pre_reset(struct uwb_rc *rc);
+void uwb_rc_post_reset(struct uwb_rc *rc);
 
 /**
  * uwb_rsv_is_owner - is the owner of this reservation the RC?
@@ -531,7 +609,9 @@
 }
 
 /**
- * Events generated by UWB that can be passed to any listeners
+ * enum uwb_notifs - UWB events that can be passed to any listeners
+ * @UWB_NOTIF_ONAIR: a new neighbour has joined the beacon group.
+ * @UWB_NOTIF_OFFAIR: a neighbour has left the beacon group.
  *
  * Higher layers can register callback functions with the radio
  * controller using uwb_notifs_register(). The radio controller
@@ -539,8 +619,6 @@
  * nodes when an event occurs.
  */
 enum uwb_notifs {
-	UWB_NOTIF_BG_JOIN = 0,	/* radio controller joined a beacon group */
-	UWB_NOTIF_BG_LEAVE = 1,	/* radio controller left a beacon group */
 	UWB_NOTIF_ONAIR,
 	UWB_NOTIF_OFFAIR,
 };
@@ -652,22 +730,9 @@
 
 /* Information Element handling */
 
-/* For representing the state of writing to a buffer when iterating */
-struct uwb_buf_ctx {
-	char *buf;
-	size_t bytes, size;
-};
-
-typedef int (*uwb_ie_f)(struct uwb_dev *, const struct uwb_ie_hdr *,
-			size_t, void *);
 struct uwb_ie_hdr *uwb_ie_next(void **ptr, size_t *len);
-ssize_t uwb_ie_for_each(struct uwb_dev *uwb_dev, uwb_ie_f fn, void *data,
-			const void *buf, size_t size);
-int uwb_ie_dump_hex(struct uwb_dev *, const struct uwb_ie_hdr *,
-		    size_t, void *);
-int uwb_rc_set_ie(struct uwb_rc *, struct uwb_rc_cmd_set_ie *);
-struct uwb_ie_hdr *uwb_ie_next(void **ptr, size_t *len);
-
+int uwb_rc_ie_add(struct uwb_rc *uwb_rc, const struct uwb_ie_hdr *ies, size_t size);
+int uwb_rc_ie_rm(struct uwb_rc *uwb_rc, enum uwb_ie element_id);
 
 /*
  * Transmission statistics
diff --git a/include/linux/uwb/debug-cmd.h b/include/linux/uwb/debug-cmd.h
index 1141f41..8da004e 100644
--- a/include/linux/uwb/debug-cmd.h
+++ b/include/linux/uwb/debug-cmd.h
@@ -32,6 +32,10 @@
 enum uwb_dbg_cmd_type {
 	UWB_DBG_CMD_RSV_ESTABLISH = 1,
 	UWB_DBG_CMD_RSV_TERMINATE = 2,
+	UWB_DBG_CMD_IE_ADD = 3,
+	UWB_DBG_CMD_IE_RM = 4,
+	UWB_DBG_CMD_RADIO_START = 5,
+	UWB_DBG_CMD_RADIO_STOP = 6,
 };
 
 struct uwb_dbg_cmd_rsv_establish {
@@ -39,18 +43,25 @@
 	__u8  type;
 	__u16 max_mas;
 	__u16 min_mas;
-	__u8  sparsity;
+	__u8  max_interval;
 };
 
 struct uwb_dbg_cmd_rsv_terminate {
 	int index;
 };
 
+struct uwb_dbg_cmd_ie {
+	__u8 data[128];
+	int len;
+};
+
 struct uwb_dbg_cmd {
 	__u32 type;
 	union {
 		struct uwb_dbg_cmd_rsv_establish rsv_establish;
 		struct uwb_dbg_cmd_rsv_terminate rsv_terminate;
+		struct uwb_dbg_cmd_ie ie_add;
+		struct uwb_dbg_cmd_ie ie_rm;
 	};
 };
 
diff --git a/include/linux/uwb/debug.h b/include/linux/uwb/debug.h
deleted file mode 100644
index a86a73f..0000000
--- a/include/linux/uwb/debug.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Ultra Wide Band
- * Debug Support
- *
- * Copyright (C) 2005-2006 Intel Corporation
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- *
- *
- * FIXME: doc
- * Invoke like:
- *
- * #define D_LOCAL 4
- * #include <linux/uwb/debug.h>
- *
- * At the end of your include files.
- */
-#include <linux/types.h>
-
-struct device;
-extern void dump_bytes(struct device *dev, const void *_buf, size_t rsize);
-
-/* Master debug switch; !0 enables, 0 disables */
-#define D_MASTER (!0)
-
-/* Local (per-file) debug switch; #define before #including */
-#ifndef D_LOCAL
-#define D_LOCAL 0
-#endif
-
-#undef __d_printf
-#undef d_fnstart
-#undef d_fnend
-#undef d_printf
-#undef d_dump
-
-#define __d_printf(l, _tag, _dev, f, a...)				\
-do {									\
-	struct device *__dev = (_dev);					\
-	if (D_MASTER && D_LOCAL >= (l)) {				\
-		char __head[64] = "";					\
-		if (_dev != NULL) {					\
-			if ((unsigned long)__dev < 4096)		\
-				printk(KERN_ERR "E: Corrupt dev %p\n",	\
-					__dev);				\
-			else						\
-				snprintf(__head, sizeof(__head),	\
-					 "%s %s: ",			\
-					 dev_driver_string(__dev),	\
-					 __dev->bus_id);		\
-		}							\
-		printk(KERN_ERR "%s%s" _tag ": " f, __head,		\
-			__func__, ## a);				\
-	}								\
-} while (0 && _dev)
-
-#define d_fnstart(l, _dev, f, a...)	\
-	__d_printf(l, " FNSTART", _dev, f, ## a)
-#define d_fnend(l, _dev, f, a...)	\
-	__d_printf(l, " FNEND", _dev, f, ## a)
-#define d_printf(l, _dev, f, a...)	\
-	__d_printf(l, "", _dev, f, ## a)
-#define d_dump(l, _dev, ptr, size)		\
-do {						\
-	struct device *__dev = _dev;		\
-	if (D_MASTER && D_LOCAL >= (l))		\
-		dump_bytes(__dev, ptr, size);	\
-} while (0 && _dev)
-#define d_test(l) (D_MASTER && D_LOCAL >= (l))
diff --git a/include/linux/uwb/spec.h b/include/linux/uwb/spec.h
index 198c15f..b52e44f 100644
--- a/include/linux/uwb/spec.h
+++ b/include/linux/uwb/spec.h
@@ -59,6 +59,11 @@
 #define UWB_MAS_PER_ZONE (UWB_NUM_MAS / UWB_NUM_ZONES)
 
 /*
+ * Number of MAS required before a row can be considered available.
+ */
+#define UWB_USABLE_MAS_PER_ROW (UWB_NUM_ZONES - 1)
+
+/*
  * Number of streams per DRP reservation between a pair of devices.
  *
  * [ECMA-368] section 16.8.6.
@@ -94,6 +99,26 @@
 enum { UWB_MAX_LOST_BEACONS = 3 };
 
 /*
+ * mDRPBackOffWinMin
+ *
+ * The minimum number of superframes to wait before trying to reserve
+ * extra MAS.
+ *
+ * [ECMA-368] section 17.16
+ */
+enum { UWB_DRP_BACKOFF_WIN_MIN = 2 };
+
+/*
+ * mDRPBackOffWinMax
+ *
+ * The maximum number of superframes to wait before trying to reserve
+ * extra MAS.
+ *
+ * [ECMA-368] section 17.16
+ */
+enum { UWB_DRP_BACKOFF_WIN_MAX = 16 };
+
+/*
  * Length of a superframe in microseconds.
  */
 #define UWB_SUPERFRAME_LENGTH_US (UWB_MAS_LENGTH_US * UWB_NUM_MAS)
@@ -200,6 +225,12 @@
 	UWB_DRP_REASON_MODIFIED,
 };
 
+/** Relinquish Request Reason Codes ([ECMA-368] table 113) */
+enum uwb_relinquish_req_reason {
+	UWB_RELINQUISH_REQ_REASON_NON_SPECIFIC = 0,
+	UWB_RELINQUISH_REQ_REASON_OVER_ALLOCATION,
+};
+
 /**
  *  DRP Notification Reason Codes (WHCI 0.95 [3.1.4.9])
  */
@@ -252,6 +283,7 @@
 	UWB_APP_SPEC_PROBE_IE = 15,
 	UWB_IDENTIFICATION_IE = 19,
 	UWB_MASTER_KEY_ID_IE = 20,
+	UWB_RELINQUISH_REQUEST_IE = 21,
 	UWB_IE_WLP = 250, /* WiMedia Logical Link Control Protocol WLP 0.99 */
 	UWB_APP_SPEC_IE = 255,
 };
@@ -365,6 +397,27 @@
 	DECLARE_BITMAP(bmp, UWB_NUM_MAS);
 } __attribute__((packed));
 
+/* Relinqish Request IE ([ECMA-368] section 16.8.19). */
+struct uwb_relinquish_request_ie {
+        struct uwb_ie_hdr       hdr;
+        __le16                  relinquish_req_control;
+        struct uwb_dev_addr     dev_addr;
+        struct uwb_drp_alloc    allocs[];
+} __attribute__((packed));
+
+static inline int uwb_ie_relinquish_req_reason_code(struct uwb_relinquish_request_ie *ie)
+{
+	return (le16_to_cpu(ie->relinquish_req_control) >> 0) & 0xf;
+}
+
+static inline void uwb_ie_relinquish_req_set_reason_code(struct uwb_relinquish_request_ie *ie,
+							 int reason_code)
+{
+	u16 ctrl = le16_to_cpu(ie->relinquish_req_control);
+	ctrl = (ctrl & ~(0xf << 0)) | (reason_code << 0);
+	ie->relinquish_req_control = cpu_to_le16(ctrl);
+}
+
 /**
  * The Vendor ID is set to an OUI that indicates the vendor of the device.
  * ECMA-368 [16.8.10]
diff --git a/include/linux/uwb/umc.h b/include/linux/uwb/umc.h
index 36a39e3..4b4fc0f 100644
--- a/include/linux/uwb/umc.h
+++ b/include/linux/uwb/umc.h
@@ -89,6 +89,8 @@
 	void (*remove)(struct umc_dev *);
 	int  (*suspend)(struct umc_dev *, pm_message_t state);
 	int  (*resume)(struct umc_dev *);
+	int  (*pre_reset)(struct umc_dev *);
+	int  (*post_reset)(struct umc_dev *);
 
 	struct device_driver driver;
 };
diff --git a/include/linux/wlp.h b/include/linux/wlp.h
index 033545e..ac95ce6 100644
--- a/include/linux/wlp.h
+++ b/include/linux/wlp.h
@@ -646,6 +646,7 @@
 struct wlp {
 	struct mutex mutex;
 	struct uwb_rc *rc;		/* UWB radio controller */
+	struct net_device *ndev;
 	struct uwb_pal pal;
 	struct wlp_eda eda;
 	struct wlp_uuid uuid;
@@ -675,7 +676,7 @@
 static struct wlp_wss_attribute wss_attr_##_name = __ATTR(_name, _mode,	\
 							  _show, _store)
 
-extern int wlp_setup(struct wlp *, struct uwb_rc *);
+extern int wlp_setup(struct wlp *, struct uwb_rc *, struct net_device *ndev);
 extern void wlp_remove(struct wlp *);
 extern ssize_t wlp_neighborhood_show(struct wlp *, char *);
 extern int wlp_wss_setup(struct net_device *, struct wlp_wss *);
diff --git a/init/Kconfig b/init/Kconfig
index 1362719..f628171 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -924,6 +924,15 @@
 
 endif # MODULES
 
+config INIT_ALL_POSSIBLE
+	bool
+	help
+	  Back when each arch used to define their own cpu_online_map and
+	  cpu_possible_map, some of them chose to initialize cpu_possible_map
+	  with all 1s, and others with all 0s.  When they were centralised,
+	  it was better to provide this option than to break all the archs
+	  and have several arch maintainers persuing me down dark alleys.
+
 config STOP_MACHINE
 	bool
 	default y
diff --git a/init/main.c b/init/main.c
index f5e64f2..ad8f9f5 100644
--- a/init/main.c
+++ b/init/main.c
@@ -75,15 +75,6 @@
 #include <asm/smp.h>
 #endif
 
-/*
- * This is one of the first .c files built. Error out early if we have compiler
- * trouble.
- */
-
-#if __GNUC__ == 4 && __GNUC_MINOR__ == 1 && __GNUC_PATCHLEVEL__ == 0
-#warning gcc-4.1.0 is known to miscompile the kernel.  A different compiler version is recommended.
-#endif
-
 static int kernel_init(void *);
 
 extern void init_IRQ(void);
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 8ea32e8..bae131a 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -24,19 +24,20 @@
 cpumask_t cpu_present_map __read_mostly;
 EXPORT_SYMBOL(cpu_present_map);
 
-#ifndef CONFIG_SMP
-
 /*
  * Represents all cpu's that are currently online.
  */
-cpumask_t cpu_online_map __read_mostly = CPU_MASK_ALL;
+cpumask_t cpu_online_map __read_mostly;
 EXPORT_SYMBOL(cpu_online_map);
 
+#ifdef CONFIG_INIT_ALL_POSSIBLE
 cpumask_t cpu_possible_map __read_mostly = CPU_MASK_ALL;
+#else
+cpumask_t cpu_possible_map __read_mostly;
+#endif
 EXPORT_SYMBOL(cpu_possible_map);
 
-#else /* CONFIG_SMP */
-
+#ifdef CONFIG_SMP
 /* Serializes the updates to cpu_online_map, cpu_present_map */
 static DEFINE_MUTEX(cpu_add_remove_lock);
 
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 96c0ba1..39c1a4c 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -896,7 +896,7 @@
 	if (!*buf) {
 		cpus_clear(trialcs.cpus_allowed);
 	} else {
-		retval = cpulist_parse(buf, trialcs.cpus_allowed);
+		retval = cpulist_parse(buf, &trialcs.cpus_allowed);
 		if (retval < 0)
 			return retval;
 
@@ -1482,7 +1482,7 @@
 	mask = cs->cpus_allowed;
 	mutex_unlock(&callback_mutex);
 
-	return cpulist_scnprintf(page, PAGE_SIZE, mask);
+	return cpulist_scnprintf(page, PAGE_SIZE, &mask);
 }
 
 static int cpuset_sprintf_memlist(char *page, struct cpuset *cs)
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 6eb3c79..f63c706 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -46,7 +46,7 @@
 	desc->irq_count = 0;
 	desc->irqs_unhandled = 0;
 #ifdef CONFIG_SMP
-	cpus_setall(desc->affinity);
+	cpumask_setall(&desc->affinity);
 #endif
 	spin_unlock_irqrestore(&desc->lock, flags);
 }
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 540f6c4..61c4a9b 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -79,7 +79,7 @@
  *	@cpumask:	cpumask
  *
  */
-int irq_set_affinity(unsigned int irq, cpumask_t cpumask)
+int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 	unsigned long flags;
@@ -91,14 +91,14 @@
 
 #ifdef CONFIG_GENERIC_PENDING_IRQ
 	if (desc->status & IRQ_MOVE_PCNTXT || desc->status & IRQ_DISABLED) {
-		desc->affinity = cpumask;
+		cpumask_copy(&desc->affinity, cpumask);
 		desc->chip->set_affinity(irq, cpumask);
 	} else {
 		desc->status |= IRQ_MOVE_PENDING;
-		desc->pending_mask = cpumask;
+		cpumask_copy(&desc->pending_mask, cpumask);
 	}
 #else
-	desc->affinity = cpumask;
+	cpumask_copy(&desc->affinity, cpumask);
 	desc->chip->set_affinity(irq, cpumask);
 #endif
 	desc->status |= IRQ_AFFINITY_SET;
@@ -112,26 +112,24 @@
  */
 int do_irq_select_affinity(unsigned int irq, struct irq_desc *desc)
 {
-	cpumask_t mask;
-
 	if (!irq_can_set_affinity(irq))
 		return 0;
 
-	cpus_and(mask, cpu_online_map, irq_default_affinity);
-
 	/*
 	 * Preserve an userspace affinity setup, but make sure that
 	 * one of the targets is online.
 	 */
 	if (desc->status & (IRQ_AFFINITY_SET | IRQ_NO_BALANCING)) {
-		if (cpus_intersects(desc->affinity, cpu_online_map))
-			mask = desc->affinity;
+		if (cpumask_any_and(&desc->affinity, cpu_online_mask)
+		    < nr_cpu_ids)
+			goto set_affinity;
 		else
 			desc->status &= ~IRQ_AFFINITY_SET;
 	}
 
-	desc->affinity = mask;
-	desc->chip->set_affinity(irq, mask);
+	cpumask_and(&desc->affinity, cpu_online_mask, &irq_default_affinity);
+set_affinity:
+	desc->chip->set_affinity(irq, &desc->affinity);
 
 	return 0;
 }
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c
index 9db681d..bd72329 100644
--- a/kernel/irq/migration.c
+++ b/kernel/irq/migration.c
@@ -4,7 +4,6 @@
 void move_masked_irq(int irq)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
-	cpumask_t tmp;
 
 	if (likely(!(desc->status & IRQ_MOVE_PENDING)))
 		return;
@@ -19,7 +18,7 @@
 
 	desc->status &= ~IRQ_MOVE_PENDING;
 
-	if (unlikely(cpus_empty(desc->pending_mask)))
+	if (unlikely(cpumask_empty(&desc->pending_mask)))
 		return;
 
 	if (!desc->chip->set_affinity)
@@ -27,8 +26,6 @@
 
 	assert_spin_locked(&desc->lock);
 
-	cpus_and(tmp, desc->pending_mask, cpu_online_map);
-
 	/*
 	 * If there was a valid mask to work with, please
 	 * do the disable, re-program, enable sequence.
@@ -41,10 +38,13 @@
 	 * For correct operation this depends on the caller
 	 * masking the irqs.
 	 */
-	if (likely(!cpus_empty(tmp))) {
-		desc->chip->set_affinity(irq,tmp);
+	if (likely(cpumask_any_and(&desc->pending_mask, cpu_online_mask)
+		   < nr_cpu_ids)) {
+		cpumask_and(&desc->affinity,
+			    &desc->pending_mask, cpu_online_mask);
+		desc->chip->set_affinity(irq, &desc->affinity);
 	}
-	cpus_clear(desc->pending_mask);
+	cpumask_clear(&desc->pending_mask);
 }
 
 void move_native_irq(int irq)
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index f6b3440..d2c0e5e 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -40,33 +40,42 @@
 		const char __user *buffer, size_t count, loff_t *pos)
 {
 	unsigned int irq = (int)(long)PDE(file->f_path.dentry->d_inode)->data;
-	cpumask_t new_value;
+	cpumask_var_t new_value;
 	int err;
 
 	if (!irq_to_desc(irq)->chip->set_affinity || no_irq_affinity ||
 	    irq_balancing_disabled(irq))
 		return -EIO;
 
+	if (!alloc_cpumask_var(&new_value, GFP_KERNEL))
+		return -ENOMEM;
+
 	err = cpumask_parse_user(buffer, count, new_value);
 	if (err)
-		return err;
+		goto free_cpumask;
 
-	if (!is_affinity_mask_valid(new_value))
-		return -EINVAL;
+	if (!is_affinity_mask_valid(*new_value)) {
+		err = -EINVAL;
+		goto free_cpumask;
+	}
 
 	/*
 	 * Do not allow disabling IRQs completely - it's a too easy
 	 * way to make the system unusable accidentally :-) At least
 	 * one online CPU still has to be targeted.
 	 */
-	if (!cpus_intersects(new_value, cpu_online_map))
+	if (!cpumask_intersects(new_value, cpu_online_mask)) {
 		/* Special case for empty set - allow the architecture
 		   code to set default SMP affinity. */
-		return irq_select_affinity_usr(irq) ? -EINVAL : count;
+		err = irq_select_affinity_usr(irq) ? -EINVAL : count;
+	} else {
+		irq_set_affinity(irq, new_value);
+		err = count;
+	}
 
-	irq_set_affinity(irq, new_value);
-
-	return count;
+free_cpumask:
+	free_cpumask_var(new_value);
+	return err;
 }
 
 static int irq_affinity_proc_open(struct inode *inode, struct file *file)
@@ -95,7 +104,7 @@
 	cpumask_t new_value;
 	int err;
 
-	err = cpumask_parse_user(buffer, count, new_value);
+	err = cpumask_parse_user(buffer, count, &new_value);
 	if (err)
 		return err;
 
diff --git a/kernel/profile.c b/kernel/profile.c
index 60adefb..4cb7d68 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -442,7 +442,7 @@
 static int prof_cpu_mask_read_proc(char *page, char **start, off_t off,
 			int count, int *eof, void *data)
 {
-	int len = cpumask_scnprintf(page, count, *(cpumask_t *)data);
+	int len = cpumask_scnprintf(page, count, (cpumask_t *)data);
 	if (count - len < 2)
 		return -EINVAL;
 	len += sprintf(page + len, "\n");
@@ -456,7 +456,7 @@
 	unsigned long full_count = count, err;
 	cpumask_t new_value;
 
-	err = cpumask_parse_user(buffer, count, new_value);
+	err = cpumask_parse_user(buffer, count, &new_value);
 	if (err)
 		return err;
 
diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
index e503a00..c03ca3e 100644
--- a/kernel/rcuclassic.c
+++ b/kernel/rcuclassic.c
@@ -393,7 +393,7 @@
 		 * unnecessarily.
 		 */
 		smp_mb();
-		cpus_andnot(rcp->cpumask, cpu_online_map, nohz_cpu_mask);
+		cpumask_andnot(&rcp->cpumask, cpu_online_mask, nohz_cpu_mask);
 
 		rcp->signaled = 0;
 	}
diff --git a/kernel/sched.c b/kernel/sched.c
index fff1c4a..27ba1d6 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -498,18 +498,26 @@
  */
 struct root_domain {
 	atomic_t refcount;
-	cpumask_t span;
-	cpumask_t online;
+	cpumask_var_t span;
+	cpumask_var_t online;
 
 	/*
 	 * The "RT overload" flag: it gets set if a CPU has more than
 	 * one runnable RT task.
 	 */
-	cpumask_t rto_mask;
+	cpumask_var_t rto_mask;
 	atomic_t rto_count;
 #ifdef CONFIG_SMP
 	struct cpupri cpupri;
 #endif
+#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
+	/*
+	 * Preferred wake up cpu nominated by sched_mc balance that will be
+	 * used when most cpus are idle in the system indicating overall very
+	 * low system utilisation. Triggered at POWERSAVINGS_BALANCE_WAKEUP(2)
+	 */
+	unsigned int sched_mc_preferred_wakeup_cpu;
+#endif
 };
 
 /*
@@ -1514,7 +1522,7 @@
 	struct sched_domain *sd = data;
 	int i;
 
-	for_each_cpu_mask(i, sd->span) {
+	for_each_cpu(i, sched_domain_span(sd)) {
 		/*
 		 * If there are currently no tasks on the cpu pretend there
 		 * is one of average load so that when a new task gets to
@@ -1535,7 +1543,7 @@
 	if (!sd->parent || !(sd->parent->flags & SD_LOAD_BALANCE))
 		shares = tg->shares;
 
-	for_each_cpu_mask(i, sd->span)
+	for_each_cpu(i, sched_domain_span(sd))
 		update_group_shares_cpu(tg, i, shares, rq_weight);
 
 	return 0;
@@ -2101,15 +2109,17 @@
 		int i;
 
 		/* Skip over this group if it has no CPUs allowed */
-		if (!cpus_intersects(group->cpumask, p->cpus_allowed))
+		if (!cpumask_intersects(sched_group_cpus(group),
+					&p->cpus_allowed))
 			continue;
 
-		local_group = cpu_isset(this_cpu, group->cpumask);
+		local_group = cpumask_test_cpu(this_cpu,
+					       sched_group_cpus(group));
 
 		/* Tally up the load of all CPUs in the group */
 		avg_load = 0;
 
-		for_each_cpu_mask_nr(i, group->cpumask) {
+		for_each_cpu(i, sched_group_cpus(group)) {
 			/* Bias balancing toward cpus of our domain */
 			if (local_group)
 				load = source_load(i, load_idx);
@@ -2141,17 +2151,14 @@
  * find_idlest_cpu - find the idlest cpu among the cpus in group.
  */
 static int
-find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu,
-		cpumask_t *tmp)
+find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
 {
 	unsigned long load, min_load = ULONG_MAX;
 	int idlest = -1;
 	int i;
 
 	/* Traverse only the allowed CPUs */
-	cpus_and(*tmp, group->cpumask, p->cpus_allowed);
-
-	for_each_cpu_mask_nr(i, *tmp) {
+	for_each_cpu_and(i, sched_group_cpus(group), &p->cpus_allowed) {
 		load = weighted_cpuload(i);
 
 		if (load < min_load || (load == min_load && i == this_cpu)) {
@@ -2193,7 +2200,6 @@
 		update_shares(sd);
 
 	while (sd) {
-		cpumask_t span, tmpmask;
 		struct sched_group *group;
 		int new_cpu, weight;
 
@@ -2202,14 +2208,13 @@
 			continue;
 		}
 
-		span = sd->span;
 		group = find_idlest_group(sd, t, cpu);
 		if (!group) {
 			sd = sd->child;
 			continue;
 		}
 
-		new_cpu = find_idlest_cpu(group, t, cpu, &tmpmask);
+		new_cpu = find_idlest_cpu(group, t, cpu);
 		if (new_cpu == -1 || new_cpu == cpu) {
 			/* Now try balancing at a lower domain level of cpu */
 			sd = sd->child;
@@ -2218,10 +2223,10 @@
 
 		/* Now try balancing at a lower domain level of new_cpu */
 		cpu = new_cpu;
+		weight = cpumask_weight(sched_domain_span(sd));
 		sd = NULL;
-		weight = cpus_weight(span);
 		for_each_domain(cpu, tmp) {
-			if (weight <= cpus_weight(tmp->span))
+			if (weight <= cpumask_weight(sched_domain_span(tmp)))
 				break;
 			if (tmp->flags & flag)
 				sd = tmp;
@@ -2266,7 +2271,7 @@
 		cpu = task_cpu(p);
 
 		for_each_domain(this_cpu, sd) {
-			if (cpu_isset(cpu, sd->span)) {
+			if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
 				update_shares(sd);
 				break;
 			}
@@ -2315,7 +2320,7 @@
 	else {
 		struct sched_domain *sd;
 		for_each_domain(this_cpu, sd) {
-			if (cpu_isset(cpu, sd->span)) {
+			if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
 				schedstat_inc(sd, ttwu_wake_remote);
 				break;
 			}
@@ -2846,7 +2851,7 @@
 	struct rq *rq;
 
 	rq = task_rq_lock(p, &flags);
-	if (!cpu_isset(dest_cpu, p->cpus_allowed)
+	if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed)
 	    || unlikely(!cpu_active(dest_cpu)))
 		goto out;
 
@@ -2911,7 +2916,7 @@
 	 * 2) cannot be migrated to this CPU due to cpus_allowed, or
 	 * 3) are cache-hot on their current CPU.
 	 */
-	if (!cpu_isset(this_cpu, p->cpus_allowed)) {
+	if (!cpumask_test_cpu(this_cpu, &p->cpus_allowed)) {
 		schedstat_inc(p, se.nr_failed_migrations_affine);
 		return 0;
 	}
@@ -3086,7 +3091,7 @@
 static struct sched_group *
 find_busiest_group(struct sched_domain *sd, int this_cpu,
 		   unsigned long *imbalance, enum cpu_idle_type idle,
-		   int *sd_idle, const cpumask_t *cpus, int *balance)
+		   int *sd_idle, const struct cpumask *cpus, int *balance)
 {
 	struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups;
 	unsigned long max_load, avg_load, total_load, this_load, total_pwr;
@@ -3122,10 +3127,11 @@
 		unsigned long sum_avg_load_per_task;
 		unsigned long avg_load_per_task;
 
-		local_group = cpu_isset(this_cpu, group->cpumask);
+		local_group = cpumask_test_cpu(this_cpu,
+					       sched_group_cpus(group));
 
 		if (local_group)
-			balance_cpu = first_cpu(group->cpumask);
+			balance_cpu = cpumask_first(sched_group_cpus(group));
 
 		/* Tally up the load of all CPUs in the group */
 		sum_weighted_load = sum_nr_running = avg_load = 0;
@@ -3134,13 +3140,8 @@
 		max_cpu_load = 0;
 		min_cpu_load = ~0UL;
 
-		for_each_cpu_mask_nr(i, group->cpumask) {
-			struct rq *rq;
-
-			if (!cpu_isset(i, *cpus))
-				continue;
-
-			rq = cpu_rq(i);
+		for_each_cpu_and(i, sched_group_cpus(group), cpus) {
+			struct rq *rq = cpu_rq(i);
 
 			if (*sd_idle && rq->nr_running)
 				*sd_idle = 0;
@@ -3251,8 +3252,8 @@
 		 */
 		if ((sum_nr_running < min_nr_running) ||
 		    (sum_nr_running == min_nr_running &&
-		     first_cpu(group->cpumask) <
-		     first_cpu(group_min->cpumask))) {
+		     cpumask_first(sched_group_cpus(group)) >
+		     cpumask_first(sched_group_cpus(group_min)))) {
 			group_min = group;
 			min_nr_running = sum_nr_running;
 			min_load_per_task = sum_weighted_load /
@@ -3267,8 +3268,8 @@
 		if (sum_nr_running <= group_capacity - 1) {
 			if (sum_nr_running > leader_nr_running ||
 			    (sum_nr_running == leader_nr_running &&
-			     first_cpu(group->cpumask) >
-			      first_cpu(group_leader->cpumask))) {
+			     cpumask_first(sched_group_cpus(group)) <
+			     cpumask_first(sched_group_cpus(group_leader)))) {
 				group_leader = group;
 				leader_nr_running = sum_nr_running;
 			}
@@ -3394,6 +3395,10 @@
 
 	if (this == group_leader && group_leader != group_min) {
 		*imbalance = min_load_per_task;
+		if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP) {
+			cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu =
+				cpumask_first(sched_group_cpus(group_leader));
+		}
 		return group_min;
 	}
 #endif
@@ -3407,16 +3412,16 @@
  */
 static struct rq *
 find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle,
-		   unsigned long imbalance, const cpumask_t *cpus)
+		   unsigned long imbalance, const struct cpumask *cpus)
 {
 	struct rq *busiest = NULL, *rq;
 	unsigned long max_load = 0;
 	int i;
 
-	for_each_cpu_mask_nr(i, group->cpumask) {
+	for_each_cpu(i, sched_group_cpus(group)) {
 		unsigned long wl;
 
-		if (!cpu_isset(i, *cpus))
+		if (!cpumask_test_cpu(i, cpus))
 			continue;
 
 		rq = cpu_rq(i);
@@ -3446,7 +3451,7 @@
  */
 static int load_balance(int this_cpu, struct rq *this_rq,
 			struct sched_domain *sd, enum cpu_idle_type idle,
-			int *balance, cpumask_t *cpus)
+			int *balance, struct cpumask *cpus)
 {
 	int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0;
 	struct sched_group *group;
@@ -3454,7 +3459,7 @@
 	struct rq *busiest;
 	unsigned long flags;
 
-	cpus_setall(*cpus);
+	cpumask_setall(cpus);
 
 	/*
 	 * When power savings policy is enabled for the parent domain, idle
@@ -3514,8 +3519,8 @@
 
 		/* All tasks on this runqueue were pinned by CPU affinity */
 		if (unlikely(all_pinned)) {
-			cpu_clear(cpu_of(busiest), *cpus);
-			if (!cpus_empty(*cpus))
+			cpumask_clear_cpu(cpu_of(busiest), cpus);
+			if (!cpumask_empty(cpus))
 				goto redo;
 			goto out_balanced;
 		}
@@ -3532,7 +3537,8 @@
 			/* don't kick the migration_thread, if the curr
 			 * task on busiest cpu can't be moved to this_cpu
 			 */
-			if (!cpu_isset(this_cpu, busiest->curr->cpus_allowed)) {
+			if (!cpumask_test_cpu(this_cpu,
+					      &busiest->curr->cpus_allowed)) {
 				spin_unlock_irqrestore(&busiest->lock, flags);
 				all_pinned = 1;
 				goto out_one_pinned;
@@ -3607,7 +3613,7 @@
  */
 static int
 load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd,
-			cpumask_t *cpus)
+			struct cpumask *cpus)
 {
 	struct sched_group *group;
 	struct rq *busiest = NULL;
@@ -3616,7 +3622,7 @@
 	int sd_idle = 0;
 	int all_pinned = 0;
 
-	cpus_setall(*cpus);
+	cpumask_setall(cpus);
 
 	/*
 	 * When power savings policy is enabled for the parent domain, idle
@@ -3660,17 +3666,71 @@
 		double_unlock_balance(this_rq, busiest);
 
 		if (unlikely(all_pinned)) {
-			cpu_clear(cpu_of(busiest), *cpus);
-			if (!cpus_empty(*cpus))
+			cpumask_clear_cpu(cpu_of(busiest), cpus);
+			if (!cpumask_empty(cpus))
 				goto redo;
 		}
 	}
 
 	if (!ld_moved) {
+		int active_balance = 0;
+
 		schedstat_inc(sd, lb_failed[CPU_NEWLY_IDLE]);
 		if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
 		    !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
 			return -1;
+
+		if (sched_mc_power_savings < POWERSAVINGS_BALANCE_WAKEUP)
+			return -1;
+
+		if (sd->nr_balance_failed++ < 2)
+			return -1;
+
+		/*
+		 * The only task running in a non-idle cpu can be moved to this
+		 * cpu in an attempt to completely freeup the other CPU
+		 * package. The same method used to move task in load_balance()
+		 * have been extended for load_balance_newidle() to speedup
+		 * consolidation at sched_mc=POWERSAVINGS_BALANCE_WAKEUP (2)
+		 *
+		 * The package power saving logic comes from
+		 * find_busiest_group().  If there are no imbalance, then
+		 * f_b_g() will return NULL.  However when sched_mc={1,2} then
+		 * f_b_g() will select a group from which a running task may be
+		 * pulled to this cpu in order to make the other package idle.
+		 * If there is no opportunity to make a package idle and if
+		 * there are no imbalance, then f_b_g() will return NULL and no
+		 * action will be taken in load_balance_newidle().
+		 *
+		 * Under normal task pull operation due to imbalance, there
+		 * will be more than one task in the source run queue and
+		 * move_tasks() will succeed.  ld_moved will be true and this
+		 * active balance code will not be triggered.
+		 */
+
+		/* Lock busiest in correct order while this_rq is held */
+		double_lock_balance(this_rq, busiest);
+
+		/*
+		 * don't kick the migration_thread, if the curr
+		 * task on busiest cpu can't be moved to this_cpu
+		 */
+		if (!cpu_isset(this_cpu, busiest->curr->cpus_allowed)) {
+			double_unlock_balance(this_rq, busiest);
+			all_pinned = 1;
+			return ld_moved;
+		}
+
+		if (!busiest->active_balance) {
+			busiest->active_balance = 1;
+			busiest->push_cpu = this_cpu;
+			active_balance = 1;
+		}
+
+		double_unlock_balance(this_rq, busiest);
+		if (active_balance)
+			wake_up_process(busiest->migration_thread);
+
 	} else
 		sd->nr_balance_failed = 0;
 
@@ -3696,7 +3756,10 @@
 	struct sched_domain *sd;
 	int pulled_task = 0;
 	unsigned long next_balance = jiffies + HZ;
-	cpumask_t tmpmask;
+	cpumask_var_t tmpmask;
+
+	if (!alloc_cpumask_var(&tmpmask, GFP_ATOMIC))
+		return;
 
 	for_each_domain(this_cpu, sd) {
 		unsigned long interval;
@@ -3707,7 +3770,7 @@
 		if (sd->flags & SD_BALANCE_NEWIDLE)
 			/* If we've pulled tasks over stop searching: */
 			pulled_task = load_balance_newidle(this_cpu, this_rq,
-							   sd, &tmpmask);
+							   sd, tmpmask);
 
 		interval = msecs_to_jiffies(sd->balance_interval);
 		if (time_after(next_balance, sd->last_balance + interval))
@@ -3722,6 +3785,7 @@
 		 */
 		this_rq->next_balance = next_balance;
 	}
+	free_cpumask_var(tmpmask);
 }
 
 /*
@@ -3759,7 +3823,7 @@
 	/* Search for an sd spanning us and the target CPU. */
 	for_each_domain(target_cpu, sd) {
 		if ((sd->flags & SD_LOAD_BALANCE) &&
-		    cpu_isset(busiest_cpu, sd->span))
+		    cpumask_test_cpu(busiest_cpu, sched_domain_span(sd)))
 				break;
 	}
 
@@ -3778,10 +3842,9 @@
 #ifdef CONFIG_NO_HZ
 static struct {
 	atomic_t load_balancer;
-	cpumask_t cpu_mask;
+	cpumask_var_t cpu_mask;
 } nohz ____cacheline_aligned = {
 	.load_balancer = ATOMIC_INIT(-1),
-	.cpu_mask = CPU_MASK_NONE,
 };
 
 /*
@@ -3809,7 +3872,7 @@
 	int cpu = smp_processor_id();
 
 	if (stop_tick) {
-		cpu_set(cpu, nohz.cpu_mask);
+		cpumask_set_cpu(cpu, nohz.cpu_mask);
 		cpu_rq(cpu)->in_nohz_recently = 1;
 
 		/*
@@ -3823,7 +3886,7 @@
 		}
 
 		/* time for ilb owner also to sleep */
-		if (cpus_weight(nohz.cpu_mask) == num_online_cpus()) {
+		if (cpumask_weight(nohz.cpu_mask) == num_online_cpus()) {
 			if (atomic_read(&nohz.load_balancer) == cpu)
 				atomic_set(&nohz.load_balancer, -1);
 			return 0;
@@ -3836,10 +3899,10 @@
 		} else if (atomic_read(&nohz.load_balancer) == cpu)
 			return 1;
 	} else {
-		if (!cpu_isset(cpu, nohz.cpu_mask))
+		if (!cpumask_test_cpu(cpu, nohz.cpu_mask))
 			return 0;
 
-		cpu_clear(cpu, nohz.cpu_mask);
+		cpumask_clear_cpu(cpu, nohz.cpu_mask);
 
 		if (atomic_read(&nohz.load_balancer) == cpu)
 			if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu)
@@ -3867,7 +3930,11 @@
 	unsigned long next_balance = jiffies + 60*HZ;
 	int update_next_balance = 0;
 	int need_serialize;
-	cpumask_t tmp;
+	cpumask_var_t tmp;
+
+	/* Fails alloc?  Rebalancing probably not a priority right now. */
+	if (!alloc_cpumask_var(&tmp, GFP_ATOMIC))
+		return;
 
 	for_each_domain(cpu, sd) {
 		if (!(sd->flags & SD_LOAD_BALANCE))
@@ -3892,7 +3959,7 @@
 		}
 
 		if (time_after_eq(jiffies, sd->last_balance + interval)) {
-			if (load_balance(cpu, rq, sd, idle, &balance, &tmp)) {
+			if (load_balance(cpu, rq, sd, idle, &balance, tmp)) {
 				/*
 				 * We've pulled tasks over so either we're no
 				 * longer idle, or one of our SMT siblings is
@@ -3926,6 +3993,8 @@
 	 */
 	if (likely(update_next_balance))
 		rq->next_balance = next_balance;
+
+	free_cpumask_var(tmp);
 }
 
 /*
@@ -3950,12 +4019,13 @@
 	 */
 	if (this_rq->idle_at_tick &&
 	    atomic_read(&nohz.load_balancer) == this_cpu) {
-		cpumask_t cpus = nohz.cpu_mask;
 		struct rq *rq;
 		int balance_cpu;
 
-		cpu_clear(this_cpu, cpus);
-		for_each_cpu_mask_nr(balance_cpu, cpus) {
+		for_each_cpu(balance_cpu, nohz.cpu_mask) {
+			if (balance_cpu == this_cpu)
+				continue;
+
 			/*
 			 * If this cpu gets work to do, stop the load balancing
 			 * work being done for other cpus. Next load
@@ -3993,7 +4063,7 @@
 		rq->in_nohz_recently = 0;
 
 		if (atomic_read(&nohz.load_balancer) == cpu) {
-			cpu_clear(cpu, nohz.cpu_mask);
+			cpumask_clear_cpu(cpu, nohz.cpu_mask);
 			atomic_set(&nohz.load_balancer, -1);
 		}
 
@@ -4006,7 +4076,7 @@
 			 * TBD: Traverse the sched domains and nominate
 			 * the nearest cpu in the nohz.cpu_mask.
 			 */
-			int ilb = first_cpu(nohz.cpu_mask);
+			int ilb = cpumask_first(nohz.cpu_mask);
 
 			if (ilb < nr_cpu_ids)
 				resched_cpu(ilb);
@@ -4018,7 +4088,7 @@
 	 * cpus with ticks stopped, is it time for that to stop?
 	 */
 	if (rq->idle_at_tick && atomic_read(&nohz.load_balancer) == cpu &&
-	    cpus_weight(nohz.cpu_mask) == num_online_cpus()) {
+	    cpumask_weight(nohz.cpu_mask) == num_online_cpus()) {
 		resched_cpu(cpu);
 		return;
 	}
@@ -4028,7 +4098,7 @@
 	 * someone else, then no need raise the SCHED_SOFTIRQ
 	 */
 	if (rq->idle_at_tick && atomic_read(&nohz.load_balancer) != cpu &&
-	    cpu_isset(cpu, nohz.cpu_mask))
+	    cpumask_test_cpu(cpu, nohz.cpu_mask))
 		return;
 #endif
 	if (time_after_eq(jiffies, rq->next_balance))
@@ -5401,10 +5471,9 @@
 	return retval;
 }
 
-long sched_setaffinity(pid_t pid, const cpumask_t *in_mask)
+long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
 {
-	cpumask_t cpus_allowed;
-	cpumask_t new_mask = *in_mask;
+	cpumask_var_t cpus_allowed, new_mask;
 	struct task_struct *p;
 	int retval;
 
@@ -5426,6 +5495,14 @@
 	get_task_struct(p);
 	read_unlock(&tasklist_lock);
 
+	if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) {
+		retval = -ENOMEM;
+		goto out_put_task;
+	}
+	if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) {
+		retval = -ENOMEM;
+		goto out_free_cpus_allowed;
+	}
 	retval = -EPERM;
 	if (!check_same_owner(p) && !capable(CAP_SYS_NICE))
 		goto out_unlock;
@@ -5434,37 +5511,41 @@
 	if (retval)
 		goto out_unlock;
 
-	cpuset_cpus_allowed(p, &cpus_allowed);
-	cpus_and(new_mask, new_mask, cpus_allowed);
+	cpuset_cpus_allowed(p, cpus_allowed);
+	cpumask_and(new_mask, in_mask, cpus_allowed);
  again:
-	retval = set_cpus_allowed_ptr(p, &new_mask);
+	retval = set_cpus_allowed_ptr(p, new_mask);
 
 	if (!retval) {
-		cpuset_cpus_allowed(p, &cpus_allowed);
-		if (!cpus_subset(new_mask, cpus_allowed)) {
+		cpuset_cpus_allowed(p, cpus_allowed);
+		if (!cpumask_subset(new_mask, cpus_allowed)) {
 			/*
 			 * We must have raced with a concurrent cpuset
 			 * update. Just reset the cpus_allowed to the
 			 * cpuset's cpus_allowed
 			 */
-			new_mask = cpus_allowed;
+			cpumask_copy(new_mask, cpus_allowed);
 			goto again;
 		}
 	}
 out_unlock:
+	free_cpumask_var(new_mask);
+out_free_cpus_allowed:
+	free_cpumask_var(cpus_allowed);
+out_put_task:
 	put_task_struct(p);
 	put_online_cpus();
 	return retval;
 }
 
 static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len,
-			     cpumask_t *new_mask)
+			     struct cpumask *new_mask)
 {
-	if (len < sizeof(cpumask_t)) {
-		memset(new_mask, 0, sizeof(cpumask_t));
-	} else if (len > sizeof(cpumask_t)) {
-		len = sizeof(cpumask_t);
-	}
+	if (len < cpumask_size())
+		cpumask_clear(new_mask);
+	else if (len > cpumask_size())
+		len = cpumask_size();
+
 	return copy_from_user(new_mask, user_mask_ptr, len) ? -EFAULT : 0;
 }
 
@@ -5477,17 +5558,20 @@
 asmlinkage long sys_sched_setaffinity(pid_t pid, unsigned int len,
 				      unsigned long __user *user_mask_ptr)
 {
-	cpumask_t new_mask;
+	cpumask_var_t new_mask;
 	int retval;
 
-	retval = get_user_cpu_mask(user_mask_ptr, len, &new_mask);
-	if (retval)
-		return retval;
+	if (!alloc_cpumask_var(&new_mask, GFP_KERNEL))
+		return -ENOMEM;
 
-	return sched_setaffinity(pid, &new_mask);
+	retval = get_user_cpu_mask(user_mask_ptr, len, new_mask);
+	if (retval == 0)
+		retval = sched_setaffinity(pid, new_mask);
+	free_cpumask_var(new_mask);
+	return retval;
 }
 
-long sched_getaffinity(pid_t pid, cpumask_t *mask)
+long sched_getaffinity(pid_t pid, struct cpumask *mask)
 {
 	struct task_struct *p;
 	int retval;
@@ -5504,7 +5588,7 @@
 	if (retval)
 		goto out_unlock;
 
-	cpus_and(*mask, p->cpus_allowed, cpu_online_map);
+	cpumask_and(mask, &p->cpus_allowed, cpu_online_mask);
 
 out_unlock:
 	read_unlock(&tasklist_lock);
@@ -5523,19 +5607,24 @@
 				      unsigned long __user *user_mask_ptr)
 {
 	int ret;
-	cpumask_t mask;
+	cpumask_var_t mask;
 
-	if (len < sizeof(cpumask_t))
+	if (len < cpumask_size())
 		return -EINVAL;
 
-	ret = sched_getaffinity(pid, &mask);
-	if (ret < 0)
-		return ret;
+	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
+		return -ENOMEM;
 
-	if (copy_to_user(user_mask_ptr, &mask, sizeof(cpumask_t)))
-		return -EFAULT;
+	ret = sched_getaffinity(pid, mask);
+	if (ret == 0) {
+		if (copy_to_user(user_mask_ptr, mask, cpumask_size()))
+			ret = -EFAULT;
+		else
+			ret = cpumask_size();
+	}
+	free_cpumask_var(mask);
 
-	return sizeof(cpumask_t);
+	return ret;
 }
 
 /**
@@ -5877,7 +5966,7 @@
 	idle->se.exec_start = sched_clock();
 
 	idle->prio = idle->normal_prio = MAX_PRIO;
-	idle->cpus_allowed = cpumask_of_cpu(cpu);
+	cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu));
 	__set_task_cpu(idle, cpu);
 
 	rq->curr = rq->idle = idle;
@@ -5904,9 +5993,9 @@
  * indicates which cpus entered this state. This is used
  * in the rcu update to wait only for active cpus. For system
  * which do not switch off the HZ timer nohz_cpu_mask should
- * always be CPU_MASK_NONE.
+ * always be CPU_BITS_NONE.
  */
-cpumask_t nohz_cpu_mask = CPU_MASK_NONE;
+cpumask_var_t nohz_cpu_mask;
 
 /*
  * Increase the granularity value when there are more CPUs,
@@ -5961,7 +6050,7 @@
  * task must not exit() & deallocate itself prematurely. The
  * call is not atomic; no spinlocks may be held.
  */
-int set_cpus_allowed_ptr(struct task_struct *p, const cpumask_t *new_mask)
+int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
 {
 	struct migration_req req;
 	unsigned long flags;
@@ -5969,13 +6058,13 @@
 	int ret = 0;
 
 	rq = task_rq_lock(p, &flags);
-	if (!cpus_intersects(*new_mask, cpu_online_map)) {
+	if (!cpumask_intersects(new_mask, cpu_online_mask)) {
 		ret = -EINVAL;
 		goto out;
 	}
 
 	if (unlikely((p->flags & PF_THREAD_BOUND) && p != current &&
-		     !cpus_equal(p->cpus_allowed, *new_mask))) {
+		     !cpumask_equal(&p->cpus_allowed, new_mask))) {
 		ret = -EINVAL;
 		goto out;
 	}
@@ -5983,15 +6072,15 @@
 	if (p->sched_class->set_cpus_allowed)
 		p->sched_class->set_cpus_allowed(p, new_mask);
 	else {
-		p->cpus_allowed = *new_mask;
-		p->rt.nr_cpus_allowed = cpus_weight(*new_mask);
+		cpumask_copy(&p->cpus_allowed, new_mask);
+		p->rt.nr_cpus_allowed = cpumask_weight(new_mask);
 	}
 
 	/* Can the task run on the task's current CPU? If so, we're done */
-	if (cpu_isset(task_cpu(p), *new_mask))
+	if (cpumask_test_cpu(task_cpu(p), new_mask))
 		goto out;
 
-	if (migrate_task(p, any_online_cpu(*new_mask), &req)) {
+	if (migrate_task(p, cpumask_any_and(cpu_online_mask, new_mask), &req)) {
 		/* Need help from migration thread: drop lock and wait. */
 		task_rq_unlock(rq, &flags);
 		wake_up_process(rq->migration_thread);
@@ -6033,7 +6122,7 @@
 	if (task_cpu(p) != src_cpu)
 		goto done;
 	/* Affinity changed (again). */
-	if (!cpu_isset(dest_cpu, p->cpus_allowed))
+	if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
 		goto fail;
 
 	on_rq = p->se.on_rq;
@@ -6130,50 +6219,43 @@
  */
 static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
 {
-	unsigned long flags;
-	cpumask_t mask;
-	struct rq *rq;
 	int dest_cpu;
+	/* FIXME: Use cpumask_of_node here. */
+	cpumask_t _nodemask = node_to_cpumask(cpu_to_node(dead_cpu));
+	const struct cpumask *nodemask = &_nodemask;
 
-	do {
-		/* On same node? */
-		mask = node_to_cpumask(cpu_to_node(dead_cpu));
-		cpus_and(mask, mask, p->cpus_allowed);
-		dest_cpu = any_online_cpu(mask);
+again:
+	/* Look for allowed, online CPU in same node. */
+	for_each_cpu_and(dest_cpu, nodemask, cpu_online_mask)
+		if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
+			goto move;
 
-		/* On any allowed CPU? */
-		if (dest_cpu >= nr_cpu_ids)
-			dest_cpu = any_online_cpu(p->cpus_allowed);
+	/* Any allowed, online CPU? */
+	dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_online_mask);
+	if (dest_cpu < nr_cpu_ids)
+		goto move;
 
-		/* No more Mr. Nice Guy. */
-		if (dest_cpu >= nr_cpu_ids) {
-			cpumask_t cpus_allowed;
+	/* No more Mr. Nice Guy. */
+	if (dest_cpu >= nr_cpu_ids) {
+		cpuset_cpus_allowed_locked(p, &p->cpus_allowed);
+		dest_cpu = cpumask_any_and(cpu_online_mask, &p->cpus_allowed);
 
-			cpuset_cpus_allowed_locked(p, &cpus_allowed);
-			/*
-			 * Try to stay on the same cpuset, where the
-			 * current cpuset may be a subset of all cpus.
-			 * The cpuset_cpus_allowed_locked() variant of
-			 * cpuset_cpus_allowed() will not block. It must be
-			 * called within calls to cpuset_lock/cpuset_unlock.
-			 */
-			rq = task_rq_lock(p, &flags);
-			p->cpus_allowed = cpus_allowed;
-			dest_cpu = any_online_cpu(p->cpus_allowed);
-			task_rq_unlock(rq, &flags);
-
-			/*
-			 * Don't tell them about moving exiting tasks or
-			 * kernel threads (both mm NULL), since they never
-			 * leave kernel.
-			 */
-			if (p->mm && printk_ratelimit()) {
-				printk(KERN_INFO "process %d (%s) no "
-				       "longer affine to cpu%d\n",
-					task_pid_nr(p), p->comm, dead_cpu);
-			}
+		/*
+		 * Don't tell them about moving exiting tasks or
+		 * kernel threads (both mm NULL), since they never
+		 * leave kernel.
+		 */
+		if (p->mm && printk_ratelimit()) {
+			printk(KERN_INFO "process %d (%s) no "
+			       "longer affine to cpu%d\n",
+			       task_pid_nr(p), p->comm, dead_cpu);
 		}
-	} while (!__migrate_task_irq(p, dead_cpu, dest_cpu));
+	}
+
+move:
+	/* It can have affinity changed while we were choosing. */
+	if (unlikely(!__migrate_task_irq(p, dead_cpu, dest_cpu)))
+		goto again;
 }
 
 /*
@@ -6185,7 +6267,7 @@
  */
 static void migrate_nr_uninterruptible(struct rq *rq_src)
 {
-	struct rq *rq_dest = cpu_rq(any_online_cpu(*CPU_MASK_ALL_PTR));
+	struct rq *rq_dest = cpu_rq(cpumask_any(cpu_online_mask));
 	unsigned long flags;
 
 	local_irq_save(flags);
@@ -6475,7 +6557,7 @@
 	if (!rq->online) {
 		const struct sched_class *class;
 
-		cpu_set(rq->cpu, rq->rd->online);
+		cpumask_set_cpu(rq->cpu, rq->rd->online);
 		rq->online = 1;
 
 		for_each_class(class) {
@@ -6495,7 +6577,7 @@
 				class->rq_offline(rq);
 		}
 
-		cpu_clear(rq->cpu, rq->rd->online);
+		cpumask_clear_cpu(rq->cpu, rq->rd->online);
 		rq->online = 0;
 	}
 }
@@ -6536,7 +6618,7 @@
 		rq = cpu_rq(cpu);
 		spin_lock_irqsave(&rq->lock, flags);
 		if (rq->rd) {
-			BUG_ON(!cpu_isset(cpu, rq->rd->span));
+			BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
 
 			set_rq_online(rq);
 		}
@@ -6550,7 +6632,7 @@
 			break;
 		/* Unbind it from offline cpu so it can run. Fall thru. */
 		kthread_bind(cpu_rq(cpu)->migration_thread,
-			     any_online_cpu(cpu_online_map));
+			     cpumask_any(cpu_online_mask));
 		kthread_stop(cpu_rq(cpu)->migration_thread);
 		cpu_rq(cpu)->migration_thread = NULL;
 		break;
@@ -6600,7 +6682,7 @@
 		rq = cpu_rq(cpu);
 		spin_lock_irqsave(&rq->lock, flags);
 		if (rq->rd) {
-			BUG_ON(!cpu_isset(cpu, rq->rd->span));
+			BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
 			set_rq_offline(rq);
 		}
 		spin_unlock_irqrestore(&rq->lock, flags);
@@ -6639,13 +6721,13 @@
 #ifdef CONFIG_SCHED_DEBUG
 
 static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
-				  cpumask_t *groupmask)
+				  struct cpumask *groupmask)
 {
 	struct sched_group *group = sd->groups;
 	char str[256];
 
-	cpulist_scnprintf(str, sizeof(str), sd->span);
-	cpus_clear(*groupmask);
+	cpulist_scnprintf(str, sizeof(str), sched_domain_span(sd));
+	cpumask_clear(groupmask);
 
 	printk(KERN_DEBUG "%*s domain %d: ", level, "", level);
 
@@ -6659,11 +6741,11 @@
 
 	printk(KERN_CONT "span %s level %s\n", str, sd->name);
 
-	if (!cpu_isset(cpu, sd->span)) {
+	if (!cpumask_test_cpu(cpu, sched_domain_span(sd))) {
 		printk(KERN_ERR "ERROR: domain->span does not contain "
 				"CPU%d\n", cpu);
 	}
-	if (!cpu_isset(cpu, group->cpumask)) {
+	if (!cpumask_test_cpu(cpu, sched_group_cpus(group))) {
 		printk(KERN_ERR "ERROR: domain->groups does not contain"
 				" CPU%d\n", cpu);
 	}
@@ -6683,31 +6765,32 @@
 			break;
 		}
 
-		if (!cpus_weight(group->cpumask)) {
+		if (!cpumask_weight(sched_group_cpus(group))) {
 			printk(KERN_CONT "\n");
 			printk(KERN_ERR "ERROR: empty group\n");
 			break;
 		}
 
-		if (cpus_intersects(*groupmask, group->cpumask)) {
+		if (cpumask_intersects(groupmask, sched_group_cpus(group))) {
 			printk(KERN_CONT "\n");
 			printk(KERN_ERR "ERROR: repeated CPUs\n");
 			break;
 		}
 
-		cpus_or(*groupmask, *groupmask, group->cpumask);
+		cpumask_or(groupmask, groupmask, sched_group_cpus(group));
 
-		cpulist_scnprintf(str, sizeof(str), group->cpumask);
+		cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group));
 		printk(KERN_CONT " %s", str);
 
 		group = group->next;
 	} while (group != sd->groups);
 	printk(KERN_CONT "\n");
 
-	if (!cpus_equal(sd->span, *groupmask))
+	if (!cpumask_equal(sched_domain_span(sd), groupmask))
 		printk(KERN_ERR "ERROR: groups don't span domain->span\n");
 
-	if (sd->parent && !cpus_subset(*groupmask, sd->parent->span))
+	if (sd->parent &&
+	    !cpumask_subset(groupmask, sched_domain_span(sd->parent)))
 		printk(KERN_ERR "ERROR: parent span is not a superset "
 			"of domain->span\n");
 	return 0;
@@ -6715,7 +6798,7 @@
 
 static void sched_domain_debug(struct sched_domain *sd, int cpu)
 {
-	cpumask_t *groupmask;
+	cpumask_var_t groupmask;
 	int level = 0;
 
 	if (!sd) {
@@ -6725,8 +6808,7 @@
 
 	printk(KERN_DEBUG "CPU%d attaching sched-domain:\n", cpu);
 
-	groupmask = kmalloc(sizeof(cpumask_t), GFP_KERNEL);
-	if (!groupmask) {
+	if (!alloc_cpumask_var(&groupmask, GFP_KERNEL)) {
 		printk(KERN_DEBUG "Cannot load-balance (out of memory)\n");
 		return;
 	}
@@ -6739,7 +6821,7 @@
 		if (!sd)
 			break;
 	}
-	kfree(groupmask);
+	free_cpumask_var(groupmask);
 }
 #else /* !CONFIG_SCHED_DEBUG */
 # define sched_domain_debug(sd, cpu) do { } while (0)
@@ -6747,7 +6829,7 @@
 
 static int sd_degenerate(struct sched_domain *sd)
 {
-	if (cpus_weight(sd->span) == 1)
+	if (cpumask_weight(sched_domain_span(sd)) == 1)
 		return 1;
 
 	/* Following flags need at least 2 groups */
@@ -6778,7 +6860,7 @@
 	if (sd_degenerate(parent))
 		return 1;
 
-	if (!cpus_equal(sd->span, parent->span))
+	if (!cpumask_equal(sched_domain_span(sd), sched_domain_span(parent)))
 		return 0;
 
 	/* Does parent contain flags not in child? */
@@ -6802,6 +6884,16 @@
 	return 1;
 }
 
+static void free_rootdomain(struct root_domain *rd)
+{
+	cpupri_cleanup(&rd->cpupri);
+
+	free_cpumask_var(rd->rto_mask);
+	free_cpumask_var(rd->online);
+	free_cpumask_var(rd->span);
+	kfree(rd);
+}
+
 static void rq_attach_root(struct rq *rq, struct root_domain *rd)
 {
 	unsigned long flags;
@@ -6811,38 +6903,63 @@
 	if (rq->rd) {
 		struct root_domain *old_rd = rq->rd;
 
-		if (cpu_isset(rq->cpu, old_rd->online))
+		if (cpumask_test_cpu(rq->cpu, old_rd->online))
 			set_rq_offline(rq);
 
-		cpu_clear(rq->cpu, old_rd->span);
+		cpumask_clear_cpu(rq->cpu, old_rd->span);
 
 		if (atomic_dec_and_test(&old_rd->refcount))
-			kfree(old_rd);
+			free_rootdomain(old_rd);
 	}
 
 	atomic_inc(&rd->refcount);
 	rq->rd = rd;
 
-	cpu_set(rq->cpu, rd->span);
-	if (cpu_isset(rq->cpu, cpu_online_map))
+	cpumask_set_cpu(rq->cpu, rd->span);
+	if (cpumask_test_cpu(rq->cpu, cpu_online_mask))
 		set_rq_online(rq);
 
 	spin_unlock_irqrestore(&rq->lock, flags);
 }
 
-static void init_rootdomain(struct root_domain *rd)
+static int init_rootdomain(struct root_domain *rd, bool bootmem)
 {
 	memset(rd, 0, sizeof(*rd));
 
-	cpus_clear(rd->span);
-	cpus_clear(rd->online);
+	if (bootmem) {
+		alloc_bootmem_cpumask_var(&def_root_domain.span);
+		alloc_bootmem_cpumask_var(&def_root_domain.online);
+		alloc_bootmem_cpumask_var(&def_root_domain.rto_mask);
+		cpupri_init(&rd->cpupri, true);
+		return 0;
+	}
 
-	cpupri_init(&rd->cpupri);
+	if (!alloc_cpumask_var(&rd->span, GFP_KERNEL))
+		goto free_rd;
+	if (!alloc_cpumask_var(&rd->online, GFP_KERNEL))
+		goto free_span;
+	if (!alloc_cpumask_var(&rd->rto_mask, GFP_KERNEL))
+		goto free_online;
+
+	if (cpupri_init(&rd->cpupri, false) != 0)
+		goto free_rto_mask;
+	return 0;
+
+free_rto_mask:
+	free_cpumask_var(rd->rto_mask);
+free_online:
+	free_cpumask_var(rd->online);
+free_span:
+	free_cpumask_var(rd->span);
+free_rd:
+	kfree(rd);
+	return -ENOMEM;
 }
 
 static void init_defrootdomain(void)
 {
-	init_rootdomain(&def_root_domain);
+	init_rootdomain(&def_root_domain, true);
+
 	atomic_set(&def_root_domain.refcount, 1);
 }
 
@@ -6854,7 +6971,10 @@
 	if (!rd)
 		return NULL;
 
-	init_rootdomain(rd);
+	if (init_rootdomain(rd, false) != 0) {
+		kfree(rd);
+		return NULL;
+	}
 
 	return rd;
 }
@@ -6896,19 +7016,12 @@
 }
 
 /* cpus with isolated domains */
-static cpumask_t cpu_isolated_map = CPU_MASK_NONE;
+static cpumask_var_t cpu_isolated_map;
 
 /* Setup the mask of cpus configured for isolated domains */
 static int __init isolated_cpu_setup(char *str)
 {
-	static int __initdata ints[NR_CPUS];
-	int i;
-
-	str = get_options(str, ARRAY_SIZE(ints), ints);
-	cpus_clear(cpu_isolated_map);
-	for (i = 1; i <= ints[0]; i++)
-		if (ints[i] < NR_CPUS)
-			cpu_set(ints[i], cpu_isolated_map);
+	cpulist_parse(str, cpu_isolated_map);
 	return 1;
 }
 
@@ -6917,42 +7030,43 @@
 /*
  * init_sched_build_groups takes the cpumask we wish to span, and a pointer
  * to a function which identifies what group(along with sched group) a CPU
- * belongs to. The return value of group_fn must be a >= 0 and < NR_CPUS
- * (due to the fact that we keep track of groups covered with a cpumask_t).
+ * belongs to. The return value of group_fn must be a >= 0 and < nr_cpu_ids
+ * (due to the fact that we keep track of groups covered with a struct cpumask).
  *
  * init_sched_build_groups will build a circular linked list of the groups
  * covered by the given span, and will set each group's ->cpumask correctly,
  * and ->cpu_power to 0.
  */
 static void
-init_sched_build_groups(const cpumask_t *span, const cpumask_t *cpu_map,
-			int (*group_fn)(int cpu, const cpumask_t *cpu_map,
+init_sched_build_groups(const struct cpumask *span,
+			const struct cpumask *cpu_map,
+			int (*group_fn)(int cpu, const struct cpumask *cpu_map,
 					struct sched_group **sg,
-					cpumask_t *tmpmask),
-			cpumask_t *covered, cpumask_t *tmpmask)
+					struct cpumask *tmpmask),
+			struct cpumask *covered, struct cpumask *tmpmask)
 {
 	struct sched_group *first = NULL, *last = NULL;
 	int i;
 
-	cpus_clear(*covered);
+	cpumask_clear(covered);
 
-	for_each_cpu_mask_nr(i, *span) {
+	for_each_cpu(i, span) {
 		struct sched_group *sg;
 		int group = group_fn(i, cpu_map, &sg, tmpmask);
 		int j;
 
-		if (cpu_isset(i, *covered))
+		if (cpumask_test_cpu(i, covered))
 			continue;
 
-		cpus_clear(sg->cpumask);
+		cpumask_clear(sched_group_cpus(sg));
 		sg->__cpu_power = 0;
 
-		for_each_cpu_mask_nr(j, *span) {
+		for_each_cpu(j, span) {
 			if (group_fn(j, cpu_map, NULL, tmpmask) != group)
 				continue;
 
-			cpu_set(j, *covered);
-			cpu_set(j, sg->cpumask);
+			cpumask_set_cpu(j, covered);
+			cpumask_set_cpu(j, sched_group_cpus(sg));
 		}
 		if (!first)
 			first = sg;
@@ -7016,9 +7130,10 @@
  * should be one that prevents unnecessary balancing, but also spreads tasks
  * out optimally.
  */
-static void sched_domain_node_span(int node, cpumask_t *span)
+static void sched_domain_node_span(int node, struct cpumask *span)
 {
 	nodemask_t used_nodes;
+	/* FIXME: use cpumask_of_node() */
 	node_to_cpumask_ptr(nodemask, node);
 	int i;
 
@@ -7040,18 +7155,33 @@
 int sched_smt_power_savings = 0, sched_mc_power_savings = 0;
 
 /*
+ * The cpus mask in sched_group and sched_domain hangs off the end.
+ * FIXME: use cpumask_var_t or dynamic percpu alloc to avoid wasting space
+ * for nr_cpu_ids < CONFIG_NR_CPUS.
+ */
+struct static_sched_group {
+	struct sched_group sg;
+	DECLARE_BITMAP(cpus, CONFIG_NR_CPUS);
+};
+
+struct static_sched_domain {
+	struct sched_domain sd;
+	DECLARE_BITMAP(span, CONFIG_NR_CPUS);
+};
+
+/*
  * SMT sched-domains:
  */
 #ifdef CONFIG_SCHED_SMT
-static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
-static DEFINE_PER_CPU(struct sched_group, sched_group_cpus);
+static DEFINE_PER_CPU(struct static_sched_domain, cpu_domains);
+static DEFINE_PER_CPU(struct static_sched_group, sched_group_cpus);
 
 static int
-cpu_to_cpu_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg,
-		 cpumask_t *unused)
+cpu_to_cpu_group(int cpu, const struct cpumask *cpu_map,
+		 struct sched_group **sg, struct cpumask *unused)
 {
 	if (sg)
-		*sg = &per_cpu(sched_group_cpus, cpu);
+		*sg = &per_cpu(sched_group_cpus, cpu).sg;
 	return cpu;
 }
 #endif /* CONFIG_SCHED_SMT */
@@ -7060,56 +7190,55 @@
  * multi-core sched-domains:
  */
 #ifdef CONFIG_SCHED_MC
-static DEFINE_PER_CPU(struct sched_domain, core_domains);
-static DEFINE_PER_CPU(struct sched_group, sched_group_core);
+static DEFINE_PER_CPU(struct static_sched_domain, core_domains);
+static DEFINE_PER_CPU(struct static_sched_group, sched_group_core);
 #endif /* CONFIG_SCHED_MC */
 
 #if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT)
 static int
-cpu_to_core_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg,
-		  cpumask_t *mask)
+cpu_to_core_group(int cpu, const struct cpumask *cpu_map,
+		  struct sched_group **sg, struct cpumask *mask)
 {
 	int group;
 
-	*mask = per_cpu(cpu_sibling_map, cpu);
-	cpus_and(*mask, *mask, *cpu_map);
-	group = first_cpu(*mask);
+	cpumask_and(mask, &per_cpu(cpu_sibling_map, cpu), cpu_map);
+	group = cpumask_first(mask);
 	if (sg)
-		*sg = &per_cpu(sched_group_core, group);
+		*sg = &per_cpu(sched_group_core, group).sg;
 	return group;
 }
 #elif defined(CONFIG_SCHED_MC)
 static int
-cpu_to_core_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg,
-		  cpumask_t *unused)
+cpu_to_core_group(int cpu, const struct cpumask *cpu_map,
+		  struct sched_group **sg, struct cpumask *unused)
 {
 	if (sg)
-		*sg = &per_cpu(sched_group_core, cpu);
+		*sg = &per_cpu(sched_group_core, cpu).sg;
 	return cpu;
 }
 #endif
 
-static DEFINE_PER_CPU(struct sched_domain, phys_domains);
-static DEFINE_PER_CPU(struct sched_group, sched_group_phys);
+static DEFINE_PER_CPU(struct static_sched_domain, phys_domains);
+static DEFINE_PER_CPU(struct static_sched_group, sched_group_phys);
 
 static int
-cpu_to_phys_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg,
-		  cpumask_t *mask)
+cpu_to_phys_group(int cpu, const struct cpumask *cpu_map,
+		  struct sched_group **sg, struct cpumask *mask)
 {
 	int group;
 #ifdef CONFIG_SCHED_MC
+	/* FIXME: Use cpu_coregroup_mask. */
 	*mask = cpu_coregroup_map(cpu);
 	cpus_and(*mask, *mask, *cpu_map);
-	group = first_cpu(*mask);
+	group = cpumask_first(mask);
 #elif defined(CONFIG_SCHED_SMT)
-	*mask = per_cpu(cpu_sibling_map, cpu);
-	cpus_and(*mask, *mask, *cpu_map);
-	group = first_cpu(*mask);
+	cpumask_and(mask, &per_cpu(cpu_sibling_map, cpu), cpu_map);
+	group = cpumask_first(mask);
 #else
 	group = cpu;
 #endif
 	if (sg)
-		*sg = &per_cpu(sched_group_phys, group);
+		*sg = &per_cpu(sched_group_phys, group).sg;
 	return group;
 }
 
@@ -7123,19 +7252,21 @@
 static struct sched_group ***sched_group_nodes_bycpu;
 
 static DEFINE_PER_CPU(struct sched_domain, allnodes_domains);
-static DEFINE_PER_CPU(struct sched_group, sched_group_allnodes);
+static DEFINE_PER_CPU(struct static_sched_group, sched_group_allnodes);
 
-static int cpu_to_allnodes_group(int cpu, const cpumask_t *cpu_map,
-				 struct sched_group **sg, cpumask_t *nodemask)
+static int cpu_to_allnodes_group(int cpu, const struct cpumask *cpu_map,
+				 struct sched_group **sg,
+				 struct cpumask *nodemask)
 {
 	int group;
+	/* FIXME: use cpumask_of_node */
+	node_to_cpumask_ptr(pnodemask, cpu_to_node(cpu));
 
-	*nodemask = node_to_cpumask(cpu_to_node(cpu));
-	cpus_and(*nodemask, *nodemask, *cpu_map);
-	group = first_cpu(*nodemask);
+	cpumask_and(nodemask, pnodemask, cpu_map);
+	group = cpumask_first(nodemask);
 
 	if (sg)
-		*sg = &per_cpu(sched_group_allnodes, group);
+		*sg = &per_cpu(sched_group_allnodes, group).sg;
 	return group;
 }
 
@@ -7147,11 +7278,11 @@
 	if (!sg)
 		return;
 	do {
-		for_each_cpu_mask_nr(j, sg->cpumask) {
+		for_each_cpu(j, sched_group_cpus(sg)) {
 			struct sched_domain *sd;
 
-			sd = &per_cpu(phys_domains, j);
-			if (j != first_cpu(sd->groups->cpumask)) {
+			sd = &per_cpu(phys_domains, j).sd;
+			if (j != cpumask_first(sched_group_cpus(sd->groups))) {
 				/*
 				 * Only add "power" once for each
 				 * physical package.
@@ -7168,11 +7299,12 @@
 
 #ifdef CONFIG_NUMA
 /* Free memory allocated for various sched_group structures */
-static void free_sched_groups(const cpumask_t *cpu_map, cpumask_t *nodemask)
+static void free_sched_groups(const struct cpumask *cpu_map,
+			      struct cpumask *nodemask)
 {
 	int cpu, i;
 
-	for_each_cpu_mask_nr(cpu, *cpu_map) {
+	for_each_cpu(cpu, cpu_map) {
 		struct sched_group **sched_group_nodes
 			= sched_group_nodes_bycpu[cpu];
 
@@ -7181,10 +7313,11 @@
 
 		for (i = 0; i < nr_node_ids; i++) {
 			struct sched_group *oldsg, *sg = sched_group_nodes[i];
+			/* FIXME: Use cpumask_of_node */
+			node_to_cpumask_ptr(pnodemask, i);
 
-			*nodemask = node_to_cpumask(i);
-			cpus_and(*nodemask, *nodemask, *cpu_map);
-			if (cpus_empty(*nodemask))
+			cpus_and(*nodemask, *pnodemask, *cpu_map);
+			if (cpumask_empty(nodemask))
 				continue;
 
 			if (sg == NULL)
@@ -7202,7 +7335,8 @@
 	}
 }
 #else /* !CONFIG_NUMA */
-static void free_sched_groups(const cpumask_t *cpu_map, cpumask_t *nodemask)
+static void free_sched_groups(const struct cpumask *cpu_map,
+			      struct cpumask *nodemask)
 {
 }
 #endif /* CONFIG_NUMA */
@@ -7228,7 +7362,7 @@
 
 	WARN_ON(!sd || !sd->groups);
 
-	if (cpu != first_cpu(sd->groups->cpumask))
+	if (cpu != cpumask_first(sched_group_cpus(sd->groups)))
 		return;
 
 	child = sd->child;
@@ -7293,48 +7427,6 @@
  SD_INIT_FUNC(MC)
 #endif
 
-/*
- * To minimize stack usage kmalloc room for cpumasks and share the
- * space as the usage in build_sched_domains() dictates.  Used only
- * if the amount of space is significant.
- */
-struct allmasks {
-	cpumask_t tmpmask;			/* make this one first */
-	union {
-		cpumask_t nodemask;
-		cpumask_t this_sibling_map;
-		cpumask_t this_core_map;
-	};
-	cpumask_t send_covered;
-
-#ifdef CONFIG_NUMA
-	cpumask_t domainspan;
-	cpumask_t covered;
-	cpumask_t notcovered;
-#endif
-};
-
-#if	NR_CPUS > 128
-#define SCHED_CPUMASK_DECLARE(v)	struct allmasks *v
-static inline void sched_cpumask_alloc(struct allmasks **masks)
-{
-	*masks = kmalloc(sizeof(**masks), GFP_KERNEL);
-}
-static inline void sched_cpumask_free(struct allmasks *masks)
-{
-	kfree(masks);
-}
-#else
-#define SCHED_CPUMASK_DECLARE(v)	struct allmasks _v, *v = &_v
-static inline void sched_cpumask_alloc(struct allmasks **masks)
-{ }
-static inline void sched_cpumask_free(struct allmasks *masks)
-{ }
-#endif
-
-#define	SCHED_CPUMASK_VAR(v, a) 	cpumask_t *v = (cpumask_t *) \
-			((unsigned long)(a) + offsetof(struct allmasks, v))
-
 static int default_relax_domain_level = -1;
 
 static int __init setup_relax_domain_level(char *str)
@@ -7374,17 +7466,38 @@
  * Build sched domains for a given set of cpus and attach the sched domains
  * to the individual cpus
  */
-static int __build_sched_domains(const cpumask_t *cpu_map,
+static int __build_sched_domains(const struct cpumask *cpu_map,
 				 struct sched_domain_attr *attr)
 {
-	int i;
+	int i, err = -ENOMEM;
 	struct root_domain *rd;
-	SCHED_CPUMASK_DECLARE(allmasks);
-	cpumask_t *tmpmask;
+	cpumask_var_t nodemask, this_sibling_map, this_core_map, send_covered,
+		tmpmask;
 #ifdef CONFIG_NUMA
+	cpumask_var_t domainspan, covered, notcovered;
 	struct sched_group **sched_group_nodes = NULL;
 	int sd_allnodes = 0;
 
+	if (!alloc_cpumask_var(&domainspan, GFP_KERNEL))
+		goto out;
+	if (!alloc_cpumask_var(&covered, GFP_KERNEL))
+		goto free_domainspan;
+	if (!alloc_cpumask_var(&notcovered, GFP_KERNEL))
+		goto free_covered;
+#endif
+
+	if (!alloc_cpumask_var(&nodemask, GFP_KERNEL))
+		goto free_notcovered;
+	if (!alloc_cpumask_var(&this_sibling_map, GFP_KERNEL))
+		goto free_nodemask;
+	if (!alloc_cpumask_var(&this_core_map, GFP_KERNEL))
+		goto free_this_sibling_map;
+	if (!alloc_cpumask_var(&send_covered, GFP_KERNEL))
+		goto free_this_core_map;
+	if (!alloc_cpumask_var(&tmpmask, GFP_KERNEL))
+		goto free_send_covered;
+
+#ifdef CONFIG_NUMA
 	/*
 	 * Allocate the per-node list of sched groups
 	 */
@@ -7392,54 +7505,37 @@
 				    GFP_KERNEL);
 	if (!sched_group_nodes) {
 		printk(KERN_WARNING "Can not alloc sched group node list\n");
-		return -ENOMEM;
+		goto free_tmpmask;
 	}
 #endif
 
 	rd = alloc_rootdomain();
 	if (!rd) {
 		printk(KERN_WARNING "Cannot alloc root domain\n");
-#ifdef CONFIG_NUMA
-		kfree(sched_group_nodes);
-#endif
-		return -ENOMEM;
+		goto free_sched_groups;
 	}
 
-	/* get space for all scratch cpumask variables */
-	sched_cpumask_alloc(&allmasks);
-	if (!allmasks) {
-		printk(KERN_WARNING "Cannot alloc cpumask array\n");
-		kfree(rd);
 #ifdef CONFIG_NUMA
-		kfree(sched_group_nodes);
-#endif
-		return -ENOMEM;
-	}
-
-	tmpmask = (cpumask_t *)allmasks;
-
-
-#ifdef CONFIG_NUMA
-	sched_group_nodes_bycpu[first_cpu(*cpu_map)] = sched_group_nodes;
+	sched_group_nodes_bycpu[cpumask_first(cpu_map)] = sched_group_nodes;
 #endif
 
 	/*
 	 * Set up domains for cpus specified by the cpu_map.
 	 */
-	for_each_cpu_mask_nr(i, *cpu_map) {
+	for_each_cpu(i, cpu_map) {
 		struct sched_domain *sd = NULL, *p;
-		SCHED_CPUMASK_VAR(nodemask, allmasks);
 
+		/* FIXME: use cpumask_of_node */
 		*nodemask = node_to_cpumask(cpu_to_node(i));
 		cpus_and(*nodemask, *nodemask, *cpu_map);
 
 #ifdef CONFIG_NUMA
-		if (cpus_weight(*cpu_map) >
-				SD_NODES_PER_DOMAIN*cpus_weight(*nodemask)) {
+		if (cpumask_weight(cpu_map) >
+				SD_NODES_PER_DOMAIN*cpumask_weight(nodemask)) {
 			sd = &per_cpu(allnodes_domains, i);
 			SD_INIT(sd, ALLNODES);
 			set_domain_attribute(sd, attr);
-			sd->span = *cpu_map;
+			cpumask_copy(sched_domain_span(sd), cpu_map);
 			cpu_to_allnodes_group(i, cpu_map, &sd->groups, tmpmask);
 			p = sd;
 			sd_allnodes = 1;
@@ -7449,18 +7545,19 @@
 		sd = &per_cpu(node_domains, i);
 		SD_INIT(sd, NODE);
 		set_domain_attribute(sd, attr);
-		sched_domain_node_span(cpu_to_node(i), &sd->span);
+		sched_domain_node_span(cpu_to_node(i), sched_domain_span(sd));
 		sd->parent = p;
 		if (p)
 			p->child = sd;
-		cpus_and(sd->span, sd->span, *cpu_map);
+		cpumask_and(sched_domain_span(sd),
+			    sched_domain_span(sd), cpu_map);
 #endif
 
 		p = sd;
-		sd = &per_cpu(phys_domains, i);
+		sd = &per_cpu(phys_domains, i).sd;
 		SD_INIT(sd, CPU);
 		set_domain_attribute(sd, attr);
-		sd->span = *nodemask;
+		cpumask_copy(sched_domain_span(sd), nodemask);
 		sd->parent = p;
 		if (p)
 			p->child = sd;
@@ -7468,11 +7565,12 @@
 
 #ifdef CONFIG_SCHED_MC
 		p = sd;
-		sd = &per_cpu(core_domains, i);
+		sd = &per_cpu(core_domains, i).sd;
 		SD_INIT(sd, MC);
 		set_domain_attribute(sd, attr);
-		sd->span = cpu_coregroup_map(i);
-		cpus_and(sd->span, sd->span, *cpu_map);
+		*sched_domain_span(sd) = cpu_coregroup_map(i);
+		cpumask_and(sched_domain_span(sd),
+			    sched_domain_span(sd), cpu_map);
 		sd->parent = p;
 		p->child = sd;
 		cpu_to_core_group(i, cpu_map, &sd->groups, tmpmask);
@@ -7480,11 +7578,11 @@
 
 #ifdef CONFIG_SCHED_SMT
 		p = sd;
-		sd = &per_cpu(cpu_domains, i);
+		sd = &per_cpu(cpu_domains, i).sd;
 		SD_INIT(sd, SIBLING);
 		set_domain_attribute(sd, attr);
-		sd->span = per_cpu(cpu_sibling_map, i);
-		cpus_and(sd->span, sd->span, *cpu_map);
+		cpumask_and(sched_domain_span(sd),
+			    &per_cpu(cpu_sibling_map, i), cpu_map);
 		sd->parent = p;
 		p->child = sd;
 		cpu_to_cpu_group(i, cpu_map, &sd->groups, tmpmask);
@@ -7493,13 +7591,10 @@
 
 #ifdef CONFIG_SCHED_SMT
 	/* Set up CPU (sibling) groups */
-	for_each_cpu_mask_nr(i, *cpu_map) {
-		SCHED_CPUMASK_VAR(this_sibling_map, allmasks);
-		SCHED_CPUMASK_VAR(send_covered, allmasks);
-
-		*this_sibling_map = per_cpu(cpu_sibling_map, i);
-		cpus_and(*this_sibling_map, *this_sibling_map, *cpu_map);
-		if (i != first_cpu(*this_sibling_map))
+	for_each_cpu(i, cpu_map) {
+		cpumask_and(this_sibling_map,
+			    &per_cpu(cpu_sibling_map, i), cpu_map);
+		if (i != cpumask_first(this_sibling_map))
 			continue;
 
 		init_sched_build_groups(this_sibling_map, cpu_map,
@@ -7510,13 +7605,11 @@
 
 #ifdef CONFIG_SCHED_MC
 	/* Set up multi-core groups */
-	for_each_cpu_mask_nr(i, *cpu_map) {
-		SCHED_CPUMASK_VAR(this_core_map, allmasks);
-		SCHED_CPUMASK_VAR(send_covered, allmasks);
-
+	for_each_cpu(i, cpu_map) {
+		/* FIXME: Use cpu_coregroup_mask */
 		*this_core_map = cpu_coregroup_map(i);
 		cpus_and(*this_core_map, *this_core_map, *cpu_map);
-		if (i != first_cpu(*this_core_map))
+		if (i != cpumask_first(this_core_map))
 			continue;
 
 		init_sched_build_groups(this_core_map, cpu_map,
@@ -7527,12 +7620,10 @@
 
 	/* Set up physical groups */
 	for (i = 0; i < nr_node_ids; i++) {
-		SCHED_CPUMASK_VAR(nodemask, allmasks);
-		SCHED_CPUMASK_VAR(send_covered, allmasks);
-
+		/* FIXME: Use cpumask_of_node */
 		*nodemask = node_to_cpumask(i);
 		cpus_and(*nodemask, *nodemask, *cpu_map);
-		if (cpus_empty(*nodemask))
+		if (cpumask_empty(nodemask))
 			continue;
 
 		init_sched_build_groups(nodemask, cpu_map,
@@ -7543,8 +7634,6 @@
 #ifdef CONFIG_NUMA
 	/* Set up node groups */
 	if (sd_allnodes) {
-		SCHED_CPUMASK_VAR(send_covered, allmasks);
-
 		init_sched_build_groups(cpu_map, cpu_map,
 					&cpu_to_allnodes_group,
 					send_covered, tmpmask);
@@ -7553,58 +7642,58 @@
 	for (i = 0; i < nr_node_ids; i++) {
 		/* Set up node groups */
 		struct sched_group *sg, *prev;
-		SCHED_CPUMASK_VAR(nodemask, allmasks);
-		SCHED_CPUMASK_VAR(domainspan, allmasks);
-		SCHED_CPUMASK_VAR(covered, allmasks);
 		int j;
 
+		/* FIXME: Use cpumask_of_node */
 		*nodemask = node_to_cpumask(i);
-		cpus_clear(*covered);
+		cpumask_clear(covered);
 
 		cpus_and(*nodemask, *nodemask, *cpu_map);
-		if (cpus_empty(*nodemask)) {
+		if (cpumask_empty(nodemask)) {
 			sched_group_nodes[i] = NULL;
 			continue;
 		}
 
 		sched_domain_node_span(i, domainspan);
-		cpus_and(*domainspan, *domainspan, *cpu_map);
+		cpumask_and(domainspan, domainspan, cpu_map);
 
-		sg = kmalloc_node(sizeof(struct sched_group), GFP_KERNEL, i);
+		sg = kmalloc_node(sizeof(struct sched_group) + cpumask_size(),
+				  GFP_KERNEL, i);
 		if (!sg) {
 			printk(KERN_WARNING "Can not alloc domain group for "
 				"node %d\n", i);
 			goto error;
 		}
 		sched_group_nodes[i] = sg;
-		for_each_cpu_mask_nr(j, *nodemask) {
+		for_each_cpu(j, nodemask) {
 			struct sched_domain *sd;
 
 			sd = &per_cpu(node_domains, j);
 			sd->groups = sg;
 		}
 		sg->__cpu_power = 0;
-		sg->cpumask = *nodemask;
+		cpumask_copy(sched_group_cpus(sg), nodemask);
 		sg->next = sg;
-		cpus_or(*covered, *covered, *nodemask);
+		cpumask_or(covered, covered, nodemask);
 		prev = sg;
 
 		for (j = 0; j < nr_node_ids; j++) {
-			SCHED_CPUMASK_VAR(notcovered, allmasks);
 			int n = (i + j) % nr_node_ids;
+			/* FIXME: Use cpumask_of_node */
 			node_to_cpumask_ptr(pnodemask, n);
 
-			cpus_complement(*notcovered, *covered);
-			cpus_and(*tmpmask, *notcovered, *cpu_map);
-			cpus_and(*tmpmask, *tmpmask, *domainspan);
-			if (cpus_empty(*tmpmask))
+			cpumask_complement(notcovered, covered);
+			cpumask_and(tmpmask, notcovered, cpu_map);
+			cpumask_and(tmpmask, tmpmask, domainspan);
+			if (cpumask_empty(tmpmask))
 				break;
 
-			cpus_and(*tmpmask, *tmpmask, *pnodemask);
-			if (cpus_empty(*tmpmask))
+			cpumask_and(tmpmask, tmpmask, pnodemask);
+			if (cpumask_empty(tmpmask))
 				continue;
 
-			sg = kmalloc_node(sizeof(struct sched_group),
+			sg = kmalloc_node(sizeof(struct sched_group) +
+					  cpumask_size(),
 					  GFP_KERNEL, i);
 			if (!sg) {
 				printk(KERN_WARNING
@@ -7612,9 +7701,9 @@
 				goto error;
 			}
 			sg->__cpu_power = 0;
-			sg->cpumask = *tmpmask;
+			cpumask_copy(sched_group_cpus(sg), tmpmask);
 			sg->next = prev->next;
-			cpus_or(*covered, *covered, *tmpmask);
+			cpumask_or(covered, covered, tmpmask);
 			prev->next = sg;
 			prev = sg;
 		}
@@ -7623,22 +7712,22 @@
 
 	/* Calculate CPU power for physical packages and nodes */
 #ifdef CONFIG_SCHED_SMT
-	for_each_cpu_mask_nr(i, *cpu_map) {
-		struct sched_domain *sd = &per_cpu(cpu_domains, i);
+	for_each_cpu(i, cpu_map) {
+		struct sched_domain *sd = &per_cpu(cpu_domains, i).sd;
 
 		init_sched_groups_power(i, sd);
 	}
 #endif
 #ifdef CONFIG_SCHED_MC
-	for_each_cpu_mask_nr(i, *cpu_map) {
-		struct sched_domain *sd = &per_cpu(core_domains, i);
+	for_each_cpu(i, cpu_map) {
+		struct sched_domain *sd = &per_cpu(core_domains, i).sd;
 
 		init_sched_groups_power(i, sd);
 	}
 #endif
 
-	for_each_cpu_mask_nr(i, *cpu_map) {
-		struct sched_domain *sd = &per_cpu(phys_domains, i);
+	for_each_cpu(i, cpu_map) {
+		struct sched_domain *sd = &per_cpu(phys_domains, i).sd;
 
 		init_sched_groups_power(i, sd);
 	}
@@ -7650,53 +7739,78 @@
 	if (sd_allnodes) {
 		struct sched_group *sg;
 
-		cpu_to_allnodes_group(first_cpu(*cpu_map), cpu_map, &sg,
+		cpu_to_allnodes_group(cpumask_first(cpu_map), cpu_map, &sg,
 								tmpmask);
 		init_numa_sched_groups_power(sg);
 	}
 #endif
 
 	/* Attach the domains */
-	for_each_cpu_mask_nr(i, *cpu_map) {
+	for_each_cpu(i, cpu_map) {
 		struct sched_domain *sd;
 #ifdef CONFIG_SCHED_SMT
-		sd = &per_cpu(cpu_domains, i);
+		sd = &per_cpu(cpu_domains, i).sd;
 #elif defined(CONFIG_SCHED_MC)
-		sd = &per_cpu(core_domains, i);
+		sd = &per_cpu(core_domains, i).sd;
 #else
-		sd = &per_cpu(phys_domains, i);
+		sd = &per_cpu(phys_domains, i).sd;
 #endif
 		cpu_attach_domain(sd, rd, i);
 	}
 
-	sched_cpumask_free(allmasks);
-	return 0;
+	err = 0;
+
+free_tmpmask:
+	free_cpumask_var(tmpmask);
+free_send_covered:
+	free_cpumask_var(send_covered);
+free_this_core_map:
+	free_cpumask_var(this_core_map);
+free_this_sibling_map:
+	free_cpumask_var(this_sibling_map);
+free_nodemask:
+	free_cpumask_var(nodemask);
+free_notcovered:
+#ifdef CONFIG_NUMA
+	free_cpumask_var(notcovered);
+free_covered:
+	free_cpumask_var(covered);
+free_domainspan:
+	free_cpumask_var(domainspan);
+out:
+#endif
+	return err;
+
+free_sched_groups:
+#ifdef CONFIG_NUMA
+	kfree(sched_group_nodes);
+#endif
+	goto free_tmpmask;
 
 #ifdef CONFIG_NUMA
 error:
 	free_sched_groups(cpu_map, tmpmask);
-	sched_cpumask_free(allmasks);
-	kfree(rd);
-	return -ENOMEM;
+	free_rootdomain(rd);
+	goto free_tmpmask;
 #endif
 }
 
-static int build_sched_domains(const cpumask_t *cpu_map)
+static int build_sched_domains(const struct cpumask *cpu_map)
 {
 	return __build_sched_domains(cpu_map, NULL);
 }
 
-static cpumask_t *doms_cur;	/* current sched domains */
+static struct cpumask *doms_cur;	/* current sched domains */
 static int ndoms_cur;		/* number of sched domains in 'doms_cur' */
 static struct sched_domain_attr *dattr_cur;
 				/* attribues of custom domains in 'doms_cur' */
 
 /*
  * Special case: If a kmalloc of a doms_cur partition (array of
- * cpumask_t) fails, then fallback to a single sched domain,
- * as determined by the single cpumask_t fallback_doms.
+ * cpumask) fails, then fallback to a single sched domain,
+ * as determined by the single cpumask fallback_doms.
  */
-static cpumask_t fallback_doms;
+static cpumask_var_t fallback_doms;
 
 /*
  * arch_update_cpu_topology lets virtualized architectures update the
@@ -7713,16 +7827,16 @@
  * For now this just excludes isolated cpus, but could be used to
  * exclude other special cases in the future.
  */
-static int arch_init_sched_domains(const cpumask_t *cpu_map)
+static int arch_init_sched_domains(const struct cpumask *cpu_map)
 {
 	int err;
 
 	arch_update_cpu_topology();
 	ndoms_cur = 1;
-	doms_cur = kmalloc(sizeof(cpumask_t), GFP_KERNEL);
+	doms_cur = kmalloc(cpumask_size(), GFP_KERNEL);
 	if (!doms_cur)
-		doms_cur = &fallback_doms;
-	cpus_andnot(*doms_cur, *cpu_map, cpu_isolated_map);
+		doms_cur = fallback_doms;
+	cpumask_andnot(doms_cur, cpu_map, cpu_isolated_map);
 	dattr_cur = NULL;
 	err = build_sched_domains(doms_cur);
 	register_sched_domain_sysctl();
@@ -7730,8 +7844,8 @@
 	return err;
 }
 
-static void arch_destroy_sched_domains(const cpumask_t *cpu_map,
-				       cpumask_t *tmpmask)
+static void arch_destroy_sched_domains(const struct cpumask *cpu_map,
+				       struct cpumask *tmpmask)
 {
 	free_sched_groups(cpu_map, tmpmask);
 }
@@ -7740,15 +7854,16 @@
  * Detach sched domains from a group of cpus specified in cpu_map
  * These cpus will now be attached to the NULL domain
  */
-static void detach_destroy_domains(const cpumask_t *cpu_map)
+static void detach_destroy_domains(const struct cpumask *cpu_map)
 {
-	cpumask_t tmpmask;
+	/* Save because hotplug lock held. */
+	static DECLARE_BITMAP(tmpmask, CONFIG_NR_CPUS);
 	int i;
 
-	for_each_cpu_mask_nr(i, *cpu_map)
+	for_each_cpu(i, cpu_map)
 		cpu_attach_domain(NULL, &def_root_domain, i);
 	synchronize_sched();
-	arch_destroy_sched_domains(cpu_map, &tmpmask);
+	arch_destroy_sched_domains(cpu_map, to_cpumask(tmpmask));
 }
 
 /* handle null as "default" */
@@ -7773,7 +7888,7 @@
  * doms_new[] to the current sched domain partitioning, doms_cur[].
  * It destroys each deleted domain and builds each new domain.
  *
- * 'doms_new' is an array of cpumask_t's of length 'ndoms_new'.
+ * 'doms_new' is an array of cpumask's of length 'ndoms_new'.
  * The masks don't intersect (don't overlap.) We should setup one
  * sched domain for each mask. CPUs not in any of the cpumasks will
  * not be load balanced. If the same cpumask appears both in the
@@ -7787,13 +7902,14 @@
  * the single partition 'fallback_doms', it also forces the domains
  * to be rebuilt.
  *
- * If doms_new == NULL it will be replaced with cpu_online_map.
+ * If doms_new == NULL it will be replaced with cpu_online_mask.
  * ndoms_new == 0 is a special case for destroying existing domains,
  * and it will not create the default domain.
  *
  * Call with hotplug lock held
  */
-void partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
+/* FIXME: Change to struct cpumask *doms_new[] */
+void partition_sched_domains(int ndoms_new, struct cpumask *doms_new,
 			     struct sched_domain_attr *dattr_new)
 {
 	int i, j, n;
@@ -7812,7 +7928,7 @@
 	/* Destroy deleted domains */
 	for (i = 0; i < ndoms_cur; i++) {
 		for (j = 0; j < n && !new_topology; j++) {
-			if (cpus_equal(doms_cur[i], doms_new[j])
+			if (cpumask_equal(&doms_cur[i], &doms_new[j])
 			    && dattrs_equal(dattr_cur, i, dattr_new, j))
 				goto match1;
 		}
@@ -7824,15 +7940,15 @@
 
 	if (doms_new == NULL) {
 		ndoms_cur = 0;
-		doms_new = &fallback_doms;
-		cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map);
+		doms_new = fallback_doms;
+		cpumask_andnot(&doms_new[0], cpu_online_mask, cpu_isolated_map);
 		WARN_ON_ONCE(dattr_new);
 	}
 
 	/* Build new domains */
 	for (i = 0; i < ndoms_new; i++) {
 		for (j = 0; j < ndoms_cur && !new_topology; j++) {
-			if (cpus_equal(doms_new[i], doms_cur[j])
+			if (cpumask_equal(&doms_new[i], &doms_cur[j])
 			    && dattrs_equal(dattr_new, i, dattr_cur, j))
 				goto match2;
 		}
@@ -7844,7 +7960,7 @@
 	}
 
 	/* Remember the new sched domains */
-	if (doms_cur != &fallback_doms)
+	if (doms_cur != fallback_doms)
 		kfree(doms_cur);
 	kfree(dattr_cur);	/* kfree(NULL) is safe */
 	doms_cur = doms_new;
@@ -7873,14 +7989,25 @@
 static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt)
 {
 	int ret;
+	unsigned int level = 0;
 
-	if (buf[0] != '0' && buf[0] != '1')
+	if (sscanf(buf, "%u", &level) != 1)
+		return -EINVAL;
+
+	/*
+	 * level is always be positive so don't check for
+	 * level < POWERSAVINGS_BALANCE_NONE which is 0
+	 * What happens on 0 or 1 byte write,
+	 * need to check for count as well?
+	 */
+
+	if (level >= MAX_POWERSAVINGS_BALANCE_LEVELS)
 		return -EINVAL;
 
 	if (smt)
-		sched_smt_power_savings = (buf[0] == '1');
+		sched_smt_power_savings = level;
 	else
-		sched_mc_power_savings = (buf[0] == '1');
+		sched_mc_power_savings = level;
 
 	ret = arch_reinit_sched_domains();
 
@@ -7984,7 +8111,9 @@
 
 void __init sched_init_smp(void)
 {
-	cpumask_t non_isolated_cpus;
+	cpumask_var_t non_isolated_cpus;
+
+	alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL);
 
 #if defined(CONFIG_NUMA)
 	sched_group_nodes_bycpu = kzalloc(nr_cpu_ids * sizeof(void **),
@@ -7993,10 +8122,10 @@
 #endif
 	get_online_cpus();
 	mutex_lock(&sched_domains_mutex);
-	arch_init_sched_domains(&cpu_online_map);
-	cpus_andnot(non_isolated_cpus, cpu_possible_map, cpu_isolated_map);
-	if (cpus_empty(non_isolated_cpus))
-		cpu_set(smp_processor_id(), non_isolated_cpus);
+	arch_init_sched_domains(cpu_online_mask);
+	cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map);
+	if (cpumask_empty(non_isolated_cpus))
+		cpumask_set_cpu(smp_processor_id(), non_isolated_cpus);
 	mutex_unlock(&sched_domains_mutex);
 	put_online_cpus();
 
@@ -8011,9 +8140,13 @@
 	init_hrtick();
 
 	/* Move init over to a non-isolated CPU */
-	if (set_cpus_allowed_ptr(current, &non_isolated_cpus) < 0)
+	if (set_cpus_allowed_ptr(current, non_isolated_cpus) < 0)
 		BUG();
 	sched_init_granularity();
+	free_cpumask_var(non_isolated_cpus);
+
+	alloc_cpumask_var(&fallback_doms, GFP_KERNEL);
+	init_sched_rt_class();
 }
 #else
 void __init sched_init_smp(void)
@@ -8328,6 +8461,15 @@
 	 */
 	current->sched_class = &fair_sched_class;
 
+	/* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */
+	alloc_bootmem_cpumask_var(&nohz_cpu_mask);
+#ifdef CONFIG_SMP
+#ifdef CONFIG_NO_HZ
+	alloc_bootmem_cpumask_var(&nohz.cpu_mask);
+#endif
+	alloc_bootmem_cpumask_var(&cpu_isolated_map);
+#endif /* SMP */
+
 	scheduler_running = 1;
 }
 
diff --git a/kernel/sched_cpupri.c b/kernel/sched_cpupri.c
index 52154fe..018b7be 100644
--- a/kernel/sched_cpupri.c
+++ b/kernel/sched_cpupri.c
@@ -67,24 +67,21 @@
  * Returns: (int)bool - CPUs were found
  */
 int cpupri_find(struct cpupri *cp, struct task_struct *p,
-		cpumask_t *lowest_mask)
+		struct cpumask *lowest_mask)
 {
 	int                  idx      = 0;
 	int                  task_pri = convert_prio(p->prio);
 
 	for_each_cpupri_active(cp->pri_active, idx) {
 		struct cpupri_vec *vec  = &cp->pri_to_cpu[idx];
-		cpumask_t mask;
 
 		if (idx >= task_pri)
 			break;
 
-		cpus_and(mask, p->cpus_allowed, vec->mask);
-
-		if (cpus_empty(mask))
+		if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids)
 			continue;
 
-		*lowest_mask = mask;
+		cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask);
 		return 1;
 	}
 
@@ -126,7 +123,7 @@
 		vec->count--;
 		if (!vec->count)
 			clear_bit(oldpri, cp->pri_active);
-		cpu_clear(cpu, vec->mask);
+		cpumask_clear_cpu(cpu, vec->mask);
 
 		spin_unlock_irqrestore(&vec->lock, flags);
 	}
@@ -136,7 +133,7 @@
 
 		spin_lock_irqsave(&vec->lock, flags);
 
-		cpu_set(cpu, vec->mask);
+		cpumask_set_cpu(cpu, vec->mask);
 		vec->count++;
 		if (vec->count == 1)
 			set_bit(newpri, cp->pri_active);
@@ -150,10 +147,11 @@
 /**
  * cpupri_init - initialize the cpupri structure
  * @cp: The cpupri context
+ * @bootmem: true if allocations need to use bootmem
  *
- * Returns: (void)
+ * Returns: -ENOMEM if memory fails.
  */
-void cpupri_init(struct cpupri *cp)
+int cpupri_init(struct cpupri *cp, bool bootmem)
 {
 	int i;
 
@@ -164,11 +162,30 @@
 
 		spin_lock_init(&vec->lock);
 		vec->count = 0;
-		cpus_clear(vec->mask);
+		if (bootmem)
+			alloc_bootmem_cpumask_var(&vec->mask);
+		else if (!alloc_cpumask_var(&vec->mask, GFP_KERNEL))
+			goto cleanup;
 	}
 
 	for_each_possible_cpu(i)
 		cp->cpu_to_pri[i] = CPUPRI_INVALID;
+	return 0;
+
+cleanup:
+	for (i--; i >= 0; i--)
+		free_cpumask_var(cp->pri_to_cpu[i].mask);
+	return -ENOMEM;
 }
 
+/**
+ * cpupri_cleanup - clean up the cpupri structure
+ * @cp: The cpupri context
+ */
+void cpupri_cleanup(struct cpupri *cp)
+{
+	int i;
 
+	for (i = 0; i < CPUPRI_NR_PRIORITIES; i++)
+		free_cpumask_var(cp->pri_to_cpu[i].mask);
+}
diff --git a/kernel/sched_cpupri.h b/kernel/sched_cpupri.h
index f25811b0..642a94e 100644
--- a/kernel/sched_cpupri.h
+++ b/kernel/sched_cpupri.h
@@ -14,7 +14,7 @@
 struct cpupri_vec {
 	spinlock_t lock;
 	int        count;
-	cpumask_t  mask;
+	cpumask_var_t mask;
 };
 
 struct cpupri {
@@ -27,7 +27,8 @@
 int  cpupri_find(struct cpupri *cp,
 		 struct task_struct *p, cpumask_t *lowest_mask);
 void cpupri_set(struct cpupri *cp, int cpu, int pri);
-void cpupri_init(struct cpupri *cp);
+int cpupri_init(struct cpupri *cp, bool bootmem);
+void cpupri_cleanup(struct cpupri *cp);
 #else
 #define cpupri_set(cp, cpu, pri) do { } while (0)
 #define cpupri_init() do { } while (0)
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 5ad4440..56c0efe 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1019,16 +1019,33 @@
  * search starts with cpus closest then further out as needed,
  * so we always favor a closer, idle cpu.
  * Domains may include CPUs that are not usable for migration,
- * hence we need to mask them out (cpu_active_map)
+ * hence we need to mask them out (cpu_active_mask)
  *
  * Returns the CPU we should wake onto.
  */
 #if defined(ARCH_HAS_SCHED_WAKE_IDLE)
 static int wake_idle(int cpu, struct task_struct *p)
 {
-	cpumask_t tmp;
 	struct sched_domain *sd;
 	int i;
+	unsigned int chosen_wakeup_cpu;
+	int this_cpu;
+
+	/*
+	 * At POWERSAVINGS_BALANCE_WAKEUP level, if both this_cpu and prev_cpu
+	 * are idle and this is not a kernel thread and this task's affinity
+	 * allows it to be moved to preferred cpu, then just move!
+	 */
+
+	this_cpu = smp_processor_id();
+	chosen_wakeup_cpu =
+		cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu;
+
+	if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP &&
+		idle_cpu(cpu) && idle_cpu(this_cpu) &&
+		p->mm && !(p->flags & PF_KTHREAD) &&
+		cpu_isset(chosen_wakeup_cpu, p->cpus_allowed))
+		return chosen_wakeup_cpu;
 
 	/*
 	 * If it is idle, then it is the best cpu to run this task.
@@ -1046,10 +1063,9 @@
 		if ((sd->flags & SD_WAKE_IDLE)
 		    || ((sd->flags & SD_WAKE_IDLE_FAR)
 			&& !task_hot(p, task_rq(p)->clock, sd))) {
-			cpus_and(tmp, sd->span, p->cpus_allowed);
-			cpus_and(tmp, tmp, cpu_active_map);
-			for_each_cpu_mask_nr(i, tmp) {
-				if (idle_cpu(i)) {
+			for_each_cpu_and(i, sched_domain_span(sd),
+					 &p->cpus_allowed) {
+				if (cpu_active(i) && idle_cpu(i)) {
 					if (i != task_cpu(p)) {
 						schedstat_inc(p,
 						       se.nr_wakeups_idle);
@@ -1242,13 +1258,13 @@
 	 * this_cpu and prev_cpu are present in:
 	 */
 	for_each_domain(this_cpu, sd) {
-		if (cpu_isset(prev_cpu, sd->span)) {
+		if (cpumask_test_cpu(prev_cpu, sched_domain_span(sd))) {
 			this_sd = sd;
 			break;
 		}
 	}
 
-	if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed)))
+	if (unlikely(!cpumask_test_cpu(this_cpu, &p->cpus_allowed)))
 		goto out;
 
 	/*
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 51d2af3..833b6d4 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -15,7 +15,7 @@
 	if (!rq->online)
 		return;
 
-	cpu_set(rq->cpu, rq->rd->rto_mask);
+	cpumask_set_cpu(rq->cpu, rq->rd->rto_mask);
 	/*
 	 * Make sure the mask is visible before we set
 	 * the overload count. That is checked to determine
@@ -34,7 +34,7 @@
 
 	/* the order here really doesn't matter */
 	atomic_dec(&rq->rd->rto_count);
-	cpu_clear(rq->cpu, rq->rd->rto_mask);
+	cpumask_clear_cpu(rq->cpu, rq->rd->rto_mask);
 }
 
 static void update_rt_migration(struct rq *rq)
@@ -139,14 +139,14 @@
 }
 
 #ifdef CONFIG_SMP
-static inline cpumask_t sched_rt_period_mask(void)
+static inline const struct cpumask *sched_rt_period_mask(void)
 {
 	return cpu_rq(smp_processor_id())->rd->span;
 }
 #else
-static inline cpumask_t sched_rt_period_mask(void)
+static inline const struct cpumask *sched_rt_period_mask(void)
 {
-	return cpu_online_map;
+	return cpu_online_mask;
 }
 #endif
 
@@ -212,9 +212,9 @@
 	return rt_rq->rt_throttled;
 }
 
-static inline cpumask_t sched_rt_period_mask(void)
+static inline const struct cpumask *sched_rt_period_mask(void)
 {
-	return cpu_online_map;
+	return cpu_online_mask;
 }
 
 static inline
@@ -241,11 +241,11 @@
 	int i, weight, more = 0;
 	u64 rt_period;
 
-	weight = cpus_weight(rd->span);
+	weight = cpumask_weight(rd->span);
 
 	spin_lock(&rt_b->rt_runtime_lock);
 	rt_period = ktime_to_ns(rt_b->rt_period);
-	for_each_cpu_mask_nr(i, rd->span) {
+	for_each_cpu(i, rd->span) {
 		struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
 		s64 diff;
 
@@ -324,7 +324,7 @@
 		/*
 		 * Greedy reclaim, take back as much as we can.
 		 */
-		for_each_cpu_mask(i, rd->span) {
+		for_each_cpu(i, rd->span) {
 			struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
 			s64 diff;
 
@@ -429,13 +429,13 @@
 static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
 {
 	int i, idle = 1;
-	cpumask_t span;
+	const struct cpumask *span;
 
 	if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
 		return 1;
 
 	span = sched_rt_period_mask();
-	for_each_cpu_mask(i, span) {
+	for_each_cpu(i, span) {
 		int enqueue = 0;
 		struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i);
 		struct rq *rq = rq_of_rt_rq(rt_rq);
@@ -805,17 +805,20 @@
 
 static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
 {
-	cpumask_t mask;
+	cpumask_var_t mask;
 
 	if (rq->curr->rt.nr_cpus_allowed == 1)
 		return;
 
-	if (p->rt.nr_cpus_allowed != 1
-	    && cpupri_find(&rq->rd->cpupri, p, &mask))
+	if (!alloc_cpumask_var(&mask, GFP_ATOMIC))
 		return;
 
-	if (!cpupri_find(&rq->rd->cpupri, rq->curr, &mask))
-		return;
+	if (p->rt.nr_cpus_allowed != 1
+	    && cpupri_find(&rq->rd->cpupri, p, mask))
+		goto free;
+
+	if (!cpupri_find(&rq->rd->cpupri, rq->curr, mask))
+		goto free;
 
 	/*
 	 * There appears to be other cpus that can accept
@@ -824,6 +827,8 @@
 	 */
 	requeue_task_rt(rq, p, 1);
 	resched_task(rq->curr);
+free:
+	free_cpumask_var(mask);
 }
 
 #endif /* CONFIG_SMP */
@@ -914,7 +919,7 @@
 static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
 {
 	if (!task_running(rq, p) &&
-	    (cpu < 0 || cpu_isset(cpu, p->cpus_allowed)) &&
+	    (cpu < 0 || cpumask_test_cpu(cpu, &p->cpus_allowed)) &&
 	    (p->rt.nr_cpus_allowed > 1))
 		return 1;
 	return 0;
@@ -953,7 +958,7 @@
 	return next;
 }
 
-static DEFINE_PER_CPU(cpumask_t, local_cpu_mask);
+static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask);
 
 static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask)
 {
@@ -973,7 +978,7 @@
 static int find_lowest_rq(struct task_struct *task)
 {
 	struct sched_domain *sd;
-	cpumask_t *lowest_mask = &__get_cpu_var(local_cpu_mask);
+	struct cpumask *lowest_mask = __get_cpu_var(local_cpu_mask);
 	int this_cpu = smp_processor_id();
 	int cpu      = task_cpu(task);
 
@@ -988,7 +993,7 @@
 	 * I guess we might want to change cpupri_find() to ignore those
 	 * in the first place.
 	 */
-	cpus_and(*lowest_mask, *lowest_mask, cpu_active_map);
+	cpumask_and(lowest_mask, lowest_mask, cpu_active_mask);
 
 	/*
 	 * At this point we have built a mask of cpus representing the
@@ -998,7 +1003,7 @@
 	 * We prioritize the last cpu that the task executed on since
 	 * it is most likely cache-hot in that location.
 	 */
-	if (cpu_isset(cpu, *lowest_mask))
+	if (cpumask_test_cpu(cpu, lowest_mask))
 		return cpu;
 
 	/*
@@ -1013,7 +1018,8 @@
 			cpumask_t domain_mask;
 			int       best_cpu;
 
-			cpus_and(domain_mask, sd->span, *lowest_mask);
+			cpumask_and(&domain_mask, sched_domain_span(sd),
+				    lowest_mask);
 
 			best_cpu = pick_optimal_cpu(this_cpu,
 						    &domain_mask);
@@ -1054,8 +1060,8 @@
 			 * Also make sure that it wasn't scheduled on its rq.
 			 */
 			if (unlikely(task_rq(task) != rq ||
-				     !cpu_isset(lowest_rq->cpu,
-						task->cpus_allowed) ||
+				     !cpumask_test_cpu(lowest_rq->cpu,
+						       &task->cpus_allowed) ||
 				     task_running(rq, task) ||
 				     !task->se.on_rq)) {
 
@@ -1176,7 +1182,7 @@
 
 	next = pick_next_task_rt(this_rq);
 
-	for_each_cpu_mask_nr(cpu, this_rq->rd->rto_mask) {
+	for_each_cpu(cpu, this_rq->rd->rto_mask) {
 		if (this_cpu == cpu)
 			continue;
 
@@ -1305,9 +1311,9 @@
 }
 
 static void set_cpus_allowed_rt(struct task_struct *p,
-				const cpumask_t *new_mask)
+				const struct cpumask *new_mask)
 {
-	int weight = cpus_weight(*new_mask);
+	int weight = cpumask_weight(new_mask);
 
 	BUG_ON(!rt_task(p));
 
@@ -1328,7 +1334,7 @@
 		update_rt_migration(rq);
 	}
 
-	p->cpus_allowed    = *new_mask;
+	cpumask_copy(&p->cpus_allowed, new_mask);
 	p->rt.nr_cpus_allowed = weight;
 }
 
@@ -1371,6 +1377,14 @@
 	if (!rq->rt.rt_nr_running)
 		pull_rt_task(rq);
 }
+
+static inline void init_sched_rt_class(void)
+{
+	unsigned int i;
+
+	for_each_possible_cpu(i)
+		alloc_cpumask_var(&per_cpu(local_cpu_mask, i), GFP_KERNEL);
+}
 #endif /* CONFIG_SMP */
 
 /*
@@ -1541,3 +1555,4 @@
 	rcu_read_unlock();
 }
 #endif /* CONFIG_SCHED_DEBUG */
+
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h
index 3b01098..f2773b5 100644
--- a/kernel/sched_stats.h
+++ b/kernel/sched_stats.h
@@ -42,7 +42,8 @@
 		for_each_domain(cpu, sd) {
 			enum cpu_idle_type itype;
 
-			cpumask_scnprintf(mask_str, mask_len, sd->span);
+			cpumask_scnprintf(mask_str, mask_len,
+					  sched_domain_span(sd));
 			seq_printf(seq, "domain%d %s", dcount++, mask_str);
 			for (itype = CPU_IDLE; itype < CPU_MAX_IDLE_TYPES;
 					itype++) {
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index bd6be76..6d7dc4e 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -352,7 +352,7 @@
 	if (!data)
 		return -ENOMEM;
 	nla_strlcpy(data, na, len);
-	ret = cpulist_parse(data, *mask);
+	ret = cpulist_parse(data, mask);
 	kfree(data);
 	return ret;
 }
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index f8d9680..ea2f48a 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -166,6 +166,8 @@
 void clockevents_register_device(struct clock_event_device *dev)
 {
 	BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
+	BUG_ON(!dev->cpumask);
+
 	/*
 	 * A nsec2cyc multiplicator of 0 is invalid and we'd crash
 	 * on it, so fix it up and emit a warning:
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index f98a1b7..9590af2 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -150,7 +150,7 @@
 		 */
 		cpu = first_cpu(mask);
 		td = &per_cpu(tick_cpu_device, cpu);
-		td->evtdev->broadcast(mask);
+		td->evtdev->broadcast(&mask);
 	}
 }
 
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index df12434..f8372be 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -136,7 +136,7 @@
  */
 static void tick_setup_device(struct tick_device *td,
 			      struct clock_event_device *newdev, int cpu,
-			      const cpumask_t *cpumask)
+			      const struct cpumask *cpumask)
 {
 	ktime_t next_event;
 	void (*handler)(struct clock_event_device *) = NULL;
@@ -171,8 +171,8 @@
 	 * When the device is not per cpu, pin the interrupt to the
 	 * current cpu:
 	 */
-	if (!cpus_equal(newdev->cpumask, *cpumask))
-		irq_set_affinity(newdev->irq, *cpumask);
+	if (!cpumask_equal(newdev->cpumask, cpumask))
+		irq_set_affinity(newdev->irq, cpumask);
 
 	/*
 	 * When global broadcasting is active, check if the current
@@ -202,14 +202,14 @@
 	spin_lock_irqsave(&tick_device_lock, flags);
 
 	cpu = smp_processor_id();
-	if (!cpu_isset(cpu, newdev->cpumask))
+	if (!cpumask_test_cpu(cpu, newdev->cpumask))
 		goto out_bc;
 
 	td = &per_cpu(tick_cpu_device, cpu);
 	curdev = td->evtdev;
 
 	/* cpu local device ? */
-	if (!cpus_equal(newdev->cpumask, cpumask_of_cpu(cpu))) {
+	if (!cpumask_equal(newdev->cpumask, cpumask_of(cpu))) {
 
 		/*
 		 * If the cpu affinity of the device interrupt can not
@@ -222,7 +222,7 @@
 		 * If we have a cpu local device already, do not replace it
 		 * by a non cpu local device
 		 */
-		if (curdev && cpus_equal(curdev->cpumask, cpumask_of_cpu(cpu)))
+		if (curdev && cpumask_equal(curdev->cpumask, cpumask_of(cpu)))
 			goto out_bc;
 	}
 
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 8f3fc25..76a574b 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -144,7 +144,7 @@
 	if (!ts->tick_stopped)
 		return;
 
-	cpu_clear(cpu, nohz_cpu_mask);
+	cpumask_clear_cpu(cpu, nohz_cpu_mask);
 	now = ktime_get();
 	ts->idle_waketime = now;
 
@@ -301,7 +301,7 @@
 			tick_do_timer_cpu = TICK_DO_TIMER_NONE;
 
 		if (delta_jiffies > 1)
-			cpu_set(cpu, nohz_cpu_mask);
+			cpumask_set_cpu(cpu, nohz_cpu_mask);
 
 		/* Skip reprogram of event if its not changed */
 		if (ts->tick_stopped && ktime_equal(expires, dev->next_event))
@@ -319,7 +319,7 @@
 				/*
 				 * sched tick not stopped!
 				 */
-				cpu_clear(cpu, nohz_cpu_mask);
+				cpumask_clear_cpu(cpu, nohz_cpu_mask);
 				goto out;
 			}
 
@@ -361,7 +361,7 @@
 		 * softirq.
 		 */
 		tick_do_update_jiffies64(ktime_get());
-		cpu_clear(cpu, nohz_cpu_mask);
+		cpumask_clear_cpu(cpu, nohz_cpu_mask);
 	}
 	raise_softirq_irqoff(TIMER_SOFTIRQ);
 out:
@@ -439,7 +439,7 @@
 	select_nohz_load_balancer(0);
 	now = ktime_get();
 	tick_do_update_jiffies64(now);
-	cpu_clear(cpu, nohz_cpu_mask);
+	cpumask_clear_cpu(cpu, nohz_cpu_mask);
 
 	/*
 	 * We stopped the tick in idle. Update process times would miss the
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 4185d52..0e91f43 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2674,7 +2674,7 @@
 
 	mutex_lock(&tracing_cpumask_update_lock);
 
-	len = cpumask_scnprintf(mask_str, count, tracing_cpumask);
+	len = cpumask_scnprintf(mask_str, count, &tracing_cpumask);
 	if (count - len < 2) {
 		count = -EINVAL;
 		goto out_err;
@@ -2695,7 +2695,7 @@
 	int err, cpu;
 
 	mutex_lock(&tracing_cpumask_update_lock);
-	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
+	err = cpumask_parse_user(ubuf, count, &tracing_cpumask_new);
 	if (err)
 		goto err_unlock;
 
diff --git a/lib/Kconfig b/lib/Kconfig
index fd4118e..2ba43c4 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -159,4 +159,11 @@
 config HAVE_LMB
 	boolean
 
+config CPUMASK_OFFSTACK
+	bool "Force CPU masks off stack" if DEBUG_PER_CPU_MAPS
+	help
+	  Use dynamic allocation for cpumask_var_t, instead of putting
+	  them on the stack.  This is a bit more expensive, but avoids
+	  stack overflow.
+
 endmenu
diff --git a/mm/slub.c b/mm/slub.c
index 6cb7ad1..0d861c3 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3642,7 +3642,7 @@
 				len < PAGE_SIZE - 60) {
 			len += sprintf(buf + len, " cpus=");
 			len += cpulist_scnprintf(buf + len, PAGE_SIZE - len - 50,
-					l->cpus);
+					&l->cpus);
 		}
 
 		if (num_online_nodes() > 1 && !nodes_empty(l->nodes) &&
diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c
index e4e2cae..086d5ef 100644
--- a/net/irda/ircomm/ircomm_tty.c
+++ b/net/irda/ircomm/ircomm_tty.c
@@ -371,9 +371,8 @@
 	IRDA_DEBUG(2, "%s()\n", __func__ );
 
 	line = tty->index;
-	if ((line < 0) || (line >= IRCOMM_TTY_PORTS)) {
+	if (line >= IRCOMM_TTY_PORTS)
 		return -ENODEV;
-	}
 
 	/* Check if instance already exists */
 	self = hashbin_lock_find(ircomm_tty, line, NULL);
@@ -405,6 +404,8 @@
 		 * Force TTY into raw mode by default which is usually what
 		 * we want for IrCOMM and IrLPT. This way applications will
 		 * not have to twiddle with printcap etc.
+		 *
+		 * Note this is completely usafe and doesn't work properly
 		 */
 		tty->termios->c_iflag = 0;
 		tty->termios->c_oflag = 0;
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 53772bb..23b81cf 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -150,10 +150,11 @@
 static void ioapic_inj_nmi(struct kvm_vcpu *vcpu)
 {
 	kvm_inject_nmi(vcpu);
+	kvm_vcpu_kick(vcpu);
 }
 
-static u32 ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
-				       u8 dest_mode)
+u32 kvm_ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
+				    u8 dest_mode)
 {
 	u32 mask = 0;
 	int i;
@@ -207,7 +208,8 @@
 		     "vector=%x trig_mode=%x\n",
 		     dest, dest_mode, delivery_mode, vector, trig_mode);
 
-	deliver_bitmask = ioapic_get_delivery_bitmask(ioapic, dest, dest_mode);
+	deliver_bitmask = kvm_ioapic_get_delivery_bitmask(ioapic, dest,
+							  dest_mode);
 	if (!deliver_bitmask) {
 		ioapic_debug("no target on destination\n");
 		return 0;
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
index cd7ae76..49c9581 100644
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -85,5 +85,7 @@
 int kvm_ioapic_init(struct kvm *kvm);
 void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
 void kvm_ioapic_reset(struct kvm_ioapic *ioapic);
+u32 kvm_ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
+				u8 dest_mode);
 
 #endif
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 55ad76e..aa5d1e5 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -61,10 +61,9 @@
 	hlist_add_head(&kian->link, &kvm->arch.irq_ack_notifier_list);
 }
 
-void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
-				     struct kvm_irq_ack_notifier *kian)
+void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian)
 {
-	hlist_del(&kian->link);
+	hlist_del_init(&kian->link);
 }
 
 /* The caller must hold kvm->lock mutex */
@@ -73,11 +72,15 @@
 	unsigned long *bitmap = &kvm->arch.irq_sources_bitmap;
 	int irq_source_id = find_first_zero_bit(bitmap,
 				sizeof(kvm->arch.irq_sources_bitmap));
+
 	if (irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) {
 		printk(KERN_WARNING "kvm: exhaust allocatable IRQ sources!\n");
-		irq_source_id = -EFAULT;
-	} else
-		set_bit(irq_source_id, bitmap);
+		return -EFAULT;
+	}
+
+	ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
+	set_bit(irq_source_id, bitmap);
+
 	return irq_source_id;
 }
 
@@ -85,7 +88,9 @@
 {
 	int i;
 
-	if (irq_source_id <= 0 ||
+	ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
+
+	if (irq_source_id < 0 ||
 	    irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) {
 		printk(KERN_ERR "kvm: IRQ source ID out of range!\n");
 		return;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index a87f45e..fc6127c 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -47,6 +47,10 @@
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 
+#ifdef CONFIG_X86
+#include <asm/msidef.h>
+#endif
+
 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
 #include "coalesced_mmio.h"
 #endif
@@ -60,10 +64,13 @@
 MODULE_AUTHOR("Qumranet");
 MODULE_LICENSE("GPL");
 
+static int msi2intx = 1;
+module_param(msi2intx, bool, 0);
+
 DEFINE_SPINLOCK(kvm_lock);
 LIST_HEAD(vm_list);
 
-static cpumask_t cpus_hardware_enabled;
+static cpumask_var_t cpus_hardware_enabled;
 
 struct kmem_cache *kvm_vcpu_cache;
 EXPORT_SYMBOL_GPL(kvm_vcpu_cache);
@@ -75,9 +82,60 @@
 static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
 			   unsigned long arg);
 
-bool kvm_rebooting;
+static bool kvm_rebooting;
 
 #ifdef KVM_CAP_DEVICE_ASSIGNMENT
+
+#ifdef CONFIG_X86
+static void assigned_device_msi_dispatch(struct kvm_assigned_dev_kernel *dev)
+{
+	int vcpu_id;
+	struct kvm_vcpu *vcpu;
+	struct kvm_ioapic *ioapic = ioapic_irqchip(dev->kvm);
+	int dest_id = (dev->guest_msi.address_lo & MSI_ADDR_DEST_ID_MASK)
+			>> MSI_ADDR_DEST_ID_SHIFT;
+	int vector = (dev->guest_msi.data & MSI_DATA_VECTOR_MASK)
+			>> MSI_DATA_VECTOR_SHIFT;
+	int dest_mode = test_bit(MSI_ADDR_DEST_MODE_SHIFT,
+				(unsigned long *)&dev->guest_msi.address_lo);
+	int trig_mode = test_bit(MSI_DATA_TRIGGER_SHIFT,
+				(unsigned long *)&dev->guest_msi.data);
+	int delivery_mode = test_bit(MSI_DATA_DELIVERY_MODE_SHIFT,
+				(unsigned long *)&dev->guest_msi.data);
+	u32 deliver_bitmask;
+
+	BUG_ON(!ioapic);
+
+	deliver_bitmask = kvm_ioapic_get_delivery_bitmask(ioapic,
+				dest_id, dest_mode);
+	/* IOAPIC delivery mode value is the same as MSI here */
+	switch (delivery_mode) {
+	case IOAPIC_LOWEST_PRIORITY:
+		vcpu = kvm_get_lowest_prio_vcpu(ioapic->kvm, vector,
+				deliver_bitmask);
+		if (vcpu != NULL)
+			kvm_apic_set_irq(vcpu, vector, trig_mode);
+		else
+			printk(KERN_INFO "kvm: null lowest priority vcpu!\n");
+		break;
+	case IOAPIC_FIXED:
+		for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) {
+			if (!(deliver_bitmask & (1 << vcpu_id)))
+				continue;
+			deliver_bitmask &= ~(1 << vcpu_id);
+			vcpu = ioapic->kvm->vcpus[vcpu_id];
+			if (vcpu)
+				kvm_apic_set_irq(vcpu, vector, trig_mode);
+		}
+		break;
+	default:
+		printk(KERN_INFO "kvm: unsupported MSI delivery mode\n");
+	}
+}
+#else
+static void assigned_device_msi_dispatch(struct kvm_assigned_dev_kernel *dev) {}
+#endif
+
 static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
 						      int assigned_dev_id)
 {
@@ -104,9 +162,16 @@
 	 * finer-grained lock, update this
 	 */
 	mutex_lock(&assigned_dev->kvm->lock);
-	kvm_set_irq(assigned_dev->kvm,
-		    assigned_dev->irq_source_id,
-		    assigned_dev->guest_irq, 1);
+	if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_INTX)
+		kvm_set_irq(assigned_dev->kvm,
+			    assigned_dev->irq_source_id,
+			    assigned_dev->guest_irq, 1);
+	else if (assigned_dev->irq_requested_type &
+				KVM_ASSIGNED_DEV_GUEST_MSI) {
+		assigned_device_msi_dispatch(assigned_dev);
+		enable_irq(assigned_dev->host_irq);
+		assigned_dev->host_irq_disabled = false;
+	}
 	mutex_unlock(&assigned_dev->kvm->lock);
 	kvm_put_kvm(assigned_dev->kvm);
 }
@@ -117,8 +182,12 @@
 		(struct kvm_assigned_dev_kernel *) dev_id;
 
 	kvm_get_kvm(assigned_dev->kvm);
+
 	schedule_work(&assigned_dev->interrupt_work);
+
 	disable_irq_nosync(irq);
+	assigned_dev->host_irq_disabled = true;
+
 	return IRQ_HANDLED;
 }
 
@@ -132,19 +201,32 @@
 
 	dev = container_of(kian, struct kvm_assigned_dev_kernel,
 			   ack_notifier);
+
 	kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0);
-	enable_irq(dev->host_irq);
+
+	/* The guest irq may be shared so this ack may be
+	 * from another device.
+	 */
+	if (dev->host_irq_disabled) {
+		enable_irq(dev->host_irq);
+		dev->host_irq_disabled = false;
+	}
 }
 
-static void kvm_free_assigned_device(struct kvm *kvm,
-				     struct kvm_assigned_dev_kernel
-				     *assigned_dev)
+static void kvm_free_assigned_irq(struct kvm *kvm,
+				  struct kvm_assigned_dev_kernel *assigned_dev)
 {
-	if (irqchip_in_kernel(kvm) && assigned_dev->irq_requested)
-		free_irq(assigned_dev->host_irq, (void *)assigned_dev);
+	if (!irqchip_in_kernel(kvm))
+		return;
 
-	kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier);
-	kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id);
+	kvm_unregister_irq_ack_notifier(&assigned_dev->ack_notifier);
+
+	if (assigned_dev->irq_source_id != -1)
+		kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id);
+	assigned_dev->irq_source_id = -1;
+
+	if (!assigned_dev->irq_requested_type)
+		return;
 
 	if (cancel_work_sync(&assigned_dev->interrupt_work))
 		/* We had pending work. That means we will have to take
@@ -152,6 +234,23 @@
 		 */
 		kvm_put_kvm(kvm);
 
+	free_irq(assigned_dev->host_irq, (void *)assigned_dev);
+
+	if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI)
+		pci_disable_msi(assigned_dev->dev);
+
+	assigned_dev->irq_requested_type = 0;
+}
+
+
+static void kvm_free_assigned_device(struct kvm *kvm,
+				     struct kvm_assigned_dev_kernel
+				     *assigned_dev)
+{
+	kvm_free_assigned_irq(kvm, assigned_dev);
+
+	pci_reset_function(assigned_dev->dev);
+
 	pci_release_regions(assigned_dev->dev);
 	pci_disable_device(assigned_dev->dev);
 	pci_dev_put(assigned_dev->dev);
@@ -174,6 +273,95 @@
 	}
 }
 
+static int assigned_device_update_intx(struct kvm *kvm,
+			struct kvm_assigned_dev_kernel *adev,
+			struct kvm_assigned_irq *airq)
+{
+	adev->guest_irq = airq->guest_irq;
+	adev->ack_notifier.gsi = airq->guest_irq;
+
+	if (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_INTX)
+		return 0;
+
+	if (irqchip_in_kernel(kvm)) {
+		if (!msi2intx &&
+		    adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) {
+			free_irq(adev->host_irq, (void *)kvm);
+			pci_disable_msi(adev->dev);
+		}
+
+		if (!capable(CAP_SYS_RAWIO))
+			return -EPERM;
+
+		if (airq->host_irq)
+			adev->host_irq = airq->host_irq;
+		else
+			adev->host_irq = adev->dev->irq;
+
+		/* Even though this is PCI, we don't want to use shared
+		 * interrupts. Sharing host devices with guest-assigned devices
+		 * on the same interrupt line is not a happy situation: there
+		 * are going to be long delays in accepting, acking, etc.
+		 */
+		if (request_irq(adev->host_irq, kvm_assigned_dev_intr,
+				0, "kvm_assigned_intx_device", (void *)adev))
+			return -EIO;
+	}
+
+	adev->irq_requested_type = KVM_ASSIGNED_DEV_GUEST_INTX |
+				   KVM_ASSIGNED_DEV_HOST_INTX;
+	return 0;
+}
+
+#ifdef CONFIG_X86
+static int assigned_device_update_msi(struct kvm *kvm,
+			struct kvm_assigned_dev_kernel *adev,
+			struct kvm_assigned_irq *airq)
+{
+	int r;
+
+	if (airq->flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSI) {
+		/* x86 don't care upper address of guest msi message addr */
+		adev->irq_requested_type |= KVM_ASSIGNED_DEV_GUEST_MSI;
+		adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_INTX;
+		adev->guest_msi.address_lo = airq->guest_msi.addr_lo;
+		adev->guest_msi.data = airq->guest_msi.data;
+		adev->ack_notifier.gsi = -1;
+	} else if (msi2intx) {
+		adev->irq_requested_type |= KVM_ASSIGNED_DEV_GUEST_INTX;
+		adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_MSI;
+		adev->guest_irq = airq->guest_irq;
+		adev->ack_notifier.gsi = airq->guest_irq;
+	}
+
+	if (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI)
+		return 0;
+
+	if (irqchip_in_kernel(kvm)) {
+		if (!msi2intx) {
+			if (adev->irq_requested_type &
+					KVM_ASSIGNED_DEV_HOST_INTX)
+				free_irq(adev->host_irq, (void *)adev);
+
+			r = pci_enable_msi(adev->dev);
+			if (r)
+				return r;
+		}
+
+		adev->host_irq = adev->dev->irq;
+		if (request_irq(adev->host_irq, kvm_assigned_dev_intr, 0,
+				"kvm_assigned_msi_device", (void *)adev))
+			return -EIO;
+	}
+
+	if (!msi2intx)
+		adev->irq_requested_type = KVM_ASSIGNED_DEV_GUEST_MSI;
+
+	adev->irq_requested_type |= KVM_ASSIGNED_DEV_HOST_MSI;
+	return 0;
+}
+#endif
+
 static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
 				   struct kvm_assigned_irq
 				   *assigned_irq)
@@ -190,49 +378,68 @@
 		return -EINVAL;
 	}
 
-	if (match->irq_requested) {
-		match->guest_irq = assigned_irq->guest_irq;
-		match->ack_notifier.gsi = assigned_irq->guest_irq;
-		mutex_unlock(&kvm->lock);
-		return 0;
+	if (!match->irq_requested_type) {
+		INIT_WORK(&match->interrupt_work,
+				kvm_assigned_dev_interrupt_work_handler);
+		if (irqchip_in_kernel(kvm)) {
+			/* Register ack nofitier */
+			match->ack_notifier.gsi = -1;
+			match->ack_notifier.irq_acked =
+					kvm_assigned_dev_ack_irq;
+			kvm_register_irq_ack_notifier(kvm,
+					&match->ack_notifier);
+
+			/* Request IRQ source ID */
+			r = kvm_request_irq_source_id(kvm);
+			if (r < 0)
+				goto out_release;
+			else
+				match->irq_source_id = r;
+
+#ifdef CONFIG_X86
+			/* Determine host device irq type, we can know the
+			 * result from dev->msi_enabled */
+			if (msi2intx)
+				pci_enable_msi(match->dev);
+#endif
+		}
 	}
 
-	INIT_WORK(&match->interrupt_work,
-		  kvm_assigned_dev_interrupt_work_handler);
-
-	if (irqchip_in_kernel(kvm)) {
-		if (!capable(CAP_SYS_RAWIO)) {
-			r = -EPERM;
+	if ((!msi2intx &&
+	     (assigned_irq->flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSI)) ||
+	    (msi2intx && match->dev->msi_enabled)) {
+#ifdef CONFIG_X86
+		r = assigned_device_update_msi(kvm, match, assigned_irq);
+		if (r) {
+			printk(KERN_WARNING "kvm: failed to enable "
+					"MSI device!\n");
 			goto out_release;
 		}
-
-		if (assigned_irq->host_irq)
-			match->host_irq = assigned_irq->host_irq;
-		else
-			match->host_irq = match->dev->irq;
-		match->guest_irq = assigned_irq->guest_irq;
-		match->ack_notifier.gsi = assigned_irq->guest_irq;
-		match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;
-		kvm_register_irq_ack_notifier(kvm, &match->ack_notifier);
-		r = kvm_request_irq_source_id(kvm);
-		if (r < 0)
+#else
+		r = -ENOTTY;
+#endif
+	} else if (assigned_irq->host_irq == 0 && match->dev->irq == 0) {
+		/* Host device IRQ 0 means don't support INTx */
+		if (!msi2intx) {
+			printk(KERN_WARNING
+			       "kvm: wait device to enable MSI!\n");
+			r = 0;
+		} else {
+			printk(KERN_WARNING
+			       "kvm: failed to enable MSI device!\n");
+			r = -ENOTTY;
 			goto out_release;
-		else
-			match->irq_source_id = r;
-
-		/* Even though this is PCI, we don't want to use shared
-		 * interrupts. Sharing host devices with guest-assigned devices
-		 * on the same interrupt line is not a happy situation: there
-		 * are going to be long delays in accepting, acking, etc.
-		 */
-		if (request_irq(match->host_irq, kvm_assigned_dev_intr, 0,
-				"kvm_assigned_device", (void *)match)) {
-			r = -EIO;
+		}
+	} else {
+		/* Non-sharing INTx mode */
+		r = assigned_device_update_intx(kvm, match, assigned_irq);
+		if (r) {
+			printk(KERN_WARNING "kvm: failed to enable "
+					"INTx device!\n");
 			goto out_release;
 		}
 	}
 
-	match->irq_requested = true;
 	mutex_unlock(&kvm->lock);
 	return r;
 out_release:
@@ -283,11 +490,14 @@
 		       __func__);
 		goto out_disable;
 	}
+
+	pci_reset_function(dev);
+
 	match->assigned_dev_id = assigned_dev->assigned_dev_id;
 	match->host_busnr = assigned_dev->busnr;
 	match->host_devfn = assigned_dev->devfn;
 	match->dev = dev;
-
+	match->irq_source_id = -1;
 	match->kvm = kvm;
 
 	list_add(&match->list, &kvm->arch.assigned_dev_head);
@@ -355,58 +565,49 @@
 {
 }
 
-void kvm_flush_remote_tlbs(struct kvm *kvm)
+static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
 {
 	int i, cpu, me;
-	cpumask_t cpus;
+	cpumask_var_t cpus;
+	bool called = true;
 	struct kvm_vcpu *vcpu;
 
+	if (alloc_cpumask_var(&cpus, GFP_ATOMIC))
+		cpumask_clear(cpus);
+
 	me = get_cpu();
-	cpus_clear(cpus);
 	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
 		vcpu = kvm->vcpus[i];
 		if (!vcpu)
 			continue;
-		if (test_and_set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
+		if (test_and_set_bit(req, &vcpu->requests))
 			continue;
 		cpu = vcpu->cpu;
-		if (cpu != -1 && cpu != me)
-			cpu_set(cpu, cpus);
+		if (cpus != NULL && cpu != -1 && cpu != me)
+			cpumask_set_cpu(cpu, cpus);
 	}
-	if (cpus_empty(cpus))
-		goto out;
-	++kvm->stat.remote_tlb_flush;
-	smp_call_function_mask(cpus, ack_flush, NULL, 1);
-out:
+	if (unlikely(cpus == NULL))
+		smp_call_function_many(cpu_online_mask, ack_flush, NULL, 1);
+	else if (!cpumask_empty(cpus))
+		smp_call_function_many(cpus, ack_flush, NULL, 1);
+	else
+		called = false;
 	put_cpu();
+	free_cpumask_var(cpus);
+	return called;
+}
+
+void kvm_flush_remote_tlbs(struct kvm *kvm)
+{
+	if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
+		++kvm->stat.remote_tlb_flush;
 }
 
 void kvm_reload_remote_mmus(struct kvm *kvm)
 {
-	int i, cpu, me;
-	cpumask_t cpus;
-	struct kvm_vcpu *vcpu;
-
-	me = get_cpu();
-	cpus_clear(cpus);
-	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
-		vcpu = kvm->vcpus[i];
-		if (!vcpu)
-			continue;
-		if (test_and_set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
-			continue;
-		cpu = vcpu->cpu;
-		if (cpu != -1 && cpu != me)
-			cpu_set(cpu, cpus);
-	}
-	if (cpus_empty(cpus))
-		goto out;
-	smp_call_function_mask(cpus, ack_flush, NULL, 1);
-out:
-	put_cpu();
+	make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD);
 }
 
-
 int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
 {
 	struct page *page;
@@ -710,6 +911,8 @@
 		goto out;
 	if (mem->guest_phys_addr & (PAGE_SIZE - 1))
 		goto out;
+	if (user_alloc && (mem->userspace_addr & (PAGE_SIZE - 1)))
+		goto out;
 	if (mem->slot >= KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS)
 		goto out;
 	if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr)
@@ -821,7 +1024,10 @@
 		goto out_free;
 	}
 
-	kvm_free_physmem_slot(&old, &new);
+	kvm_free_physmem_slot(&old, npages ? &new : NULL);
+	/* Slot deletion case: we have to update the current slot */
+	if (!npages)
+		*memslot = old;
 #ifdef CONFIG_DMAR
 	/* map the pages in iommu page table */
 	r = kvm_iommu_map_pages(kvm, base_gfn, npages);
@@ -918,7 +1124,7 @@
 }
 EXPORT_SYMBOL_GPL(kvm_is_error_hva);
 
-static struct kvm_memory_slot *__gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
+struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn)
 {
 	int i;
 
@@ -931,11 +1137,12 @@
 	}
 	return NULL;
 }
+EXPORT_SYMBOL_GPL(gfn_to_memslot_unaliased);
 
 struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
 {
 	gfn = unalias_gfn(kvm, gfn);
-	return __gfn_to_memslot(kvm, gfn);
+	return gfn_to_memslot_unaliased(kvm, gfn);
 }
 
 int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
@@ -959,7 +1166,7 @@
 	struct kvm_memory_slot *slot;
 
 	gfn = unalias_gfn(kvm, gfn);
-	slot = __gfn_to_memslot(kvm, gfn);
+	slot = gfn_to_memslot_unaliased(kvm, gfn);
 	if (!slot)
 		return bad_hva();
 	return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE);
@@ -1210,7 +1417,7 @@
 	struct kvm_memory_slot *memslot;
 
 	gfn = unalias_gfn(kvm, gfn);
-	memslot = __gfn_to_memslot(kvm, gfn);
+	memslot = gfn_to_memslot_unaliased(kvm, gfn);
 	if (memslot && memslot->dirty_bitmap) {
 		unsigned long rel_gfn = gfn - memslot->base_gfn;
 
@@ -1295,7 +1502,7 @@
 	return 0;
 }
 
-static const struct file_operations kvm_vcpu_fops = {
+static struct file_operations kvm_vcpu_fops = {
 	.release        = kvm_vcpu_release,
 	.unlocked_ioctl = kvm_vcpu_ioctl,
 	.compat_ioctl   = kvm_vcpu_ioctl,
@@ -1689,7 +1896,7 @@
 	return 0;
 }
 
-static const struct file_operations kvm_vm_fops = {
+static struct file_operations kvm_vm_fops = {
 	.release        = kvm_vm_release,
 	.unlocked_ioctl = kvm_vm_ioctl,
 	.compat_ioctl   = kvm_vm_ioctl,
@@ -1711,6 +1918,18 @@
 	return fd;
 }
 
+static long kvm_dev_ioctl_check_extension_generic(long arg)
+{
+	switch (arg) {
+	case KVM_CAP_USER_MEMORY:
+	case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
+		return 1;
+	default:
+		break;
+	}
+	return kvm_dev_ioctl_check_extension(arg);
+}
+
 static long kvm_dev_ioctl(struct file *filp,
 			  unsigned int ioctl, unsigned long arg)
 {
@@ -1730,7 +1949,7 @@
 		r = kvm_dev_ioctl_create_vm();
 		break;
 	case KVM_CHECK_EXTENSION:
-		r = kvm_dev_ioctl_check_extension(arg);
+		r = kvm_dev_ioctl_check_extension_generic(arg);
 		break;
 	case KVM_GET_VCPU_MMAP_SIZE:
 		r = -EINVAL;
@@ -1771,9 +1990,9 @@
 {
 	int cpu = raw_smp_processor_id();
 
-	if (cpu_isset(cpu, cpus_hardware_enabled))
+	if (cpumask_test_cpu(cpu, cpus_hardware_enabled))
 		return;
-	cpu_set(cpu, cpus_hardware_enabled);
+	cpumask_set_cpu(cpu, cpus_hardware_enabled);
 	kvm_arch_hardware_enable(NULL);
 }
 
@@ -1781,9 +2000,9 @@
 {
 	int cpu = raw_smp_processor_id();
 
-	if (!cpu_isset(cpu, cpus_hardware_enabled))
+	if (!cpumask_test_cpu(cpu, cpus_hardware_enabled))
 		return;
-	cpu_clear(cpu, cpus_hardware_enabled);
+	cpumask_clear_cpu(cpu, cpus_hardware_enabled);
 	kvm_arch_hardware_disable(NULL);
 }
 
@@ -2017,9 +2236,14 @@
 
 	bad_pfn = page_to_pfn(bad_page);
 
+	if (!alloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) {
+		r = -ENOMEM;
+		goto out_free_0;
+	}
+
 	r = kvm_arch_hardware_setup();
 	if (r < 0)
-		goto out_free_0;
+		goto out_free_0a;
 
 	for_each_online_cpu(cpu) {
 		smp_call_function_single(cpu,
@@ -2053,6 +2277,8 @@
 	}
 
 	kvm_chardev_ops.owner = module;
+	kvm_vm_fops.owner = module;
+	kvm_vcpu_fops.owner = module;
 
 	r = misc_register(&kvm_dev);
 	if (r) {
@@ -2062,6 +2288,9 @@
 
 	kvm_preempt_ops.sched_in = kvm_sched_in;
 	kvm_preempt_ops.sched_out = kvm_sched_out;
+#ifndef CONFIG_X86
+	msi2intx = 0;
+#endif
 
 	return 0;
 
@@ -2078,6 +2307,8 @@
 	on_each_cpu(hardware_disable, NULL, 1);
 out_free_1:
 	kvm_arch_hardware_unsetup();
+out_free_0a:
+	free_cpumask_var(cpus_hardware_enabled);
 out_free_0:
 	__free_page(bad_page);
 out:
@@ -2101,6 +2332,7 @@
 	kvm_arch_hardware_unsetup();
 	kvm_arch_exit();
 	kvm_exit_debug();
+	free_cpumask_var(cpus_hardware_enabled);
 	__free_page(bad_page);
 }
 EXPORT_SYMBOL_GPL(kvm_exit);
diff --git a/virt/kvm/kvm_trace.c b/virt/kvm/kvm_trace.c
index 41dcc84..f598744 100644
--- a/virt/kvm/kvm_trace.c
+++ b/virt/kvm/kvm_trace.c
@@ -252,6 +252,7 @@
 			struct kvm_trace_probe *p = &kvm_trace_probes[i];
 			marker_probe_unregister(p->name, p->probe_func, p);
 		}
+		marker_synchronize_unregister();
 
 		relay_close(kt->rchan);
 		debugfs_remove(kt->lost_file);