Merge tag 'v3.11-rc7' into stable/for-linus-3.12 Linux 3.11-rc7 As we need the git commit 28817e9de4f039a1a8c1fe1df2fa2df524626b9e Author: Chuck Anderson <chuck.anderson@oracle.com> Date: Tue Aug 6 15:12:19 2013 -0700 xen/smp: initialize IPI vectors before marking CPU online * tag 'v3.11-rc7': (443 commits) Linux 3.11-rc7 ARC: [lib] strchr breakage in Big-endian configuration VFS: collect_mounts() should return an ERR_PTR bfs: iget_locked() doesn't return an ERR_PTR efs: iget_locked() doesn't return an ERR_PTR() proc: kill the extra proc_readfd_common()->dir_emit_dots() cope with potentially long ->d_dname() output for shmem/hugetlb usb: phy: fix build breakage USB: OHCI: add missing PCI PM callbacks to ohci-pci.c staging: comedi: bug-fix NULL pointer dereference on failed attach lib/lz4: correct the LZ4 license memcg: get rid of swapaccount leftovers nilfs2: fix issue with counting number of bio requests for BIO_EOPNOTSUPP error detection nilfs2: remove double bio_put() in nilfs_end_bio_write() for BIO_EOPNOTSUPP error drivers/platform/olpc/olpc-ec.c: initialise earlier ipv4: expose IPV4_DEVCONF ipv6: handle Redirect ICMP Message with no Redirected Header option be2net: fix disabling TX in be_close() Revert "ACPI / video: Always call acpi_video_init_brightness() on init" Revert "genetlink: fix family dump race" ... Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>

commit: 65320fcedaa7affd1736cd7aa51f5e70b5c7e7f2 [log] [tgz]
author: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Mon Sep 09 12:05:37 2013 -0400
committer: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Mon Sep 09 12:05:37 2013 -0400
tree: 2fb1bdf8a1139262dd13fa671055c7517cb3fffb
parent: c3f31f6a6f68bcb51689c90733282ec263602a9d [diff]
parent: d8dfad3876e4386666b759da3c833d62fb8b2267 [diff]
diff --git a/CREDITS b/CREDITS
index 206d0fc..646a0a9 100644
--- a/CREDITS
+++ b/CREDITS

@@ -1120,6 +1120,7 @@
 D: Improved mmap and munmap handling
 D: General mm minor tidyups
 D: autofs v4 maintainer
+D: Xen subsystem
 S: 987 Alabama St
 S: San Francisco
 S: CA, 94110

diff --git a/Documentation/tpm/xen-tpmfront.txt b/Documentation/tpm/xen-tpmfront.txt
new file mode 100644
index 0000000..69346de
--- /dev/null
+++ b/Documentation/tpm/xen-tpmfront.txt

@@ -0,0 +1,113 @@
+Virtual TPM interface for Xen
+
+Authors: Matthew Fioravante (JHUAPL), Daniel De Graaf (NSA)
+
+This document describes the virtual Trusted Platform Module (vTPM) subsystem for
+Xen. The reader is assumed to have familiarity with building and installing Xen,
+Linux, and a basic understanding of the TPM and vTPM concepts.
+
+INTRODUCTION
+
+The goal of this work is to provide a TPM functionality to a virtual guest
+operating system (in Xen terms, a DomU).  This allows programs to interact with
+a TPM in a virtual system the same way they interact with a TPM on the physical
+system.  Each guest gets its own unique, emulated, software TPM.  However, each
+of the vTPM's secrets (Keys, NVRAM, etc) are managed by a vTPM Manager domain,
+which seals the secrets to the Physical TPM.  If the process of creating each of
+these domains (manager, vTPM, and guest) is trusted, the vTPM subsystem extends
+the chain of trust rooted in the hardware TPM to virtual machines in Xen. Each
+major component of vTPM is implemented as a separate domain, providing secure
+separation guaranteed by the hypervisor. The vTPM domains are implemented in
+mini-os to reduce memory and processor overhead.
+
+This mini-os vTPM subsystem was built on top of the previous vTPM work done by
+IBM and Intel corporation.
+
+
+DESIGN OVERVIEW
+---------------
+
+The architecture of vTPM is described below:
+
++------------------+
+|    Linux DomU    | ...
+|       |  ^       |
+|       v  |       |
+|   xen-tpmfront   |
++------------------+
+        |  ^
+        v  |
++------------------+
+| mini-os/tpmback  |
+|       |  ^       |
+|       v  |       |
+|  vtpm-stubdom    | ...
+|       |  ^       |
+|       v  |       |
+| mini-os/tpmfront |
++------------------+
+        |  ^
+        v  |
++------------------+
+| mini-os/tpmback  |
+|       |  ^       |
+|       v  |       |
+| vtpmmgr-stubdom  |
+|       |  ^       |
+|       v  |       |
+| mini-os/tpm_tis  |
++------------------+
+        |  ^
+        v  |
++------------------+
+|   Hardware TPM   |
++------------------+
+
+ * Linux DomU: The Linux based guest that wants to use a vTPM. There may be
+	       more than one of these.
+
+ * xen-tpmfront.ko: Linux kernel virtual TPM frontend driver. This driver
+                    provides vTPM access to a Linux-based DomU.
+
+ * mini-os/tpmback: Mini-os TPM backend driver. The Linux frontend driver
+		    connects to this backend driver to facilitate communications
+		    between the Linux DomU and its vTPM. This driver is also
+		    used by vtpmmgr-stubdom to communicate with vtpm-stubdom.
+
+ * vtpm-stubdom: A mini-os stub domain that implements a vTPM. There is a
+		 one to one mapping between running vtpm-stubdom instances and
+                 logical vtpms on the system. The vTPM Platform Configuration
+                 Registers (PCRs) are normally all initialized to zero.
+
+ * mini-os/tpmfront: Mini-os TPM frontend driver. The vTPM mini-os domain
+		     vtpm-stubdom uses this driver to communicate with
+		     vtpmmgr-stubdom. This driver is also used in mini-os
+		     domains such as pv-grub that talk to the vTPM domain.
+
+ * vtpmmgr-stubdom: A mini-os domain that implements the vTPM manager. There is
+		    only one vTPM manager and it should be running during the
+		    entire lifetime of the machine.  This domain regulates
+		    access to the physical TPM on the system and secures the
+		    persistent state of each vTPM.
+
+ * mini-os/tpm_tis: Mini-os TPM version 1.2 TPM Interface Specification (TIS)
+                    driver. This driver used by vtpmmgr-stubdom to talk directly to
+                    the hardware TPM. Communication is facilitated by mapping
+                    hardware memory pages into vtpmmgr-stubdom.
+
+ * Hardware TPM: The physical TPM that is soldered onto the motherboard.
+
+
+INTEGRATION WITH XEN
+--------------------
+
+Support for the vTPM driver was added in Xen using the libxl toolstack in Xen
+4.3.  See the Xen documentation (docs/misc/vtpm.txt) for details on setting up
+the vTPM and vTPM Manager stub domains.  Once the stub domains are running, a
+vTPM device is set up in the same manner as a disk or network device in the
+domain's configuration file.
+
+In order to use features such as IMA that require a TPM to be loaded prior to
+the initrd, the xen-tpmfront driver must be compiled in to the kernel.  If not
+using such features, the driver can be compiled as a module and will be loaded
+as usual.

diff --git a/MAINTAINERS b/MAINTAINERS
index 8197fbd..94aa87dc 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS

@@ -9247,9 +9247,9 @@
 
 XEN HYPERVISOR INTERFACE
 M:	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
-M:	Jeremy Fitzhardinge <jeremy@goop.org>
-L:	xen-devel@lists.xensource.com (moderated for non-subscribers)
-L:	virtualization@lists.linux-foundation.org
+M:	Boris Ostrovsky <boris.ostrovsky@oracle.com>
+M:	David Vrabel <david.vrabel@citrix.com>
+L:	xen-devel@lists.xenproject.org (moderated for non-subscribers)
 S:	Supported
 F:	arch/x86/xen/
 F:	drivers/*/xen-*front.c
@@ -9260,35 +9260,35 @@
 
 XEN HYPERVISOR ARM
 M:	Stefano Stabellini <stefano.stabellini@eu.citrix.com>
-L:	xen-devel@lists.xensource.com (moderated for non-subscribers)
+L:	xen-devel@lists.xenproject.org (moderated for non-subscribers)
 S:	Supported
 F:	arch/arm/xen/
 F:	arch/arm/include/asm/xen/
 
 XEN HYPERVISOR ARM64
 M:	Stefano Stabellini <stefano.stabellini@eu.citrix.com>
-L:	xen-devel@lists.xensource.com (moderated for non-subscribers)
+L:	xen-devel@lists.xenproject.org (moderated for non-subscribers)
 S:	Supported
 F:	arch/arm64/xen/
 F:	arch/arm64/include/asm/xen/
 
 XEN NETWORK BACKEND DRIVER
 M:	Ian Campbell <ian.campbell@citrix.com>
-L:	xen-devel@lists.xensource.com (moderated for non-subscribers)
+L:	xen-devel@lists.xenproject.org (moderated for non-subscribers)
 L:	netdev@vger.kernel.org
 S:	Supported
 F:	drivers/net/xen-netback/*
 
 XEN PCI SUBSYSTEM
 M:	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
-L:	xen-devel@lists.xensource.com (moderated for non-subscribers)
+L:	xen-devel@lists.xenproject.org (moderated for non-subscribers)
 S:	Supported
 F:	arch/x86/pci/*xen*
 F:	drivers/pci/*xen*
 
 XEN SWIOTLB SUBSYSTEM
 M:	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
-L:	xen-devel@lists.xensource.com (moderated for non-subscribers)
+L:	xen-devel@lists.xenproject.org (moderated for non-subscribers)
 S:	Supported
 F:	arch/x86/xen/*swiotlb*
 F:	drivers/xen/*swiotlb*

diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c
index 8a6295c..83e4f95 100644
--- a/arch/arm/xen/enlighten.c
+++ b/arch/arm/xen/enlighten.c

@@ -21,6 +21,8 @@
 #include <linux/of.h>
 #include <linux/of_irq.h>
 #include <linux/of_address.h>
+#include <linux/cpuidle.h>
+#include <linux/cpufreq.h>
 
 #include <linux/mm.h>
 
@@ -267,18 +269,28 @@
 	if (!xen_initial_domain())
 		xenbus_probe(NULL);
 
+	/*
+	 * Making sure board specific code will not set up ops for
+	 * cpu idle and cpu freq.
+	 */
+	disable_cpuidle();
+	disable_cpufreq();
+
 	return 0;
 }
 core_initcall(xen_guest_init);
 
 static int __init xen_pm_init(void)
 {
+	if (!xen_domain())
+		return -ENODEV;
+
 	pm_power_off = xen_power_off;
 	arm_pm_restart = xen_restart;
 
 	return 0;
 }
-subsys_initcall(xen_pm_init);
+late_initcall(xen_pm_init);
 
 static irqreturn_t xen_arm_callback(int irq, void *arg)
 {

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b32ebf9..b1fb846 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig

@@ -632,6 +632,7 @@
 config PARAVIRT_SPINLOCKS
 	bool "Paravirtualization layer for spinlocks"
 	depends on PARAVIRT && SMP
+	select UNINLINE_SPIN_UNLOCK
 	---help---
 	  Paravirtualized spinlocks allow a pvops backend to replace the
 	  spinlock implementation with something virtualization-friendly
@@ -656,6 +657,15 @@
 	  underlying device model, the host provides the guest with
 	  timing infrastructure such as time of day, and system time
 
+config KVM_DEBUG_FS
+	bool "Enable debug information for KVM Guests in debugfs"
+	depends on KVM_GUEST && DEBUG_FS
+	default n
+	---help---
+	  This option enables collection of various statistics for KVM guest.
+	  Statistics are displayed in debugfs filesystem. Enabling this option
+	  may incur significant overhead.
+
 source "arch/x86/lguest/Kconfig"
 
 config PARAVIRT_TIME_ACCOUNTING

diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 695399f..427afcb 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h

@@ -118,10 +118,20 @@
 void kvm_async_pf_task_wake(u32 token);
 u32 kvm_read_and_reset_pf_reason(void);
 extern void kvm_disable_steal_time(void);
-#else
-#define kvm_guest_init() do { } while (0)
+
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+void __init kvm_spinlock_init(void);
+#else /* !CONFIG_PARAVIRT_SPINLOCKS */
+static inline void kvm_spinlock_init(void)
+{
+}
+#endif /* CONFIG_PARAVIRT_SPINLOCKS */
+
+#else /* CONFIG_KVM_GUEST */
+#define kvm_guest_init() do {} while (0)
 #define kvm_async_pf_task_wait(T) do {} while(0)
 #define kvm_async_pf_task_wake(T) do {} while(0)
+
 static inline u32 kvm_read_and_reset_pf_reason(void)
 {
 	return 0;

diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index cfdc9ee..401f350 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h

@@ -712,36 +712,16 @@
 
 #if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS)
 
-static inline int arch_spin_is_locked(struct arch_spinlock *lock)
+static __always_inline void __ticket_lock_spinning(struct arch_spinlock *lock,
+							__ticket_t ticket)
 {
-	return PVOP_CALL1(int, pv_lock_ops.spin_is_locked, lock);
+	PVOP_VCALLEE2(pv_lock_ops.lock_spinning, lock, ticket);
 }
 
-static inline int arch_spin_is_contended(struct arch_spinlock *lock)
+static __always_inline void __ticket_unlock_kick(struct arch_spinlock *lock,
+							__ticket_t ticket)
 {
-	return PVOP_CALL1(int, pv_lock_ops.spin_is_contended, lock);
-}
-#define arch_spin_is_contended	arch_spin_is_contended
-
-static __always_inline void arch_spin_lock(struct arch_spinlock *lock)
-{
-	PVOP_VCALL1(pv_lock_ops.spin_lock, lock);
-}
-
-static __always_inline void arch_spin_lock_flags(struct arch_spinlock *lock,
-						  unsigned long flags)
-{
-	PVOP_VCALL2(pv_lock_ops.spin_lock_flags, lock, flags);
-}
-
-static __always_inline int arch_spin_trylock(struct arch_spinlock *lock)
-{
-	return PVOP_CALL1(int, pv_lock_ops.spin_trylock, lock);
-}
-
-static __always_inline void arch_spin_unlock(struct arch_spinlock *lock)
-{
-	PVOP_VCALL1(pv_lock_ops.spin_unlock, lock);
+	PVOP_VCALL2(pv_lock_ops.unlock_kick, lock, ticket);
 }
 
 #endif

diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 0db1fca..04ac40e 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h

@@ -327,13 +327,15 @@
 };
 
 struct arch_spinlock;
+#ifdef CONFIG_SMP
+#include <asm/spinlock_types.h>
+#else
+typedef u16 __ticket_t;
+#endif
+
 struct pv_lock_ops {
-	int (*spin_is_locked)(struct arch_spinlock *lock);
-	int (*spin_is_contended)(struct arch_spinlock *lock);
-	void (*spin_lock)(struct arch_spinlock *lock);
-	void (*spin_lock_flags)(struct arch_spinlock *lock, unsigned long flags);
-	int (*spin_trylock)(struct arch_spinlock *lock);
-	void (*spin_unlock)(struct arch_spinlock *lock);
+	struct paravirt_callee_save lock_spinning;
+	void (*unlock_kick)(struct arch_spinlock *lock, __ticket_t ticket);
 };
 
 /* This contains all the paravirt structures: we get a convenient

diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index e3ddd7d..8963bfe 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h

@@ -1,11 +1,14 @@
 #ifndef _ASM_X86_SPINLOCK_H
 #define _ASM_X86_SPINLOCK_H
 
+#include <linux/jump_label.h>
 #include <linux/atomic.h>
 #include <asm/page.h>
 #include <asm/processor.h>
 #include <linux/compiler.h>
 #include <asm/paravirt.h>
+#include <asm/bitops.h>
+
 /*
  * Your basic SMP spinlocks, allowing only a single CPU anywhere
  *
@@ -34,6 +37,31 @@
 # define UNLOCK_LOCK_PREFIX
 #endif
 
+/* How long a lock should spin before we consider blocking */
+#define SPIN_THRESHOLD	(1 << 15)
+
+extern struct static_key paravirt_ticketlocks_enabled;
+static __always_inline bool static_key_false(struct static_key *key);
+
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+
+static inline void __ticket_enter_slowpath(arch_spinlock_t *lock)
+{
+	set_bit(0, (volatile unsigned long *)&lock->tickets.tail);
+}
+
+#else  /* !CONFIG_PARAVIRT_SPINLOCKS */
+static __always_inline void __ticket_lock_spinning(arch_spinlock_t *lock,
+							__ticket_t ticket)
+{
+}
+static inline void __ticket_unlock_kick(arch_spinlock_t *lock,
+							__ticket_t ticket)
+{
+}
+
+#endif /* CONFIG_PARAVIRT_SPINLOCKS */
+
 /*
  * Ticket locks are conceptually two parts, one indicating the current head of
  * the queue, and the other indicating the current tail. The lock is acquired
@@ -47,90 +75,108 @@
  * in the high part, because a wide xadd increment of the low part would carry
  * up and contaminate the high part.
  */
-static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock)
+static __always_inline void arch_spin_lock(arch_spinlock_t *lock)
 {
-	register struct __raw_tickets inc = { .tail = 1 };
+	register struct __raw_tickets inc = { .tail = TICKET_LOCK_INC };
 
 	inc = xadd(&lock->tickets, inc);
+	if (likely(inc.head == inc.tail))
+		goto out;
 
+	inc.tail &= ~TICKET_SLOWPATH_FLAG;
 	for (;;) {
-		if (inc.head == inc.tail)
-			break;
-		cpu_relax();
-		inc.head = ACCESS_ONCE(lock->tickets.head);
+		unsigned count = SPIN_THRESHOLD;
+
+		do {
+			if (ACCESS_ONCE(lock->tickets.head) == inc.tail)
+				goto out;
+			cpu_relax();
+		} while (--count);
+		__ticket_lock_spinning(lock, inc.tail);
 	}
-	barrier();		/* make sure nothing creeps before the lock is taken */
+out:	barrier();	/* make sure nothing creeps before the lock is taken */
 }
 
-static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock)
+static __always_inline int arch_spin_trylock(arch_spinlock_t *lock)
 {
 	arch_spinlock_t old, new;
 
 	old.tickets = ACCESS_ONCE(lock->tickets);
-	if (old.tickets.head != old.tickets.tail)
+	if (old.tickets.head != (old.tickets.tail & ~TICKET_SLOWPATH_FLAG))
 		return 0;
 
-	new.head_tail = old.head_tail + (1 << TICKET_SHIFT);
+	new.head_tail = old.head_tail + (TICKET_LOCK_INC << TICKET_SHIFT);
 
 	/* cmpxchg is a full barrier, so nothing can move before it */
 	return cmpxchg(&lock->head_tail, old.head_tail, new.head_tail) == old.head_tail;
 }
 
-static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock)
+static inline void __ticket_unlock_slowpath(arch_spinlock_t *lock,
+					    arch_spinlock_t old)
 {
-	__add(&lock->tickets.head, 1, UNLOCK_LOCK_PREFIX);
+	arch_spinlock_t new;
+
+	BUILD_BUG_ON(((__ticket_t)NR_CPUS) != NR_CPUS);
+
+	/* Perform the unlock on the "before" copy */
+	old.tickets.head += TICKET_LOCK_INC;
+
+	/* Clear the slowpath flag */
+	new.head_tail = old.head_tail & ~(TICKET_SLOWPATH_FLAG << TICKET_SHIFT);
+
+	/*
+	 * If the lock is uncontended, clear the flag - use cmpxchg in
+	 * case it changes behind our back though.
+	 */
+	if (new.tickets.head != new.tickets.tail ||
+	    cmpxchg(&lock->head_tail, old.head_tail,
+					new.head_tail) != old.head_tail) {
+		/*
+		 * Lock still has someone queued for it, so wake up an
+		 * appropriate waiter.
+		 */
+		__ticket_unlock_kick(lock, old.tickets.head);
+	}
 }
 
-static inline int __ticket_spin_is_locked(arch_spinlock_t *lock)
+static __always_inline void arch_spin_unlock(arch_spinlock_t *lock)
+{
+	if (TICKET_SLOWPATH_FLAG &&
+	    static_key_false(&paravirt_ticketlocks_enabled)) {
+		arch_spinlock_t prev;
+
+		prev = *lock;
+		add_smp(&lock->tickets.head, TICKET_LOCK_INC);
+
+		/* add_smp() is a full mb() */
+
+		if (unlikely(lock->tickets.tail & TICKET_SLOWPATH_FLAG))
+			__ticket_unlock_slowpath(lock, prev);
+	} else
+		__add(&lock->tickets.head, TICKET_LOCK_INC, UNLOCK_LOCK_PREFIX);
+}
+
+static inline int arch_spin_is_locked(arch_spinlock_t *lock)
 {
 	struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);
 
 	return tmp.tail != tmp.head;
 }
 
-static inline int __ticket_spin_is_contended(arch_spinlock_t *lock)
+static inline int arch_spin_is_contended(arch_spinlock_t *lock)
 {
 	struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);
 
-	return (__ticket_t)(tmp.tail - tmp.head) > 1;
-}
-
-#ifndef CONFIG_PARAVIRT_SPINLOCKS
-
-static inline int arch_spin_is_locked(arch_spinlock_t *lock)
-{
-	return __ticket_spin_is_locked(lock);
-}
-
-static inline int arch_spin_is_contended(arch_spinlock_t *lock)
-{
-	return __ticket_spin_is_contended(lock);
+	return (__ticket_t)(tmp.tail - tmp.head) > TICKET_LOCK_INC;
 }
 #define arch_spin_is_contended	arch_spin_is_contended
 
-static __always_inline void arch_spin_lock(arch_spinlock_t *lock)
-{
-	__ticket_spin_lock(lock);
-}
-
-static __always_inline int arch_spin_trylock(arch_spinlock_t *lock)
-{
-	return __ticket_spin_trylock(lock);
-}
-
-static __always_inline void arch_spin_unlock(arch_spinlock_t *lock)
-{
-	__ticket_spin_unlock(lock);
-}
-
 static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock,
 						  unsigned long flags)
 {
 	arch_spin_lock(lock);
 }
 
-#endif	/* CONFIG_PARAVIRT_SPINLOCKS */
-
 static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
 {
 	while (arch_spin_is_locked(lock))

diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h
index ad0ad07..4f1bea1 100644
--- a/arch/x86/include/asm/spinlock_types.h
+++ b/arch/x86/include/asm/spinlock_types.h

@@ -1,13 +1,17 @@
 #ifndef _ASM_X86_SPINLOCK_TYPES_H
 #define _ASM_X86_SPINLOCK_TYPES_H
 
-#ifndef __LINUX_SPINLOCK_TYPES_H
-# error "please don't include this file directly"
-#endif
-
 #include <linux/types.h>
 
-#if (CONFIG_NR_CPUS < 256)
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+#define __TICKET_LOCK_INC	2
+#define TICKET_SLOWPATH_FLAG	((__ticket_t)1)
+#else
+#define __TICKET_LOCK_INC	1
+#define TICKET_SLOWPATH_FLAG	((__ticket_t)0)
+#endif
+
+#if (CONFIG_NR_CPUS < (256 / __TICKET_LOCK_INC))
 typedef u8  __ticket_t;
 typedef u16 __ticketpair_t;
 #else
@@ -15,6 +19,8 @@
 typedef u32 __ticketpair_t;
 #endif
 
+#define TICKET_LOCK_INC	((__ticket_t)__TICKET_LOCK_INC)
+
 #define TICKET_SHIFT	(sizeof(__ticket_t) * 8)
 
 typedef struct arch_spinlock {

diff --git a/arch/x86/include/asm/xen/events.h b/arch/x86/include/asm/xen/events.h
index ca842f2..608a79d 100644
--- a/arch/x86/include/asm/xen/events.h
+++ b/arch/x86/include/asm/xen/events.h

@@ -7,6 +7,7 @@
 	XEN_CALL_FUNCTION_SINGLE_VECTOR,
 	XEN_SPIN_UNLOCK_VECTOR,
 	XEN_IRQ_WORK_VECTOR,
+	XEN_NMI_VECTOR,
 
 	XEN_NR_IPIS,
 };

diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
index 06fdbd9..94dc8ca 100644
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h

@@ -23,6 +23,7 @@
 #define KVM_FEATURE_ASYNC_PF		4
 #define KVM_FEATURE_STEAL_TIME		5
 #define KVM_FEATURE_PV_EOI		6
+#define KVM_FEATURE_PV_UNHALT		7
 
 /* The last 8 bits are used to indicate how to interpret the flags field
  * in pvclock structure. If no bits are set, all flags are ignored.

diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index a96d32c..56e2fa4 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c

@@ -34,6 +34,7 @@
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/kprobes.h>
+#include <linux/debugfs.h>
 #include <asm/timer.h>
 #include <asm/cpu.h>
 #include <asm/traps.h>
@@ -419,6 +420,7 @@
 	WARN_ON(kvm_register_clock("primary cpu clock"));
 	kvm_guest_cpu_init();
 	native_smp_prepare_boot_cpu();
+	kvm_spinlock_init();
 }
 
 static void kvm_guest_cpu_online(void *dummy)
@@ -523,3 +525,263 @@
 	return 0;
 }
 arch_initcall(activate_jump_labels);
+
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+
+/* Kick a cpu by its apicid. Used to wake up a halted vcpu */
+static void kvm_kick_cpu(int cpu)
+{
+	int apicid;
+	unsigned long flags = 0;
+
+	apicid = per_cpu(x86_cpu_to_apicid, cpu);
+	kvm_hypercall2(KVM_HC_KICK_CPU, flags, apicid);
+}
+
+enum kvm_contention_stat {
+	TAKEN_SLOW,
+	TAKEN_SLOW_PICKUP,
+	RELEASED_SLOW,
+	RELEASED_SLOW_KICKED,
+	NR_CONTENTION_STATS
+};
+
+#ifdef CONFIG_KVM_DEBUG_FS
+#define HISTO_BUCKETS	30
+
+static struct kvm_spinlock_stats
+{
+	u32 contention_stats[NR_CONTENTION_STATS];
+	u32 histo_spin_blocked[HISTO_BUCKETS+1];
+	u64 time_blocked;
+} spinlock_stats;
+
+static u8 zero_stats;
+
+static inline void check_zero(void)
+{
+	u8 ret;
+	u8 old;
+
+	old = ACCESS_ONCE(zero_stats);
+	if (unlikely(old)) {
+		ret = cmpxchg(&zero_stats, old, 0);
+		/* This ensures only one fellow resets the stat */
+		if (ret == old)
+			memset(&spinlock_stats, 0, sizeof(spinlock_stats));
+	}
+}
+
+static inline void add_stats(enum kvm_contention_stat var, u32 val)
+{
+	check_zero();
+	spinlock_stats.contention_stats[var] += val;
+}
+
+
+static inline u64 spin_time_start(void)
+{
+	return sched_clock();
+}
+
+static void __spin_time_accum(u64 delta, u32 *array)
+{
+	unsigned index;
+
+	index = ilog2(delta);
+	check_zero();
+
+	if (index < HISTO_BUCKETS)
+		array[index]++;
+	else
+		array[HISTO_BUCKETS]++;
+}
+
+static inline void spin_time_accum_blocked(u64 start)
+{
+	u32 delta;
+
+	delta = sched_clock() - start;
+	__spin_time_accum(delta, spinlock_stats.histo_spin_blocked);
+	spinlock_stats.time_blocked += delta;
+}
+
+static struct dentry *d_spin_debug;
+static struct dentry *d_kvm_debug;
+
+struct dentry *kvm_init_debugfs(void)
+{
+	d_kvm_debug = debugfs_create_dir("kvm", NULL);
+	if (!d_kvm_debug)
+		printk(KERN_WARNING "Could not create 'kvm' debugfs directory\n");
+
+	return d_kvm_debug;
+}
+
+static int __init kvm_spinlock_debugfs(void)
+{
+	struct dentry *d_kvm;
+
+	d_kvm = kvm_init_debugfs();
+	if (d_kvm == NULL)
+		return -ENOMEM;
+
+	d_spin_debug = debugfs_create_dir("spinlocks", d_kvm);
+
+	debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats);
+
+	debugfs_create_u32("taken_slow", 0444, d_spin_debug,
+		   &spinlock_stats.contention_stats[TAKEN_SLOW]);
+	debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug,
+		   &spinlock_stats.contention_stats[TAKEN_SLOW_PICKUP]);
+
+	debugfs_create_u32("released_slow", 0444, d_spin_debug,
+		   &spinlock_stats.contention_stats[RELEASED_SLOW]);
+	debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug,
+		   &spinlock_stats.contention_stats[RELEASED_SLOW_KICKED]);
+
+	debugfs_create_u64("time_blocked", 0444, d_spin_debug,
+			   &spinlock_stats.time_blocked);
+
+	debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug,
+		     spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1);
+
+	return 0;
+}
+fs_initcall(kvm_spinlock_debugfs);
+#else  /* !CONFIG_KVM_DEBUG_FS */
+static inline void add_stats(enum kvm_contention_stat var, u32 val)
+{
+}
+
+static inline u64 spin_time_start(void)
+{
+	return 0;
+}
+
+static inline void spin_time_accum_blocked(u64 start)
+{
+}
+#endif  /* CONFIG_KVM_DEBUG_FS */
+
+struct kvm_lock_waiting {
+	struct arch_spinlock *lock;
+	__ticket_t want;
+};
+
+/* cpus 'waiting' on a spinlock to become available */
+static cpumask_t waiting_cpus;
+
+/* Track spinlock on which a cpu is waiting */
+static DEFINE_PER_CPU(struct kvm_lock_waiting, klock_waiting);
+
+static void kvm_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
+{
+	struct kvm_lock_waiting *w;
+	int cpu;
+	u64 start;
+	unsigned long flags;
+
+	if (in_nmi())
+		return;
+
+	w = &__get_cpu_var(klock_waiting);
+	cpu = smp_processor_id();
+	start = spin_time_start();
+
+	/*
+	 * Make sure an interrupt handler can't upset things in a
+	 * partially setup state.
+	 */
+	local_irq_save(flags);
+
+	/*
+	 * The ordering protocol on this is that the "lock" pointer
+	 * may only be set non-NULL if the "want" ticket is correct.
+	 * If we're updating "want", we must first clear "lock".
+	 */
+	w->lock = NULL;
+	smp_wmb();
+	w->want = want;
+	smp_wmb();
+	w->lock = lock;
+
+	add_stats(TAKEN_SLOW, 1);
+
+	/*
+	 * This uses set_bit, which is atomic but we should not rely on its
+	 * reordering gurantees. So barrier is needed after this call.
+	 */
+	cpumask_set_cpu(cpu, &waiting_cpus);
+
+	barrier();
+
+	/*
+	 * Mark entry to slowpath before doing the pickup test to make
+	 * sure we don't deadlock with an unlocker.
+	 */
+	__ticket_enter_slowpath(lock);
+
+	/*
+	 * check again make sure it didn't become free while
+	 * we weren't looking.
+	 */
+	if (ACCESS_ONCE(lock->tickets.head) == want) {
+		add_stats(TAKEN_SLOW_PICKUP, 1);
+		goto out;
+	}
+
+	/*
+	 * halt until it's our turn and kicked. Note that we do safe halt
+	 * for irq enabled case to avoid hang when lock info is overwritten
+	 * in irq spinlock slowpath and no spurious interrupt occur to save us.
+	 */
+	if (arch_irqs_disabled_flags(flags))
+		halt();
+	else
+		safe_halt();
+
+out:
+	cpumask_clear_cpu(cpu, &waiting_cpus);
+	w->lock = NULL;
+	local_irq_restore(flags);
+	spin_time_accum_blocked(start);
+}
+PV_CALLEE_SAVE_REGS_THUNK(kvm_lock_spinning);
+
+/* Kick vcpu waiting on @lock->head to reach value @ticket */
+static void kvm_unlock_kick(struct arch_spinlock *lock, __ticket_t ticket)
+{
+	int cpu;
+
+	add_stats(RELEASED_SLOW, 1);
+	for_each_cpu(cpu, &waiting_cpus) {
+		const struct kvm_lock_waiting *w = &per_cpu(klock_waiting, cpu);
+		if (ACCESS_ONCE(w->lock) == lock &&
+		    ACCESS_ONCE(w->want) == ticket) {
+			add_stats(RELEASED_SLOW_KICKED, 1);
+			kvm_kick_cpu(cpu);
+			break;
+		}
+	}
+}
+
+/*
+ * Setup pv_lock_ops to exploit KVM_FEATURE_PV_UNHALT if present.
+ */
+void __init kvm_spinlock_init(void)
+{
+	if (!kvm_para_available())
+		return;
+	/* Does host kernel support KVM_FEATURE_PV_UNHALT? */
+	if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT))
+		return;
+
+	printk(KERN_INFO "KVM setup paravirtual spinlock\n");
+
+	static_key_slow_inc(&paravirt_ticketlocks_enabled);
+
+	pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(kvm_lock_spinning);
+	pv_lock_ops.unlock_kick = kvm_unlock_kick;
+}
+#endif	/* CONFIG_PARAVIRT_SPINLOCKS */

diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c
index 676b8c7..bbb6c73 100644
--- a/arch/x86/kernel/paravirt-spinlocks.c
+++ b/arch/x86/kernel/paravirt-spinlocks.c

@@ -4,25 +4,17 @@
  */
 #include <linux/spinlock.h>
 #include <linux/module.h>
+#include <linux/jump_label.h>
 
 #include <asm/paravirt.h>
 
-static inline void
-default_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
-{
-	arch_spin_lock(lock);
-}
-
 struct pv_lock_ops pv_lock_ops = {
 #ifdef CONFIG_SMP
-	.spin_is_locked = __ticket_spin_is_locked,
-	.spin_is_contended = __ticket_spin_is_contended,
-
-	.spin_lock = __ticket_spin_lock,
-	.spin_lock_flags = default_spin_lock_flags,
-	.spin_trylock = __ticket_spin_trylock,
-	.spin_unlock = __ticket_spin_unlock,
+	.lock_spinning = __PV_IS_CALLEE_SAVE(paravirt_nop),
+	.unlock_kick = paravirt_nop,
 #endif
 };
 EXPORT_SYMBOL(pv_lock_ops);
 
+struct static_key paravirt_ticketlocks_enabled = STATIC_KEY_INIT_FALSE;
+EXPORT_SYMBOL(paravirt_ticketlocks_enabled);

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 193097e..15939e8 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c

@@ -427,8 +427,7 @@
 
 	if (!xen_initial_domain())
 		cpuid_leaf1_edx_mask &=
-			~((1 << X86_FEATURE_APIC) |  /* disable local APIC */
-			  (1 << X86_FEATURE_ACPI));  /* disable ACPI */
+			~((1 << X86_FEATURE_ACPI));  /* disable ACPI */
 
 	cpuid_leaf1_ecx_mask &= ~(1 << (X86_FEATURE_X2APIC % 32));
 
@@ -735,8 +734,7 @@
 		addr = (unsigned long)xen_int3;
 	else if (addr == (unsigned long)stack_segment)
 		addr = (unsigned long)xen_stack_segment;
-	else if (addr == (unsigned long)double_fault ||
-		 addr == (unsigned long)nmi) {
+	else if (addr == (unsigned long)double_fault) {
 		/* Don't need to handle these */
 		return 0;
 #ifdef CONFIG_X86_MCE
@@ -747,7 +745,12 @@
 		 */
 		;
 #endif
-	} else {
+	} else if (addr == (unsigned long)nmi)
+		/*
+		 * Use the native version as well.
+		 */
+		;
+	else {
 		/* Some other trap using IST? */
 		if (WARN_ON(val->ist != 0))
 			return 0;
@@ -1710,6 +1713,8 @@
 
 	xen_hvm_init_shared_info();
 
+	xen_panic_handler_init();
+
 	if (xen_feature(XENFEAT_hvm_callback_vector))
 		xen_have_vector_callback = 1;
 	xen_hvm_smp_init();

diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
index 01a4dc0..0da7f86 100644
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c

@@ -47,23 +47,18 @@
 	/* convert from IF type flag */
 	flags = !(flags & X86_EFLAGS_IF);
 
-	/* There's a one instruction preempt window here.  We need to
-	   make sure we're don't switch CPUs between getting the vcpu
-	   pointer and updating the mask. */
+	/* See xen_irq_enable() for why preemption must be disabled. */
 	preempt_disable();
 	vcpu = this_cpu_read(xen_vcpu);
 	vcpu->evtchn_upcall_mask = flags;
-	preempt_enable_no_resched();
-
-	/* Doesn't matter if we get preempted here, because any
-	   pending event will get dealt with anyway. */
 
 	if (flags == 0) {
-		preempt_check_resched();
 		barrier(); /* unmask then check (avoid races) */
 		if (unlikely(vcpu->evtchn_upcall_pending))
 			xen_force_evtchn_callback();
-	}
+		preempt_enable();
+	} else
+		preempt_enable_no_resched();
 }
 PV_CALLEE_SAVE_REGS_THUNK(xen_restore_fl);
 
@@ -82,10 +77,12 @@
 {
 	struct vcpu_info *vcpu;
 
-	/* We don't need to worry about being preempted here, since
-	   either a) interrupts are disabled, so no preemption, or b)
-	   the caller is confused and is trying to re-enable interrupts
-	   on an indeterminate processor. */
+	/*
+	 * We may be preempted as soon as vcpu->evtchn_upcall_mask is
+	 * cleared, so disable preemption to ensure we check for
+	 * events on the VCPU we are still running on.
+	 */
+	preempt_disable();
 
 	vcpu = this_cpu_read(xen_vcpu);
 	vcpu->evtchn_upcall_mask = 0;
@@ -96,6 +93,8 @@
 	barrier(); /* unmask then check (avoid races) */
 	if (unlikely(vcpu->evtchn_upcall_pending))
 		xen_force_evtchn_callback();
+
+	preempt_enable();
 }
 PV_CALLEE_SAVE_REGS_THUNK(xen_irq_enable);
 

diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 95fb2aa..8b901e8 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c

@@ -161,6 +161,7 @@
 #include <asm/xen/page.h>
 #include <asm/xen/hypercall.h>
 #include <asm/xen/hypervisor.h>
+#include <xen/balloon.h>
 #include <xen/grant_table.h>
 
 #include "multicalls.h"
@@ -967,7 +968,10 @@
 	if (kmap_op != NULL) {
 		if (!PageHighMem(page)) {
 			struct multicall_space mcs;
-			struct gnttab_unmap_grant_ref *unmap_op;
+			struct gnttab_unmap_and_replace *unmap_op;
+			struct page *scratch_page = get_balloon_scratch_page();
+			unsigned long scratch_page_address = (unsigned long)
+				__va(page_to_pfn(scratch_page) << PAGE_SHIFT);
 
 			/*
 			 * It might be that we queued all the m2p grant table
@@ -986,25 +990,31 @@
 				printk(KERN_WARNING "m2p_remove_override: "
 						"pfn %lx mfn %lx, failed to modify kernel mappings",
 						pfn, mfn);
+				put_balloon_scratch_page();
 				return -1;
 			}
 
-			mcs = xen_mc_entry(
-					sizeof(struct gnttab_unmap_grant_ref));
+			xen_mc_batch();
+
+			mcs = __xen_mc_entry(
+					sizeof(struct gnttab_unmap_and_replace));
 			unmap_op = mcs.args;
 			unmap_op->host_addr = kmap_op->host_addr;
+			unmap_op->new_addr = scratch_page_address;
 			unmap_op->handle = kmap_op->handle;
-			unmap_op->dev_bus_addr = 0;
 
 			MULTI_grant_table_op(mcs.mc,
-					GNTTABOP_unmap_grant_ref, unmap_op, 1);
+					GNTTABOP_unmap_and_replace, unmap_op, 1);
+
+			mcs = __xen_mc_entry(0);
+			MULTI_update_va_mapping(mcs.mc, scratch_page_address,
+					pfn_pte(page_to_pfn(scratch_page),
+					PAGE_KERNEL_RO), 0);
 
 			xen_mc_issue(PARAVIRT_LAZY_MMU);
 
-			set_pte_at(&init_mm, address, ptep,
-					pfn_pte(pfn, PAGE_KERNEL));
-			__flush_tlb_single(address);
 			kmap_op->host_addr = 0;
+			put_balloon_scratch_page();
 		}
 	}
 

diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 8f3eea6..09f3059 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c

@@ -33,6 +33,9 @@
 /* These are code, but not functions.  Defined in entry.S */
 extern const char xen_hypervisor_callback[];
 extern const char xen_failsafe_callback[];
+#ifdef CONFIG_X86_64
+extern const char nmi[];
+#endif
 extern void xen_sysenter_target(void);
 extern void xen_syscall_target(void);
 extern void xen_syscall32_target(void);
@@ -215,13 +218,19 @@
 	unsigned long pfn;
 
 	/*
-	 * If the PFNs are currently mapped, the VA mapping also needs
-	 * to be updated to be 1:1.
+	 * If the PFNs are currently mapped, clear the mappings
+	 * (except for the ISA region which must be 1:1 mapped) to
+	 * release the refcounts (in Xen) on the original frames.
 	 */
-	for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++)
+	for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++) {
+		pte_t pte = __pte_ma(0);
+
+		if (pfn < PFN_UP(ISA_END_ADDRESS))
+			pte = mfn_pte(pfn, PAGE_KERNEL_IO);
+
 		(void)HYPERVISOR_update_va_mapping(
-			(unsigned long)__va(pfn << PAGE_SHIFT),
-			mfn_pte(pfn, PAGE_KERNEL_IO), 0);
+			(unsigned long)__va(pfn << PAGE_SHIFT), pte, 0);
+	}
 
 	if (start_pfn < nr_pages)
 		*released += xen_release_chunk(
@@ -547,7 +556,13 @@
 	}
 #endif /* CONFIG_X86_64 */
 }
-
+void __cpuinit xen_enable_nmi(void)
+{
+#ifdef CONFIG_X86_64
+	if (register_callback(CALLBACKTYPE_nmi, nmi))
+		BUG();
+#endif
+}
 void __init xen_arch_setup(void)
 {
 	xen_panic_handler_init();
@@ -565,7 +580,7 @@
 
 	xen_enable_sysenter();
 	xen_enable_syscall();
-
+	xen_enable_nmi();
 #ifdef CONFIG_ACPI
 	if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
 		printk(KERN_INFO "ACPI in unprivileged domain disabled\n");

diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index b81c88e..9235842 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c

@@ -279,6 +279,7 @@
 
 	xen_filter_cpu_maps();
 	xen_setup_vcpu_info_placement();
+	xen_init_spinlocks();
 }
 
 static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
@@ -572,6 +573,12 @@
 	case IRQ_WORK_VECTOR:
 		xen_vector = XEN_IRQ_WORK_VECTOR;
 		break;
+#ifdef CONFIG_X86_64
+	case NMI_VECTOR:
+	case APIC_DM_NMI: /* Some use that instead of NMI_VECTOR */
+		xen_vector = XEN_NMI_VECTOR;
+		break;
+#endif
 	default:
 		xen_vector = -1;
 		printk(KERN_ERR "xen: vector 0x%x is not implemented\n",
@@ -680,7 +687,6 @@
 {
 	smp_ops = xen_smp_ops;
 	xen_fill_possible_map();
-	xen_init_spinlocks();
 }
 
 static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus)

diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index cf3caee..0438b93 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c

@@ -17,45 +17,44 @@
 #include "xen-ops.h"
 #include "debugfs.h"
 
+enum xen_contention_stat {
+	TAKEN_SLOW,
+	TAKEN_SLOW_PICKUP,
+	TAKEN_SLOW_SPURIOUS,
+	RELEASED_SLOW,
+	RELEASED_SLOW_KICKED,
+	NR_CONTENTION_STATS
+};
+
+
 #ifdef CONFIG_XEN_DEBUG_FS
+#define HISTO_BUCKETS	30
 static struct xen_spinlock_stats
 {
-	u64 taken;
-	u32 taken_slow;
-	u32 taken_slow_nested;
-	u32 taken_slow_pickup;
-	u32 taken_slow_spurious;
-	u32 taken_slow_irqenable;
-
-	u64 released;
-	u32 released_slow;
-	u32 released_slow_kicked;
-
-#define HISTO_BUCKETS	30
-	u32 histo_spin_total[HISTO_BUCKETS+1];
-	u32 histo_spin_spinning[HISTO_BUCKETS+1];
+	u32 contention_stats[NR_CONTENTION_STATS];
 	u32 histo_spin_blocked[HISTO_BUCKETS+1];
-
-	u64 time_total;
-	u64 time_spinning;
 	u64 time_blocked;
 } spinlock_stats;
 
 static u8 zero_stats;
 
-static unsigned lock_timeout = 1 << 10;
-#define TIMEOUT lock_timeout
-
 static inline void check_zero(void)
 {
-	if (unlikely(zero_stats)) {
-		memset(&spinlock_stats, 0, sizeof(spinlock_stats));
-		zero_stats = 0;
+	u8 ret;
+	u8 old = ACCESS_ONCE(zero_stats);
+	if (unlikely(old)) {
+		ret = cmpxchg(&zero_stats, old, 0);
+		/* This ensures only one fellow resets the stat */
+		if (ret == old)
+			memset(&spinlock_stats, 0, sizeof(spinlock_stats));
 	}
 }
 
-#define ADD_STATS(elem, val)			\
-	do { check_zero(); spinlock_stats.elem += (val); } while(0)
+static inline void add_stats(enum xen_contention_stat var, u32 val)
+{
+	check_zero();
+	spinlock_stats.contention_stats[var] += val;
+}
 
 static inline u64 spin_time_start(void)
 {
@@ -74,22 +73,6 @@
 		array[HISTO_BUCKETS]++;
 }
 
-static inline void spin_time_accum_spinning(u64 start)
-{
-	u32 delta = xen_clocksource_read() - start;
-
-	__spin_time_accum(delta, spinlock_stats.histo_spin_spinning);
-	spinlock_stats.time_spinning += delta;
-}
-
-static inline void spin_time_accum_total(u64 start)
-{
-	u32 delta = xen_clocksource_read() - start;
-
-	__spin_time_accum(delta, spinlock_stats.histo_spin_total);
-	spinlock_stats.time_total += delta;
-}
-
 static inline void spin_time_accum_blocked(u64 start)
 {
 	u32 delta = xen_clocksource_read() - start;
@@ -99,19 +82,15 @@
 }
 #else  /* !CONFIG_XEN_DEBUG_FS */
 #define TIMEOUT			(1 << 10)
-#define ADD_STATS(elem, val)	do { (void)(val); } while(0)
+static inline void add_stats(enum xen_contention_stat var, u32 val)
+{
+}
 
 static inline u64 spin_time_start(void)
 {
 	return 0;
 }
 
-static inline void spin_time_accum_total(u64 start)
-{
-}
-static inline void spin_time_accum_spinning(u64 start)
-{
-}
 static inline void spin_time_accum_blocked(u64 start)
 {
 }
@@ -134,227 +113,123 @@
 	asm(LOCK_PREFIX " decw %0" : "+m" ((xl)->spinners) : : "memory");
 #endif
 
-struct xen_spinlock {
-	unsigned char lock;		/* 0 -> free; 1 -> locked */
-	xen_spinners_t spinners;	/* count of waiting cpus */
+struct xen_lock_waiting {
+	struct arch_spinlock *lock;
+	__ticket_t want;
 };
 
-static int xen_spin_is_locked(struct arch_spinlock *lock)
-{
-	struct xen_spinlock *xl = (struct xen_spinlock *)lock;
-
-	return xl->lock != 0;
-}
-
-static int xen_spin_is_contended(struct arch_spinlock *lock)
-{
-	struct xen_spinlock *xl = (struct xen_spinlock *)lock;
-
-	/* Not strictly true; this is only the count of contended
-	   lock-takers entering the slow path. */
-	return xl->spinners != 0;
-}
-
-static int xen_spin_trylock(struct arch_spinlock *lock)
-{
-	struct xen_spinlock *xl = (struct xen_spinlock *)lock;
-	u8 old = 1;
-
-	asm("xchgb %b0,%1"
-	    : "+q" (old), "+m" (xl->lock) : : "memory");
-
-	return old == 0;
-}
-
-static DEFINE_PER_CPU(char *, irq_name);
 static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
-static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners);
+static DEFINE_PER_CPU(char *, irq_name);
+static DEFINE_PER_CPU(struct xen_lock_waiting, lock_waiting);
+static cpumask_t waiting_cpus;
 
-/*
- * Mark a cpu as interested in a lock.  Returns the CPU's previous
- * lock of interest, in case we got preempted by an interrupt.
- */
-static inline struct xen_spinlock *spinning_lock(struct xen_spinlock *xl)
+static void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
 {
-	struct xen_spinlock *prev;
-
-	prev = __this_cpu_read(lock_spinners);
-	__this_cpu_write(lock_spinners, xl);
-
-	wmb();			/* set lock of interest before count */
-
-	inc_spinners(xl);
-
-	return prev;
-}
-
-/*
- * Mark a cpu as no longer interested in a lock.  Restores previous
- * lock of interest (NULL for none).
- */
-static inline void unspinning_lock(struct xen_spinlock *xl, struct xen_spinlock *prev)
-{
-	dec_spinners(xl);
-	wmb();			/* decrement count before restoring lock */
-	__this_cpu_write(lock_spinners, prev);
-}
-
-static noinline int xen_spin_lock_slow(struct arch_spinlock *lock, bool irq_enable)
-{
-	struct xen_spinlock *xl = (struct xen_spinlock *)lock;
-	struct xen_spinlock *prev;
 	int irq = __this_cpu_read(lock_kicker_irq);
-	int ret;
+	struct xen_lock_waiting *w = &__get_cpu_var(lock_waiting);
+	int cpu = smp_processor_id();
 	u64 start;
+	unsigned long flags;
 
 	/* If kicker interrupts not initialized yet, just spin */
 	if (irq == -1)
-		return 0;
+		return;
 
 	start = spin_time_start();
 
-	/* announce we're spinning */
-	prev = spinning_lock(xl);
+	/*
+	 * Make sure an interrupt handler can't upset things in a
+	 * partially setup state.
+	 */
+	local_irq_save(flags);
+	/*
+	 * We don't really care if we're overwriting some other
+	 * (lock,want) pair, as that would mean that we're currently
+	 * in an interrupt context, and the outer context had
+	 * interrupts enabled.  That has already kicked the VCPU out
+	 * of xen_poll_irq(), so it will just return spuriously and
+	 * retry with newly setup (lock,want).
+	 *
+	 * The ordering protocol on this is that the "lock" pointer
+	 * may only be set non-NULL if the "want" ticket is correct.
+	 * If we're updating "want", we must first clear "lock".
+	 */
+	w->lock = NULL;
+	smp_wmb();
+	w->want = want;
+	smp_wmb();
+	w->lock = lock;
 
-	ADD_STATS(taken_slow, 1);
-	ADD_STATS(taken_slow_nested, prev != NULL);
+	/* This uses set_bit, which atomic and therefore a barrier */
+	cpumask_set_cpu(cpu, &waiting_cpus);
+	add_stats(TAKEN_SLOW, 1);
 
-	do {
-		unsigned long flags;
+	/* clear pending */
+	xen_clear_irq_pending(irq);
 
-		/* clear pending */
-		xen_clear_irq_pending(irq);
+	/* Only check lock once pending cleared */
+	barrier();
 
-		/* check again make sure it didn't become free while
-		   we weren't looking  */
-		ret = xen_spin_trylock(lock);
-		if (ret) {
-			ADD_STATS(taken_slow_pickup, 1);
+	/*
+	 * Mark entry to slowpath before doing the pickup test to make
+	 * sure we don't deadlock with an unlocker.
+	 */
+	__ticket_enter_slowpath(lock);
 
-			/*
-			 * If we interrupted another spinlock while it
-			 * was blocking, make sure it doesn't block
-			 * without rechecking the lock.
-			 */
-			if (prev != NULL)
-				xen_set_irq_pending(irq);
-			goto out;
-		}
+	/*
+	 * check again make sure it didn't become free while
+	 * we weren't looking
+	 */
+	if (ACCESS_ONCE(lock->tickets.head) == want) {
+		add_stats(TAKEN_SLOW_PICKUP, 1);
+		goto out;
+	}
 
-		flags = arch_local_save_flags();
-		if (irq_enable) {
-			ADD_STATS(taken_slow_irqenable, 1);
-			raw_local_irq_enable();
-		}
+	/* Allow interrupts while blocked */
+	local_irq_restore(flags);
 
-		/*
-		 * Block until irq becomes pending.  If we're
-		 * interrupted at this point (after the trylock but
-		 * before entering the block), then the nested lock
-		 * handler guarantees that the irq will be left
-		 * pending if there's any chance the lock became free;
-		 * xen_poll_irq() returns immediately if the irq is
-		 * pending.
-		 */
-		xen_poll_irq(irq);
+	/*
+	 * If an interrupt happens here, it will leave the wakeup irq
+	 * pending, which will cause xen_poll_irq() to return
+	 * immediately.
+	 */
 
-		raw_local_irq_restore(flags);
+	/* Block until irq becomes pending (or perhaps a spurious wakeup) */
+	xen_poll_irq(irq);
+	add_stats(TAKEN_SLOW_SPURIOUS, !xen_test_irq_pending(irq));
 
-		ADD_STATS(taken_slow_spurious, !xen_test_irq_pending(irq));
-	} while (!xen_test_irq_pending(irq)); /* check for spurious wakeups */
+	local_irq_save(flags);
 
 	kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq));
-
 out:
-	unspinning_lock(xl, prev);
+	cpumask_clear_cpu(cpu, &waiting_cpus);
+	w->lock = NULL;
+
+	local_irq_restore(flags);
+
 	spin_time_accum_blocked(start);
-
-	return ret;
 }
+PV_CALLEE_SAVE_REGS_THUNK(xen_lock_spinning);
 
-static inline void __xen_spin_lock(struct arch_spinlock *lock, bool irq_enable)
-{
-	struct xen_spinlock *xl = (struct xen_spinlock *)lock;
-	unsigned timeout;
-	u8 oldval;
-	u64 start_spin;
-
-	ADD_STATS(taken, 1);
-
-	start_spin = spin_time_start();
-
-	do {
-		u64 start_spin_fast = spin_time_start();
-
-		timeout = TIMEOUT;
-
-		asm("1: xchgb %1,%0\n"
-		    "   testb %1,%1\n"
-		    "   jz 3f\n"
-		    "2: rep;nop\n"
-		    "   cmpb $0,%0\n"
-		    "   je 1b\n"
-		    "   dec %2\n"
-		    "   jnz 2b\n"
-		    "3:\n"
-		    : "+m" (xl->lock), "=q" (oldval), "+r" (timeout)
-		    : "1" (1)
-		    : "memory");
-
-		spin_time_accum_spinning(start_spin_fast);
-
-	} while (unlikely(oldval != 0 &&
-			  (TIMEOUT == ~0 || !xen_spin_lock_slow(lock, irq_enable))));
-
-	spin_time_accum_total(start_spin);
-}
-
-static void xen_spin_lock(struct arch_spinlock *lock)
-{
-	__xen_spin_lock(lock, false);
-}
-
-static void xen_spin_lock_flags(struct arch_spinlock *lock, unsigned long flags)
-{
-	__xen_spin_lock(lock, !raw_irqs_disabled_flags(flags));
-}
-
-static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl)
+static void xen_unlock_kick(struct arch_spinlock *lock, __ticket_t next)
 {
 	int cpu;
 
-	ADD_STATS(released_slow, 1);
+	add_stats(RELEASED_SLOW, 1);
 
-	for_each_online_cpu(cpu) {
-		/* XXX should mix up next cpu selection */
-		if (per_cpu(lock_spinners, cpu) == xl) {
-			ADD_STATS(released_slow_kicked, 1);
+	for_each_cpu(cpu, &waiting_cpus) {
+		const struct xen_lock_waiting *w = &per_cpu(lock_waiting, cpu);
+
+		/* Make sure we read lock before want */
+		if (ACCESS_ONCE(w->lock) == lock &&
+		    ACCESS_ONCE(w->want) == next) {
+			add_stats(RELEASED_SLOW_KICKED, 1);
 			xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR);
+			break;
 		}
 	}
 }
 
-static void xen_spin_unlock(struct arch_spinlock *lock)
-{
-	struct xen_spinlock *xl = (struct xen_spinlock *)lock;
-
-	ADD_STATS(released, 1);
-
-	smp_wmb();		/* make sure no writes get moved after unlock */
-	xl->lock = 0;		/* release lock */
-
-	/*
-	 * Make sure unlock happens before checking for waiting
-	 * spinners.  We need a strong barrier to enforce the
-	 * write-read ordering to different memory locations, as the
-	 * CPU makes no implied guarantees about their ordering.
-	 */
-	mb();
-
-	if (unlikely(xl->spinners))
-		xen_spin_unlock_slow(xl);
-}
-
 static irqreturn_t dummy_handler(int irq, void *dev_id)
 {
 	BUG();
@@ -408,6 +283,8 @@
 	per_cpu(irq_name, cpu) = NULL;
 }
 
+static bool xen_pvspin __initdata = true;
+
 void __init xen_init_spinlocks(void)
 {
 	/*
@@ -417,16 +294,24 @@
 	if (xen_hvm_domain())
 		return;
 
-	BUILD_BUG_ON(sizeof(struct xen_spinlock) > sizeof(arch_spinlock_t));
+	if (!xen_pvspin) {
+		printk(KERN_DEBUG "xen: PV spinlocks disabled\n");
+		return;
+	}
 
-	pv_lock_ops.spin_is_locked = xen_spin_is_locked;
-	pv_lock_ops.spin_is_contended = xen_spin_is_contended;
-	pv_lock_ops.spin_lock = xen_spin_lock;
-	pv_lock_ops.spin_lock_flags = xen_spin_lock_flags;
-	pv_lock_ops.spin_trylock = xen_spin_trylock;
-	pv_lock_ops.spin_unlock = xen_spin_unlock;
+	static_key_slow_inc(&paravirt_ticketlocks_enabled);
+
+	pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(xen_lock_spinning);
+	pv_lock_ops.unlock_kick = xen_unlock_kick;
 }
 
+static __init int xen_parse_nopvspin(char *arg)
+{
+	xen_pvspin = false;
+	return 0;
+}
+early_param("xen_nopvspin", xen_parse_nopvspin);
+
 #ifdef CONFIG_XEN_DEBUG_FS
 
 static struct dentry *d_spin_debug;
@@ -442,37 +327,21 @@
 
 	debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats);
 
-	debugfs_create_u32("timeout", 0644, d_spin_debug, &lock_timeout);
-
-	debugfs_create_u64("taken", 0444, d_spin_debug, &spinlock_stats.taken);
 	debugfs_create_u32("taken_slow", 0444, d_spin_debug,
-			   &spinlock_stats.taken_slow);
-	debugfs_create_u32("taken_slow_nested", 0444, d_spin_debug,
-			   &spinlock_stats.taken_slow_nested);
+			   &spinlock_stats.contention_stats[TAKEN_SLOW]);
 	debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug,
-			   &spinlock_stats.taken_slow_pickup);
+			   &spinlock_stats.contention_stats[TAKEN_SLOW_PICKUP]);
 	debugfs_create_u32("taken_slow_spurious", 0444, d_spin_debug,
-			   &spinlock_stats.taken_slow_spurious);
-	debugfs_create_u32("taken_slow_irqenable", 0444, d_spin_debug,
-			   &spinlock_stats.taken_slow_irqenable);
+			   &spinlock_stats.contention_stats[TAKEN_SLOW_SPURIOUS]);
 
-	debugfs_create_u64("released", 0444, d_spin_debug, &spinlock_stats.released);
 	debugfs_create_u32("released_slow", 0444, d_spin_debug,
-			   &spinlock_stats.released_slow);
+			   &spinlock_stats.contention_stats[RELEASED_SLOW]);
 	debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug,
-			   &spinlock_stats.released_slow_kicked);
+			   &spinlock_stats.contention_stats[RELEASED_SLOW_KICKED]);
 
-	debugfs_create_u64("time_spinning", 0444, d_spin_debug,
-			   &spinlock_stats.time_spinning);
 	debugfs_create_u64("time_blocked", 0444, d_spin_debug,
 			   &spinlock_stats.time_blocked);
-	debugfs_create_u64("time_total", 0444, d_spin_debug,
-			   &spinlock_stats.time_total);
 
-	debugfs_create_u32_array("histo_total", 0444, d_spin_debug,
-				spinlock_stats.histo_spin_total, HISTO_BUCKETS + 1);
-	debugfs_create_u32_array("histo_spinning", 0444, d_spin_debug,
-				spinlock_stats.histo_spin_spinning, HISTO_BUCKETS + 1);
 	debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug,
 				spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1);
 

diff --git a/drivers/char/tpm/Kconfig b/drivers/char/tpm/Kconfig
index dbfd564..94c0c74 100644
--- a/drivers/char/tpm/Kconfig
+++ b/drivers/char/tpm/Kconfig

@@ -91,4 +91,16 @@
         To compile this driver as a module, choose M here; the module will be
         called tpm_stm_st33_i2c.
 
+config TCG_XEN
+	tristate "XEN TPM Interface"
+	depends on TCG_TPM && XEN
+	select XEN_XENBUS_FRONTEND
+	---help---
+	  If you want to make TPM support available to a Xen user domain,
+	  say Yes and it will be accessible from within Linux. See
+	  the manpages for xl, xl.conf, and docs/misc/vtpm.txt in
+	  the Xen source repository for more details.
+	  To compile this driver as a module, choose M here; the module
+	  will be called xen-tpmfront.
+
 endif # TCG_TPM

diff --git a/drivers/char/tpm/Makefile b/drivers/char/tpm/Makefile
index a3736c9..eb41ff9 100644
--- a/drivers/char/tpm/Makefile
+++ b/drivers/char/tpm/Makefile

@@ -18,3 +18,4 @@
 obj-$(CONFIG_TCG_INFINEON) += tpm_infineon.o
 obj-$(CONFIG_TCG_IBMVTPM) += tpm_ibmvtpm.o
 obj-$(CONFIG_TCG_ST33_I2C) += tpm_i2c_stm_st33.o
+obj-$(CONFIG_TCG_XEN) += xen-tpmfront.o

diff --git a/drivers/char/tpm/xen-tpmfront.c b/drivers/char/tpm/xen-tpmfront.c
new file mode 100644
index 0000000..7a7929b
--- /dev/null
+++ b/drivers/char/tpm/xen-tpmfront.c

@@ -0,0 +1,473 @@
+/*
+ * Implementation of the Xen vTPM device frontend
+ *
+ * Author:  Daniel De Graaf <dgdegra@tycho.nsa.gov>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2,
+ * as published by the Free Software Foundation.
+ */
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <xen/events.h>
+#include <xen/interface/io/tpmif.h>
+#include <xen/grant_table.h>
+#include <xen/xenbus.h>
+#include <xen/page.h>
+#include "tpm.h"
+
+struct tpm_private {
+	struct tpm_chip *chip;
+	struct xenbus_device *dev;
+
+	struct vtpm_shared_page *shr;
+
+	unsigned int evtchn;
+	int ring_ref;
+	domid_t backend_id;
+};
+
+enum status_bits {
+	VTPM_STATUS_RUNNING  = 0x1,
+	VTPM_STATUS_IDLE     = 0x2,
+	VTPM_STATUS_RESULT   = 0x4,
+	VTPM_STATUS_CANCELED = 0x8,
+};
+
+static u8 vtpm_status(struct tpm_chip *chip)
+{
+	struct tpm_private *priv = TPM_VPRIV(chip);
+	switch (priv->shr->state) {
+	case VTPM_STATE_IDLE:
+		return VTPM_STATUS_IDLE | VTPM_STATUS_CANCELED;
+	case VTPM_STATE_FINISH:
+		return VTPM_STATUS_IDLE | VTPM_STATUS_RESULT;
+	case VTPM_STATE_SUBMIT:
+	case VTPM_STATE_CANCEL: /* cancel requested, not yet canceled */
+		return VTPM_STATUS_RUNNING;
+	default:
+		return 0;
+	}
+}
+
+static bool vtpm_req_canceled(struct tpm_chip *chip, u8 status)
+{
+	return status & VTPM_STATUS_CANCELED;
+}
+
+static void vtpm_cancel(struct tpm_chip *chip)
+{
+	struct tpm_private *priv = TPM_VPRIV(chip);
+	priv->shr->state = VTPM_STATE_CANCEL;
+	wmb();
+	notify_remote_via_evtchn(priv->evtchn);
+}
+
+static unsigned int shr_data_offset(struct vtpm_shared_page *shr)
+{
+	return sizeof(*shr) + sizeof(u32) * shr->nr_extra_pages;
+}
+
+static int vtpm_send(struct tpm_chip *chip, u8 *buf, size_t count)
+{
+	struct tpm_private *priv = TPM_VPRIV(chip);
+	struct vtpm_shared_page *shr = priv->shr;
+	unsigned int offset = shr_data_offset(shr);
+
+	u32 ordinal;
+	unsigned long duration;
+
+	if (offset > PAGE_SIZE)
+		return -EINVAL;
+
+	if (offset + count > PAGE_SIZE)
+		return -EINVAL;
+
+	/* Wait for completion of any existing command or cancellation */
+	if (wait_for_tpm_stat(chip, VTPM_STATUS_IDLE, chip->vendor.timeout_c,
+			&chip->vendor.read_queue, true) < 0) {
+		vtpm_cancel(chip);
+		return -ETIME;
+	}
+
+	memcpy(offset + (u8 *)shr, buf, count);
+	shr->length = count;
+	barrier();
+	shr->state = VTPM_STATE_SUBMIT;
+	wmb();
+	notify_remote_via_evtchn(priv->evtchn);
+
+	ordinal = be32_to_cpu(((struct tpm_input_header*)buf)->ordinal);
+	duration = tpm_calc_ordinal_duration(chip, ordinal);
+
+	if (wait_for_tpm_stat(chip, VTPM_STATUS_IDLE, duration,
+			&chip->vendor.read_queue, true) < 0) {
+		/* got a signal or timeout, try to cancel */
+		vtpm_cancel(chip);
+		return -ETIME;
+	}
+
+	return count;
+}
+
+static int vtpm_recv(struct tpm_chip *chip, u8 *buf, size_t count)
+{
+	struct tpm_private *priv = TPM_VPRIV(chip);
+	struct vtpm_shared_page *shr = priv->shr;
+	unsigned int offset = shr_data_offset(shr);
+	size_t length = shr->length;
+
+	if (shr->state == VTPM_STATE_IDLE)
+		return -ECANCELED;
+
+	/* In theory the wait at the end of _send makes this one unnecessary */
+	if (wait_for_tpm_stat(chip, VTPM_STATUS_RESULT, chip->vendor.timeout_c,
+			&chip->vendor.read_queue, true) < 0) {
+		vtpm_cancel(chip);
+		return -ETIME;
+	}
+
+	if (offset > PAGE_SIZE)
+		return -EIO;
+
+	if (offset + length > PAGE_SIZE)
+		length = PAGE_SIZE - offset;
+
+	if (length > count)
+		length = count;
+
+	memcpy(buf, offset + (u8 *)shr, length);
+
+	return length;
+}
+
+ssize_t tpm_show_locality(struct device *dev, struct device_attribute *attr,
+			  char *buf)
+{
+	struct tpm_chip *chip = dev_get_drvdata(dev);
+	struct tpm_private *priv = TPM_VPRIV(chip);
+	u8 locality = priv->shr->locality;
+
+	return sprintf(buf, "%d\n", locality);
+}
+
+ssize_t tpm_store_locality(struct device *dev, struct device_attribute *attr,
+			const char *buf, size_t len)
+{
+	struct tpm_chip *chip = dev_get_drvdata(dev);
+	struct tpm_private *priv = TPM_VPRIV(chip);
+	u8 val;
+
+	int rv = kstrtou8(buf, 0, &val);
+	if (rv)
+		return rv;
+
+	priv->shr->locality = val;
+
+	return len;
+}
+
+static const struct file_operations vtpm_ops = {
+	.owner = THIS_MODULE,
+	.llseek = no_llseek,
+	.open = tpm_open,
+	.read = tpm_read,
+	.write = tpm_write,
+	.release = tpm_release,
+};
+
+static DEVICE_ATTR(pubek, S_IRUGO, tpm_show_pubek, NULL);
+static DEVICE_ATTR(pcrs, S_IRUGO, tpm_show_pcrs, NULL);
+static DEVICE_ATTR(enabled, S_IRUGO, tpm_show_enabled, NULL);
+static DEVICE_ATTR(active, S_IRUGO, tpm_show_active, NULL);
+static DEVICE_ATTR(owned, S_IRUGO, tpm_show_owned, NULL);
+static DEVICE_ATTR(temp_deactivated, S_IRUGO, tpm_show_temp_deactivated,
+		NULL);
+static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps, NULL);
+static DEVICE_ATTR(cancel, S_IWUSR | S_IWGRP, NULL, tpm_store_cancel);
+static DEVICE_ATTR(durations, S_IRUGO, tpm_show_durations, NULL);
+static DEVICE_ATTR(timeouts, S_IRUGO, tpm_show_timeouts, NULL);
+static DEVICE_ATTR(locality, S_IRUGO | S_IWUSR, tpm_show_locality,
+		tpm_store_locality);
+
+static struct attribute *vtpm_attrs[] = {
+	&dev_attr_pubek.attr,
+	&dev_attr_pcrs.attr,
+	&dev_attr_enabled.attr,
+	&dev_attr_active.attr,
+	&dev_attr_owned.attr,
+	&dev_attr_temp_deactivated.attr,
+	&dev_attr_caps.attr,
+	&dev_attr_cancel.attr,
+	&dev_attr_durations.attr,
+	&dev_attr_timeouts.attr,
+	&dev_attr_locality.attr,
+	NULL,
+};
+
+static struct attribute_group vtpm_attr_grp = {
+	.attrs = vtpm_attrs,
+};
+
+#define TPM_LONG_TIMEOUT   (10 * 60 * HZ)
+
+static const struct tpm_vendor_specific tpm_vtpm = {
+	.status = vtpm_status,
+	.recv = vtpm_recv,
+	.send = vtpm_send,
+	.cancel = vtpm_cancel,
+	.req_complete_mask = VTPM_STATUS_IDLE | VTPM_STATUS_RESULT,
+	.req_complete_val  = VTPM_STATUS_IDLE | VTPM_STATUS_RESULT,
+	.req_canceled      = vtpm_req_canceled,
+	.attr_group = &vtpm_attr_grp,
+	.miscdev = {
+		.fops = &vtpm_ops,
+	},
+	.duration = {
+		TPM_LONG_TIMEOUT,
+		TPM_LONG_TIMEOUT,
+		TPM_LONG_TIMEOUT,
+	},
+};
+
+static irqreturn_t tpmif_interrupt(int dummy, void *dev_id)
+{
+	struct tpm_private *priv = dev_id;
+
+	switch (priv->shr->state) {
+	case VTPM_STATE_IDLE:
+	case VTPM_STATE_FINISH:
+		wake_up_interruptible(&priv->chip->vendor.read_queue);
+		break;
+	case VTPM_STATE_SUBMIT:
+	case VTPM_STATE_CANCEL:
+	default:
+		break;
+	}
+	return IRQ_HANDLED;
+}
+
+static int setup_chip(struct device *dev, struct tpm_private *priv)
+{
+	struct tpm_chip *chip;
+
+	chip = tpm_register_hardware(dev, &tpm_vtpm);
+	if (!chip)
+		return -ENODEV;
+
+	init_waitqueue_head(&chip->vendor.read_queue);
+
+	priv->chip = chip;
+	TPM_VPRIV(chip) = priv;
+
+	return 0;
+}
+
+/* caller must clean up in case of errors */
+static int setup_ring(struct xenbus_device *dev, struct tpm_private *priv)
+{
+	struct xenbus_transaction xbt;
+	const char *message = NULL;
+	int rv;
+
+	priv->shr = (void *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
+	if (!priv->shr) {
+		xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
+		return -ENOMEM;
+	}
+
+	rv = xenbus_grant_ring(dev, virt_to_mfn(priv->shr));
+	if (rv < 0)
+		return rv;
+
+	priv->ring_ref = rv;
+
+	rv = xenbus_alloc_evtchn(dev, &priv->evtchn);
+	if (rv)
+		return rv;
+
+	rv = bind_evtchn_to_irqhandler(priv->evtchn, tpmif_interrupt, 0,
+				       "tpmif", priv);
+	if (rv <= 0) {
+		xenbus_dev_fatal(dev, rv, "allocating TPM irq");
+		return rv;
+	}
+	priv->chip->vendor.irq = rv;
+
+ again:
+	rv = xenbus_transaction_start(&xbt);
+	if (rv) {
+		xenbus_dev_fatal(dev, rv, "starting transaction");
+		return rv;
+	}
+
+	rv = xenbus_printf(xbt, dev->nodename,
+			"ring-ref", "%u", priv->ring_ref);
+	if (rv) {
+		message = "writing ring-ref";
+		goto abort_transaction;
+	}
+
+	rv = xenbus_printf(xbt, dev->nodename, "event-channel", "%u",
+			priv->evtchn);
+	if (rv) {
+		message = "writing event-channel";
+		goto abort_transaction;
+	}
+
+	rv = xenbus_printf(xbt, dev->nodename, "feature-protocol-v2", "1");
+	if (rv) {
+		message = "writing feature-protocol-v2";
+		goto abort_transaction;
+	}
+
+	rv = xenbus_transaction_end(xbt, 0);
+	if (rv == -EAGAIN)
+		goto again;
+	if (rv) {
+		xenbus_dev_fatal(dev, rv, "completing transaction");
+		return rv;
+	}
+
+	xenbus_switch_state(dev, XenbusStateInitialised);
+
+	return 0;
+
+ abort_transaction:
+	xenbus_transaction_end(xbt, 1);
+	if (message)
+		xenbus_dev_error(dev, rv, "%s", message);
+
+	return rv;
+}
+
+static void ring_free(struct tpm_private *priv)
+{
+	if (!priv)
+		return;
+
+	if (priv->ring_ref)
+		gnttab_end_foreign_access(priv->ring_ref, 0,
+				(unsigned long)priv->shr);
+	else
+		free_page((unsigned long)priv->shr);
+
+	if (priv->chip && priv->chip->vendor.irq)
+		unbind_from_irqhandler(priv->chip->vendor.irq, priv);
+
+	kfree(priv);
+}
+
+static int tpmfront_probe(struct xenbus_device *dev,
+		const struct xenbus_device_id *id)
+{
+	struct tpm_private *priv;
+	int rv;
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv) {
+		xenbus_dev_fatal(dev, -ENOMEM, "allocating priv structure");
+		return -ENOMEM;
+	}
+
+	rv = setup_chip(&dev->dev, priv);
+	if (rv) {
+		kfree(priv);
+		return rv;
+	}
+
+	rv = setup_ring(dev, priv);
+	if (rv) {
+		tpm_remove_hardware(&dev->dev);
+		ring_free(priv);
+		return rv;
+	}
+
+	tpm_get_timeouts(priv->chip);
+
+	dev_set_drvdata(&dev->dev, priv->chip);
+
+	return rv;
+}
+
+static int tpmfront_remove(struct xenbus_device *dev)
+{
+	struct tpm_chip *chip = dev_get_drvdata(&dev->dev);
+	struct tpm_private *priv = TPM_VPRIV(chip);
+	tpm_remove_hardware(&dev->dev);
+	ring_free(priv);
+	TPM_VPRIV(chip) = NULL;
+	return 0;
+}
+
+static int tpmfront_resume(struct xenbus_device *dev)
+{
+	/* A suspend/resume/migrate will interrupt a vTPM anyway */
+	tpmfront_remove(dev);
+	return tpmfront_probe(dev, NULL);
+}
+
+static void backend_changed(struct xenbus_device *dev,
+		enum xenbus_state backend_state)
+{
+	int val;
+
+	switch (backend_state) {
+	case XenbusStateInitialised:
+	case XenbusStateConnected:
+		if (dev->state == XenbusStateConnected)
+			break;
+
+		if (xenbus_scanf(XBT_NIL, dev->otherend,
+				"feature-protocol-v2", "%d", &val) < 0)
+			val = 0;
+		if (!val) {
+			xenbus_dev_fatal(dev, -EINVAL,
+					"vTPM protocol 2 required");
+			return;
+		}
+		xenbus_switch_state(dev, XenbusStateConnected);
+		break;
+
+	case XenbusStateClosing:
+	case XenbusStateClosed:
+		device_unregister(&dev->dev);
+		xenbus_frontend_closed(dev);
+		break;
+	default:
+		break;
+	}
+}
+
+static const struct xenbus_device_id tpmfront_ids[] = {
+	{ "vtpm" },
+	{ "" }
+};
+MODULE_ALIAS("xen:vtpm");
+
+static DEFINE_XENBUS_DRIVER(tpmfront, ,
+		.probe = tpmfront_probe,
+		.remove = tpmfront_remove,
+		.resume = tpmfront_resume,
+		.otherend_changed = backend_changed,
+	);
+
+static int __init xen_tpmfront_init(void)
+{
+	if (!xen_domain())
+		return -ENODEV;
+
+	return xenbus_register_frontend(&tpmfront_driver);
+}
+module_init(xen_tpmfront_init);
+
+static void __exit xen_tpmfront_exit(void)
+{
+	xenbus_unregister_driver(&tpmfront_driver);
+}
+module_exit(xen_tpmfront_exit);
+
+MODULE_AUTHOR("Daniel De Graaf <dgdegra@tycho.nsa.gov>");
+MODULE_DESCRIPTION("Xen vTPM Driver");
+MODULE_LICENSE("GPL");

diff --git a/drivers/tty/hvc/hvc_xen.c b/drivers/tty/hvc/hvc_xen.c
index 682210d..e61c36c 100644
--- a/drivers/tty/hvc/hvc_xen.c
+++ b/drivers/tty/hvc/hvc_xen.c

@@ -208,7 +208,7 @@
 
 	info = vtermno_to_xencons(HVC_COOKIE);
 	if (!info) {
-		info = kzalloc(sizeof(struct xencons_info), GFP_KERNEL | __GFP_ZERO);
+		info = kzalloc(sizeof(struct xencons_info), GFP_KERNEL);
 		if (!info)
 			return -ENOMEM;
 	} else if (info->intf != NULL) {
@@ -257,7 +257,7 @@
 
 	info = vtermno_to_xencons(HVC_COOKIE);
 	if (!info) {
-		info = kzalloc(sizeof(struct xencons_info), GFP_KERNEL | __GFP_ZERO);
+		info = kzalloc(sizeof(struct xencons_info), GFP_KERNEL);
 		if (!info)
 			return -ENOMEM;
 	} else if (info->intf != NULL) {
@@ -284,7 +284,7 @@
 
 	info = vtermno_to_xencons(HVC_COOKIE);
 	if (!info) {
-		info = kzalloc(sizeof(struct xencons_info), GFP_KERNEL | __GFP_ZERO);
+		info = kzalloc(sizeof(struct xencons_info), GFP_KERNEL);
 		if (!info)
 			return -ENOMEM;
 	}

diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index 2a2ef97..3101cf6 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c

@@ -38,6 +38,7 @@
 
 #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
 
+#include <linux/cpu.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/errno.h>
@@ -52,6 +53,7 @@
 #include <linux/notifier.h>
 #include <linux/memory.h>
 #include <linux/memory_hotplug.h>
+#include <linux/percpu-defs.h>
 
 #include <asm/page.h>
 #include <asm/pgalloc.h>
@@ -90,6 +92,8 @@
 
 /* We increase/decrease in batches which fit in a page */
 static xen_pfn_t frame_list[PAGE_SIZE / sizeof(unsigned long)];
+static DEFINE_PER_CPU(struct page *, balloon_scratch_page);
+
 
 /* List of ballooned pages, threaded through the mem_map array. */
 static LIST_HEAD(ballooned_pages);
@@ -412,7 +416,8 @@
 		if (xen_pv_domain() && !PageHighMem(page)) {
 			ret = HYPERVISOR_update_va_mapping(
 				(unsigned long)__va(pfn << PAGE_SHIFT),
-				__pte_ma(0), 0);
+				pfn_pte(page_to_pfn(__get_cpu_var(balloon_scratch_page)),
+					PAGE_KERNEL_RO), 0);
 			BUG_ON(ret);
 		}
 #endif
@@ -425,7 +430,13 @@
 	/* No more mappings: invalidate P2M and add to balloon. */
 	for (i = 0; i < nr_pages; i++) {
 		pfn = mfn_to_pfn(frame_list[i]);
-		__set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
+		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+			unsigned long p;
+			struct page *pg;
+			pg = __get_cpu_var(balloon_scratch_page);
+			p = page_to_pfn(pg);
+			__set_phys_to_machine(pfn, pfn_to_mfn(p));
+		}
 		balloon_append(pfn_to_page(pfn));
 	}
 
@@ -480,6 +491,18 @@
 	mutex_unlock(&balloon_mutex);
 }
 
+struct page *get_balloon_scratch_page(void)
+{
+	struct page *ret = get_cpu_var(balloon_scratch_page);
+	BUG_ON(ret == NULL);
+	return ret;
+}
+
+void put_balloon_scratch_page(void)
+{
+	put_cpu_var(balloon_scratch_page);
+}
+
 /* Resets the Xen limit, sets new target, and kicks off processing. */
 void balloon_set_new_target(unsigned long target)
 {
@@ -573,13 +596,47 @@
 	}
 }
 
+static int __cpuinit balloon_cpu_notify(struct notifier_block *self,
+				    unsigned long action, void *hcpu)
+{
+	int cpu = (long)hcpu;
+	switch (action) {
+	case CPU_UP_PREPARE:
+		if (per_cpu(balloon_scratch_page, cpu) != NULL)
+			break;
+		per_cpu(balloon_scratch_page, cpu) = alloc_page(GFP_KERNEL);
+		if (per_cpu(balloon_scratch_page, cpu) == NULL) {
+			pr_warn("Failed to allocate balloon_scratch_page for cpu %d\n", cpu);
+			return NOTIFY_BAD;
+		}
+		break;
+	default:
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block balloon_cpu_notifier __cpuinitdata = {
+	.notifier_call	= balloon_cpu_notify,
+};
+
 static int __init balloon_init(void)
 {
-	int i;
+	int i, cpu;
 
 	if (!xen_domain())
 		return -ENODEV;
 
+	for_each_online_cpu(cpu)
+	{
+		per_cpu(balloon_scratch_page, cpu) = alloc_page(GFP_KERNEL);
+		if (per_cpu(balloon_scratch_page, cpu) == NULL) {
+			pr_warn("Failed to allocate balloon_scratch_page for cpu %d\n", cpu);
+			return -ENOMEM;
+		}
+	}
+	register_cpu_notifier(&balloon_cpu_notifier);
+
 	pr_info("Initialising balloon driver\n");
 
 	balloon_stats.current_pages = xen_pv_domain()
@@ -616,4 +673,15 @@
 
 subsys_initcall(balloon_init);
 
+static int __init balloon_clear(void)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu)
+		per_cpu(balloon_scratch_page, cpu) = NULL;
+
+	return 0;
+}
+early_initcall(balloon_clear);
+
 MODULE_LICENSE("GPL");

diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 5e8be46..4035e83 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c

@@ -56,6 +56,7 @@
 #include <xen/interface/hvm/params.h>
 #include <xen/interface/physdev.h>
 #include <xen/interface/sched.h>
+#include <xen/interface/vcpu.h>
 #include <asm/hw_irq.h>
 
 /*
@@ -1212,7 +1213,17 @@
 
 void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
 {
-	int irq = per_cpu(ipi_to_irq, cpu)[vector];
+	int irq;
+
+#ifdef CONFIG_X86
+	if (unlikely(vector == XEN_NMI_VECTOR)) {
+		int rc =  HYPERVISOR_vcpu_op(VCPUOP_send_nmi, cpu, NULL);
+		if (rc < 0)
+			printk(KERN_WARNING "Sending nmi to CPU%d failed (rc:%d)\n", cpu, rc);
+		return;
+	}
+#endif
+	irq = per_cpu(ipi_to_irq, cpu)[vector];
 	BUG_ON(irq < 0);
 	notify_remote_via_irq(irq);
 }
@@ -1379,14 +1390,21 @@
 
 			pending_bits = active_evtchns(cpu, s, word_idx);
 			bit_idx = 0; /* usually scan entire word from start */
+			/*
+			 * We scan the starting word in two parts.
+			 *
+			 * 1st time: start in the middle, scanning the
+			 * upper bits.
+			 *
+			 * 2nd time: scan the whole word (not just the
+			 * parts skipped in the first pass) -- if an
+			 * event in the previously scanned bits is
+			 * pending again it would just be scanned on
+			 * the next loop anyway.
+			 */
 			if (word_idx == start_word_idx) {
-				/* We scan the starting word in two parts */
 				if (i == 0)
-					/* 1st time: start in the middle */
 					bit_idx = start_bit_idx;
-				else
-					/* 2nd time: mask bits done already */
-					bit_idx &= (1UL << start_bit_idx) - 1;
 			}
 
 			do {

diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
index b6165e0..8b3a69a 100644
--- a/drivers/xen/evtchn.c
+++ b/drivers/xen/evtchn.c

@@ -57,6 +57,7 @@
 
 struct per_user_data {
 	struct mutex bind_mutex; /* serialize bind/unbind operations */
+	struct rb_root evtchns;
 
 	/* Notification ring, accessed via /dev/xen/evtchn. */
 #define EVTCHN_RING_SIZE     (PAGE_SIZE / sizeof(evtchn_port_t))
@@ -64,6 +65,7 @@
 	evtchn_port_t *ring;
 	unsigned int ring_cons, ring_prod, ring_overflow;
 	struct mutex ring_cons_mutex; /* protect against concurrent readers */
+	spinlock_t ring_prod_lock; /* product against concurrent interrupts */
 
 	/* Processes wait on this queue when ring is empty. */
 	wait_queue_head_t evtchn_wait;
@@ -71,54 +73,79 @@
 	const char *name;
 };
 
-/*
- * Who's bound to each port?  This is logically an array of struct
- * per_user_data *, but we encode the current enabled-state in bit 0.
- */
-static unsigned long *port_user;
-static DEFINE_SPINLOCK(port_user_lock); /* protects port_user[] and ring_prod */
+struct user_evtchn {
+	struct rb_node node;
+	struct per_user_data *user;
+	unsigned port;
+	bool enabled;
+};
 
-static inline struct per_user_data *get_port_user(unsigned port)
+static int add_evtchn(struct per_user_data *u, struct user_evtchn *evtchn)
 {
-	return (struct per_user_data *)(port_user[port] & ~1);
+	struct rb_node **new = &(u->evtchns.rb_node), *parent = NULL;
+
+	while (*new) {
+		struct user_evtchn *this;
+
+		this = container_of(*new, struct user_evtchn, node);
+
+		parent = *new;
+		if (this->port < evtchn->port)
+			new = &((*new)->rb_left);
+		else if (this->port > evtchn->port)
+			new = &((*new)->rb_right);
+		else
+			return -EEXIST;
+	}
+
+	/* Add new node and rebalance tree. */
+	rb_link_node(&evtchn->node, parent, new);
+	rb_insert_color(&evtchn->node, &u->evtchns);
+
+	return 0;
 }
 
-static inline void set_port_user(unsigned port, struct per_user_data *u)
+static void del_evtchn(struct per_user_data *u, struct user_evtchn *evtchn)
 {
-	port_user[port] = (unsigned long)u;
+	rb_erase(&evtchn->node, &u->evtchns);
+	kfree(evtchn);
 }
 
-static inline bool get_port_enabled(unsigned port)
+static struct user_evtchn *find_evtchn(struct per_user_data *u, unsigned port)
 {
-	return port_user[port] & 1;
-}
+	struct rb_node *node = u->evtchns.rb_node;
 
-static inline void set_port_enabled(unsigned port, bool enabled)
-{
-	if (enabled)
-		port_user[port] |= 1;
-	else
-		port_user[port] &= ~1;
+	while (node) {
+		struct user_evtchn *evtchn;
+
+		evtchn = container_of(node, struct user_evtchn, node);
+
+		if (evtchn->port < port)
+			node = node->rb_left;
+		else if (evtchn->port > port)
+			node = node->rb_right;
+		else
+			return evtchn;
+	}
+	return NULL;
 }
 
 static irqreturn_t evtchn_interrupt(int irq, void *data)
 {
-	unsigned int port = (unsigned long)data;
-	struct per_user_data *u;
+	struct user_evtchn *evtchn = data;
+	struct per_user_data *u = evtchn->user;
 
-	spin_lock(&port_user_lock);
-
-	u = get_port_user(port);
-
-	WARN(!get_port_enabled(port),
+	WARN(!evtchn->enabled,
 	     "Interrupt for port %d, but apparently not enabled; per-user %p\n",
-	     port, u);
+	     evtchn->port, u);
 
 	disable_irq_nosync(irq);
-	set_port_enabled(port, false);
+	evtchn->enabled = false;
+
+	spin_lock(&u->ring_prod_lock);
 
 	if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) {
-		u->ring[EVTCHN_RING_MASK(u->ring_prod)] = port;
+		u->ring[EVTCHN_RING_MASK(u->ring_prod)] = evtchn->port;
 		wmb(); /* Ensure ring contents visible */
 		if (u->ring_cons == u->ring_prod++) {
 			wake_up_interruptible(&u->evtchn_wait);
@@ -128,7 +155,7 @@
 	} else
 		u->ring_overflow = 1;
 
-	spin_unlock(&port_user_lock);
+	spin_unlock(&u->ring_prod_lock);
 
 	return IRQ_HANDLED;
 }
@@ -229,20 +256,20 @@
 	if (copy_from_user(kbuf, buf, count) != 0)
 		goto out;
 
-	spin_lock_irq(&port_user_lock);
+	mutex_lock(&u->bind_mutex);
 
 	for (i = 0; i < (count/sizeof(evtchn_port_t)); i++) {
 		unsigned port = kbuf[i];
+		struct user_evtchn *evtchn;
 
-		if (port < NR_EVENT_CHANNELS &&
-		    get_port_user(port) == u &&
-		    !get_port_enabled(port)) {
-			set_port_enabled(port, true);
+		evtchn = find_evtchn(u, port);
+		if (evtchn && !evtchn->enabled) {
+			evtchn->enabled = true;
 			enable_irq(irq_from_evtchn(port));
 		}
 	}
 
-	spin_unlock_irq(&port_user_lock);
+	mutex_unlock(&u->bind_mutex);
 
 	rc = count;
 
@@ -253,6 +280,8 @@
 
 static int evtchn_bind_to_user(struct per_user_data *u, int port)
 {
+	struct user_evtchn *evtchn;
+	struct evtchn_close close;
 	int rc = 0;
 
 	/*
@@ -263,35 +292,46 @@
 	 * interrupt handler yet, and our caller has already
 	 * serialized bind operations.)
 	 */
-	BUG_ON(get_port_user(port) != NULL);
-	set_port_user(port, u);
-	set_port_enabled(port, true); /* start enabled */
+
+	evtchn = kzalloc(sizeof(*evtchn), GFP_KERNEL);
+	if (!evtchn)
+		return -ENOMEM;
+
+	evtchn->user = u;
+	evtchn->port = port;
+	evtchn->enabled = true; /* start enabled */
+
+	rc = add_evtchn(u, evtchn);
+	if (rc < 0)
+		goto err;
 
 	rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, IRQF_DISABLED,
-				       u->name, (void *)(unsigned long)port);
-	if (rc >= 0)
-		rc = evtchn_make_refcounted(port);
-	else {
-		/* bind failed, should close the port now */
-		struct evtchn_close close;
-		close.port = port;
-		if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
-			BUG();
-		set_port_user(port, NULL);
-	}
+				       u->name, evtchn);
+	if (rc < 0)
+		goto err;
 
+	rc = evtchn_make_refcounted(port);
+	return rc;
+
+err:
+	/* bind failed, should close the port now */
+	close.port = port;
+	if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
+		BUG();
+	del_evtchn(u, evtchn);
 	return rc;
 }
 
-static void evtchn_unbind_from_user(struct per_user_data *u, int port)
+static void evtchn_unbind_from_user(struct per_user_data *u,
+				    struct user_evtchn *evtchn)
 {
-	int irq = irq_from_evtchn(port);
+	int irq = irq_from_evtchn(evtchn->port);
 
 	BUG_ON(irq < 0);
 
-	unbind_from_irqhandler(irq, (void *)(unsigned long)port);
+	unbind_from_irqhandler(irq, evtchn);
 
-	set_port_user(port, NULL);
+	del_evtchn(u, evtchn);
 }
 
 static long evtchn_ioctl(struct file *file,
@@ -370,6 +410,7 @@
 
 	case IOCTL_EVTCHN_UNBIND: {
 		struct ioctl_evtchn_unbind unbind;
+		struct user_evtchn *evtchn;
 
 		rc = -EFAULT;
 		if (copy_from_user(&unbind, uarg, sizeof(unbind)))
@@ -380,29 +421,27 @@
 			break;
 
 		rc = -ENOTCONN;
-		if (get_port_user(unbind.port) != u)
+		evtchn = find_evtchn(u, unbind.port);
+		if (!evtchn)
 			break;
 
 		disable_irq(irq_from_evtchn(unbind.port));
-
-		evtchn_unbind_from_user(u, unbind.port);
-
+		evtchn_unbind_from_user(u, evtchn);
 		rc = 0;
 		break;
 	}
 
 	case IOCTL_EVTCHN_NOTIFY: {
 		struct ioctl_evtchn_notify notify;
+		struct user_evtchn *evtchn;
 
 		rc = -EFAULT;
 		if (copy_from_user(&notify, uarg, sizeof(notify)))
 			break;
 
-		if (notify.port >= NR_EVENT_CHANNELS) {
-			rc = -EINVAL;
-		} else if (get_port_user(notify.port) != u) {
-			rc = -ENOTCONN;
-		} else {
+		rc = -ENOTCONN;
+		evtchn = find_evtchn(u, notify.port);
+		if (evtchn) {
 			notify_remote_via_evtchn(notify.port);
 			rc = 0;
 		}
@@ -412,9 +451,9 @@
 	case IOCTL_EVTCHN_RESET: {
 		/* Initialise the ring to empty. Clear errors. */
 		mutex_lock(&u->ring_cons_mutex);
-		spin_lock_irq(&port_user_lock);
+		spin_lock_irq(&u->ring_prod_lock);
 		u->ring_cons = u->ring_prod = u->ring_overflow = 0;
-		spin_unlock_irq(&port_user_lock);
+		spin_unlock_irq(&u->ring_prod_lock);
 		mutex_unlock(&u->ring_cons_mutex);
 		rc = 0;
 		break;
@@ -473,6 +512,7 @@
 
 	mutex_init(&u->bind_mutex);
 	mutex_init(&u->ring_cons_mutex);
+	spin_lock_init(&u->ring_prod_lock);
 
 	filp->private_data = u;
 
@@ -481,15 +521,15 @@
 
 static int evtchn_release(struct inode *inode, struct file *filp)
 {
-	int i;
 	struct per_user_data *u = filp->private_data;
+	struct rb_node *node;
 
-	for (i = 0; i < NR_EVENT_CHANNELS; i++) {
-		if (get_port_user(i) != u)
-			continue;
+	while ((node = u->evtchns.rb_node)) {
+		struct user_evtchn *evtchn;
 
-		disable_irq(irq_from_evtchn(i));
-		evtchn_unbind_from_user(get_port_user(i), i);
+		evtchn = rb_entry(node, struct user_evtchn, node);
+		disable_irq(irq_from_evtchn(evtchn->port));
+		evtchn_unbind_from_user(u, evtchn);
 	}
 
 	free_page((unsigned long)u->ring);
@@ -523,12 +563,6 @@
 	if (!xen_domain())
 		return -ENODEV;
 
-	port_user = kcalloc(NR_EVENT_CHANNELS, sizeof(*port_user), GFP_KERNEL);
-	if (port_user == NULL)
-		return -ENOMEM;
-
-	spin_lock_init(&port_user_lock);
-
 	/* Create '/dev/xen/evtchn'. */
 	err = misc_register(&evtchn_miscdev);
 	if (err != 0) {
@@ -543,9 +577,6 @@
 
 static void __exit evtchn_cleanup(void)
 {
-	kfree(port_user);
-	port_user = NULL;
-
 	misc_deregister(&evtchn_miscdev);
 }
 

diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index eab5427..e41c79c 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c

@@ -272,19 +272,12 @@
 		 * with find_grant_ptes.
 		 */
 		for (i = 0; i < map->count; i++) {
-			unsigned level;
 			unsigned long address = (unsigned long)
 				pfn_to_kaddr(page_to_pfn(map->pages[i]));
-			pte_t *ptep;
-			u64 pte_maddr = 0;
 			BUG_ON(PageHighMem(map->pages[i]));
 
-			ptep = lookup_address(address, &level);
-			pte_maddr = arbitrary_virt_to_machine(ptep).maddr;
-			gnttab_set_map_op(&map->kmap_ops[i], pte_maddr,
-				map->flags |
-				GNTMAP_host_map |
-				GNTMAP_contains_pte,
+			gnttab_set_map_op(&map->kmap_ops[i], address,
+				map->flags | GNTMAP_host_map,
 				map->grants[i].ref,
 				map->grants[i].domid);
 		}

diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 04cdeb8..c4d2298 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c

@@ -730,9 +730,18 @@
 				  void (*fn)(void *), void *arg, u16 count)
 {
 	unsigned long flags;
+	struct gnttab_free_callback *cb;
+
 	spin_lock_irqsave(&gnttab_list_lock, flags);
-	if (callback->next)
-		goto out;
+
+	/* Check if the callback is already on the list */
+	cb = gnttab_free_callback_list;
+	while (cb) {
+		if (cb == callback)
+			goto out;
+		cb = cb->next;
+	}
+
 	callback->fn = fn;
 	callback->arg = arg;
 	callback->count = count;

diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
index f8e5dd70..8e74590 100644
--- a/drivers/xen/privcmd.c
+++ b/drivers/xen/privcmd.c

@@ -43,9 +43,10 @@
 
 #define PRIV_VMA_LOCKED ((void *)1)
 
-#ifndef HAVE_ARCH_PRIVCMD_MMAP
-static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma);
-#endif
+static int privcmd_vma_range_is_mapped(
+               struct vm_area_struct *vma,
+               unsigned long addr,
+               unsigned long nr_pages);
 
 static long privcmd_ioctl_hypercall(void __user *udata)
 {
@@ -225,9 +226,9 @@
 		vma = find_vma(mm, msg->va);
 		rc = -EINVAL;
 
-		if (!vma || (msg->va != vma->vm_start) ||
-		    !privcmd_enforce_singleshot_mapping(vma))
+		if (!vma || (msg->va != vma->vm_start) || vma->vm_private_data)
 			goto out_up;
+		vma->vm_private_data = PRIV_VMA_LOCKED;
 	}
 
 	state.va = vma->vm_start;
@@ -358,7 +359,7 @@
 		kfree(pages);
 		return -ENOMEM;
 	}
-	BUG_ON(vma->vm_private_data != PRIV_VMA_LOCKED);
+	BUG_ON(vma->vm_private_data != NULL);
 	vma->vm_private_data = pages;
 
 	return 0;
@@ -421,19 +422,43 @@
 
 	vma = find_vma(mm, m.addr);
 	if (!vma ||
-	    vma->vm_ops != &privcmd_vm_ops ||
-	    (m.addr != vma->vm_start) ||
-	    ((m.addr + (nr_pages << PAGE_SHIFT)) != vma->vm_end) ||
-	    !privcmd_enforce_singleshot_mapping(vma)) {
-		up_write(&mm->mmap_sem);
+	    vma->vm_ops != &privcmd_vm_ops) {
 		ret = -EINVAL;
-		goto out;
+		goto out_unlock;
 	}
-	if (xen_feature(XENFEAT_auto_translated_physmap)) {
-		ret = alloc_empty_pages(vma, m.num);
-		if (ret < 0) {
-			up_write(&mm->mmap_sem);
-			goto out;
+
+	/*
+	 * Caller must either:
+	 *
+	 * Map the whole VMA range, which will also allocate all the
+	 * pages required for the auto_translated_physmap case.
+	 *
+	 * Or
+	 *
+	 * Map unmapped holes left from a previous map attempt (e.g.,
+	 * because those foreign frames were previously paged out).
+	 */
+	if (vma->vm_private_data == NULL) {
+		if (m.addr != vma->vm_start ||
+		    m.addr + (nr_pages << PAGE_SHIFT) != vma->vm_end) {
+			ret = -EINVAL;
+			goto out_unlock;
+		}
+		if (xen_feature(XENFEAT_auto_translated_physmap)) {
+			ret = alloc_empty_pages(vma, m.num);
+			if (ret < 0)
+				goto out_unlock;
+		} else
+			vma->vm_private_data = PRIV_VMA_LOCKED;
+	} else {
+		if (m.addr < vma->vm_start ||
+		    m.addr + (nr_pages << PAGE_SHIFT) > vma->vm_end) {
+			ret = -EINVAL;
+			goto out_unlock;
+		}
+		if (privcmd_vma_range_is_mapped(vma, m.addr, nr_pages)) {
+			ret = -EINVAL;
+			goto out_unlock;
 		}
 	}
 
@@ -466,8 +491,11 @@
 
 out:
 	free_page_list(&pagelist);
-
 	return ret;
+
+out_unlock:
+	up_write(&mm->mmap_sem);
+	goto out;
 }
 
 static long privcmd_ioctl(struct file *file,
@@ -540,9 +568,24 @@
 	return 0;
 }
 
-static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma)
+/*
+ * For MMAPBATCH*. This allows asserting the singleshot mapping
+ * on a per pfn/pte basis. Mapping calls that fail with ENOENT
+ * can be then retried until success.
+ */
+static int is_mapped_fn(pte_t *pte, struct page *pmd_page,
+	                unsigned long addr, void *data)
 {
-	return !cmpxchg(&vma->vm_private_data, NULL, PRIV_VMA_LOCKED);
+	return pte_none(*pte) ? 0 : -EBUSY;
+}
+
+static int privcmd_vma_range_is_mapped(
+	           struct vm_area_struct *vma,
+	           unsigned long addr,
+	           unsigned long nr_pages)
+{
+	return apply_to_page_range(vma->vm_mm, addr, nr_pages << PAGE_SHIFT,
+				   is_mapped_fn, NULL) != 0;
 }
 
 const struct file_operations xen_privcmd_fops = {

diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index aadffcf..1b2277c 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c

@@ -506,13 +506,13 @@
 				   to do proper error handling. */
 				xen_swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir,
 							   attrs);
-				sgl[0].dma_length = 0;
+				sg_dma_len(sgl) = 0;
 				return DMA_ERROR_CODE;
 			}
 			sg->dma_address = xen_phys_to_bus(map);
 		} else
 			sg->dma_address = dev_addr;
-		sg->dma_length = sg->length;
+		sg_dma_len(sg) = sg->length;
 	}
 	return nelems;
 }
@@ -533,7 +533,7 @@
 	BUG_ON(dir == DMA_NONE);
 
 	for_each_sg(sgl, sg, nelems, i)
-		xen_unmap_single(hwdev, sg->dma_address, sg->dma_length, dir);
+		xen_unmap_single(hwdev, sg->dma_address, sg_dma_len(sg), dir);
 
 }
 EXPORT_SYMBOL_GPL(xen_swiotlb_unmap_sg_attrs);
@@ -555,7 +555,7 @@
 
 	for_each_sg(sgl, sg, nelems, i)
 		xen_swiotlb_sync_single(hwdev, sg->dma_address,
-					sg->dma_length, dir, target);
+					sg_dma_len(sg), dir, target);
 }
 
 void

diff --git a/drivers/xen/xen-selfballoon.c b/drivers/xen/xen-selfballoon.c
index 02817a8..21e18c1 100644
--- a/drivers/xen/xen-selfballoon.c
+++ b/drivers/xen/xen-selfballoon.c

@@ -265,8 +265,10 @@
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	err = strict_strtoul(buf, 10, &tmp);
-	if (err || ((tmp != 0) && (tmp != 1)))
+	err = kstrtoul(buf, 10, &tmp);
+	if (err)
+		return err;
+	if ((tmp != 0) && (tmp != 1))
 		return -EINVAL;
 
 	xen_selfballooning_enabled = !!tmp;
@@ -292,8 +294,10 @@
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
-	err = strict_strtoul(buf, 10, &val);
-	if (err || val == 0)
+	err = kstrtoul(buf, 10, &val);
+	if (err)
+		return err;
+	if (val == 0)
 		return -EINVAL;
 	selfballoon_interval = val;
 	return count;
@@ -314,8 +318,10 @@
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
-	err = strict_strtoul(buf, 10, &val);
-	if (err || val == 0)
+	err = kstrtoul(buf, 10, &val);
+	if (err)
+		return err;
+	if (val == 0)
 		return -EINVAL;
 	selfballoon_downhysteresis = val;
 	return count;
@@ -337,8 +343,10 @@
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
-	err = strict_strtoul(buf, 10, &val);
-	if (err || val == 0)
+	err = kstrtoul(buf, 10, &val);
+	if (err)
+		return err;
+	if (val == 0)
 		return -EINVAL;
 	selfballoon_uphysteresis = val;
 	return count;
@@ -360,8 +368,10 @@
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
-	err = strict_strtoul(buf, 10, &val);
-	if (err || val == 0)
+	err = kstrtoul(buf, 10, &val);
+	if (err)
+		return err;
+	if (val == 0)
 		return -EINVAL;
 	selfballoon_min_usable_mb = val;
 	return count;
@@ -384,8 +394,10 @@
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
-	err = strict_strtoul(buf, 10, &val);
-	if (err || val == 0)
+	err = kstrtoul(buf, 10, &val);
+	if (err)
+		return err;
+	if (val == 0)
 		return -EINVAL;
 	selfballoon_reserved_mb = val;
 	return count;
@@ -410,8 +422,10 @@
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
-	err = strict_strtoul(buf, 10, &tmp);
-	if (err || ((tmp != 0) && (tmp != 1)))
+	err = kstrtoul(buf, 10, &tmp);
+	if (err)
+		return err;
+	if ((tmp != 0) && (tmp != 1))
 		return -EINVAL;
 	frontswap_selfshrinking = !!tmp;
 	if (!was_enabled && !xen_selfballooning_enabled &&
@@ -437,8 +451,10 @@
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
-	err = strict_strtoul(buf, 10, &val);
-	if (err || val == 0)
+	err = kstrtoul(buf, 10, &val);
+	if (err)
+		return err;
+	if (val == 0)
 		return -EINVAL;
 	frontswap_inertia = val;
 	frontswap_inertia_counter = val;
@@ -460,8 +476,10 @@
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
-	err = strict_strtoul(buf, 10, &val);
-	if (err || val == 0)
+	err = kstrtoul(buf, 10, &val);
+	if (err)
+		return err;
+	if (val == 0)
 		return -EINVAL;
 	frontswap_hysteresis = val;
 	return count;

diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 0976fc4..a507907 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h

@@ -48,7 +48,6 @@
 
 #include <linux/types.h>
 #include <linux/compiler.h>
-#include <linux/workqueue.h>
 
 #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL)
 
@@ -61,12 +60,6 @@
 #endif
 };
 
-struct static_key_deferred {
-	struct static_key key;
-	unsigned long timeout;
-	struct delayed_work work;
-};
-
 # include <asm/jump_label.h>
 # define HAVE_JUMP_LABEL
 #endif	/* CC_HAVE_ASM_GOTO && CONFIG_JUMP_LABEL */
@@ -78,6 +71,7 @@
 
 struct module;
 
+#include <linux/atomic.h>
 #ifdef HAVE_JUMP_LABEL
 
 #define JUMP_LABEL_TRUE_BRANCH 1UL
@@ -119,10 +113,7 @@
 extern int jump_label_text_reserved(void *start, void *end);
 extern void static_key_slow_inc(struct static_key *key);
 extern void static_key_slow_dec(struct static_key *key);
-extern void static_key_slow_dec_deferred(struct static_key_deferred *key);
 extern void jump_label_apply_nops(struct module *mod);
-extern void
-jump_label_rate_limit(struct static_key_deferred *key, unsigned long rl);
 
 #define STATIC_KEY_INIT_TRUE ((struct static_key) \
 	{ .enabled = ATOMIC_INIT(1), .entries = (void *)1 })
@@ -131,8 +122,6 @@
 
 #else  /* !HAVE_JUMP_LABEL */
 
-#include <linux/atomic.h>
-
 struct static_key {
 	atomic_t enabled;
 };
@@ -141,10 +130,6 @@
 {
 }
 
-struct static_key_deferred {
-	struct static_key  key;
-};
-
 static __always_inline bool static_key_false(struct static_key *key)
 {
 	if (unlikely(atomic_read(&key->enabled)) > 0)
@@ -169,11 +154,6 @@
 	atomic_dec(&key->enabled);
 }
 
-static inline void static_key_slow_dec_deferred(struct static_key_deferred *key)
-{
-	static_key_slow_dec(&key->key);
-}
-
 static inline int jump_label_text_reserved(void *start, void *end)
 {
 	return 0;
@@ -187,12 +167,6 @@
 	return 0;
 }
 
-static inline void
-jump_label_rate_limit(struct static_key_deferred *key,
-		unsigned long rl)
-{
-}
-
 #define STATIC_KEY_INIT_TRUE ((struct static_key) \
 		{ .enabled = ATOMIC_INIT(1) })
 #define STATIC_KEY_INIT_FALSE ((struct static_key) \

diff --git a/include/linux/jump_label_ratelimit.h b/include/linux/jump_label_ratelimit.h
new file mode 100644
index 0000000..1137883
--- /dev/null
+++ b/include/linux/jump_label_ratelimit.h

@@ -0,0 +1,34 @@
+#ifndef _LINUX_JUMP_LABEL_RATELIMIT_H
+#define _LINUX_JUMP_LABEL_RATELIMIT_H
+
+#include <linux/jump_label.h>
+#include <linux/workqueue.h>
+
+#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL)
+struct static_key_deferred {
+	struct static_key key;
+	unsigned long timeout;
+	struct delayed_work work;
+};
+#endif
+
+#ifdef HAVE_JUMP_LABEL
+extern void static_key_slow_dec_deferred(struct static_key_deferred *key);
+extern void
+jump_label_rate_limit(struct static_key_deferred *key, unsigned long rl);
+
+#else	/* !HAVE_JUMP_LABEL */
+struct static_key_deferred {
+	struct static_key  key;
+};
+static inline void static_key_slow_dec_deferred(struct static_key_deferred *key)
+{
+	static_key_slow_dec(&key->key);
+}
+static inline void
+jump_label_rate_limit(struct static_key_deferred *key,
+		unsigned long rl)
+{
+}
+#endif	/* HAVE_JUMP_LABEL */
+#endif	/* _LINUX_JUMP_LABEL_RATELIMIT_H */

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index c43f6ea..226be8d 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h

@@ -48,6 +48,7 @@
 #include <linux/cpu.h>
 #include <linux/irq_work.h>
 #include <linux/static_key.h>
+#include <linux/jump_label_ratelimit.h>
 #include <linux/atomic.h>
 #include <linux/sysfs.h>
 #include <linux/perf_regs.h>

diff --git a/include/uapi/linux/kvm_para.h b/include/uapi/linux/kvm_para.h
index cea2c5c..2841f86 100644
--- a/include/uapi/linux/kvm_para.h
+++ b/include/uapi/linux/kvm_para.h

@@ -19,6 +19,7 @@
 #define KVM_HC_MMU_OP			2
 #define KVM_HC_FEATURES			3
 #define KVM_HC_PPC_MAP_MAGIC_PAGE	4
+#define KVM_HC_KICK_CPU			5
 
 /*
  * hypercalls use architecture specific

diff --git a/include/xen/balloon.h b/include/xen/balloon.h
index cc2e1a7..a4c1c6a 100644
--- a/include/xen/balloon.h
+++ b/include/xen/balloon.h

@@ -29,6 +29,9 @@
 		bool highmem);
 void free_xenballooned_pages(int nr_pages, struct page **pages);
 
+struct page *get_balloon_scratch_page(void);
+void put_balloon_scratch_page(void);
+
 struct device;
 #ifdef CONFIG_XEN_SELFBALLOONING
 extern int register_xen_selfballooning(struct device *dev);

diff --git a/include/xen/interface/io/tpmif.h b/include/xen/interface/io/tpmif.h
new file mode 100644
index 0000000..28e7dcd
--- /dev/null
+++ b/include/xen/interface/io/tpmif.h

@@ -0,0 +1,52 @@
+/******************************************************************************
+ * tpmif.h
+ *
+ * TPM I/O interface for Xen guest OSes, v2
+ *
+ * This file is in the public domain.
+ *
+ */
+
+#ifndef __XEN_PUBLIC_IO_TPMIF_H__
+#define __XEN_PUBLIC_IO_TPMIF_H__
+
+/*
+ * Xenbus state machine
+ *
+ * Device open:
+ *   1. Both ends start in XenbusStateInitialising
+ *   2. Backend transitions to InitWait (frontend does not wait on this step)
+ *   3. Frontend populates ring-ref, event-channel, feature-protocol-v2
+ *   4. Frontend transitions to Initialised
+ *   5. Backend maps grant and event channel, verifies feature-protocol-v2
+ *   6. Backend transitions to Connected
+ *   7. Frontend verifies feature-protocol-v2, transitions to Connected
+ *
+ * Device close:
+ *   1. State is changed to XenbusStateClosing
+ *   2. Frontend transitions to Closed
+ *   3. Backend unmaps grant and event, changes state to InitWait
+ */
+
+enum vtpm_shared_page_state {
+	VTPM_STATE_IDLE,         /* no contents / vTPM idle / cancel complete */
+	VTPM_STATE_SUBMIT,       /* request ready / vTPM working */
+	VTPM_STATE_FINISH,       /* response ready / vTPM idle */
+	VTPM_STATE_CANCEL,       /* cancel requested / vTPM working */
+};
+/* The backend should only change state to IDLE or FINISH, while the
+ * frontend should only change to SUBMIT or CANCEL. */
+
+
+struct vtpm_shared_page {
+	uint32_t length;         /* request/response length in bytes */
+
+	uint8_t state;           /* enum vtpm_shared_page_state */
+	uint8_t locality;        /* for the current request */
+	uint8_t pad;
+
+	uint8_t nr_extra_pages;  /* extra pages for long packets; may be zero */
+	uint32_t extra_pages[0]; /* grant IDs; length in nr_extra_pages */
+};
+
+#endif

diff --git a/include/xen/interface/vcpu.h b/include/xen/interface/vcpu.h
index 87e6f8a..b05288c 100644
--- a/include/xen/interface/vcpu.h
+++ b/include/xen/interface/vcpu.h

@@ -170,4 +170,6 @@
 };
 DEFINE_GUEST_HANDLE_STRUCT(vcpu_register_vcpu_info);
 
+/* Send an NMI to the specified VCPU. @extra_arg == NULL. */
+#define VCPUOP_send_nmi             11
 #endif /* __XEN_PUBLIC_VCPU_H__ */

diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 60f48fa..297a924 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c

@@ -13,6 +13,7 @@
 #include <linux/sort.h>
 #include <linux/err.h>
 #include <linux/static_key.h>
+#include <linux/jump_label_ratelimit.h>
 
 #ifdef HAVE_JUMP_LABEL
 

diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index d23762e..4e8686c 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c

@@ -870,13 +870,13 @@
 				swiotlb_full(hwdev, sg->length, dir, 0);
 				swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir,
 						       attrs);
-				sgl[0].dma_length = 0;
+				sg_dma_len(sgl) = 0;
 				return 0;
 			}
 			sg->dma_address = phys_to_dma(hwdev, map);
 		} else
 			sg->dma_address = dev_addr;
-		sg->dma_length = sg->length;
+		sg_dma_len(sg) = sg->length;
 	}
 	return nelems;
 }
@@ -904,7 +904,7 @@
 	BUG_ON(dir == DMA_NONE);
 
 	for_each_sg(sgl, sg, nelems, i)
-		unmap_single(hwdev, sg->dma_address, sg->dma_length, dir);
+		unmap_single(hwdev, sg->dma_address, sg_dma_len(sg), dir);
 
 }
 EXPORT_SYMBOL(swiotlb_unmap_sg_attrs);
@@ -934,7 +934,7 @@
 
 	for_each_sg(sgl, sg, nelems, i)
 		swiotlb_sync_single(hwdev, sg->dma_address,
-				    sg->dma_length, dir, target);
+				    sg_dma_len(sg), dir, target);
 }
 
 void
commit	65320fcedaa7affd1736cd7aa51f5e70b5c7e7f2	[log] [tgz]
author	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>	Mon Sep 09 12:05:37 2013 -0400
committer	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>	Mon Sep 09 12:05:37 2013 -0400
tree	2fb1bdf8a1139262dd13fa671055c7517cb3fffb
parent	c3f31f6a6f68bcb51689c90733282ec263602a9d [diff]
parent	d8dfad3876e4386666b759da3c833d62fb8b2267 [diff]