Merge branches 'upstream/core', 'upstream/xenfs' and 'upstream/evtchn' into upstream/for-linus
* upstream/core:
xen/events: Use PIRQ instead of GSI value when unmapping MSI/MSI-X irqs.
xen: set IO permission early (before early_cpu_init())
xen: re-enable boot-time ballooning
xen/balloon: make sure we only include remaining extra ram
xen/balloon: the balloon_lock is useless
xen: add extra pages to balloon
xen/events: use locked set|clear_bit() for cpu_evtchn_mask
xen/evtchn: clear secondary CPUs' cpu_evtchn_mask[] after restore
xen: implement XENMEM_machphys_mapping
* upstream/xenfs:
Revert "xen/privcmd: create address space to allow writable mmaps"
xen/xenfs: update xenfs_mount for new prototype
xen: fix header export to userspace
xen: set vma flag VM_PFNMAP in the privcmd mmap file_op
xen: xenfs: privcmd: check put_user() return code
* upstream/evtchn:
xen: make evtchn's name less generic
xen/evtchn: the evtchn device is non-seekable
xen/evtchn: add missing static
xen/evtchn: Fix name of Xen event-channel device
xen/evtchn: don't do unbind_from_irqhandler under spinlock
xen/evtchn: remove spurious barrier
xen/evtchn: ports start enabled
xen/evtchn: dynamically allocate port_user array
xen/evtchn: track enabled state for each port
diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h
index e8506c1..1c10c88 100644
--- a/arch/x86/include/asm/xen/interface.h
+++ b/arch/x86/include/asm/xen/interface.h
@@ -61,9 +61,9 @@
#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START)
#endif
-#ifndef machine_to_phys_mapping
-#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START)
-#endif
+#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START)
+#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END)
+#define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>__MACH2PHYS_SHIFT)
/* Maximum number of virtual CPUs in multi-processor guests. */
#define MAX_VIRT_CPUS 32
diff --git a/arch/x86/include/asm/xen/interface_32.h b/arch/x86/include/asm/xen/interface_32.h
index 42a7e00..8413688 100644
--- a/arch/x86/include/asm/xen/interface_32.h
+++ b/arch/x86/include/asm/xen/interface_32.h
@@ -32,6 +32,11 @@
/* And the trap vector is... */
#define TRAP_INSTR "int $0x82"
+#define __MACH2PHYS_VIRT_START 0xF5800000
+#define __MACH2PHYS_VIRT_END 0xF6800000
+
+#define __MACH2PHYS_SHIFT 2
+
/*
* Virtual addresses beyond this are not modifiable by guest OSes. The
* machine->physical mapping table starts at this address, read-only.
diff --git a/arch/x86/include/asm/xen/interface_64.h b/arch/x86/include/asm/xen/interface_64.h
index 100d266..839a481 100644
--- a/arch/x86/include/asm/xen/interface_64.h
+++ b/arch/x86/include/asm/xen/interface_64.h
@@ -39,18 +39,7 @@
#define __HYPERVISOR_VIRT_END 0xFFFF880000000000
#define __MACH2PHYS_VIRT_START 0xFFFF800000000000
#define __MACH2PHYS_VIRT_END 0xFFFF804000000000
-
-#ifndef HYPERVISOR_VIRT_START
-#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START)
-#define HYPERVISOR_VIRT_END mk_unsigned_long(__HYPERVISOR_VIRT_END)
-#endif
-
-#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START)
-#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END)
-#define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>3)
-#ifndef machine_to_phys_mapping
-#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START)
-#endif
+#define __MACH2PHYS_SHIFT 3
/*
* int HYPERVISOR_set_segment_base(unsigned int which, unsigned long base)
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index dd8c141..8760cc6 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -5,6 +5,7 @@
#include <linux/types.h>
#include <linux/spinlock.h>
#include <linux/pfn.h>
+#include <linux/mm.h>
#include <asm/uaccess.h>
#include <asm/page.h>
@@ -35,6 +36,8 @@
#define MAX_DOMAIN_PAGES \
((unsigned long)((u64)CONFIG_XEN_MAX_DOMAIN_MEMORY * 1024 * 1024 * 1024 / PAGE_SIZE))
+extern unsigned long *machine_to_phys_mapping;
+extern unsigned int machine_to_phys_order;
extern unsigned long get_phys_to_machine(unsigned long pfn);
extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn);
@@ -69,10 +72,8 @@
if (xen_feature(XENFEAT_auto_translated_physmap))
return mfn;
-#if 0
if (unlikely((mfn >> machine_to_phys_order) != 0))
- return max_mapnr;
-#endif
+ return ~0;
pfn = 0;
/*
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 235c0f4..7250bef 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -75,6 +75,11 @@
enum xen_domain_type xen_domain_type = XEN_NATIVE;
EXPORT_SYMBOL_GPL(xen_domain_type);
+unsigned long *machine_to_phys_mapping = (void *)MACH2PHYS_VIRT_START;
+EXPORT_SYMBOL(machine_to_phys_mapping);
+unsigned int machine_to_phys_order;
+EXPORT_SYMBOL(machine_to_phys_order);
+
struct start_info *xen_start_info;
EXPORT_SYMBOL_GPL(xen_start_info);
@@ -1090,6 +1095,8 @@
/* First C function to be called on Xen boot */
asmlinkage void __init xen_start_kernel(void)
{
+ struct physdev_set_iopl set_iopl;
+ int rc;
pgd_t *pgd;
if (!xen_start_info)
@@ -1097,6 +1104,8 @@
xen_domain_type = XEN_PV_DOMAIN;
+ xen_setup_machphys_mapping();
+
/* Install Xen paravirt ops */
pv_info = xen_info;
pv_init_ops = xen_init_ops;
@@ -1202,10 +1211,18 @@
#else
pv_info.kernel_rpl = 0;
#endif
-
/* set the limit of our address space */
xen_reserve_top();
+ /* We used to do this in xen_arch_setup, but that is too late on AMD
+ * were early_cpu_init (run before ->arch_setup()) calls early_amd_init
+ * which pokes 0xcf8 port.
+ */
+ set_iopl.iopl = 1;
+ rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
+ if (rc != 0)
+ xen_raw_printk("physdev_op failed %d\n", rc);
+
#ifdef CONFIG_X86_32
/* set up basic CPUID stuff */
cpu_detect(&new_cpu_data);
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 276c67b..790af90 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -2034,6 +2034,20 @@
set_page_prot(pmd, PAGE_KERNEL_RO);
}
+void __init xen_setup_machphys_mapping(void)
+{
+ struct xen_machphys_mapping mapping;
+ unsigned long machine_to_phys_nr_ents;
+
+ if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) {
+ machine_to_phys_mapping = (unsigned long *)mapping.v_start;
+ machine_to_phys_nr_ents = mapping.max_mfn + 1;
+ } else {
+ machine_to_phys_nr_ents = MACH2PHYS_NR_ENTRIES;
+ }
+ machine_to_phys_order = fls(machine_to_phys_nr_ents - 1);
+}
+
#ifdef CONFIG_X86_64
static void convert_pfn_mfn(void *v)
{
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 769c4b0..38fdffa 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -248,8 +248,7 @@
else
extra_pages = 0;
- if (!xen_initial_domain())
- xen_add_extra_mem(extra_pages);
+ xen_add_extra_mem(extra_pages);
return "Xen";
}
@@ -337,9 +336,6 @@
void __init xen_arch_setup(void)
{
- struct physdev_set_iopl set_iopl;
- int rc;
-
xen_panic_handler_init();
HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments);
@@ -356,11 +352,6 @@
xen_enable_sysenter();
xen_enable_syscall();
- set_iopl.iopl = 1;
- rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
- if (rc != 0)
- printk(KERN_INFO "physdev_op failed %d\n", rc);
-
#ifdef CONFIG_ACPI
if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
printk(KERN_INFO "ACPI in unprivileged domain disabled\n");
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index eb8a78d..533a199 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -8,9 +8,12 @@
obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
obj-$(CONFIG_XEN_XENCOMM) += xencomm.o
obj-$(CONFIG_XEN_BALLOON) += balloon.o
-obj-$(CONFIG_XEN_DEV_EVTCHN) += evtchn.o
+obj-$(CONFIG_XEN_DEV_EVTCHN) += xen-evtchn.o
obj-$(CONFIG_XENFS) += xenfs/
obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o
obj-$(CONFIG_XEN_PLATFORM_PCI) += platform-pci.o
obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o
obj-$(CONFIG_XEN_DOM0) += pci.o
+
+xen-evtchn-y := evtchn.o
+
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index 500290b..2b17ad5 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -50,6 +50,7 @@
#include <asm/pgtable.h>
#include <asm/uaccess.h>
#include <asm/tlb.h>
+#include <asm/e820.h>
#include <asm/xen/hypervisor.h>
#include <asm/xen/hypercall.h>
@@ -119,7 +120,7 @@
}
/* balloon_append: add the given page to the balloon. */
-static void balloon_append(struct page *page)
+static void __balloon_append(struct page *page)
{
/* Lowmem is re-populated first, so highmem pages go at list tail. */
if (PageHighMem(page)) {
@@ -130,7 +131,11 @@
list_add(&page->lru, &ballooned_pages);
balloon_stats.balloon_low++;
}
+}
+static void balloon_append(struct page *page)
+{
+ __balloon_append(page);
totalram_pages--;
}
@@ -191,7 +196,7 @@
static int increase_reservation(unsigned long nr_pages)
{
- unsigned long pfn, i, flags;
+ unsigned long pfn, i;
struct page *page;
long rc;
struct xen_memory_reservation reservation = {
@@ -203,8 +208,6 @@
if (nr_pages > ARRAY_SIZE(frame_list))
nr_pages = ARRAY_SIZE(frame_list);
- spin_lock_irqsave(&xen_reservation_lock, flags);
-
page = balloon_first_page();
for (i = 0; i < nr_pages; i++) {
BUG_ON(page == NULL);
@@ -247,14 +250,12 @@
balloon_stats.current_pages += rc;
out:
- spin_unlock_irqrestore(&xen_reservation_lock, flags);
-
return rc < 0 ? rc : rc != nr_pages;
}
static int decrease_reservation(unsigned long nr_pages)
{
- unsigned long pfn, i, flags;
+ unsigned long pfn, i;
struct page *page;
int need_sleep = 0;
int ret;
@@ -292,8 +293,6 @@
kmap_flush_unused();
flush_tlb_all();
- spin_lock_irqsave(&xen_reservation_lock, flags);
-
/* No more mappings: invalidate P2M and add to balloon. */
for (i = 0; i < nr_pages; i++) {
pfn = mfn_to_pfn(frame_list[i]);
@@ -308,8 +307,6 @@
balloon_stats.current_pages -= nr_pages;
- spin_unlock_irqrestore(&xen_reservation_lock, flags);
-
return need_sleep;
}
@@ -395,7 +392,7 @@
static int __init balloon_init(void)
{
- unsigned long pfn;
+ unsigned long pfn, extra_pfn_end;
struct page *page;
if (!xen_pv_domain())
@@ -416,10 +413,15 @@
register_balloon(&balloon_sysdev);
/* Initialise the balloon with excess memory space. */
- for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
+ extra_pfn_end = min(e820_end_of_ram_pfn(),
+ (unsigned long)PFN_DOWN(xen_extra_mem_start + xen_extra_mem_size));
+ for (pfn = PFN_UP(xen_extra_mem_start);
+ pfn < extra_pfn_end;
+ pfn++) {
page = pfn_to_page(pfn);
- if (!PageReserved(page))
- balloon_append(page);
+ /* totalram_pages doesn't include the boot-time
+ balloon extension, so don't subtract from it. */
+ __balloon_append(page);
}
target_watch.callback = watch_target;
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 321a0c8..2811bb9 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -278,17 +278,17 @@
cpumask_copy(irq_to_desc(irq)->affinity, cpumask_of(cpu));
#endif
- __clear_bit(chn, cpu_evtchn_mask(cpu_from_irq(irq)));
- __set_bit(chn, cpu_evtchn_mask(cpu));
+ clear_bit(chn, cpu_evtchn_mask(cpu_from_irq(irq)));
+ set_bit(chn, cpu_evtchn_mask(cpu));
irq_info[irq].cpu = cpu;
}
static void init_evtchn_cpu_bindings(void)
{
+ int i;
#ifdef CONFIG_SMP
struct irq_desc *desc;
- int i;
/* By default all event channels notify CPU#0. */
for_each_irq_desc(i, desc) {
@@ -296,7 +296,10 @@
}
#endif
- memset(cpu_evtchn_mask(0), ~0, sizeof(struct cpu_evtchn_s));
+ for_each_possible_cpu(i)
+ memset(cpu_evtchn_mask(i),
+ (i == 0) ? ~0 : 0, sizeof(struct cpu_evtchn_s));
+
}
static inline void clear_evtchn(int port)
@@ -752,7 +755,7 @@
goto out;
if (xen_initial_domain()) {
- unmap_irq.pirq = info->u.pirq.gsi;
+ unmap_irq.pirq = info->u.pirq.pirq;
unmap_irq.domid = DOMID_SELF;
rc = HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, &unmap_irq);
if (rc) {
diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
index fec6ba3..ef11daf 100644
--- a/drivers/xen/evtchn.c
+++ b/drivers/xen/evtchn.c
@@ -69,20 +69,51 @@
const char *name;
};
-/* Who's bound to each port? */
-static struct per_user_data *port_user[NR_EVENT_CHANNELS];
+/*
+ * Who's bound to each port? This is logically an array of struct
+ * per_user_data *, but we encode the current enabled-state in bit 0.
+ */
+static unsigned long *port_user;
static DEFINE_SPINLOCK(port_user_lock); /* protects port_user[] and ring_prod */
-irqreturn_t evtchn_interrupt(int irq, void *data)
+static inline struct per_user_data *get_port_user(unsigned port)
+{
+ return (struct per_user_data *)(port_user[port] & ~1);
+}
+
+static inline void set_port_user(unsigned port, struct per_user_data *u)
+{
+ port_user[port] = (unsigned long)u;
+}
+
+static inline bool get_port_enabled(unsigned port)
+{
+ return port_user[port] & 1;
+}
+
+static inline void set_port_enabled(unsigned port, bool enabled)
+{
+ if (enabled)
+ port_user[port] |= 1;
+ else
+ port_user[port] &= ~1;
+}
+
+static irqreturn_t evtchn_interrupt(int irq, void *data)
{
unsigned int port = (unsigned long)data;
struct per_user_data *u;
spin_lock(&port_user_lock);
- u = port_user[port];
+ u = get_port_user(port);
+
+ WARN(!get_port_enabled(port),
+ "Interrupt for port %d, but apparently not enabled; per-user %p\n",
+ port, u);
disable_irq_nosync(irq);
+ set_port_enabled(port, false);
if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) {
u->ring[EVTCHN_RING_MASK(u->ring_prod)] = port;
@@ -92,9 +123,8 @@
kill_fasync(&u->evtchn_async_queue,
SIGIO, POLL_IN);
}
- } else {
+ } else
u->ring_overflow = 1;
- }
spin_unlock(&port_user_lock);
@@ -198,9 +228,18 @@
goto out;
spin_lock_irq(&port_user_lock);
- for (i = 0; i < (count/sizeof(evtchn_port_t)); i++)
- if ((kbuf[i] < NR_EVENT_CHANNELS) && (port_user[kbuf[i]] == u))
- enable_irq(irq_from_evtchn(kbuf[i]));
+
+ for (i = 0; i < (count/sizeof(evtchn_port_t)); i++) {
+ unsigned port = kbuf[i];
+
+ if (port < NR_EVENT_CHANNELS &&
+ get_port_user(port) == u &&
+ !get_port_enabled(port)) {
+ set_port_enabled(port, true);
+ enable_irq(irq_from_evtchn(port));
+ }
+ }
+
spin_unlock_irq(&port_user_lock);
rc = count;
@@ -222,8 +261,9 @@
* interrupt handler yet, and our caller has already
* serialized bind operations.)
*/
- BUG_ON(port_user[port] != NULL);
- port_user[port] = u;
+ BUG_ON(get_port_user(port) != NULL);
+ set_port_user(port, u);
+ set_port_enabled(port, true); /* start enabled */
rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, IRQF_DISABLED,
u->name, (void *)(unsigned long)port);
@@ -239,10 +279,7 @@
unbind_from_irqhandler(irq, (void *)(unsigned long)port);
- /* make sure we unbind the irq handler before clearing the port */
- barrier();
-
- port_user[port] = NULL;
+ set_port_user(port, NULL);
}
static long evtchn_ioctl(struct file *file,
@@ -333,15 +370,17 @@
spin_lock_irq(&port_user_lock);
rc = -ENOTCONN;
- if (port_user[unbind.port] != u) {
+ if (get_port_user(unbind.port) != u) {
spin_unlock_irq(&port_user_lock);
break;
}
- evtchn_unbind_from_user(u, unbind.port);
+ disable_irq(irq_from_evtchn(unbind.port));
spin_unlock_irq(&port_user_lock);
+ evtchn_unbind_from_user(u, unbind.port);
+
rc = 0;
break;
}
@@ -355,7 +394,7 @@
if (notify.port >= NR_EVENT_CHANNELS) {
rc = -EINVAL;
- } else if (port_user[notify.port] != u) {
+ } else if (get_port_user(notify.port) != u) {
rc = -ENOTCONN;
} else {
notify_remote_via_evtchn(notify.port);
@@ -431,7 +470,7 @@
filp->private_data = u;
- return 0;
+ return nonseekable_open(inode, filp);;
}
static int evtchn_release(struct inode *inode, struct file *filp)
@@ -444,14 +483,21 @@
free_page((unsigned long)u->ring);
for (i = 0; i < NR_EVENT_CHANNELS; i++) {
- if (port_user[i] != u)
+ if (get_port_user(i) != u)
continue;
- evtchn_unbind_from_user(port_user[i], i);
+ disable_irq(irq_from_evtchn(i));
}
spin_unlock_irq(&port_user_lock);
+ for (i = 0; i < NR_EVENT_CHANNELS; i++) {
+ if (get_port_user(i) != u)
+ continue;
+
+ evtchn_unbind_from_user(get_port_user(i), i);
+ }
+
kfree(u->name);
kfree(u);
@@ -467,12 +513,12 @@
.fasync = evtchn_fasync,
.open = evtchn_open,
.release = evtchn_release,
- .llseek = noop_llseek,
+ .llseek = no_llseek,
};
static struct miscdevice evtchn_miscdev = {
.minor = MISC_DYNAMIC_MINOR,
- .name = "evtchn",
+ .name = "xen/evtchn",
.fops = &evtchn_fops,
};
static int __init evtchn_init(void)
@@ -482,8 +528,11 @@
if (!xen_domain())
return -ENODEV;
+ port_user = kcalloc(NR_EVENT_CHANNELS, sizeof(*port_user), GFP_KERNEL);
+ if (port_user == NULL)
+ return -ENOMEM;
+
spin_lock_init(&port_user_lock);
- memset(port_user, 0, sizeof(port_user));
/* Create '/dev/misc/evtchn'. */
err = misc_register(&evtchn_miscdev);
@@ -499,6 +548,9 @@
static void __exit evtchn_cleanup(void)
{
+ kfree(port_user);
+ port_user = NULL;
+
misc_deregister(&evtchn_miscdev);
}
diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h
index d7a6c13..eac3ce1 100644
--- a/include/xen/interface/memory.h
+++ b/include/xen/interface/memory.h
@@ -141,6 +141,19 @@
DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mfn_list);
/*
+ * Returns the location in virtual address space of the machine_to_phys
+ * mapping table. Architectures which do not have a m2p table, or which do not
+ * map it by default into guest address space, do not implement this command.
+ * arg == addr of xen_machphys_mapping_t.
+ */
+#define XENMEM_machphys_mapping 12
+struct xen_machphys_mapping {
+ unsigned long v_start, v_end; /* Start and end virtual addresses. */
+ unsigned long max_mfn; /* Maximum MFN that can be looked up. */
+};
+DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mapping_t);
+
+/*
* Sets the GPFN at which a particular page appears in the specified guest's
* pseudophysical address space.
* arg == addr of xen_add_to_physmap_t.
diff --git a/include/xen/page.h b/include/xen/page.h
index eaf85fa..0be36b9 100644
--- a/include/xen/page.h
+++ b/include/xen/page.h
@@ -1 +1,8 @@
+#ifndef _XEN_PAGE_H
+#define _XEN_PAGE_H
+
#include <asm/xen/page.h>
+
+extern phys_addr_t xen_extra_mem_start, xen_extra_mem_size;
+
+#endif /* _XEN_PAGE_H */