Merge branch 'x86-platform-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 platform updates from Ingo Molnar:
"The main changes include various Hyper-V optimizations such as faster
hypercalls and faster/better TLB flushes - and there's also some
Intel-MID cleanups"
* 'x86-platform-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
tracing/hyper-v: Trace hyperv_mmu_flush_tlb_others()
x86/hyper-v: Support extended CPU ranges for TLB flush hypercalls
x86/platform/intel-mid: Make several arrays static, to make code smaller
MAINTAINERS: Add missed file for Hyper-V
x86/hyper-v: Use hypercall for remote TLB flush
hyper-v: Globalize vp_index
x86/hyper-v: Implement rep hypercalls
hyper-v: Use fast hypercall for HVCALL_SIGNAL_EVENT
x86/hyper-v: Introduce fast hypercall implementation
x86/hyper-v: Make hv_do_hypercall() inline
x86/hyper-v: Include hyperv/ only when CONFIG_HYPERV is set
x86/platform/intel-mid: Make 'bt_sfi_data' const
x86/platform/intel-mid: Make IRQ allocation a bit more flexible
x86/platform/intel-mid: Group timers callbacks together
diff --git a/MAINTAINERS b/MAINTAINERS
index 11dde28..fb63e53 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6312,6 +6312,7 @@
S: Maintained
F: Documentation/networking/netvsc.txt
F: arch/x86/include/asm/mshyperv.h
+F: arch/x86/include/asm/trace/hyperv.h
F: arch/x86/include/uapi/asm/hyperv.h
F: arch/x86/kernel/cpu/mshyperv.c
F: arch/x86/hyperv
@@ -6325,6 +6326,7 @@
F: drivers/video/fbdev/hyperv_fb.c
F: net/vmw_vsock/hyperv_transport.c
F: include/linux/hyperv.h
+F: include/uapi/linux/hyperv.h
F: tools/hv/
F: Documentation/ABI/stable/sysfs-bus-vmbus
diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild
index f65a804..0038a2d 100644
--- a/arch/x86/Kbuild
+++ b/arch/x86/Kbuild
@@ -8,7 +8,7 @@
obj-$(CONFIG_XEN) += xen/
# Hyper-V paravirtualization support
-obj-$(CONFIG_HYPERVISOR_GUEST) += hyperv/
+obj-$(subst m,y,$(CONFIG_HYPERV)) += hyperv/
obj-y += realmode/
obj-y += kernel/
diff --git a/arch/x86/hyperv/Makefile b/arch/x86/hyperv/Makefile
index 171ae09..367a820 100644
--- a/arch/x86/hyperv/Makefile
+++ b/arch/x86/hyperv/Makefile
@@ -1 +1 @@
-obj-y := hv_init.o
+obj-y := hv_init.o mmu.o
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 5b882cc..1a8eb55 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -26,6 +26,8 @@
#include <linux/mm.h>
#include <linux/clockchips.h>
#include <linux/hyperv.h>
+#include <linux/slab.h>
+#include <linux/cpuhotplug.h>
#ifdef CONFIG_HYPERV_TSCPAGE
@@ -75,10 +77,25 @@
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
-static void *hypercall_pg;
+void *hv_hypercall_pg;
+EXPORT_SYMBOL_GPL(hv_hypercall_pg);
struct clocksource *hyperv_cs;
EXPORT_SYMBOL_GPL(hyperv_cs);
+u32 *hv_vp_index;
+EXPORT_SYMBOL_GPL(hv_vp_index);
+
+static int hv_cpu_init(unsigned int cpu)
+{
+ u64 msr_vp_index;
+
+ hv_get_vp_index(msr_vp_index);
+
+ hv_vp_index[smp_processor_id()] = msr_vp_index;
+
+ return 0;
+}
+
/*
* This function is to be invoked early in the boot sequence after the
* hypervisor has been detected.
@@ -94,6 +111,16 @@
if (x86_hyper != &x86_hyper_ms_hyperv)
return;
+ /* Allocate percpu VP index */
+ hv_vp_index = kmalloc_array(num_possible_cpus(), sizeof(*hv_vp_index),
+ GFP_KERNEL);
+ if (!hv_vp_index)
+ return;
+
+ if (cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/hyperv_init:online",
+ hv_cpu_init, NULL) < 0)
+ goto free_vp_index;
+
/*
* Setup the hypercall page and enable hypercalls.
* 1. Register the guest ID
@@ -102,17 +129,19 @@
guest_id = generate_guest_id(0, LINUX_VERSION_CODE, 0);
wrmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id);
- hypercall_pg = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL_RX);
- if (hypercall_pg == NULL) {
+ hv_hypercall_pg = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL_RX);
+ if (hv_hypercall_pg == NULL) {
wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0);
- return;
+ goto free_vp_index;
}
rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
hypercall_msr.enable = 1;
- hypercall_msr.guest_physical_address = vmalloc_to_pfn(hypercall_pg);
+ hypercall_msr.guest_physical_address = vmalloc_to_pfn(hv_hypercall_pg);
wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
+ hyper_alloc_mmu();
+
/*
* Register Hyper-V specific clocksource.
*/
@@ -148,6 +177,12 @@
hyperv_cs = &hyperv_cs_msr;
if (ms_hyperv.features & HV_X64_MSR_TIME_REF_COUNT_AVAILABLE)
clocksource_register_hz(&hyperv_cs_msr, NSEC_PER_SEC/100);
+
+ return;
+
+free_vp_index:
+ kfree(hv_vp_index);
+ hv_vp_index = NULL;
}
/*
@@ -170,51 +205,6 @@
}
EXPORT_SYMBOL_GPL(hyperv_cleanup);
-/*
- * hv_do_hypercall- Invoke the specified hypercall
- */
-u64 hv_do_hypercall(u64 control, void *input, void *output)
-{
- u64 input_address = (input) ? virt_to_phys(input) : 0;
- u64 output_address = (output) ? virt_to_phys(output) : 0;
-#ifdef CONFIG_X86_64
- u64 hv_status = 0;
-
- if (!hypercall_pg)
- return (u64)ULLONG_MAX;
-
- __asm__ __volatile__("mov %0, %%r8" : : "r" (output_address) : "r8");
- __asm__ __volatile__("call *%3" : "=a" (hv_status) :
- "c" (control), "d" (input_address),
- "m" (hypercall_pg));
-
- return hv_status;
-
-#else
-
- u32 control_hi = control >> 32;
- u32 control_lo = control & 0xFFFFFFFF;
- u32 hv_status_hi = 1;
- u32 hv_status_lo = 1;
- u32 input_address_hi = input_address >> 32;
- u32 input_address_lo = input_address & 0xFFFFFFFF;
- u32 output_address_hi = output_address >> 32;
- u32 output_address_lo = output_address & 0xFFFFFFFF;
-
- if (!hypercall_pg)
- return (u64)ULLONG_MAX;
-
- __asm__ __volatile__ ("call *%8" : "=d"(hv_status_hi),
- "=a"(hv_status_lo) : "d" (control_hi),
- "a" (control_lo), "b" (input_address_hi),
- "c" (input_address_lo), "D"(output_address_hi),
- "S"(output_address_lo), "m" (hypercall_pg));
-
- return hv_status_lo | ((u64)hv_status_hi << 32);
-#endif /* !x86_64 */
-}
-EXPORT_SYMBOL_GPL(hv_do_hypercall);
-
void hyperv_report_panic(struct pt_regs *regs)
{
static bool panic_reported;
diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c
new file mode 100644
index 0000000..39e7f6e
--- /dev/null
+++ b/arch/x86/hyperv/mmu.c
@@ -0,0 +1,272 @@
+#define pr_fmt(fmt) "Hyper-V: " fmt
+
+#include <linux/hyperv.h>
+#include <linux/log2.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
+#include <asm/fpu/api.h>
+#include <asm/mshyperv.h>
+#include <asm/msr.h>
+#include <asm/tlbflush.h>
+
+#define CREATE_TRACE_POINTS
+#include <asm/trace/hyperv.h>
+
+/* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */
+struct hv_flush_pcpu {
+ u64 address_space;
+ u64 flags;
+ u64 processor_mask;
+ u64 gva_list[];
+};
+
+/* HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressListEx hypercalls */
+struct hv_flush_pcpu_ex {
+ u64 address_space;
+ u64 flags;
+ struct {
+ u64 format;
+ u64 valid_bank_mask;
+ u64 bank_contents[];
+ } hv_vp_set;
+ u64 gva_list[];
+};
+
+/* Each gva in gva_list encodes up to 4096 pages to flush */
+#define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE)
+
+static struct hv_flush_pcpu __percpu *pcpu_flush;
+
+static struct hv_flush_pcpu_ex __percpu *pcpu_flush_ex;
+
+/*
+ * Fills in gva_list starting from offset. Returns the number of items added.
+ */
+static inline int fill_gva_list(u64 gva_list[], int offset,
+ unsigned long start, unsigned long end)
+{
+ int gva_n = offset;
+ unsigned long cur = start, diff;
+
+ do {
+ diff = end > cur ? end - cur : 0;
+
+ gva_list[gva_n] = cur & PAGE_MASK;
+ /*
+ * Lower 12 bits encode the number of additional
+ * pages to flush (in addition to the 'cur' page).
+ */
+ if (diff >= HV_TLB_FLUSH_UNIT)
+ gva_list[gva_n] |= ~PAGE_MASK;
+ else if (diff)
+ gva_list[gva_n] |= (diff - 1) >> PAGE_SHIFT;
+
+ cur += HV_TLB_FLUSH_UNIT;
+ gva_n++;
+
+ } while (cur < end);
+
+ return gva_n - offset;
+}
+
+/* Return the number of banks in the resulting vp_set */
+static inline int cpumask_to_vp_set(struct hv_flush_pcpu_ex *flush,
+ const struct cpumask *cpus)
+{
+ int cpu, vcpu, vcpu_bank, vcpu_offset, nr_bank = 1;
+
+ /*
+ * Some banks may end up being empty but this is acceptable.
+ */
+ for_each_cpu(cpu, cpus) {
+ vcpu = hv_cpu_number_to_vp_number(cpu);
+ vcpu_bank = vcpu / 64;
+ vcpu_offset = vcpu % 64;
+
+ /* valid_bank_mask can represent up to 64 banks */
+ if (vcpu_bank >= 64)
+ return 0;
+
+ __set_bit(vcpu_offset, (unsigned long *)
+ &flush->hv_vp_set.bank_contents[vcpu_bank]);
+ if (vcpu_bank >= nr_bank)
+ nr_bank = vcpu_bank + 1;
+ }
+ flush->hv_vp_set.valid_bank_mask = GENMASK_ULL(nr_bank - 1, 0);
+
+ return nr_bank;
+}
+
+static void hyperv_flush_tlb_others(const struct cpumask *cpus,
+ const struct flush_tlb_info *info)
+{
+ int cpu, vcpu, gva_n, max_gvas;
+ struct hv_flush_pcpu *flush;
+ u64 status = U64_MAX;
+ unsigned long flags;
+
+ trace_hyperv_mmu_flush_tlb_others(cpus, info);
+
+ if (!pcpu_flush || !hv_hypercall_pg)
+ goto do_native;
+
+ if (cpumask_empty(cpus))
+ return;
+
+ local_irq_save(flags);
+
+ flush = this_cpu_ptr(pcpu_flush);
+
+ if (info->mm) {
+ flush->address_space = virt_to_phys(info->mm->pgd);
+ flush->flags = 0;
+ } else {
+ flush->address_space = 0;
+ flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+ }
+
+ flush->processor_mask = 0;
+ if (cpumask_equal(cpus, cpu_present_mask)) {
+ flush->flags |= HV_FLUSH_ALL_PROCESSORS;
+ } else {
+ for_each_cpu(cpu, cpus) {
+ vcpu = hv_cpu_number_to_vp_number(cpu);
+ if (vcpu >= 64)
+ goto do_native;
+
+ __set_bit(vcpu, (unsigned long *)
+ &flush->processor_mask);
+ }
+ }
+
+ /*
+ * We can flush not more than max_gvas with one hypercall. Flush the
+ * whole address space if we were asked to do more.
+ */
+ max_gvas = (PAGE_SIZE - sizeof(*flush)) / sizeof(flush->gva_list[0]);
+
+ if (info->end == TLB_FLUSH_ALL) {
+ flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
+ status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE,
+ flush, NULL);
+ } else if (info->end &&
+ ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) {
+ status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE,
+ flush, NULL);
+ } else {
+ gva_n = fill_gva_list(flush->gva_list, 0,
+ info->start, info->end);
+ status = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST,
+ gva_n, 0, flush, NULL);
+ }
+
+ local_irq_restore(flags);
+
+ if (!(status & HV_HYPERCALL_RESULT_MASK))
+ return;
+do_native:
+ native_flush_tlb_others(cpus, info);
+}
+
+static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
+ const struct flush_tlb_info *info)
+{
+ int nr_bank = 0, max_gvas, gva_n;
+ struct hv_flush_pcpu_ex *flush;
+ u64 status = U64_MAX;
+ unsigned long flags;
+
+ trace_hyperv_mmu_flush_tlb_others(cpus, info);
+
+ if (!pcpu_flush_ex || !hv_hypercall_pg)
+ goto do_native;
+
+ if (cpumask_empty(cpus))
+ return;
+
+ local_irq_save(flags);
+
+ flush = this_cpu_ptr(pcpu_flush_ex);
+
+ if (info->mm) {
+ flush->address_space = virt_to_phys(info->mm->pgd);
+ flush->flags = 0;
+ } else {
+ flush->address_space = 0;
+ flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+ }
+
+ flush->hv_vp_set.valid_bank_mask = 0;
+
+ if (!cpumask_equal(cpus, cpu_present_mask)) {
+ flush->hv_vp_set.format = HV_GENERIC_SET_SPARCE_4K;
+ nr_bank = cpumask_to_vp_set(flush, cpus);
+ }
+
+ if (!nr_bank) {
+ flush->hv_vp_set.format = HV_GENERIC_SET_ALL;
+ flush->flags |= HV_FLUSH_ALL_PROCESSORS;
+ }
+
+ /*
+ * We can flush not more than max_gvas with one hypercall. Flush the
+ * whole address space if we were asked to do more.
+ */
+ max_gvas =
+ (PAGE_SIZE - sizeof(*flush) - nr_bank *
+ sizeof(flush->hv_vp_set.bank_contents[0])) /
+ sizeof(flush->gva_list[0]);
+
+ if (info->end == TLB_FLUSH_ALL) {
+ flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
+ status = hv_do_rep_hypercall(
+ HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
+ 0, nr_bank + 2, flush, NULL);
+ } else if (info->end &&
+ ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) {
+ status = hv_do_rep_hypercall(
+ HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
+ 0, nr_bank + 2, flush, NULL);
+ } else {
+ gva_n = fill_gva_list(flush->gva_list, nr_bank,
+ info->start, info->end);
+ status = hv_do_rep_hypercall(
+ HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX,
+ gva_n, nr_bank + 2, flush, NULL);
+ }
+
+ local_irq_restore(flags);
+
+ if (!(status & HV_HYPERCALL_RESULT_MASK))
+ return;
+do_native:
+ native_flush_tlb_others(cpus, info);
+}
+
+void hyperv_setup_mmu_ops(void)
+{
+ if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED))
+ return;
+
+ setup_clear_cpu_cap(X86_FEATURE_PCID);
+
+ if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED)) {
+ pr_info("Using hypercall for remote TLB flush\n");
+ pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others;
+ } else {
+ pr_info("Using ext hypercall for remote TLB flush\n");
+ pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others_ex;
+ }
+}
+
+void hyper_alloc_mmu(void)
+{
+ if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED))
+ return;
+
+ if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED))
+ pcpu_flush = __alloc_percpu(PAGE_SIZE, PAGE_SIZE);
+ else
+ pcpu_flush_ex = __alloc_percpu(PAGE_SIZE, PAGE_SIZE);
+}
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 58b9291..63cc96f 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -3,6 +3,8 @@
#include <linux/types.h>
#include <linux/atomic.h>
+#include <linux/nmi.h>
+#include <asm/io.h>
#include <asm/hyperv.h>
/*
@@ -170,12 +172,155 @@
#if IS_ENABLED(CONFIG_HYPERV)
extern struct clocksource *hyperv_cs;
+extern void *hv_hypercall_pg;
+
+static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
+{
+ u64 input_address = input ? virt_to_phys(input) : 0;
+ u64 output_address = output ? virt_to_phys(output) : 0;
+ u64 hv_status;
+ register void *__sp asm(_ASM_SP);
+
+#ifdef CONFIG_X86_64
+ if (!hv_hypercall_pg)
+ return U64_MAX;
+
+ __asm__ __volatile__("mov %4, %%r8\n"
+ "call *%5"
+ : "=a" (hv_status), "+r" (__sp),
+ "+c" (control), "+d" (input_address)
+ : "r" (output_address), "m" (hv_hypercall_pg)
+ : "cc", "memory", "r8", "r9", "r10", "r11");
+#else
+ u32 input_address_hi = upper_32_bits(input_address);
+ u32 input_address_lo = lower_32_bits(input_address);
+ u32 output_address_hi = upper_32_bits(output_address);
+ u32 output_address_lo = lower_32_bits(output_address);
+
+ if (!hv_hypercall_pg)
+ return U64_MAX;
+
+ __asm__ __volatile__("call *%7"
+ : "=A" (hv_status),
+ "+c" (input_address_lo), "+r" (__sp)
+ : "A" (control),
+ "b" (input_address_hi),
+ "D"(output_address_hi), "S"(output_address_lo),
+ "m" (hv_hypercall_pg)
+ : "cc", "memory");
+#endif /* !x86_64 */
+ return hv_status;
+}
+
+#define HV_HYPERCALL_RESULT_MASK GENMASK_ULL(15, 0)
+#define HV_HYPERCALL_FAST_BIT BIT(16)
+#define HV_HYPERCALL_VARHEAD_OFFSET 17
+#define HV_HYPERCALL_REP_COMP_OFFSET 32
+#define HV_HYPERCALL_REP_COMP_MASK GENMASK_ULL(43, 32)
+#define HV_HYPERCALL_REP_START_OFFSET 48
+#define HV_HYPERCALL_REP_START_MASK GENMASK_ULL(59, 48)
+
+/* Fast hypercall with 8 bytes of input and no output */
+static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
+{
+ u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
+ register void *__sp asm(_ASM_SP);
+
+#ifdef CONFIG_X86_64
+ {
+ __asm__ __volatile__("call *%4"
+ : "=a" (hv_status), "+r" (__sp),
+ "+c" (control), "+d" (input1)
+ : "m" (hv_hypercall_pg)
+ : "cc", "r8", "r9", "r10", "r11");
+ }
+#else
+ {
+ u32 input1_hi = upper_32_bits(input1);
+ u32 input1_lo = lower_32_bits(input1);
+
+ __asm__ __volatile__ ("call *%5"
+ : "=A"(hv_status),
+ "+c"(input1_lo),
+ "+r"(__sp)
+ : "A" (control),
+ "b" (input1_hi),
+ "m" (hv_hypercall_pg)
+ : "cc", "edi", "esi");
+ }
+#endif
+ return hv_status;
+}
+
+/*
+ * Rep hypercalls. Callers of this functions are supposed to ensure that
+ * rep_count and varhead_size comply with Hyper-V hypercall definition.
+ */
+static inline u64 hv_do_rep_hypercall(u16 code, u16 rep_count, u16 varhead_size,
+ void *input, void *output)
+{
+ u64 control = code;
+ u64 status;
+ u16 rep_comp;
+
+ control |= (u64)varhead_size << HV_HYPERCALL_VARHEAD_OFFSET;
+ control |= (u64)rep_count << HV_HYPERCALL_REP_COMP_OFFSET;
+
+ do {
+ status = hv_do_hypercall(control, input, output);
+ if ((status & HV_HYPERCALL_RESULT_MASK) != HV_STATUS_SUCCESS)
+ return status;
+
+ /* Bits 32-43 of status have 'Reps completed' data. */
+ rep_comp = (status & HV_HYPERCALL_REP_COMP_MASK) >>
+ HV_HYPERCALL_REP_COMP_OFFSET;
+
+ control &= ~HV_HYPERCALL_REP_START_MASK;
+ control |= (u64)rep_comp << HV_HYPERCALL_REP_START_OFFSET;
+
+ touch_nmi_watchdog();
+ } while (rep_comp < rep_count);
+
+ return status;
+}
+
+/*
+ * Hypervisor's notion of virtual processor ID is different from
+ * Linux' notion of CPU ID. This information can only be retrieved
+ * in the context of the calling CPU. Setup a map for easy access
+ * to this information.
+ */
+extern u32 *hv_vp_index;
+
+/**
+ * hv_cpu_number_to_vp_number() - Map CPU to VP.
+ * @cpu_number: CPU number in Linux terms
+ *
+ * This function returns the mapping between the Linux processor
+ * number and the hypervisor's virtual processor number, useful
+ * in making hypercalls and such that talk about specific
+ * processors.
+ *
+ * Return: Virtual processor number in Hyper-V terms
+ */
+static inline int hv_cpu_number_to_vp_number(int cpu_number)
+{
+ return hv_vp_index[cpu_number];
+}
void hyperv_init(void);
+void hyperv_setup_mmu_ops(void);
+void hyper_alloc_mmu(void);
void hyperv_report_panic(struct pt_regs *regs);
bool hv_is_hypercall_page_setup(void);
void hyperv_cleanup(void);
-#endif
+#else /* CONFIG_HYPERV */
+static inline void hyperv_init(void) {}
+static inline bool hv_is_hypercall_page_setup(void) { return false; }
+static inline void hyperv_cleanup(void) {}
+static inline void hyperv_setup_mmu_ops(void) {}
+#endif /* CONFIG_HYPERV */
+
#ifdef CONFIG_HYPERV_TSCPAGE
struct ms_hyperv_tsc_page *hv_get_tsc_page(void);
static inline u64 hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg)
diff --git a/arch/x86/include/asm/trace/hyperv.h b/arch/x86/include/asm/trace/hyperv.h
new file mode 100644
index 0000000..4253bca
--- /dev/null
+++ b/arch/x86/include/asm/trace/hyperv.h
@@ -0,0 +1,40 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM hyperv
+
+#if !defined(_TRACE_HYPERV_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_HYPERV_H
+
+#include <linux/tracepoint.h>
+
+#if IS_ENABLED(CONFIG_HYPERV)
+
+TRACE_EVENT(hyperv_mmu_flush_tlb_others,
+ TP_PROTO(const struct cpumask *cpus,
+ const struct flush_tlb_info *info),
+ TP_ARGS(cpus, info),
+ TP_STRUCT__entry(
+ __field(unsigned int, ncpus)
+ __field(struct mm_struct *, mm)
+ __field(unsigned long, addr)
+ __field(unsigned long, end)
+ ),
+ TP_fast_assign(__entry->ncpus = cpumask_weight(cpus);
+ __entry->mm = info->mm;
+ __entry->addr = info->start;
+ __entry->end = info->end;
+ ),
+ TP_printk("ncpus %d mm %p addr %lx, end %lx",
+ __entry->ncpus, __entry->mm,
+ __entry->addr, __entry->end)
+ );
+
+#endif /* CONFIG_HYPERV */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH asm/trace/
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE hyperv
+#endif /* _TRACE_HYPERV_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
index 127ddad..7032f4d 100644
--- a/arch/x86/include/uapi/asm/hyperv.h
+++ b/arch/x86/include/uapi/asm/hyperv.h
@@ -149,6 +149,9 @@
*/
#define HV_X64_DEPRECATING_AEOI_RECOMMENDED (1 << 9)
+/* Recommend using the newer ExProcessorMasks interface */
+#define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED (1 << 11)
+
/*
* HV_VP_SET available
*/
@@ -242,7 +245,11 @@
(~((1ull << HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT) - 1))
/* Declare the various hypercall operations. */
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE 0x0002
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST 0x0003
#define HVCALL_NOTIFY_LONG_SPIN_WAIT 0x0008
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX 0x0013
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX 0x0014
#define HVCALL_POST_MESSAGE 0x005c
#define HVCALL_SIGNAL_EVENT 0x005d
@@ -259,6 +266,16 @@
#define HV_PROCESSOR_POWER_STATE_C2 2
#define HV_PROCESSOR_POWER_STATE_C3 3
+#define HV_FLUSH_ALL_PROCESSORS BIT(0)
+#define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES BIT(1)
+#define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY BIT(2)
+#define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT BIT(3)
+
+enum HV_GENERIC_SET_FORMAT {
+ HV_GENERIC_SET_SPARCE_4K,
+ HV_GENERIC_SET_ALL,
+};
+
/* hypercall status code */
#define HV_STATUS_SUCCESS 0
#define HV_STATUS_INVALID_HYPERCALL_CODE 2
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index fbafd24..3b3f713 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -250,6 +250,7 @@
* Setup the hook to get control post apic initialization.
*/
x86_platform.apic_post_init = hyperv_init;
+ hyperv_setup_mmu_ops();
#endif
}
diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c
index 5a18aed..b901ece 100644
--- a/arch/x86/pci/intel_mid_pci.c
+++ b/arch/x86/pci/intel_mid_pci.c
@@ -215,16 +215,23 @@
struct irq_alloc_info info;
int polarity;
int ret;
+ u8 gsi;
if (dev->irq_managed && dev->irq > 0)
return 0;
+ ret = pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &gsi);
+ if (ret < 0) {
+ dev_warn(&dev->dev, "Failed to read interrupt line: %d\n", ret);
+ return ret;
+ }
+
switch (intel_mid_identify_cpu()) {
case INTEL_MID_CPU_CHIP_TANGIER:
polarity = IOAPIC_POL_HIGH;
/* Special treatment for IRQ0 */
- if (dev->irq == 0) {
+ if (gsi == 0) {
/*
* Skip HS UART common registers device since it has
* IRQ0 assigned and not used by the kernel.
@@ -253,10 +260,11 @@
* MRST only have IOAPIC, the PCI irq lines are 1:1 mapped to
* IOAPIC RTE entries, so we just enable RTE for the device.
*/
- ret = mp_map_gsi_to_irq(dev->irq, IOAPIC_MAP_ALLOC, &info);
+ ret = mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC, &info);
if (ret < 0)
return ret;
+ dev->irq = ret;
dev->irq_managed = 1;
return 0;
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_bt.c b/arch/x86/platform/intel-mid/device_libs/platform_bt.c
index 5a0483e..dc036e5 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_bt.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_bt.c
@@ -60,7 +60,7 @@
return 0;
}
-static struct bt_sfi_data tng_bt_sfi_data __initdata = {
+static const struct bt_sfi_data tng_bt_sfi_data __initdata = {
.setup = tng_bt_sfi_setup,
};
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c
index 9e304e2..4f5fa65 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c
@@ -30,13 +30,13 @@
{
struct irq_alloc_info info;
struct intel_mid_wdt_pdata *pdata = pdev->dev.platform_data;
- int gsi, irq;
+ int gsi = TANGIER_EXT_TIMER0_MSI;
+ int irq;
if (!pdata)
return -EINVAL;
/* IOAPIC builds identity mapping between GSI and IRQ on MID */
- gsi = pdata->irq;
ioapic_set_alloc_attr(&info, cpu_to_node(0), 1, 0);
irq = mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC, &info);
if (irq < 0) {
@@ -44,11 +44,11 @@
return irq;
}
+ pdata->irq = irq;
return 0;
}
static struct intel_mid_wdt_pdata tangier_pdata = {
- .irq = TANGIER_EXT_TIMER0_MSI,
.probe = tangier_probe,
};
diff --git a/arch/x86/platform/intel-mid/intel-mid.c b/arch/x86/platform/intel-mid/intel-mid.c
index 12a2725..86676ce 100644
--- a/arch/x86/platform/intel-mid/intel-mid.c
+++ b/arch/x86/platform/intel-mid/intel-mid.c
@@ -183,6 +183,7 @@
x86_init.timers.timer_init = intel_mid_time_init;
x86_init.timers.setup_percpu_clockev = x86_init_noop;
+ x86_init.timers.wallclock_init = intel_mid_rtc_init;
x86_init.irqs.pre_vector_init = x86_init_noop;
@@ -191,7 +192,6 @@
x86_cpuinit.setup_percpu_clockev = apbt_setup_secondary_clock;
x86_platform.calibrate_tsc = intel_mid_calibrate_tsc;
- x86_init.timers.wallclock_init = intel_mid_rtc_init;
x86_platform.get_nmi_reason = intel_mid_get_nmi_reason;
x86_init.pci.init = intel_mid_pci_init;
diff --git a/arch/x86/platform/intel-mid/pwr.c b/arch/x86/platform/intel-mid/pwr.c
index ef03852..49ec5b9 100644
--- a/arch/x86/platform/intel-mid/pwr.c
+++ b/arch/x86/platform/intel-mid/pwr.c
@@ -444,7 +444,7 @@
static int pnw_set_initial_state(struct mid_pwr *pwr)
{
/* On Penwell SRAM must stay powered on */
- const u32 states[] = {
+ static const u32 states[] = {
0xf00fffff, /* PM_SSC(0) */
0xffffffff, /* PM_SSC(1) */
0xffffffff, /* PM_SSC(2) */
@@ -455,7 +455,7 @@
static int tng_set_initial_state(struct mid_pwr *pwr)
{
- const u32 states[] = {
+ static const u32 states[] = {
0xffffffff, /* PM_SSC(0) */
0xffffffff, /* PM_SSC(1) */
0xffffffff, /* PM_SSC(2) */
diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig
index c29cd53..50b89ea 100644
--- a/drivers/hv/Kconfig
+++ b/drivers/hv/Kconfig
@@ -3,6 +3,7 @@
config HYPERV
tristate "Microsoft Hyper-V client drivers"
depends on X86 && ACPI && PCI && X86_LOCAL_APIC && HYPERVISOR_GUEST
+ select PARAVIRT
help
Select this option to run Linux as a Hyper-V client operating
system.
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 968af17..060df71 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -603,7 +603,7 @@
*/
channel->numa_node = 0;
channel->target_cpu = 0;
- channel->target_vp = hv_context.vp_index[0];
+ channel->target_vp = hv_cpu_number_to_vp_number(0);
return;
}
@@ -687,7 +687,7 @@
}
channel->target_cpu = cur_cpu;
- channel->target_vp = hv_context.vp_index[cur_cpu];
+ channel->target_vp = hv_cpu_number_to_vp_number(cur_cpu);
}
static void vmbus_wait_for_unload(void)
@@ -809,21 +809,12 @@
/*
* Setup state for signalling the host.
*/
- newchannel->sig_event = (struct hv_input_signal_event *)
- (ALIGN((unsigned long)
- &newchannel->sig_buf,
- HV_HYPERCALL_PARAM_ALIGN));
-
- newchannel->sig_event->connectionid.asu32 = 0;
- newchannel->sig_event->connectionid.u.id = VMBUS_EVENT_CONNECTION_ID;
- newchannel->sig_event->flag_number = 0;
- newchannel->sig_event->rsvdz = 0;
+ newchannel->sig_event = VMBUS_EVENT_CONNECTION_ID;
if (vmbus_proto_version != VERSION_WS2008) {
newchannel->is_dedicated_interrupt =
(offer->is_dedicated_interrupt != 0);
- newchannel->sig_event->connectionid.u.id =
- offer->connection_id;
+ newchannel->sig_event = offer->connection_id;
}
memcpy(&newchannel->offermsg, offer,
@@ -1251,8 +1242,7 @@
return outgoing_channel;
}
- cur_cpu = hv_context.vp_index[get_cpu()];
- put_cpu();
+ cur_cpu = hv_cpu_number_to_vp_number(smp_processor_id());
list_for_each_safe(cur, tmp, &primary->sc_list) {
cur_channel = list_entry(cur, struct vmbus_channel, sc_list);
if (cur_channel->state != CHANNEL_OPENED_STATE)
diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index 59c11ff..f41901f 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -32,6 +32,8 @@
#include <linux/hyperv.h>
#include <linux/export.h>
#include <asm/hyperv.h>
+#include <asm/mshyperv.h>
+
#include "hyperv_vmbus.h"
@@ -94,7 +96,8 @@
* the CPU attempting to connect may not be CPU 0.
*/
if (version >= VERSION_WIN8_1) {
- msg->target_vcpu = hv_context.vp_index[smp_processor_id()];
+ msg->target_vcpu =
+ hv_cpu_number_to_vp_number(smp_processor_id());
vmbus_connection.connect_cpu = smp_processor_id();
} else {
msg->target_vcpu = 0;
@@ -406,6 +409,6 @@
if (!channel->is_dedicated_interrupt)
vmbus_send_interrupt(child_relid);
- hv_do_hypercall(HVCALL_SIGNAL_EVENT, channel->sig_event, NULL);
+ hv_do_fast_hypercall8(HVCALL_SIGNAL_EVENT, channel->sig_event);
}
EXPORT_SYMBOL_GPL(vmbus_set_event);
diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
index 2ea1220..8267439 100644
--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -234,7 +234,6 @@
union hv_synic_siefp siefp;
union hv_synic_sint shared_sint;
union hv_synic_scontrol sctrl;
- u64 vp_index;
/* Setup the Synic's message page */
hv_get_simp(simp.as_uint64);
@@ -276,14 +275,6 @@
hv_context.synic_initialized = true;
/*
- * Setup the mapping between Hyper-V's notion
- * of cpuid and Linux' notion of cpuid.
- * This array will be indexed using Linux cpuid.
- */
- hv_get_vp_index(vp_index);
- hv_context.vp_index[cpu] = (u32)vp_index;
-
- /*
* Register the per-cpu clockevent source.
*/
if (ms_hyperv.features & HV_X64_MSR_SYNTIMER_AVAILABLE)
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index 1b6a5e0..49569f8 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -229,17 +229,6 @@
struct hv_per_cpu_context __percpu *cpu_context;
/*
- * Hypervisor's notion of virtual processor ID is different from
- * Linux' notion of CPU ID. This information can only be retrieved
- * in the context of the calling CPU. Setup a map for easy access
- * to this information:
- *
- * vp_index[a] is the Hyper-V's processor ID corresponding to
- * Linux cpuid 'a'.
- */
- u32 vp_index[NR_CPUS];
-
- /*
* To manage allocations in a NUMA node.
* Array indexed by numa node ID.
*/
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 43160a2..a9d49f6 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -1454,23 +1454,6 @@
}
EXPORT_SYMBOL_GPL(vmbus_free_mmio);
-/**
- * vmbus_cpu_number_to_vp_number() - Map CPU to VP.
- * @cpu_number: CPU number in Linux terms
- *
- * This function returns the mapping between the Linux processor
- * number and the hypervisor's virtual processor number, useful
- * in making hypercalls and such that talk about specific
- * processors.
- *
- * Return: Virtual processor number in Hyper-V terms
- */
-int vmbus_cpu_number_to_vp_number(int cpu_number)
-{
- return hv_context.vp_index[cpu_number];
-}
-EXPORT_SYMBOL_GPL(vmbus_cpu_number_to_vp_number);
-
static int vmbus_acpi_add(struct acpi_device *device)
{
acpi_status result;
diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c
index 415dcc6..aba0414 100644
--- a/drivers/pci/host/pci-hyperv.c
+++ b/drivers/pci/host/pci-hyperv.c
@@ -562,52 +562,6 @@
static void get_hvpcibus(struct hv_pcibus_device *hv_pcibus);
static void put_hvpcibus(struct hv_pcibus_device *hv_pcibus);
-
-/*
- * Temporary CPU to vCPU mapping to address transitioning
- * vmbus_cpu_number_to_vp_number() being migrated to
- * hv_cpu_number_to_vp_number() in a separate patch. Once that patch
- * has been picked up in the main line, remove this code here and use
- * the official code.
- */
-static struct hv_tmpcpumap
-{
- bool initialized;
- u32 vp_index[NR_CPUS];
-} hv_tmpcpumap;
-
-static void hv_tmpcpumap_init_cpu(void *_unused)
-{
- int cpu = smp_processor_id();
- u64 vp_index;
-
- hv_get_vp_index(vp_index);
-
- hv_tmpcpumap.vp_index[cpu] = vp_index;
-}
-
-static void hv_tmpcpumap_init(void)
-{
- if (hv_tmpcpumap.initialized)
- return;
-
- memset(hv_tmpcpumap.vp_index, -1, sizeof(hv_tmpcpumap.vp_index));
- on_each_cpu(hv_tmpcpumap_init_cpu, NULL, true);
- hv_tmpcpumap.initialized = true;
-}
-
-/**
- * hv_tmp_cpu_nr_to_vp_nr() - Convert Linux CPU nr to Hyper-V vCPU nr
- *
- * Remove once vmbus_cpu_number_to_vp_number() has been converted to
- * hv_cpu_number_to_vp_number() and replace callers appropriately.
- */
-static u32 hv_tmp_cpu_nr_to_vp_nr(int cpu)
-{
- return hv_tmpcpumap.vp_index[cpu];
-}
-
-
/**
* devfn_to_wslot() - Convert from Linux PCI slot to Windows
* @devfn: The Linux representation of PCI slot
@@ -971,7 +925,7 @@
var_size = 1 + HV_VP_SET_BANK_COUNT_MAX;
for_each_cpu_and(cpu, dest, cpu_online_mask) {
- cpu_vmbus = hv_tmp_cpu_nr_to_vp_nr(cpu);
+ cpu_vmbus = hv_cpu_number_to_vp_number(cpu);
if (cpu_vmbus >= HV_VP_SET_BANK_COUNT_MAX * 64) {
dev_err(&hbus->hdev->device,
@@ -986,7 +940,7 @@
} else {
for_each_cpu_and(cpu, dest, cpu_online_mask) {
params->int_target.vp_mask |=
- (1ULL << hv_tmp_cpu_nr_to_vp_nr(cpu));
+ (1ULL << hv_cpu_number_to_vp_number(cpu));
}
}
@@ -1063,7 +1017,7 @@
*/
cpu = cpumask_first_and(affinity, cpu_online_mask);
int_pkt->int_desc.processor_array[0] =
- hv_tmp_cpu_nr_to_vp_nr(cpu);
+ hv_cpu_number_to_vp_number(cpu);
int_pkt->int_desc.processor_count = 1;
return sizeof(*int_pkt);
@@ -2490,8 +2444,6 @@
return -ENOMEM;
hbus->state = hv_pcibus_init;
- hv_tmpcpumap_init();
-
/*
* The PCI bus "domain" is what is called "segment" in ACPI and
* other specs. Pull it from the instance ID, to get something
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index e4bbf7d..c458d7b 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -661,18 +661,6 @@
} u;
};
-/* Definition of the hv_signal_event hypercall input structure. */
-struct hv_input_signal_event {
- union hv_connection_id connectionid;
- u16 flag_number;
- u16 rsvdz;
-};
-
-struct hv_input_signal_event_buffer {
- u64 align8;
- struct hv_input_signal_event event;
-};
-
enum hv_numa_policy {
HV_BALANCED = 0,
HV_LOCALIZED,
@@ -754,8 +742,7 @@
} callback_mode;
bool is_dedicated_interrupt;
- struct hv_input_signal_event_buffer sig_buf;
- struct hv_input_signal_event *sig_event;
+ u64 sig_event;
/*
* Starting with win8, this field will be used to specify
@@ -1151,8 +1138,6 @@
resource_size_t size, resource_size_t align,
bool fb_overlap_ok);
void vmbus_free_mmio(resource_size_t start, resource_size_t size);
-int vmbus_cpu_number_to_vp_number(int cpu_number);
-u64 hv_do_hypercall(u64 control, void *input, void *output);
/*
* GUID definitions of various offer types - services offered to the guest.