perf, x86: Detect broken BIOSes that corrupt the PMU

Some BIOSes use PMU resources, which can cause various bugs:

 - Non-working or erratic PMU based statistics - the PMU can end up
   counting the wrong thing, resulting in misleading statistics

 - Profiling can stop working or it can profile the wrong thing

 - A non-working or erratic NMI watchdog that cannot be relied on

 - The kernel may disturb whatever thing the BIOS tries to use the
   PMU for - possibly causing hardware malfunction in extreme cases.

 - ... and other forms of potential misbehavior

Various forms of such misbehavior has been observed in practice - there are
BIOSes that just corrupt the PMU state, consequences be damned.

The PMU is a CPU resource that is handled by the kernel and the BIOS
stealing+corrupting it is not acceptable nor robust, so we detect it,
warn about it and further refuse to touch the PMU ourselves.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Jason Wessel <jason.wessel@windriver.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 817d2b1..ce27c54 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -375,15 +375,53 @@
 static bool check_hw_exists(void)
 {
 	u64 val, val_new = 0;
-	int ret = 0;
+	int i, reg, ret = 0;
 
+	/*
+	 * Check to see if the BIOS enabled any of the counters, if so
+	 * complain and bail.
+	 */
+	for (i = 0; i < x86_pmu.num_counters; i++) {
+		reg = x86_pmu.eventsel + i;
+		ret = rdmsrl_safe(reg, &val);
+		if (ret)
+			goto msr_fail;
+		if (val & ARCH_PERFMON_EVENTSEL_ENABLE)
+			goto bios_fail;
+	}
+
+	if (x86_pmu.num_counters_fixed) {
+		reg = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
+		ret = rdmsrl_safe(reg, &val);
+		if (ret)
+			goto msr_fail;
+		for (i = 0; i < x86_pmu.num_counters_fixed; i++) {
+			if (val & (0x03 << i*4))
+				goto bios_fail;
+		}
+	}
+
+	/*
+	 * Now write a value and read it back to see if it matches,
+	 * this is needed to detect certain hardware emulators (qemu/kvm)
+	 * that don't trap on the MSR access and always return 0s.
+	 */
 	val = 0xabcdUL;
-	ret |= checking_wrmsrl(x86_pmu.perfctr, val);
+	ret = checking_wrmsrl(x86_pmu.perfctr, val);
 	ret |= rdmsrl_safe(x86_pmu.perfctr, &val_new);
 	if (ret || val != val_new)
-		return false;
+		goto msr_fail;
 
 	return true;
+
+bios_fail:
+	printk(KERN_CONT "Broken BIOS detected, using software events only.\n");
+	printk(KERN_ERR FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n", reg, val);
+	return false;
+
+msr_fail:
+	printk(KERN_CONT "Broken PMU hardware detected, using software events only.\n");
+	return false;
 }
 
 static void reserve_ds_buffers(void);
@@ -1378,10 +1416,8 @@
 	pmu_check_apic();
 
 	/* sanity check that the hardware exists or is emulated */
-	if (!check_hw_exists()) {
-		pr_cont("Broken PMU hardware detected, software events only.\n");
+	if (!check_hw_exists())
 		return 0;
-	}
 
 	pr_cont("%s PMU driver.\n", x86_pmu.name);