Merge commit 'v2.6.27-rc5' into tip/oprofile

Conflicts:
	arch/x86/oprofile/nmi_int.c
diff --git a/arch/Kconfig b/arch/Kconfig
index 364c6da..0267bab 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -13,6 +13,20 @@
 
 	  If unsure, say N.
 
+config OPROFILE_IBS
+	bool "OProfile AMD IBS support (EXPERIMENTAL)"
+	default n
+	depends on OPROFILE && SMP && X86
+	help
+          Instruction-Based Sampling (IBS) is a new profiling
+          technique that provides rich, precise program performance
+          information. IBS is introduced by AMD Family10h processors
+          (AMD Opteron Quad-Core processor “Barcelona”) to overcome
+          the limitations of conventional performance counter
+          sampling.
+
+	  If unsure, say N.
+
 config HAVE_OPROFILE
 	def_bool n
 
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index f88bd0d..0ff576d 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -646,6 +646,9 @@
  *
  * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and
  * MCE interrupts are supported. Thus MCE offset must be set to 0.
+ *
+ * If mask=1, the LVT entry does not generate interrupts while mask=0
+ * enables the vector. See also the BKDGs.
  */
 
 #define APIC_EILVT_LVTOFF_MCE 0
@@ -669,6 +672,7 @@
 	setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask);
 	return APIC_EILVT_LVTOFF_IBS;
 }
+EXPORT_SYMBOL_GPL(setup_APIC_eilvt_ibs);
 
 /*
  * Local APIC start and shutdown
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index 446c062..57744f4 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -204,6 +204,9 @@
  *
  * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and
  * MCE interrupts are supported. Thus MCE offset must be set to 0.
+ *
+ * If mask=1, the LVT entry does not generate interrupts while mask=0
+ * enables the vector. See also the BKDGs.
  */
 
 #define APIC_EILVT_LVTOFF_MCE 0
@@ -228,6 +231,7 @@
 	setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask);
 	return APIC_EILVT_LVTOFF_IBS;
 }
+EXPORT_SYMBOL_GPL(setup_APIC_eilvt_ibs);
 
 /*
  * Program the next event, relative to now
diff --git a/arch/x86/oprofile/Makefile b/arch/x86/oprofile/Makefile
index 30f3eb3..446902b 100644
--- a/arch/x86/oprofile/Makefile
+++ b/arch/x86/oprofile/Makefile
@@ -7,6 +7,6 @@
 		timer_int.o )
 
 oprofile-y				:= $(DRIVER_OBJS) init.o backtrace.o
-oprofile-$(CONFIG_X86_LOCAL_APIC) 	+= nmi_int.o op_model_athlon.o \
+oprofile-$(CONFIG_X86_LOCAL_APIC) 	+= nmi_int.o op_model_amd.o \
 					   op_model_ppro.o op_model_p4.o
 oprofile-$(CONFIG_X86_IO_APIC)		+= nmi_timer_int.o
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 0227694..d988574 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -1,10 +1,11 @@
 /**
  * @file nmi_int.c
  *
- * @remark Copyright 2002 OProfile authors
+ * @remark Copyright 2002-2008 OProfile authors
  * @remark Read the file COPYING
  *
  * @author John Levon <levon@movementarian.org>
+ * @author Robert Richter <robert.richter@amd.com>
  */
 
 #include <linux/init.h>
@@ -437,6 +438,7 @@
 	__u8 vendor = boot_cpu_data.x86_vendor;
 	__u8 family = boot_cpu_data.x86;
 	char *cpu_type;
+	int ret = 0;
 
 	if (!cpu_has_apic)
 		return -ENODEV;
@@ -449,19 +451,23 @@
 		default:
 			return -ENODEV;
 		case 6:
-			model = &op_athlon_spec;
+			model = &op_amd_spec;
 			cpu_type = "i386/athlon";
 			break;
 		case 0xf:
-			model = &op_athlon_spec;
+			model = &op_amd_spec;
 			/* Actually it could be i386/hammer too, but give
 			 user space an consistent name. */
 			cpu_type = "x86-64/hammer";
 			break;
 		case 0x10:
-			model = &op_athlon_spec;
+			model = &op_amd_spec;
 			cpu_type = "x86-64/family10";
 			break;
+		case 0x11:
+			model = &op_amd_spec;
+			cpu_type = "x86-64/family11h";
+			break;
 		}
 		break;
 
@@ -488,17 +494,24 @@
 		return -ENODEV;
 	}
 
-	init_sysfs();
 #ifdef CONFIG_SMP
 	register_cpu_notifier(&oprofile_cpu_nb);
 #endif
-	using_nmi = 1;
+	/* default values, can be overwritten by model */
 	ops->create_files = nmi_create_files;
 	ops->setup = nmi_setup;
 	ops->shutdown = nmi_shutdown;
 	ops->start = nmi_start;
 	ops->stop = nmi_stop;
 	ops->cpu_type = cpu_type;
+
+	if (model->init)
+		ret = model->init(ops);
+	if (ret)
+		return ret;
+
+	init_sysfs();
+	using_nmi = 1;
 	printk(KERN_INFO "oprofile: using NMI interrupt.\n");
 	return 0;
 }
@@ -511,4 +524,6 @@
 		unregister_cpu_notifier(&oprofile_cpu_nb);
 #endif
 	}
+	if (model->exit)
+		model->exit();
 }
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
new file mode 100644
index 0000000..d9faf60
--- /dev/null
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -0,0 +1,543 @@
+/*
+ * @file op_model_amd.c
+ * athlon / K7 / K8 / Family 10h model-specific MSR operations
+ *
+ * @remark Copyright 2002-2008 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon
+ * @author Philippe Elie
+ * @author Graydon Hoare
+ * @author Robert Richter <robert.richter@amd.com>
+ * @author Barry Kasindorf
+*/
+
+#include <linux/oprofile.h>
+#include <linux/device.h>
+#include <linux/pci.h>
+
+#include <asm/ptrace.h>
+#include <asm/msr.h>
+#include <asm/nmi.h>
+
+#include "op_x86_model.h"
+#include "op_counter.h"
+
+#define NUM_COUNTERS 4
+#define NUM_CONTROLS 4
+
+#define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0)
+#define CTR_READ(l, h, msrs, c) do {rdmsr(msrs->counters[(c)].addr, (l), (h)); } while (0)
+#define CTR_WRITE(l, msrs, c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned int)(l), -1); } while (0)
+#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
+
+#define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0)
+#define CTRL_READ(l, h, msrs, c) do {rdmsr(msrs->controls[(c)].addr, (l), (h)); } while (0)
+#define CTRL_WRITE(l, h, msrs, c) do {wrmsr(msrs->controls[(c)].addr, (l), (h)); } while (0)
+#define CTRL_SET_ACTIVE(n) (n |= (1<<22))
+#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22))
+#define CTRL_CLEAR_LO(x) (x &= (1<<21))
+#define CTRL_CLEAR_HI(x) (x &= 0xfffffcf0)
+#define CTRL_SET_ENABLE(val) (val |= 1<<20)
+#define CTRL_SET_USR(val, u) (val |= ((u & 1) << 16))
+#define CTRL_SET_KERN(val, k) (val |= ((k & 1) << 17))
+#define CTRL_SET_UM(val, m) (val |= (m << 8))
+#define CTRL_SET_EVENT_LOW(val, e) (val |= (e & 0xff))
+#define CTRL_SET_EVENT_HIGH(val, e) (val |= ((e >> 8) & 0xf))
+#define CTRL_SET_HOST_ONLY(val, h) (val |= ((h & 1) << 9))
+#define CTRL_SET_GUEST_ONLY(val, h) (val |= ((h & 1) << 8))
+
+static unsigned long reset_value[NUM_COUNTERS];
+
+#ifdef CONFIG_OPROFILE_IBS
+
+/* IbsFetchCtl bits/masks */
+#define IBS_FETCH_HIGH_VALID_BIT	(1UL << 17)	/* bit 49 */
+#define IBS_FETCH_HIGH_ENABLE		(1UL << 16)	/* bit 48 */
+#define IBS_FETCH_LOW_MAX_CNT_MASK	0x0000FFFFUL	/* MaxCnt mask */
+
+/*IbsOpCtl bits */
+#define IBS_OP_LOW_VALID_BIT		(1ULL<<18)	/* bit 18 */
+#define IBS_OP_LOW_ENABLE		(1ULL<<17)	/* bit 17 */
+
+/* Codes used in cpu_buffer.c */
+/* This produces duplicate code, need to be fixed */
+#define IBS_FETCH_BEGIN 3
+#define IBS_OP_BEGIN    4
+
+/* The function interface needs to be fixed, something like add
+   data. Should then be added to linux/oprofile.h. */
+extern void oprofile_add_ibs_sample(struct pt_regs *const regs,
+				    unsigned int * const ibs_sample, u8 code);
+
+struct ibs_fetch_sample {
+	/* MSRC001_1031 IBS Fetch Linear Address Register */
+	unsigned int ibs_fetch_lin_addr_low;
+	unsigned int ibs_fetch_lin_addr_high;
+	/* MSRC001_1030 IBS Fetch Control Register */
+	unsigned int ibs_fetch_ctl_low;
+	unsigned int ibs_fetch_ctl_high;
+	/* MSRC001_1032 IBS Fetch Physical Address Register */
+	unsigned int ibs_fetch_phys_addr_low;
+	unsigned int ibs_fetch_phys_addr_high;
+};
+
+struct ibs_op_sample {
+	/* MSRC001_1034 IBS Op Logical Address Register (IbsRIP) */
+	unsigned int ibs_op_rip_low;
+	unsigned int ibs_op_rip_high;
+	/* MSRC001_1035 IBS Op Data Register */
+	unsigned int ibs_op_data1_low;
+	unsigned int ibs_op_data1_high;
+	/* MSRC001_1036 IBS Op Data 2 Register */
+	unsigned int ibs_op_data2_low;
+	unsigned int ibs_op_data2_high;
+	/* MSRC001_1037 IBS Op Data 3 Register */
+	unsigned int ibs_op_data3_low;
+	unsigned int ibs_op_data3_high;
+	/* MSRC001_1038 IBS DC Linear Address Register (IbsDcLinAd) */
+	unsigned int ibs_dc_linear_low;
+	unsigned int ibs_dc_linear_high;
+	/* MSRC001_1039 IBS DC Physical Address Register (IbsDcPhysAd) */
+	unsigned int ibs_dc_phys_low;
+	unsigned int ibs_dc_phys_high;
+};
+
+/*
+ * unitialize the APIC for the IBS interrupts if needed on AMD Family10h+
+*/
+static void clear_ibs_nmi(void);
+
+static int ibs_allowed;	/* AMD Family10h and later */
+
+struct op_ibs_config {
+	unsigned long op_enabled;
+	unsigned long fetch_enabled;
+	unsigned long max_cnt_fetch;
+	unsigned long max_cnt_op;
+	unsigned long rand_en;
+	unsigned long dispatched_ops;
+};
+
+static struct op_ibs_config ibs_config;
+
+#endif
+
+/* functions for op_amd_spec */
+
+static void op_amd_fill_in_addresses(struct op_msrs * const msrs)
+{
+	int i;
+
+	for (i = 0; i < NUM_COUNTERS; i++) {
+		if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i))
+			msrs->counters[i].addr = MSR_K7_PERFCTR0 + i;
+		else
+			msrs->counters[i].addr = 0;
+	}
+
+	for (i = 0; i < NUM_CONTROLS; i++) {
+		if (reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i))
+			msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i;
+		else
+			msrs->controls[i].addr = 0;
+	}
+}
+
+
+static void op_amd_setup_ctrs(struct op_msrs const * const msrs)
+{
+	unsigned int low, high;
+	int i;
+
+	/* clear all counters */
+	for (i = 0 ; i < NUM_CONTROLS; ++i) {
+		if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
+			continue;
+		CTRL_READ(low, high, msrs, i);
+		CTRL_CLEAR_LO(low);
+		CTRL_CLEAR_HI(high);
+		CTRL_WRITE(low, high, msrs, i);
+	}
+
+	/* avoid a false detection of ctr overflows in NMI handler */
+	for (i = 0; i < NUM_COUNTERS; ++i) {
+		if (unlikely(!CTR_IS_RESERVED(msrs, i)))
+			continue;
+		CTR_WRITE(1, msrs, i);
+	}
+
+	/* enable active counters */
+	for (i = 0; i < NUM_COUNTERS; ++i) {
+		if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) {
+			reset_value[i] = counter_config[i].count;
+
+			CTR_WRITE(counter_config[i].count, msrs, i);
+
+			CTRL_READ(low, high, msrs, i);
+			CTRL_CLEAR_LO(low);
+			CTRL_CLEAR_HI(high);
+			CTRL_SET_ENABLE(low);
+			CTRL_SET_USR(low, counter_config[i].user);
+			CTRL_SET_KERN(low, counter_config[i].kernel);
+			CTRL_SET_UM(low, counter_config[i].unit_mask);
+			CTRL_SET_EVENT_LOW(low, counter_config[i].event);
+			CTRL_SET_EVENT_HIGH(high, counter_config[i].event);
+			CTRL_SET_HOST_ONLY(high, 0);
+			CTRL_SET_GUEST_ONLY(high, 0);
+
+			CTRL_WRITE(low, high, msrs, i);
+		} else {
+			reset_value[i] = 0;
+		}
+	}
+}
+
+#ifdef CONFIG_OPROFILE_IBS
+
+static inline int
+op_amd_handle_ibs(struct pt_regs * const regs,
+		  struct op_msrs const * const msrs)
+{
+	unsigned int low, high;
+	struct ibs_fetch_sample ibs_fetch;
+	struct ibs_op_sample ibs_op;
+
+	if (!ibs_allowed)
+		return 1;
+
+	if (ibs_config.fetch_enabled) {
+		rdmsr(MSR_AMD64_IBSFETCHCTL, low, high);
+		if (high & IBS_FETCH_HIGH_VALID_BIT) {
+			ibs_fetch.ibs_fetch_ctl_high = high;
+			ibs_fetch.ibs_fetch_ctl_low = low;
+			rdmsr(MSR_AMD64_IBSFETCHLINAD, low, high);
+			ibs_fetch.ibs_fetch_lin_addr_high = high;
+			ibs_fetch.ibs_fetch_lin_addr_low = low;
+			rdmsr(MSR_AMD64_IBSFETCHPHYSAD, low, high);
+			ibs_fetch.ibs_fetch_phys_addr_high = high;
+			ibs_fetch.ibs_fetch_phys_addr_low = low;
+
+			oprofile_add_ibs_sample(regs,
+						(unsigned int *)&ibs_fetch,
+						IBS_FETCH_BEGIN);
+
+			/*reenable the IRQ */
+			rdmsr(MSR_AMD64_IBSFETCHCTL, low, high);
+			high &= ~IBS_FETCH_HIGH_VALID_BIT;
+			high |= IBS_FETCH_HIGH_ENABLE;
+			low &= IBS_FETCH_LOW_MAX_CNT_MASK;
+			wrmsr(MSR_AMD64_IBSFETCHCTL, low, high);
+		}
+	}
+
+	if (ibs_config.op_enabled) {
+		rdmsr(MSR_AMD64_IBSOPCTL, low, high);
+		if (low & IBS_OP_LOW_VALID_BIT) {
+			rdmsr(MSR_AMD64_IBSOPRIP, low, high);
+			ibs_op.ibs_op_rip_low = low;
+			ibs_op.ibs_op_rip_high = high;
+			rdmsr(MSR_AMD64_IBSOPDATA, low, high);
+			ibs_op.ibs_op_data1_low = low;
+			ibs_op.ibs_op_data1_high = high;
+			rdmsr(MSR_AMD64_IBSOPDATA2, low, high);
+			ibs_op.ibs_op_data2_low = low;
+			ibs_op.ibs_op_data2_high = high;
+			rdmsr(MSR_AMD64_IBSOPDATA3, low, high);
+			ibs_op.ibs_op_data3_low = low;
+			ibs_op.ibs_op_data3_high = high;
+			rdmsr(MSR_AMD64_IBSDCLINAD, low, high);
+			ibs_op.ibs_dc_linear_low = low;
+			ibs_op.ibs_dc_linear_high = high;
+			rdmsr(MSR_AMD64_IBSDCPHYSAD, low, high);
+			ibs_op.ibs_dc_phys_low = low;
+			ibs_op.ibs_dc_phys_high = high;
+
+			/* reenable the IRQ */
+			oprofile_add_ibs_sample(regs,
+						(unsigned int *)&ibs_op,
+						IBS_OP_BEGIN);
+			rdmsr(MSR_AMD64_IBSOPCTL, low, high);
+			high = 0;
+			low &= ~IBS_OP_LOW_VALID_BIT;
+			low |= IBS_OP_LOW_ENABLE;
+			wrmsr(MSR_AMD64_IBSOPCTL, low, high);
+		}
+	}
+
+	return 1;
+}
+
+#endif
+
+static int op_amd_check_ctrs(struct pt_regs * const regs,
+			     struct op_msrs const * const msrs)
+{
+	unsigned int low, high;
+	int i;
+
+	for (i = 0 ; i < NUM_COUNTERS; ++i) {
+		if (!reset_value[i])
+			continue;
+		CTR_READ(low, high, msrs, i);
+		if (CTR_OVERFLOWED(low)) {
+			oprofile_add_sample(regs, i);
+			CTR_WRITE(reset_value[i], msrs, i);
+		}
+	}
+
+#ifdef CONFIG_OPROFILE_IBS
+	op_amd_handle_ibs(regs, msrs);
+#endif
+
+	/* See op_model_ppro.c */
+	return 1;
+}
+
+static void op_amd_start(struct op_msrs const * const msrs)
+{
+	unsigned int low, high;
+	int i;
+	for (i = 0 ; i < NUM_COUNTERS ; ++i) {
+		if (reset_value[i]) {
+			CTRL_READ(low, high, msrs, i);
+			CTRL_SET_ACTIVE(low);
+			CTRL_WRITE(low, high, msrs, i);
+		}
+	}
+
+#ifdef CONFIG_OPROFILE_IBS
+	if (ibs_allowed && ibs_config.fetch_enabled) {
+		low = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF;
+		high = IBS_FETCH_HIGH_ENABLE;
+		wrmsr(MSR_AMD64_IBSFETCHCTL, low, high);
+	}
+
+	if (ibs_allowed && ibs_config.op_enabled) {
+		low = ((ibs_config.max_cnt_op >> 4) & 0xFFFF) + IBS_OP_LOW_ENABLE;
+		high = 0;
+		wrmsr(MSR_AMD64_IBSOPCTL, low, high);
+	}
+#endif
+}
+
+
+static void op_amd_stop(struct op_msrs const * const msrs)
+{
+	unsigned int low, high;
+	int i;
+
+	/* Subtle: stop on all counters to avoid race with
+	 * setting our pm callback */
+	for (i = 0 ; i < NUM_COUNTERS ; ++i) {
+		if (!reset_value[i])
+			continue;
+		CTRL_READ(low, high, msrs, i);
+		CTRL_SET_INACTIVE(low);
+		CTRL_WRITE(low, high, msrs, i);
+	}
+
+#ifdef CONFIG_OPROFILE_IBS
+	if (ibs_allowed && ibs_config.fetch_enabled) {
+		low = 0;		/* clear max count and enable */
+		high = 0;
+		wrmsr(MSR_AMD64_IBSFETCHCTL, low, high);
+	}
+
+	if (ibs_allowed && ibs_config.op_enabled) {
+		low = 0;		/* clear max count and enable */
+		high = 0;
+		wrmsr(MSR_AMD64_IBSOPCTL, low, high);
+	}
+#endif
+}
+
+static void op_amd_shutdown(struct op_msrs const * const msrs)
+{
+	int i;
+
+	for (i = 0 ; i < NUM_COUNTERS ; ++i) {
+		if (CTR_IS_RESERVED(msrs, i))
+			release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
+	}
+	for (i = 0 ; i < NUM_CONTROLS ; ++i) {
+		if (CTRL_IS_RESERVED(msrs, i))
+			release_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
+	}
+}
+
+#ifndef CONFIG_OPROFILE_IBS
+
+/* no IBS support */
+
+static int op_amd_init(struct oprofile_operations *ops)
+{
+	return 0;
+}
+
+static void op_amd_exit(void) {}
+
+#else
+
+static u8 ibs_eilvt_off;
+
+static inline void apic_init_ibs_nmi_per_cpu(void *arg)
+{
+	ibs_eilvt_off = setup_APIC_eilvt_ibs(0, APIC_EILVT_MSG_NMI, 0);
+}
+
+static inline void apic_clear_ibs_nmi_per_cpu(void *arg)
+{
+	setup_APIC_eilvt_ibs(0, APIC_EILVT_MSG_FIX, 1);
+}
+
+static int pfm_amd64_setup_eilvt(void)
+{
+#define IBSCTL_LVTOFFSETVAL		(1 << 8)
+#define IBSCTL				0x1cc
+	struct pci_dev *cpu_cfg;
+	int nodes;
+	u32 value = 0;
+
+	/* per CPU setup */
+	on_each_cpu(apic_init_ibs_nmi_per_cpu, NULL, 1);
+
+	nodes = 0;
+	cpu_cfg = NULL;
+	do {
+		cpu_cfg = pci_get_device(PCI_VENDOR_ID_AMD,
+					 PCI_DEVICE_ID_AMD_10H_NB_MISC,
+					 cpu_cfg);
+		if (!cpu_cfg)
+			break;
+		++nodes;
+		pci_write_config_dword(cpu_cfg, IBSCTL, ibs_eilvt_off
+				       | IBSCTL_LVTOFFSETVAL);
+		pci_read_config_dword(cpu_cfg, IBSCTL, &value);
+		if (value != (ibs_eilvt_off | IBSCTL_LVTOFFSETVAL)) {
+			printk(KERN_DEBUG "Failed to setup IBS LVT offset, "
+				"IBSCTL = 0x%08x", value);
+			return 1;
+		}
+	} while (1);
+
+	if (!nodes) {
+		printk(KERN_DEBUG "No CPU node configured for IBS");
+		return 1;
+	}
+
+#ifdef CONFIG_NUMA
+	/* Sanity check */
+	/* Works only for 64bit with proper numa implementation. */
+	if (nodes != num_possible_nodes()) {
+		printk(KERN_DEBUG "Failed to setup CPU node(s) for IBS, "
+			"found: %d, expected %d",
+			nodes, num_possible_nodes());
+		return 1;
+	}
+#endif
+	return 0;
+}
+
+/*
+ * initialize the APIC for the IBS interrupts
+ * if available (AMD Family10h rev B0 and later)
+ */
+static void setup_ibs(void)
+{
+	ibs_allowed = boot_cpu_has(X86_FEATURE_IBS);
+
+	if (!ibs_allowed)
+		return;
+
+	if (pfm_amd64_setup_eilvt()) {
+		ibs_allowed = 0;
+		return;
+	}
+
+	printk(KERN_INFO "oprofile: AMD IBS detected\n");
+}
+
+
+/*
+ * unitialize the APIC for the IBS interrupts if needed on AMD Family10h
+ * rev B0 and later */
+static void clear_ibs_nmi(void)
+{
+	if (ibs_allowed)
+		on_each_cpu(apic_clear_ibs_nmi_per_cpu, NULL, 1);
+}
+
+static int (*create_arch_files)(struct super_block * sb, struct dentry * root);
+
+static int setup_ibs_files(struct super_block * sb, struct dentry * root)
+{
+	char buf[12];
+	struct dentry *dir;
+	int ret = 0;
+
+	/* architecture specific files */
+	if (create_arch_files)
+		ret = create_arch_files(sb, root);
+
+	if (ret)
+		return ret;
+
+	if (!ibs_allowed)
+		return ret;
+
+	/* model specific files */
+
+	/* setup some reasonable defaults */
+	ibs_config.max_cnt_fetch = 250000;
+	ibs_config.fetch_enabled = 0;
+	ibs_config.max_cnt_op = 250000;
+	ibs_config.op_enabled = 0;
+	ibs_config.dispatched_ops = 1;
+	snprintf(buf,  sizeof(buf), "ibs_fetch");
+	dir = oprofilefs_mkdir(sb, root, buf);
+	oprofilefs_create_ulong(sb, dir, "rand_enable",
+				&ibs_config.rand_en);
+	oprofilefs_create_ulong(sb, dir, "enable",
+		&ibs_config.fetch_enabled);
+	oprofilefs_create_ulong(sb, dir, "max_count",
+		&ibs_config.max_cnt_fetch);
+	snprintf(buf,  sizeof(buf), "ibs_uops");
+	dir = oprofilefs_mkdir(sb, root, buf);
+	oprofilefs_create_ulong(sb, dir, "enable",
+		&ibs_config.op_enabled);
+	oprofilefs_create_ulong(sb, dir, "max_count",
+		&ibs_config.max_cnt_op);
+	oprofilefs_create_ulong(sb, dir, "dispatched_ops",
+		&ibs_config.dispatched_ops);
+
+	return 0;
+}
+
+static int op_amd_init(struct oprofile_operations *ops)
+{
+	setup_ibs();
+	create_arch_files = ops->create_files;
+	ops->create_files = setup_ibs_files;
+	return 0;
+}
+
+static void op_amd_exit(void)
+{
+	clear_ibs_nmi();
+}
+
+#endif
+
+struct op_x86_model_spec const op_amd_spec = {
+	.init = op_amd_init,
+	.exit = op_amd_exit,
+	.num_counters = NUM_COUNTERS,
+	.num_controls = NUM_CONTROLS,
+	.fill_in_addresses = &op_amd_fill_in_addresses,
+	.setup_ctrs = &op_amd_setup_ctrs,
+	.check_ctrs = &op_amd_check_ctrs,
+	.start = &op_amd_start,
+	.stop = &op_amd_stop,
+	.shutdown = &op_amd_shutdown
+};
diff --git a/arch/x86/oprofile/op_model_athlon.c b/arch/x86/oprofile/op_model_athlon.c
deleted file mode 100644
index 3d53487..0000000
--- a/arch/x86/oprofile/op_model_athlon.c
+++ /dev/null
@@ -1,190 +0,0 @@
-/*
- * @file op_model_athlon.h
- * athlon / K7 / K8 / Family 10h model-specific MSR operations
- *
- * @remark Copyright 2002 OProfile authors
- * @remark Read the file COPYING
- *
- * @author John Levon
- * @author Philippe Elie
- * @author Graydon Hoare
- */
-
-#include <linux/oprofile.h>
-#include <asm/ptrace.h>
-#include <asm/msr.h>
-#include <asm/nmi.h>
-
-#include "op_x86_model.h"
-#include "op_counter.h"
-
-#define NUM_COUNTERS 4
-#define NUM_CONTROLS 4
-
-#define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0)
-#define CTR_READ(l, h, msrs, c) do {rdmsr(msrs->counters[(c)].addr, (l), (h)); } while (0)
-#define CTR_WRITE(l, msrs, c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned int)(l), -1); } while (0)
-#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
-
-#define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0)
-#define CTRL_READ(l, h, msrs, c) do {rdmsr(msrs->controls[(c)].addr, (l), (h)); } while (0)
-#define CTRL_WRITE(l, h, msrs, c) do {wrmsr(msrs->controls[(c)].addr, (l), (h)); } while (0)
-#define CTRL_SET_ACTIVE(n) (n |= (1<<22))
-#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22))
-#define CTRL_CLEAR_LO(x) (x &= (1<<21))
-#define CTRL_CLEAR_HI(x) (x &= 0xfffffcf0)
-#define CTRL_SET_ENABLE(val) (val |= 1<<20)
-#define CTRL_SET_USR(val, u) (val |= ((u & 1) << 16))
-#define CTRL_SET_KERN(val, k) (val |= ((k & 1) << 17))
-#define CTRL_SET_UM(val, m) (val |= (m << 8))
-#define CTRL_SET_EVENT_LOW(val, e) (val |= (e & 0xff))
-#define CTRL_SET_EVENT_HIGH(val, e) (val |= ((e >> 8) & 0xf))
-#define CTRL_SET_HOST_ONLY(val, h) (val |= ((h & 1) << 9))
-#define CTRL_SET_GUEST_ONLY(val, h) (val |= ((h & 1) << 8))
-
-static unsigned long reset_value[NUM_COUNTERS];
-
-static void athlon_fill_in_addresses(struct op_msrs * const msrs)
-{
-	int i;
-
-	for (i = 0; i < NUM_COUNTERS; i++) {
-		if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i))
-			msrs->counters[i].addr = MSR_K7_PERFCTR0 + i;
-		else
-			msrs->counters[i].addr = 0;
-	}
-
-	for (i = 0; i < NUM_CONTROLS; i++) {
-		if (reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i))
-			msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i;
-		else
-			msrs->controls[i].addr = 0;
-	}
-}
-
-
-static void athlon_setup_ctrs(struct op_msrs const * const msrs)
-{
-	unsigned int low, high;
-	int i;
-
-	/* clear all counters */
-	for (i = 0 ; i < NUM_CONTROLS; ++i) {
-		if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
-			continue;
-		CTRL_READ(low, high, msrs, i);
-		CTRL_CLEAR_LO(low);
-		CTRL_CLEAR_HI(high);
-		CTRL_WRITE(low, high, msrs, i);
-	}
-
-	/* avoid a false detection of ctr overflows in NMI handler */
-	for (i = 0; i < NUM_COUNTERS; ++i) {
-		if (unlikely(!CTR_IS_RESERVED(msrs, i)))
-			continue;
-		CTR_WRITE(1, msrs, i);
-	}
-
-	/* enable active counters */
-	for (i = 0; i < NUM_COUNTERS; ++i) {
-		if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) {
-			reset_value[i] = counter_config[i].count;
-
-			CTR_WRITE(counter_config[i].count, msrs, i);
-
-			CTRL_READ(low, high, msrs, i);
-			CTRL_CLEAR_LO(low);
-			CTRL_CLEAR_HI(high);
-			CTRL_SET_ENABLE(low);
-			CTRL_SET_USR(low, counter_config[i].user);
-			CTRL_SET_KERN(low, counter_config[i].kernel);
-			CTRL_SET_UM(low, counter_config[i].unit_mask);
-			CTRL_SET_EVENT_LOW(low, counter_config[i].event);
-			CTRL_SET_EVENT_HIGH(high, counter_config[i].event);
-			CTRL_SET_HOST_ONLY(high, 0);
-			CTRL_SET_GUEST_ONLY(high, 0);
-
-			CTRL_WRITE(low, high, msrs, i);
-		} else {
-			reset_value[i] = 0;
-		}
-	}
-}
-
-
-static int athlon_check_ctrs(struct pt_regs * const regs,
-			     struct op_msrs const * const msrs)
-{
-	unsigned int low, high;
-	int i;
-
-	for (i = 0 ; i < NUM_COUNTERS; ++i) {
-		if (!reset_value[i])
-			continue;
-		CTR_READ(low, high, msrs, i);
-		if (CTR_OVERFLOWED(low)) {
-			oprofile_add_sample(regs, i);
-			CTR_WRITE(reset_value[i], msrs, i);
-		}
-	}
-
-	/* See op_model_ppro.c */
-	return 1;
-}
-
-
-static void athlon_start(struct op_msrs const * const msrs)
-{
-	unsigned int low, high;
-	int i;
-	for (i = 0 ; i < NUM_COUNTERS ; ++i) {
-		if (reset_value[i]) {
-			CTRL_READ(low, high, msrs, i);
-			CTRL_SET_ACTIVE(low);
-			CTRL_WRITE(low, high, msrs, i);
-		}
-	}
-}
-
-
-static void athlon_stop(struct op_msrs const * const msrs)
-{
-	unsigned int low, high;
-	int i;
-
-	/* Subtle: stop on all counters to avoid race with
-	 * setting our pm callback */
-	for (i = 0 ; i < NUM_COUNTERS ; ++i) {
-		if (!reset_value[i])
-			continue;
-		CTRL_READ(low, high, msrs, i);
-		CTRL_SET_INACTIVE(low);
-		CTRL_WRITE(low, high, msrs, i);
-	}
-}
-
-static void athlon_shutdown(struct op_msrs const * const msrs)
-{
-	int i;
-
-	for (i = 0 ; i < NUM_COUNTERS ; ++i) {
-		if (CTR_IS_RESERVED(msrs, i))
-			release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
-	}
-	for (i = 0 ; i < NUM_CONTROLS ; ++i) {
-		if (CTRL_IS_RESERVED(msrs, i))
-			release_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
-	}
-}
-
-struct op_x86_model_spec const op_athlon_spec = {
-	.num_counters = NUM_COUNTERS,
-	.num_controls = NUM_CONTROLS,
-	.fill_in_addresses = &athlon_fill_in_addresses,
-	.setup_ctrs = &athlon_setup_ctrs,
-	.check_ctrs = &athlon_check_ctrs,
-	.start = &athlon_start,
-	.stop = &athlon_stop,
-	.shutdown = &athlon_shutdown
-};
diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c
index 56b4757..43ac5af 100644
--- a/arch/x86/oprofile/op_model_p4.c
+++ b/arch/x86/oprofile/op_model_p4.c
@@ -10,11 +10,12 @@
 
 #include <linux/oprofile.h>
 #include <linux/smp.h>
+#include <linux/ptrace.h>
+#include <linux/nmi.h>
 #include <asm/msr.h>
-#include <asm/ptrace.h>
 #include <asm/fixmap.h>
 #include <asm/apic.h>
-#include <asm/nmi.h>
+
 
 #include "op_x86_model.h"
 #include "op_counter.h"
@@ -40,7 +41,7 @@
 static inline void setup_num_counters(void)
 {
 #ifdef CONFIG_SMP
-	if (smp_num_siblings == 2){
+	if (smp_num_siblings == 2) {
 		num_counters = NUM_COUNTERS_HT2;
 		num_controls = NUM_CONTROLS_HT2;
 	}
@@ -86,7 +87,7 @@
 #define CTR_FLAME_2    (1 << 6)
 #define CTR_IQ_5       (1 << 7)
 
-static struct p4_counter_binding p4_counters [NUM_COUNTERS_NON_HT] = {
+static struct p4_counter_binding p4_counters[NUM_COUNTERS_NON_HT] = {
 	{ CTR_BPU_0,   MSR_P4_BPU_PERFCTR0,   MSR_P4_BPU_CCCR0 },
 	{ CTR_MS_0,    MSR_P4_MS_PERFCTR0,    MSR_P4_MS_CCCR0 },
 	{ CTR_FLAME_0, MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_CCCR0 },
@@ -97,32 +98,32 @@
 	{ CTR_IQ_5,    MSR_P4_IQ_PERFCTR5,    MSR_P4_IQ_CCCR5 }
 };
 
-#define NUM_UNUSED_CCCRS	NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT
+#define NUM_UNUSED_CCCRS (NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT)
 
 /* p4 event codes in libop/op_event.h are indices into this table. */
 
 static struct p4_event_binding p4_events[NUM_EVENTS] = {
-	
+
 	{ /* BRANCH_RETIRED */
-		0x05, 0x06, 
+		0x05, 0x06,
 		{ {CTR_IQ_4, MSR_P4_CRU_ESCR2},
 		  {CTR_IQ_5, MSR_P4_CRU_ESCR3} }
 	},
-	
+
 	{ /* MISPRED_BRANCH_RETIRED */
-		0x04, 0x03, 
+		0x04, 0x03,
 		{ { CTR_IQ_4, MSR_P4_CRU_ESCR0},
 		  { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
 	},
-	
+
 	{ /* TC_DELIVER_MODE */
 		0x01, 0x01,
-		{ { CTR_MS_0, MSR_P4_TC_ESCR0},  
+		{ { CTR_MS_0, MSR_P4_TC_ESCR0},
 		  { CTR_MS_2, MSR_P4_TC_ESCR1} }
 	},
-	
+
 	{ /* BPU_FETCH_REQUEST */
-		0x00, 0x03, 
+		0x00, 0x03,
 		{ { CTR_BPU_0, MSR_P4_BPU_ESCR0},
 		  { CTR_BPU_2, MSR_P4_BPU_ESCR1} }
 	},
@@ -146,7 +147,7 @@
 	},
 
 	{ /* LOAD_PORT_REPLAY */
-		0x02, 0x04, 
+		0x02, 0x04,
 		{ { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
 		  { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
 	},
@@ -170,43 +171,43 @@
 	},
 
 	{ /* BSQ_CACHE_REFERENCE */
-		0x07, 0x0c, 
+		0x07, 0x0c,
 		{ { CTR_BPU_0, MSR_P4_BSU_ESCR0},
 		  { CTR_BPU_2, MSR_P4_BSU_ESCR1} }
 	},
 
 	{ /* IOQ_ALLOCATION */
-		0x06, 0x03, 
+		0x06, 0x03,
 		{ { CTR_BPU_0, MSR_P4_FSB_ESCR0},
 		  { 0, 0 } }
 	},
 
 	{ /* IOQ_ACTIVE_ENTRIES */
-		0x06, 0x1a, 
+		0x06, 0x1a,
 		{ { CTR_BPU_2, MSR_P4_FSB_ESCR1},
 		  { 0, 0 } }
 	},
 
 	{ /* FSB_DATA_ACTIVITY */
-		0x06, 0x17, 
+		0x06, 0x17,
 		{ { CTR_BPU_0, MSR_P4_FSB_ESCR0},
 		  { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
 	},
 
 	{ /* BSQ_ALLOCATION */
-		0x07, 0x05, 
+		0x07, 0x05,
 		{ { CTR_BPU_0, MSR_P4_BSU_ESCR0},
 		  { 0, 0 } }
 	},
 
 	{ /* BSQ_ACTIVE_ENTRIES */
 		0x07, 0x06,
-		{ { CTR_BPU_2, MSR_P4_BSU_ESCR1 /* guess */},  
+		{ { CTR_BPU_2, MSR_P4_BSU_ESCR1 /* guess */},
 		  { 0, 0 } }
 	},
 
 	{ /* X87_ASSIST */
-		0x05, 0x03, 
+		0x05, 0x03,
 		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
 		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
 	},
@@ -216,21 +217,21 @@
 		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
 		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
 	},
-  
+
 	{ /* PACKED_SP_UOP */
-		0x01, 0x08, 
+		0x01, 0x08,
 		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
 		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
 	},
-  
+
 	{ /* PACKED_DP_UOP */
-		0x01, 0x0c, 
+		0x01, 0x0c,
 		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
 		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
 	},
 
 	{ /* SCALAR_SP_UOP */
-		0x01, 0x0a, 
+		0x01, 0x0a,
 		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
 		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
 	},
@@ -242,31 +243,31 @@
 	},
 
 	{ /* 64BIT_MMX_UOP */
-		0x01, 0x02, 
+		0x01, 0x02,
 		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
 		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
 	},
-  
+
 	{ /* 128BIT_MMX_UOP */
-		0x01, 0x1a, 
+		0x01, 0x1a,
 		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
 		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
 	},
 
 	{ /* X87_FP_UOP */
-		0x01, 0x04, 
+		0x01, 0x04,
 		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
 		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
 	},
-  
+
 	{ /* X87_SIMD_MOVES_UOP */
-		0x01, 0x2e, 
+		0x01, 0x2e,
 		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
 		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
 	},
-  
+
 	{ /* MACHINE_CLEAR */
-		0x05, 0x02, 
+		0x05, 0x02,
 		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
 		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
 	},
@@ -276,9 +277,9 @@
 		{ { CTR_BPU_0, MSR_P4_FSB_ESCR0},
 		  { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
 	},
-  
+
 	{ /* TC_MS_XFER */
-		0x00, 0x05, 
+		0x00, 0x05,
 		{ { CTR_MS_0, MSR_P4_MS_ESCR0},
 		  { CTR_MS_2, MSR_P4_MS_ESCR1} }
 	},
@@ -308,7 +309,7 @@
 	},
 
 	{ /* INSTR_RETIRED */
-		0x04, 0x02, 
+		0x04, 0x02,
 		{ { CTR_IQ_4, MSR_P4_CRU_ESCR0},
 		  { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
 	},
@@ -319,14 +320,14 @@
 		  { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
 	},
 
-	{ /* UOP_TYPE */    
-		0x02, 0x02, 
+	{ /* UOP_TYPE */
+		0x02, 0x02,
 		{ { CTR_IQ_4, MSR_P4_RAT_ESCR0},
 		  { CTR_IQ_5, MSR_P4_RAT_ESCR1} }
 	},
 
 	{ /* RETIRED_MISPRED_BRANCH_TYPE */
-		0x02, 0x05, 
+		0x02, 0x05,
 		{ { CTR_MS_0, MSR_P4_TBPU_ESCR0},
 		  { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
 	},
@@ -349,8 +350,8 @@
 #define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1))
 #define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25))
 #define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9))
-#define ESCR_READ(escr,high,ev,i) do {rdmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0)
-#define ESCR_WRITE(escr,high,ev,i) do {wrmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0)
+#define ESCR_READ(escr, high, ev, i) do {rdmsr(ev->bindings[(i)].escr_address, (escr), (high)); } while (0)
+#define ESCR_WRITE(escr, high, ev, i) do {wrmsr(ev->bindings[(i)].escr_address, (escr), (high)); } while (0)
 
 #define CCCR_RESERVED_BITS 0x38030FFF
 #define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS)
@@ -360,15 +361,15 @@
 #define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27))
 #define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12))
 #define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12))
-#define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0)
-#define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0)
+#define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, (low), (high)); } while (0)
+#define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, (low), (high)); } while (0)
 #define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
 #define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))
 
-#define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0)
-#define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0)
-#define CTR_READ(l,h,i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h));} while (0)
-#define CTR_WRITE(l,i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1);} while (0)
+#define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0)
+#define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0)
+#define CTR_READ(l, h, i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h)); } while (0)
+#define CTR_WRITE(l, i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1); } while (0)
 #define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000))
 
 
@@ -380,7 +381,7 @@
 #ifdef CONFIG_SMP
 	int cpu = smp_processor_id();
 	return (cpu != first_cpu(per_cpu(cpu_sibling_map, cpu)));
-#endif	
+#endif
 	return 0;
 }
 
@@ -395,25 +396,23 @@
 
 static void p4_fill_in_addresses(struct op_msrs * const msrs)
 {
-	unsigned int i; 
+	unsigned int i;
 	unsigned int addr, cccraddr, stag;
 
 	setup_num_counters();
 	stag = get_stagger();
 
 	/* initialize some registers */
-	for (i = 0; i < num_counters; ++i) {
+	for (i = 0; i < num_counters; ++i)
 		msrs->counters[i].addr = 0;
-	}
-	for (i = 0; i < num_controls; ++i) {
+	for (i = 0; i < num_controls; ++i)
 		msrs->controls[i].addr = 0;
-	}
-	
+
 	/* the counter & cccr registers we pay attention to */
 	for (i = 0; i < num_counters; ++i) {
 		addr = p4_counters[VIRT_CTR(stag, i)].counter_address;
 		cccraddr = p4_counters[VIRT_CTR(stag, i)].cccr_address;
-		if (reserve_perfctr_nmi(addr)){
+		if (reserve_perfctr_nmi(addr)) {
 			msrs->counters[i].addr = addr;
 			msrs->controls[i].addr = cccraddr;
 		}
@@ -447,22 +446,22 @@
 		if (reserve_evntsel_nmi(addr))
 			msrs->controls[i].addr = addr;
 	}
-	
+
 	for (addr = MSR_P4_MS_ESCR0 + stag;
-	     addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) { 
+	     addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) {
 		if (reserve_evntsel_nmi(addr))
 			msrs->controls[i].addr = addr;
 	}
-	
+
 	for (addr = MSR_P4_IX_ESCR0 + stag;
-	     addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) { 
+	     addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) {
 		if (reserve_evntsel_nmi(addr))
 			msrs->controls[i].addr = addr;
 	}
 
 	/* there are 2 remaining non-contiguously located ESCRs */
 
-	if (num_counters == NUM_COUNTERS_NON_HT) {		
+	if (num_counters == NUM_COUNTERS_NON_HT) {
 		/* standard non-HT CPUs handle both remaining ESCRs*/
 		if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5))
 			msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
@@ -498,20 +497,20 @@
 	unsigned int stag;
 
 	stag = get_stagger();
-	
+
 	/* convert from counter *number* to counter *bit* */
 	counter_bit = 1 << VIRT_CTR(stag, ctr);
-	
+
 	/* find our event binding structure. */
 	if (counter_config[ctr].event <= 0 || counter_config[ctr].event > NUM_EVENTS) {
-		printk(KERN_ERR 
-		       "oprofile: P4 event code 0x%lx out of range\n", 
+		printk(KERN_ERR
+		       "oprofile: P4 event code 0x%lx out of range\n",
 		       counter_config[ctr].event);
 		return;
 	}
-	
+
 	ev = &(p4_events[counter_config[ctr].event - 1]);
-	
+
 	for (i = 0; i < maxbind; i++) {
 		if (ev->bindings[i].virt_counter & counter_bit) {
 
@@ -526,25 +525,24 @@
 				ESCR_SET_OS_1(escr, counter_config[ctr].kernel);
 			}
 			ESCR_SET_EVENT_SELECT(escr, ev->event_select);
-			ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask);			
+			ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask);
 			ESCR_WRITE(escr, high, ev, i);
-		       
+
 			/* modify CCCR */
 			CCCR_READ(cccr, high, VIRT_CTR(stag, ctr));
 			CCCR_CLEAR(cccr);
 			CCCR_SET_REQUIRED_BITS(cccr);
 			CCCR_SET_ESCR_SELECT(cccr, ev->escr_select);
-			if (stag == 0) {
+			if (stag == 0)
 				CCCR_SET_PMI_OVF_0(cccr);
-			} else {
+			else
 				CCCR_SET_PMI_OVF_1(cccr);
-			}
 			CCCR_WRITE(cccr, high, VIRT_CTR(stag, ctr));
 			return;
 		}
 	}
 
-	printk(KERN_ERR 
+	printk(KERN_ERR
 	       "oprofile: P4 event code 0x%lx no binding, stag %d ctr %d\n",
 	       counter_config[ctr].event, stag, ctr);
 }
@@ -559,14 +557,14 @@
 	stag = get_stagger();
 
 	rdmsr(MSR_IA32_MISC_ENABLE, low, high);
-	if (! MISC_PMC_ENABLED_P(low)) {
+	if (!MISC_PMC_ENABLED_P(low)) {
 		printk(KERN_ERR "oprofile: P4 PMC not available\n");
 		return;
 	}
 
 	/* clear the cccrs we will use */
 	for (i = 0 ; i < num_counters ; i++) {
-		if (unlikely(!CTRL_IS_RESERVED(msrs,i)))
+		if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
 			continue;
 		rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
 		CCCR_CLEAR(low);
@@ -576,14 +574,14 @@
 
 	/* clear all escrs (including those outside our concern) */
 	for (i = num_counters; i < num_controls; i++) {
-		if (unlikely(!CTRL_IS_RESERVED(msrs,i)))
+		if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
 			continue;
 		wrmsr(msrs->controls[i].addr, 0, 0);
 	}
 
 	/* setup all counters */
 	for (i = 0 ; i < num_counters ; ++i) {
-		if ((counter_config[i].enabled) && (CTRL_IS_RESERVED(msrs,i))) {
+		if ((counter_config[i].enabled) && (CTRL_IS_RESERVED(msrs, i))) {
 			reset_value[i] = counter_config[i].count;
 			pmc_setup_one_p4_counter(i);
 			CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i));
@@ -603,11 +601,11 @@
 	stag = get_stagger();
 
 	for (i = 0; i < num_counters; ++i) {
-		
-		if (!reset_value[i]) 
+
+		if (!reset_value[i])
 			continue;
 
-		/* 
+		/*
 		 * there is some eccentricity in the hardware which
 		 * requires that we perform 2 extra corrections:
 		 *
@@ -616,24 +614,24 @@
 		 *
 		 * - write the counter back twice to ensure it gets
 		 *   updated properly.
-		 * 
+		 *
 		 * the former seems to be related to extra NMIs happening
 		 * during the current NMI; the latter is reported as errata
 		 * N15 in intel doc 249199-029, pentium 4 specification
 		 * update, though their suggested work-around does not
 		 * appear to solve the problem.
 		 */
-		
+
 		real = VIRT_CTR(stag, i);
 
 		CCCR_READ(low, high, real);
- 		CTR_READ(ctr, high, real);
+		CTR_READ(ctr, high, real);
 		if (CCCR_OVF_P(low) || CTR_OVERFLOW_P(ctr)) {
 			oprofile_add_sample(regs, i);
- 			CTR_WRITE(reset_value[i], real);
+			CTR_WRITE(reset_value[i], real);
 			CCCR_CLEAR_OVF(low);
 			CCCR_WRITE(low, high, real);
- 			CTR_WRITE(reset_value[i], real);
+			CTR_WRITE(reset_value[i], real);
 		}
 	}
 
@@ -683,15 +681,16 @@
 	int i;
 
 	for (i = 0 ; i < num_counters ; ++i) {
-		if (CTR_IS_RESERVED(msrs,i))
+		if (CTR_IS_RESERVED(msrs, i))
 			release_perfctr_nmi(msrs->counters[i].addr);
 	}
-	/* some of the control registers are specially reserved in
+	/*
+	 * some of the control registers are specially reserved in
 	 * conjunction with the counter registers (hence the starting offset).
 	 * This saves a few bits.
 	 */
 	for (i = num_counters ; i < num_controls ; ++i) {
-		if (CTRL_IS_RESERVED(msrs,i))
+		if (CTRL_IS_RESERVED(msrs, i))
 			release_evntsel_nmi(msrs->controls[i].addr);
 	}
 }
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h
index 45b605f..05a0261 100644
--- a/arch/x86/oprofile/op_x86_model.h
+++ b/arch/x86/oprofile/op_x86_model.h
@@ -32,6 +32,8 @@
  * various x86 CPU models' perfctr support.
  */
 struct op_x86_model_spec {
+	int (*init)(struct oprofile_operations *ops);
+	void (*exit)(void);
 	unsigned int const num_counters;
 	unsigned int const num_controls;
 	void (*fill_in_addresses)(struct op_msrs * const msrs);
@@ -46,6 +48,6 @@
 extern struct op_x86_model_spec const op_ppro_spec;
 extern struct op_x86_model_spec const op_p4_spec;
 extern struct op_x86_model_spec const op_p4_ht2_spec;
-extern struct op_x86_model_spec const op_athlon_spec;
+extern struct op_x86_model_spec const op_amd_spec;
 
 #endif /* OP_X86_MODEL_H */
diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c
index 9304c45..ed98227 100644
--- a/drivers/oprofile/buffer_sync.c
+++ b/drivers/oprofile/buffer_sync.c
@@ -5,6 +5,7 @@
  * @remark Read the file COPYING
  *
  * @author John Levon <levon@movementarian.org>
+ * @author Barry Kasindorf
  *
  * This is the core of the buffer management. Each
  * CPU buffer is processed and entered into the
@@ -33,7 +34,7 @@
 #include "event_buffer.h"
 #include "cpu_buffer.h"
 #include "buffer_sync.h"
- 
+
 static LIST_HEAD(dying_tasks);
 static LIST_HEAD(dead_tasks);
 static cpumask_t marked_cpus = CPU_MASK_NONE;
@@ -48,10 +49,11 @@
  * Can be invoked from softirq via RCU callback due to
  * call_rcu() of the task struct, hence the _irqsave.
  */
-static int task_free_notify(struct notifier_block * self, unsigned long val, void * data)
+static int
+task_free_notify(struct notifier_block *self, unsigned long val, void *data)
 {
 	unsigned long flags;
-	struct task_struct * task = data;
+	struct task_struct *task = data;
 	spin_lock_irqsave(&task_mortuary, flags);
 	list_add(&task->tasks, &dying_tasks);
 	spin_unlock_irqrestore(&task_mortuary, flags);
@@ -62,13 +64,14 @@
 /* The task is on its way out. A sync of the buffer means we can catch
  * any remaining samples for this task.
  */
-static int task_exit_notify(struct notifier_block * self, unsigned long val, void * data)
+static int
+task_exit_notify(struct notifier_block *self, unsigned long val, void *data)
 {
 	/* To avoid latency problems, we only process the current CPU,
 	 * hoping that most samples for the task are on this CPU
 	 */
 	sync_buffer(raw_smp_processor_id());
-  	return 0;
+	return 0;
 }
 
 
@@ -77,11 +80,12 @@
  * we don't lose any. This does not have to be exact, it's a QoI issue
  * only.
  */
-static int munmap_notify(struct notifier_block * self, unsigned long val, void * data)
+static int
+munmap_notify(struct notifier_block *self, unsigned long val, void *data)
 {
 	unsigned long addr = (unsigned long)data;
-	struct mm_struct * mm = current->mm;
-	struct vm_area_struct * mpnt;
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *mpnt;
 
 	down_read(&mm->mmap_sem);
 
@@ -99,11 +103,12 @@
 	return 0;
 }
 
- 
+
 /* We need to be told about new modules so we don't attribute to a previously
  * loaded module, or drop the samples on the floor.
  */
-static int module_load_notify(struct notifier_block * self, unsigned long val, void * data)
+static int
+module_load_notify(struct notifier_block *self, unsigned long val, void *data)
 {
 #ifdef CONFIG_MODULES
 	if (val != MODULE_STATE_COMING)
@@ -118,7 +123,7 @@
 	return 0;
 }
 
- 
+
 static struct notifier_block task_free_nb = {
 	.notifier_call	= task_free_notify,
 };
@@ -135,7 +140,7 @@
 	.notifier_call = module_load_notify,
 };
 
- 
+
 static void end_sync(void)
 {
 	end_cpu_work();
@@ -208,14 +213,14 @@
  * not strictly necessary but allows oprofile to associate
  * shared-library samples with particular applications
  */
-static unsigned long get_exec_dcookie(struct mm_struct * mm)
+static unsigned long get_exec_dcookie(struct mm_struct *mm)
 {
 	unsigned long cookie = NO_COOKIE;
-	struct vm_area_struct * vma;
- 
+	struct vm_area_struct *vma;
+
 	if (!mm)
 		goto out;
- 
+
 	for (vma = mm->mmap; vma; vma = vma->vm_next) {
 		if (!vma->vm_file)
 			continue;
@@ -235,13 +240,14 @@
  * sure to do this lookup before a mm->mmap modification happens so
  * we don't lose track.
  */
-static unsigned long lookup_dcookie(struct mm_struct * mm, unsigned long addr, off_t * offset)
+static unsigned long
+lookup_dcookie(struct mm_struct *mm, unsigned long addr, off_t *offset)
 {
 	unsigned long cookie = NO_COOKIE;
-	struct vm_area_struct * vma;
+	struct vm_area_struct *vma;
 
 	for (vma = find_vma(mm, addr); vma; vma = vma->vm_next) {
- 
+
 		if (addr < vma->vm_start || addr >= vma->vm_end)
 			continue;
 
@@ -263,9 +269,20 @@
 	return cookie;
 }
 
+static void increment_tail(struct oprofile_cpu_buffer *b)
+{
+	unsigned long new_tail = b->tail_pos + 1;
+
+	rmb();	/* be sure fifo pointers are synchromized */
+
+	if (new_tail < b->buffer_size)
+		b->tail_pos = new_tail;
+	else
+		b->tail_pos = 0;
+}
 
 static unsigned long last_cookie = INVALID_COOKIE;
- 
+
 static void add_cpu_switch(int i)
 {
 	add_event_entry(ESCAPE_CODE);
@@ -278,16 +295,16 @@
 {
 	add_event_entry(ESCAPE_CODE);
 	if (in_kernel)
-		add_event_entry(KERNEL_ENTER_SWITCH_CODE); 
+		add_event_entry(KERNEL_ENTER_SWITCH_CODE);
 	else
-		add_event_entry(KERNEL_EXIT_SWITCH_CODE); 
+		add_event_entry(KERNEL_EXIT_SWITCH_CODE);
 }
- 
+
 static void
-add_user_ctx_switch(struct task_struct const * task, unsigned long cookie)
+add_user_ctx_switch(struct task_struct const *task, unsigned long cookie)
 {
 	add_event_entry(ESCAPE_CODE);
-	add_event_entry(CTX_SWITCH_CODE); 
+	add_event_entry(CTX_SWITCH_CODE);
 	add_event_entry(task->pid);
 	add_event_entry(cookie);
 	/* Another code for daemon back-compat */
@@ -296,7 +313,7 @@
 	add_event_entry(task->tgid);
 }
 
- 
+
 static void add_cookie_switch(unsigned long cookie)
 {
 	add_event_entry(ESCAPE_CODE);
@@ -304,13 +321,78 @@
 	add_event_entry(cookie);
 }
 
- 
+
 static void add_trace_begin(void)
 {
 	add_event_entry(ESCAPE_CODE);
 	add_event_entry(TRACE_BEGIN_CODE);
 }
 
+#ifdef CONFIG_OPROFILE_IBS
+
+#define IBS_FETCH_CODE_SIZE	2
+#define IBS_OP_CODE_SIZE	5
+#define IBS_EIP(offset)				\
+	(((struct op_sample *)&cpu_buf->buffer[(offset)])->eip)
+#define IBS_EVENT(offset)				\
+	(((struct op_sample *)&cpu_buf->buffer[(offset)])->event)
+
+/*
+ * Add IBS fetch and op entries to event buffer
+ */
+static void add_ibs_begin(struct oprofile_cpu_buffer *cpu_buf, int code,
+	int in_kernel, struct mm_struct *mm)
+{
+	unsigned long rip;
+	int i, count;
+	unsigned long ibs_cookie = 0;
+	off_t offset;
+
+	increment_tail(cpu_buf);	/* move to RIP entry */
+
+	rip = IBS_EIP(cpu_buf->tail_pos);
+
+#ifdef __LP64__
+	rip += IBS_EVENT(cpu_buf->tail_pos) << 32;
+#endif
+
+	if (mm) {
+		ibs_cookie = lookup_dcookie(mm, rip, &offset);
+
+		if (ibs_cookie == NO_COOKIE)
+			offset = rip;
+		if (ibs_cookie == INVALID_COOKIE) {
+			atomic_inc(&oprofile_stats.sample_lost_no_mapping);
+			offset = rip;
+		}
+		if (ibs_cookie != last_cookie) {
+			add_cookie_switch(ibs_cookie);
+			last_cookie = ibs_cookie;
+		}
+	} else
+		offset = rip;
+
+	add_event_entry(ESCAPE_CODE);
+	add_event_entry(code);
+	add_event_entry(offset);	/* Offset from Dcookie */
+
+	/* we send the Dcookie offset, but send the raw Linear Add also*/
+	add_event_entry(IBS_EIP(cpu_buf->tail_pos));
+	add_event_entry(IBS_EVENT(cpu_buf->tail_pos));
+
+	if (code == IBS_FETCH_CODE)
+		count = IBS_FETCH_CODE_SIZE;	/*IBS FETCH is 2 int64s*/
+	else
+		count = IBS_OP_CODE_SIZE;	/*IBS OP is 5 int64s*/
+
+	for (i = 0; i < count; i++) {
+		increment_tail(cpu_buf);
+		add_event_entry(IBS_EIP(cpu_buf->tail_pos));
+		add_event_entry(IBS_EVENT(cpu_buf->tail_pos));
+	}
+}
+
+#endif
 
 static void add_sample_entry(unsigned long offset, unsigned long event)
 {
@@ -319,13 +401,13 @@
 }
 
 
-static int add_us_sample(struct mm_struct * mm, struct op_sample * s)
+static int add_us_sample(struct mm_struct *mm, struct op_sample *s)
 {
 	unsigned long cookie;
 	off_t offset;
- 
- 	cookie = lookup_dcookie(mm, s->eip, &offset);
- 
+
+	cookie = lookup_dcookie(mm, s->eip, &offset);
+
 	if (cookie == INVALID_COOKIE) {
 		atomic_inc(&oprofile_stats.sample_lost_no_mapping);
 		return 0;
@@ -341,13 +423,13 @@
 	return 1;
 }
 
- 
+
 /* Add a sample to the global event buffer. If possible the
  * sample is converted into a persistent dentry/offset pair
  * for later lookup from userspace.
  */
 static int
-add_sample(struct mm_struct * mm, struct op_sample * s, int in_kernel)
+add_sample(struct mm_struct *mm, struct op_sample *s, int in_kernel)
 {
 	if (in_kernel) {
 		add_sample_entry(s->eip, s->event);
@@ -359,9 +441,9 @@
 	}
 	return 0;
 }
- 
 
-static void release_mm(struct mm_struct * mm)
+
+static void release_mm(struct mm_struct *mm)
 {
 	if (!mm)
 		return;
@@ -370,9 +452,9 @@
 }
 
 
-static struct mm_struct * take_tasks_mm(struct task_struct * task)
+static struct mm_struct *take_tasks_mm(struct task_struct *task)
 {
-	struct mm_struct * mm = get_task_mm(task);
+	struct mm_struct *mm = get_task_mm(task);
 	if (mm)
 		down_read(&mm->mmap_sem);
 	return mm;
@@ -383,10 +465,10 @@
 {
 	return val == ESCAPE_CODE;
 }
- 
+
 
 /* "acquire" as many cpu buffer slots as we can */
-static unsigned long get_slots(struct oprofile_cpu_buffer * b)
+static unsigned long get_slots(struct oprofile_cpu_buffer *b)
 {
 	unsigned long head = b->head_pos;
 	unsigned long tail = b->tail_pos;
@@ -412,19 +494,6 @@
 }
 
 
-static void increment_tail(struct oprofile_cpu_buffer * b)
-{
-	unsigned long new_tail = b->tail_pos + 1;
-
-	rmb();
-
-	if (new_tail < b->buffer_size)
-		b->tail_pos = new_tail;
-	else
-		b->tail_pos = 0;
-}
-
-
 /* Move tasks along towards death. Any tasks on dead_tasks
  * will definitely have no remaining references in any
  * CPU buffers at this point, because we use two lists,
@@ -435,8 +504,8 @@
 {
 	unsigned long flags;
 	LIST_HEAD(local_dead_tasks);
-	struct task_struct * task;
-	struct task_struct * ttask;
+	struct task_struct *task;
+	struct task_struct *ttask;
 
 	spin_lock_irqsave(&task_mortuary, flags);
 
@@ -493,7 +562,7 @@
 {
 	struct oprofile_cpu_buffer *cpu_buf = &per_cpu(cpu_buffer, cpu);
 	struct mm_struct *mm = NULL;
-	struct task_struct * new;
+	struct task_struct *new;
 	unsigned long cookie = 0;
 	int in_kernel = 1;
 	unsigned int i;
@@ -501,7 +570,7 @@
 	unsigned long available;
 
 	mutex_lock(&buffer_mutex);
- 
+
 	add_cpu_switch(cpu);
 
 	/* Remember, only we can modify tail_pos */
@@ -509,8 +578,8 @@
 	available = get_slots(cpu_buf);
 
 	for (i = 0; i < available; ++i) {
-		struct op_sample * s = &cpu_buf->buffer[cpu_buf->tail_pos];
- 
+		struct op_sample *s = &cpu_buf->buffer[cpu_buf->tail_pos];
+
 		if (is_code(s->eip)) {
 			if (s->event <= CPU_IS_KERNEL) {
 				/* kernel/userspace switch */
@@ -521,8 +590,18 @@
 			} else if (s->event == CPU_TRACE_BEGIN) {
 				state = sb_bt_start;
 				add_trace_begin();
+#ifdef CONFIG_OPROFILE_IBS
+			} else if (s->event == IBS_FETCH_BEGIN) {
+				state = sb_bt_start;
+				add_ibs_begin(cpu_buf,
+					IBS_FETCH_CODE, in_kernel, mm);
+			} else if (s->event == IBS_OP_BEGIN) {
+				state = sb_bt_start;
+				add_ibs_begin(cpu_buf,
+					IBS_OP_CODE, in_kernel, mm);
+#endif
 			} else {
-				struct mm_struct * oldmm = mm;
+				struct mm_struct *oldmm = mm;
 
 				/* userspace context switch */
 				new = (struct task_struct *)s->event;
@@ -533,13 +612,11 @@
 					cookie = get_exec_dcookie(mm);
 				add_user_ctx_switch(new, cookie);
 			}
-		} else {
-			if (state >= sb_bt_start &&
-			    !add_sample(mm, s, in_kernel)) {
-				if (state == sb_bt_start) {
-					state = sb_bt_ignore;
-					atomic_inc(&oprofile_stats.bt_lost_no_mapping);
-				}
+		} else if (state >= sb_bt_start &&
+			   !add_sample(mm, s, in_kernel)) {
+			if (state == sb_bt_start) {
+				state = sb_bt_ignore;
+				atomic_inc(&oprofile_stats.bt_lost_no_mapping);
 			}
 		}
 
diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c
index 7ba78e6..e1bd5a9 100644
--- a/drivers/oprofile/cpu_buffer.c
+++ b/drivers/oprofile/cpu_buffer.c
@@ -5,6 +5,7 @@
  * @remark Read the file COPYING
  *
  * @author John Levon <levon@movementarian.org>
+ * @author Barry Kasindorf <barry.kasindorf@amd.com>
  *
  * Each CPU has a local buffer that stores PC value/event
  * pairs. We also log context switches when we notice them.
@@ -209,7 +210,7 @@
 	return 1;
 }
 
-static int oprofile_begin_trace(struct oprofile_cpu_buffer * cpu_buf)
+static int oprofile_begin_trace(struct oprofile_cpu_buffer *cpu_buf)
 {
 	if (nr_available_slots(cpu_buf) < 4) {
 		cpu_buf->sample_lost_overflow++;
@@ -254,6 +255,75 @@
 	oprofile_add_ext_sample(pc, regs, event, is_kernel);
 }
 
+#ifdef CONFIG_OPROFILE_IBS
+
+#define MAX_IBS_SAMPLE_SIZE	14
+static int log_ibs_sample(struct oprofile_cpu_buffer *cpu_buf,
+	unsigned long pc, int is_kernel, unsigned  int *ibs, int ibs_code)
+{
+	struct task_struct *task;
+
+	cpu_buf->sample_received++;
+
+	if (nr_available_slots(cpu_buf) < MAX_IBS_SAMPLE_SIZE) {
+		cpu_buf->sample_lost_overflow++;
+		return 0;
+	}
+
+	is_kernel = !!is_kernel;
+
+	/* notice a switch from user->kernel or vice versa */
+	if (cpu_buf->last_is_kernel != is_kernel) {
+		cpu_buf->last_is_kernel = is_kernel;
+		add_code(cpu_buf, is_kernel);
+	}
+
+	/* notice a task switch */
+	if (!is_kernel) {
+		task = current;
+
+		if (cpu_buf->last_task != task) {
+			cpu_buf->last_task = task;
+			add_code(cpu_buf, (unsigned long)task);
+		}
+	}
+
+	add_code(cpu_buf, ibs_code);
+	add_sample(cpu_buf, ibs[0], ibs[1]);
+	add_sample(cpu_buf, ibs[2], ibs[3]);
+	add_sample(cpu_buf, ibs[4], ibs[5]);
+
+	if (ibs_code == IBS_OP_BEGIN) {
+	add_sample(cpu_buf, ibs[6], ibs[7]);
+	add_sample(cpu_buf, ibs[8], ibs[9]);
+	add_sample(cpu_buf, ibs[10], ibs[11]);
+	}
+
+	return 1;
+}
+
+void oprofile_add_ibs_sample(struct pt_regs *const regs,
+				unsigned int * const ibs_sample, u8 code)
+{
+	int is_kernel = !user_mode(regs);
+	unsigned long pc = profile_pc(regs);
+
+	struct oprofile_cpu_buffer *cpu_buf =
+			 &per_cpu(cpu_buffer, smp_processor_id());
+
+	if (!backtrace_depth) {
+		log_ibs_sample(cpu_buf, pc, is_kernel, ibs_sample, code);
+		return;
+	}
+
+	/* if log_sample() fails we can't backtrace since we lost the source
+	* of this event */
+	if (log_ibs_sample(cpu_buf, pc, is_kernel, ibs_sample, code))
+		oprofile_ops.backtrace(regs, backtrace_depth);
+}
+
+#endif
+
 void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event)
 {
 	struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer);
@@ -296,7 +366,7 @@
 	struct oprofile_cpu_buffer * b =
 		container_of(work, struct oprofile_cpu_buffer, work.work);
 	if (b->cpu != smp_processor_id()) {
-		printk("WQ on CPU%d, prefer CPU%d\n",
+		printk(KERN_DEBUG "WQ on CPU%d, prefer CPU%d\n",
 		       smp_processor_id(), b->cpu);
 	}
 	sync_buffer(b->cpu);
diff --git a/drivers/oprofile/cpu_buffer.h b/drivers/oprofile/cpu_buffer.h
index c3e366b..9c44d00 100644
--- a/drivers/oprofile/cpu_buffer.h
+++ b/drivers/oprofile/cpu_buffer.h
@@ -55,5 +55,7 @@
 /* transient events for the CPU buffer -> event buffer */
 #define CPU_IS_KERNEL 1
 #define CPU_TRACE_BEGIN 2
+#define IBS_FETCH_BEGIN 3
+#define IBS_OP_BEGIN    4
 
 #endif /* OPROFILE_CPU_BUFFER_H */
diff --git a/include/linux/oprofile.h b/include/linux/oprofile.h
index 041bb31..bcb8f72 100644
--- a/include/linux/oprofile.h
+++ b/include/linux/oprofile.h
@@ -36,6 +36,8 @@
 #define XEN_ENTER_SWITCH_CODE		10
 #define SPU_PROFILING_CODE		11
 #define SPU_CTX_SWITCH_CODE		12
+#define IBS_FETCH_CODE			13
+#define IBS_OP_CODE			14
 
 struct super_block;
 struct dentry;
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index f1624b3..f719d91 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -497,6 +497,11 @@
 #define PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP	0x1101
 #define PCI_DEVICE_ID_AMD_K8_NB_MEMCTL	0x1102
 #define PCI_DEVICE_ID_AMD_K8_NB_MISC	0x1103
+#define PCI_DEVICE_ID_AMD_10H_NB_HT	0x1200
+#define PCI_DEVICE_ID_AMD_10H_NB_MAP	0x1201
+#define PCI_DEVICE_ID_AMD_10H_NB_DRAM	0x1202
+#define PCI_DEVICE_ID_AMD_10H_NB_MISC	0x1203
+#define PCI_DEVICE_ID_AMD_10H_NB_LINK	0x1204
 #define PCI_DEVICE_ID_AMD_LANCE		0x2000
 #define PCI_DEVICE_ID_AMD_LANCE_HOME	0x2001
 #define PCI_DEVICE_ID_AMD_SCSI		0x2020