[ACPI] enable C2 and C3 idle power states on SMP
http://bugzilla.kernel.org/show_bug.cgi?id=4401

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
diff --git a/arch/i386/kernel/acpi/Makefile b/arch/i386/kernel/acpi/Makefile
index ee75cb2..5e291a2 100644
--- a/arch/i386/kernel/acpi/Makefile
+++ b/arch/i386/kernel/acpi/Makefile
@@ -2,3 +2,7 @@
 obj-$(CONFIG_X86_IO_APIC)	+= earlyquirk.o
 obj-$(CONFIG_ACPI_SLEEP)	+= sleep.o wakeup.o
 
+ifneq ($(CONFIG_ACPI_PROCESSOR),)
+obj-y				+= cstate.o
+endif
+
diff --git a/arch/i386/kernel/acpi/cstate.c b/arch/i386/kernel/acpi/cstate.c
new file mode 100644
index 0000000..4c3036b
--- /dev/null
+++ b/arch/i386/kernel/acpi/cstate.c
@@ -0,0 +1,103 @@
+/*
+ * arch/i386/kernel/acpi/cstate.c
+ *
+ * Copyright (C) 2005 Intel Corporation
+ * 	Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
+ * 	- Added _PDC for SMP C-states on Intel CPUs
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/acpi.h>
+
+#include <acpi/processor.h>
+#include <asm/acpi.h>
+
+static void acpi_processor_power_init_intel_pdc(struct acpi_processor_power
+						*pow)
+{
+	struct acpi_object_list *obj_list;
+	union acpi_object *obj;
+	u32 *buf;
+
+	/* allocate and initialize pdc. It will be used later. */
+	obj_list = kmalloc(sizeof(struct acpi_object_list), GFP_KERNEL);
+	if (!obj_list) {
+		printk(KERN_ERR "Memory allocation error\n");
+		return;
+	}
+
+	obj = kmalloc(sizeof(union acpi_object), GFP_KERNEL);
+	if (!obj) {
+		printk(KERN_ERR "Memory allocation error\n");
+		kfree(obj_list);
+		return;
+	}
+
+	buf = kmalloc(12, GFP_KERNEL);
+	if (!buf) {
+		printk(KERN_ERR "Memory allocation error\n");
+		kfree(obj);
+		kfree(obj_list);
+		return;
+	}
+
+	buf[0] = ACPI_PDC_REVISION_ID;
+	buf[1] = 1;
+	buf[2] = ACPI_PDC_C_CAPABILITY_SMP;
+
+	obj->type = ACPI_TYPE_BUFFER;
+	obj->buffer.length = 12;
+	obj->buffer.pointer = (u8 *) buf;
+	obj_list->count = 1;
+	obj_list->pointer = obj;
+	pow->pdc = obj_list;
+
+	return;
+}
+
+/* Initialize _PDC data based on the CPU vendor */
+void acpi_processor_power_init_pdc(struct acpi_processor_power *pow,
+				   unsigned int cpu)
+{
+	struct cpuinfo_x86 *c = cpu_data + cpu;
+
+	pow->pdc = NULL;
+	if (c->x86_vendor == X86_VENDOR_INTEL)
+		acpi_processor_power_init_intel_pdc(pow);
+
+	return;
+}
+
+EXPORT_SYMBOL(acpi_processor_power_init_pdc);
+
+/*
+ * Initialize bm_flags based on the CPU cache properties
+ * On SMP it depends on cache configuration
+ * - When cache is not shared among all CPUs, we flush cache
+ *   before entering C3.
+ * - When cache is shared among all CPUs, we use bm_check
+ *   mechanism as in UP case
+ *
+ * This routine is called only after all the CPUs are online
+ */
+void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags,
+					unsigned int cpu)
+{
+	struct cpuinfo_x86 *c = cpu_data + cpu;
+
+	flags->bm_check = 0;
+	if (num_online_cpus() == 1)
+		flags->bm_check = 1;
+	else if (c->x86_vendor == X86_VENDOR_INTEL) {
+		/*
+		 * Today all CPUs that support C3 share cache.
+		 * TBD: This needs to look at cache shared map, once
+		 * multi-core detection patch makes to the base.
+		 */
+		flags->bm_check = 1;
+	}
+}
+
+EXPORT_SYMBOL(acpi_processor_power_init_bm_check);
diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c
index 7dcbf70..327a55d 100644
--- a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c
+++ b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c
@@ -375,7 +375,7 @@
 	arg0.buffer.pointer = (u8 *) arg0_buf;
 	arg0_buf[0] = ACPI_PDC_REVISION_ID;
 	arg0_buf[1] = 1;
-	arg0_buf[2] = ACPI_PDC_EST_CAPABILITY_SMP | ACPI_PDC_EST_CAPABILITY_MSR;
+	arg0_buf[2] = ACPI_PDC_EST_CAPABILITY_SMP_MSR;
 
 	p.pdc = &arg_list;
 
diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c
index f477874..e421842 100644
--- a/drivers/acpi/processor_core.c
+++ b/drivers/acpi/processor_core.c
@@ -256,6 +256,43 @@
 
 
 /* --------------------------------------------------------------------------
+                              Common ACPI processor fucntions
+   -------------------------------------------------------------------------- */
+
+/*
+ * _PDC is required for a BIOS-OS handshake for most of the newer
+ * ACPI processor features.
+ */
+
+int acpi_processor_set_pdc(struct acpi_processor *pr,
+				struct acpi_object_list *pdc_in)
+{
+	acpi_status		status = AE_OK;
+	u32			arg0_buf[3];
+	union acpi_object	arg0 = {ACPI_TYPE_BUFFER};
+	struct acpi_object_list	no_object = {1, &arg0};
+	struct acpi_object_list	*pdc;
+
+	ACPI_FUNCTION_TRACE("acpi_processor_set_pdc");
+
+	arg0.buffer.length = 12;
+	arg0.buffer.pointer = (u8 *) arg0_buf;
+	arg0_buf[0] = ACPI_PDC_REVISION_ID;
+	arg0_buf[1] = 0;
+	arg0_buf[2] = 0;
+
+	pdc = (pdc_in) ? pdc_in : &no_object;
+
+	status = acpi_evaluate_object(pr->handle, "_PDC", pdc, NULL);
+
+	if ((ACPI_FAILURE(status)) && (pdc_in))
+		ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Error evaluating _PDC, using legacy perf. control...\n"));
+
+	return_VALUE(status);
+}
+
+
+/* --------------------------------------------------------------------------
                               FS Interface (/proc)
    -------------------------------------------------------------------------- */
 
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index 79d5ca3..8f038cd 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -6,6 +6,8 @@
  *  Copyright (C) 2004       Dominik Brodowski <linux@brodo.de>
  *  Copyright (C) 2004  Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
  *  			- Added processor hotplug support
+ *  Copyright (C) 2005  Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
+ *  			- Added support for C3 on SMP
  *
  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  *
@@ -142,7 +144,7 @@
 		switch (old->type) {
 		case ACPI_STATE_C3:
 			/* Disable bus master reload */
-			if (new->type != ACPI_STATE_C3)
+			if (new->type != ACPI_STATE_C3 && pr->flags.bm_check)
 				acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0, ACPI_MTX_DO_NOT_LOCK);
 			break;
 		}
@@ -152,7 +154,7 @@
 	switch (new->type) {
 	case ACPI_STATE_C3:
 		/* Enable bus master reload */
-		if (old->type != ACPI_STATE_C3)
+		if (old->type != ACPI_STATE_C3 && pr->flags.bm_check)
 			acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 1, ACPI_MTX_DO_NOT_LOCK);
 		break;
 	}
@@ -163,6 +165,9 @@
 }
 
 
+static atomic_t 	c3_cpu_count;
+
+
 static void acpi_processor_idle (void)
 {
 	struct acpi_processor	*pr = NULL;
@@ -297,8 +302,22 @@
 		break;
 
 	case ACPI_STATE_C3:
-		/* Disable bus master arbitration */
-		acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1, ACPI_MTX_DO_NOT_LOCK);
+		
+		if (pr->flags.bm_check) {
+			if (atomic_inc_return(&c3_cpu_count) ==
+					num_online_cpus()) {
+				/*
+				 * All CPUs are trying to go to C3
+				 * Disable bus master arbitration
+				 */
+				acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1,
+					ACPI_MTX_DO_NOT_LOCK);
+			}
+		} else {
+			/* SMP with no shared cache... Invalidate cache  */
+			ACPI_FLUSH_CPU_CACHE();
+		}
+		
 		/* Get start time (ticks) */
 		t1 = inl(acpi_fadt.xpm_tmr_blk.address);
 		/* Invoke C3 */
@@ -307,8 +326,12 @@
 		t2 = inl(acpi_fadt.xpm_tmr_blk.address);
 		/* Get end time (ticks) */
 		t2 = inl(acpi_fadt.xpm_tmr_blk.address);
-		/* Enable bus master arbitration */
-		acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0, ACPI_MTX_DO_NOT_LOCK);
+		if (pr->flags.bm_check) {
+			/* Enable bus master arbitration */
+			atomic_dec(&c3_cpu_count);
+			acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0, ACPI_MTX_DO_NOT_LOCK);
+		}
+
 		/* Re-enable interrupts */
 		local_irq_enable();
 		/* Compute time (ticks) that we were actually asleep */
@@ -552,9 +575,6 @@
 
 	ACPI_FUNCTION_TRACE("acpi_processor_get_power_info_cst");
 
-	if (errata.smp)
-		return_VALUE(-ENODEV);
-
 	if (nocst)
 		return_VALUE(-ENODEV);
 
@@ -687,13 +707,6 @@
 		return_VOID;
 	}
 
-	/* We're (currently) only supporting C2 on UP */
-	else if (errata.smp) {
-		ACPI_DEBUG_PRINT((ACPI_DB_INFO,
-				  "C2 not supported in SMP mode\n"));
-		return_VOID;
-	}
-
 	/*
 	 * Otherwise we've met all of our C2 requirements.
 	 * Normalize the C2 latency to expidite policy
@@ -709,6 +722,8 @@
 	struct acpi_processor *pr,
 	struct acpi_processor_cx *cx)
 {
+	static int bm_check_flag;
+
 	ACPI_FUNCTION_TRACE("acpi_processor_get_power_verify_c3");
 
 	if (!cx->address)
@@ -725,20 +740,6 @@
 		return_VOID;
 	}
 
-	/* bus mastering control is necessary */
-	else if (!pr->flags.bm_control) {
-		ACPI_DEBUG_PRINT((ACPI_DB_INFO,
-				  "C3 support requires bus mastering control\n"));
-		return_VOID;
-	}
-
-	/* We're (currently) only supporting C2 on UP */
-	else if (errata.smp) {
-		ACPI_DEBUG_PRINT((ACPI_DB_INFO,
-				  "C3 not supported in SMP mode\n"));
-		return_VOID;
-	}
-
 	/*
 	 * PIIX4 Erratum #18: We don't support C3 when Type-F (fast)
 	 * DMA transfers are used by any ISA device to avoid livelock.
@@ -752,6 +753,39 @@
 		return_VOID;
 	}
 
+	/* All the logic here assumes flags.bm_check is same across all CPUs */
+	if (!bm_check_flag) {
+		/* Determine whether bm_check is needed based on CPU  */
+		acpi_processor_power_init_bm_check(&(pr->flags), pr->id);
+		bm_check_flag = pr->flags.bm_check;
+	} else {
+		pr->flags.bm_check = bm_check_flag;
+	}
+
+	if (pr->flags.bm_check) {
+		printk("Disabling BM access before entering C3\n");
+		/* bus mastering control is necessary */
+		if (!pr->flags.bm_control) {
+			ACPI_DEBUG_PRINT((ACPI_DB_INFO,
+			  "C3 support requires bus mastering control\n"));
+			return_VOID;
+		}
+	} else {
+		printk("Invalidating cache before entering C3\n");
+		/*
+		 * WBINVD should be set in fadt, for C3 state to be
+		 * supported on when bm_check is not required.
+		 */
+		if (acpi_fadt.wb_invd != 1) {
+			ACPI_DEBUG_PRINT((ACPI_DB_INFO,
+			  "Cache invalidation should work properly"
+			  " for C3 to be enabled on SMP systems\n"));
+			return_VOID;
+		}
+		acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD,
+				0, ACPI_MTX_DO_NOT_LOCK);
+	}
+
 	/*
 	 * Otherwise we've met all of our C3 requirements.
 	 * Normalize the C3 latency to expidite policy.  Enable
@@ -760,7 +794,6 @@
 	 */
 	cx->valid = 1;
 	cx->latency_ticks = US_TO_PM_TIMER_TICKS(cx->latency);
-	pr->flags.bm_check = 1;
 
 	return_VOID;
 }
@@ -848,7 +881,7 @@
 	if (!pr)
  		return_VALUE(-EINVAL);
 
-	if (errata.smp || nocst) {
+	if ( nocst) {
 		return_VALUE(-ENODEV);
 	}
 
@@ -948,7 +981,6 @@
 	.release	= single_release,
 };
 
-
 int acpi_processor_power_init(struct acpi_processor *pr, struct acpi_device *device)
 {
 	acpi_status		status = 0;
@@ -965,7 +997,10 @@
 		first_run++;
 	}
 
-	if (!errata.smp && (pr->id == 0) && acpi_fadt.cst_cnt && !nocst) {
+	if (!pr)
+		return_VALUE(-EINVAL);
+
+	if (acpi_fadt.cst_cnt && !nocst) {
 		status = acpi_os_write_port(acpi_fadt.smi_cmd, acpi_fadt.cst_cnt, 8);
 		if (ACPI_FAILURE(status)) {
 			ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
@@ -973,6 +1008,8 @@
 		}
 	}
 
+	acpi_processor_power_init_pdc(&(pr->power), pr->id);
+	acpi_processor_set_pdc(pr, pr->power.pdc);
 	acpi_processor_get_power_info(pr);
 
 	/*
diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c
index a9a1a8f..1f0d625 100644
--- a/drivers/acpi/processor_perflib.c
+++ b/drivers/acpi/processor_perflib.c
@@ -165,37 +165,6 @@
 	acpi_processor_ppc_status &= ~PPC_REGISTERED;
 }
 
-/*
- * when registering a cpufreq driver with this ACPI processor driver, the
- * _PCT and _PSS structures are read out and written into struct
- * acpi_processor_performance.
- */
-static int acpi_processor_set_pdc (struct acpi_processor *pr)
-{
-	acpi_status             status = AE_OK;
-	u32			arg0_buf[3];
-	union acpi_object	arg0 = {ACPI_TYPE_BUFFER};
-	struct acpi_object_list no_object = {1, &arg0};
-	struct acpi_object_list *pdc;
-
-	ACPI_FUNCTION_TRACE("acpi_processor_set_pdc");
-
-	arg0.buffer.length = 12;
-	arg0.buffer.pointer = (u8 *) arg0_buf;
-	arg0_buf[0] = ACPI_PDC_REVISION_ID;
-	arg0_buf[1] = 0;
-	arg0_buf[2] = 0;
-
-	pdc = (pr->performance->pdc) ? pr->performance->pdc : &no_object;
-
-	status = acpi_evaluate_object(pr->handle, "_PDC", pdc, NULL);
-
-	if ((ACPI_FAILURE(status)) && (pr->performance->pdc))
-		ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Error evaluating _PDC, using legacy perf. control...\n"));
-
-	return_VALUE(status);
-}
-
 
 static int
 acpi_processor_get_performance_control (
@@ -357,7 +326,7 @@
 	if (!pr || !pr->performance || !pr->handle)
 		return_VALUE(-EINVAL);
 
-	acpi_processor_set_pdc(pr);
+	acpi_processor_set_pdc(pr, pr->performance->pdc);
 
 	status = acpi_get_handle(pr->handle, "_PCT", &handle);
 	if (ACPI_FAILURE(status)) {
diff --git a/include/acpi/pdc_intel.h b/include/acpi/pdc_intel.h
new file mode 100644
index 0000000..fd6730e
--- /dev/null
+++ b/include/acpi/pdc_intel.h
@@ -0,0 +1,29 @@
+
+/* _PDC bit definition for Intel processors */
+
+#ifndef __PDC_INTEL_H__
+#define __PDC_INTEL_H__
+
+#define ACPI_PDC_P_FFH			(0x0001)
+#define ACPI_PDC_C_C1_HALT		(0x0002)
+#define ACPI_PDC_T_FFH			(0x0004)
+#define ACPI_PDC_SMP_C1PT		(0x0008)
+#define ACPI_PDC_SMP_C2C3		(0x0010)
+#define ACPI_PDC_SMP_P_SWCOORD		(0x0020)
+#define ACPI_PDC_SMP_C_SWCOORD		(0x0040)
+#define ACPI_PDC_SMP_T_SWCOORD		(0x0080)
+#define ACPI_PDC_C_C1_FFH		(0x0100)
+
+
+#define ACPI_PDC_EST_CAPABILITY_SMP	(ACPI_PDC_SMP_C1PT | \
+					 ACPI_PDC_C_C1_HALT)
+
+#define ACPI_PDC_EST_CAPABILITY_SMP_MSR	(ACPI_PDC_EST_CAPABILITY_SMP | \
+					 ACPI_PDC_P_FFH)
+
+#define ACPI_PDC_C_CAPABILITY_SMP	(ACPI_PDC_SMP_C2C3 | \
+					 ACPI_PDC_SMP_C1PT | \
+					 ACPI_PDC_C_C1_HALT)
+
+#endif /* __PDC_INTEL_H__ */
+
diff --git a/include/acpi/processor.h b/include/acpi/processor.h
index 2f50a5b..50cfea4 100644
--- a/include/acpi/processor.h
+++ b/include/acpi/processor.h
@@ -4,6 +4,8 @@
 #include <linux/kernel.h>
 #include <linux/config.h>
 
+#include <asm/acpi.h>
+
 #define ACPI_PROCESSOR_BUSY_METRIC	10
 
 #define ACPI_PROCESSOR_MAX_POWER	8
@@ -14,6 +16,8 @@
 #define ACPI_PROCESSOR_MAX_THROTTLE	250	/* 25% */
 #define ACPI_PROCESSOR_MAX_DUTY_WIDTH	4
 
+#define ACPI_PDC_REVISION_ID		0x1
+
 /* Power Management */
 
 struct acpi_processor_cx;
@@ -59,6 +63,9 @@
 	u32			bm_activity;
 	int			count;
 	struct acpi_processor_cx states[ACPI_PROCESSOR_MAX_POWER];
+
+	/* the _PDC objects passed by the driver, if any */
+	struct acpi_object_list *pdc;
 };
 
 /* Performance Management */
@@ -82,8 +89,6 @@
 	acpi_integer		status;			/* success indicator */
 };
 
-#define ACPI_PDC_REVISION_ID                   0x1
-
 struct acpi_processor_performance {
 	unsigned int		 state;
 	unsigned int		 platform_limit;
@@ -179,7 +184,32 @@
 extern struct acpi_processor	*processors[NR_CPUS];
 extern struct acpi_processor_errata errata;
 
+int acpi_processor_set_pdc(struct acpi_processor *pr,
+		struct acpi_object_list *pdc_in);
+
+#ifdef ARCH_HAS_POWER_PDC_INIT
+void acpi_processor_power_init_pdc(struct acpi_processor_power *pow,
+		unsigned int cpu);
+void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags,
+		unsigned int cpu);
+#else
+static inline void acpi_processor_power_init_pdc(
+		struct acpi_processor_power *pow, unsigned int cpu)
+{
+	pow->pdc = NULL;
+	return;
+}
+
+static inline void acpi_processor_power_init_bm_check(
+		struct acpi_processor_flags *flags, unsigned int cpu)
+{
+	flags->bm_check = 1;
+	return;
+}
+#endif
+
 /* in processor_perflib.c */
+
 #ifdef CONFIG_CPU_FREQ
 void acpi_processor_ppc_init(void);
 void acpi_processor_ppc_exit(void);
diff --git a/include/asm-i386/acpi.h b/include/asm-i386/acpi.h
index c976c1d..cf828ac 100644
--- a/include/asm-i386/acpi.h
+++ b/include/asm-i386/acpi.h
@@ -28,6 +28,8 @@
 
 #ifdef __KERNEL__
 
+#include <acpi/pdc_intel.h>
+
 #include <asm/system.h>		/* defines cmpxchg */
 
 #define COMPILER_DEPENDENT_INT64   long long
@@ -101,12 +103,6 @@
         :"=r"(n_hi), "=r"(n_lo)     \
         :"0"(n_hi), "1"(n_lo))
 
-/*
- * Refer Intel ACPI _PDC support document for bit definitions
- */
-#define ACPI_PDC_EST_CAPABILITY_SMP 	0xa
-#define ACPI_PDC_EST_CAPABILITY_MSR	0x1
-
 #ifdef CONFIG_ACPI_BOOT 
 extern int acpi_lapic;
 extern int acpi_ioapic;
@@ -185,6 +181,8 @@
 
 extern u8 x86_acpiid_to_apicid[];
 
+#define ARCH_HAS_POWER_PDC_INIT	1
+
 #endif /*__KERNEL__*/
 
 #endif /*_ASM_ACPI_H*/
diff --git a/include/asm-x86_64/acpi.h b/include/asm-x86_64/acpi.h
index a6b41b8..dc8c981 100644
--- a/include/asm-x86_64/acpi.h
+++ b/include/asm-x86_64/acpi.h
@@ -28,6 +28,8 @@
 
 #ifdef __KERNEL__
 
+#include <acpi/pdc_intel.h>
+
 #define COMPILER_DEPENDENT_INT64   long long
 #define COMPILER_DEPENDENT_UINT64  unsigned long long
 
@@ -99,12 +101,6 @@
         :"=r"(n_hi), "=r"(n_lo)     \
         :"0"(n_hi), "1"(n_lo))
 
-/*
- * Refer Intel ACPI _PDC support document for bit definitions
- */
-#define ACPI_PDC_EST_CAPABILITY_SMP 	0xa
-#define ACPI_PDC_EST_CAPABILITY_MSR	0x1
-
 #ifdef CONFIG_ACPI_BOOT
 extern int acpi_lapic;
 extern int acpi_ioapic;