x86: provide delay loop for x86_64.

This is for consistency with i386. We call use_tsc_delay()
at tsc initialization for x86_64, so we'll be always using it.

Signed-off-by: Glauber Costa <gcosta@redhat.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 3c36f92..4a775d0 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -513,6 +513,7 @@
 	 */
 	for_each_possible_cpu(cpu)
 		set_cyc2ns_scale(cpu_khz, cpu);
+	use_tsc_delay();
 
 	if (tsc_disabled > 0)
 		return;
diff --git a/arch/x86/lib/delay_64.c b/arch/x86/lib/delay_64.c
index 4c441be..d0326d0 100644
--- a/arch/x86/lib/delay_64.c
+++ b/arch/x86/lib/delay_64.c
@@ -22,13 +22,28 @@
 #include <asm/smp.h>
 #endif
 
-int __devinit read_current_timer(unsigned long *timer_value)
+/* simple loop based delay: */
+static void delay_loop(unsigned long loops)
 {
-	rdtscll(*timer_value);
-	return 0;
+	asm volatile(
+		"	test %0,%0	\n"
+		"	jz 3f		\n"
+		"	jmp 1f		\n"
+
+		".align 16		\n"
+		"1:	jmp 2f		\n"
+
+		".align 16		\n"
+		"2:	dec %0		\n"
+		"	jnz 2b		\n"
+		"3:	dec %0		\n"
+
+		: /* we don't need output */
+		:"a" (loops)
+	);
 }
 
-void __delay(unsigned long loops)
+static void delay_tsc(unsigned long loops)
 {
 	unsigned bclock, now;
 	int cpu;
@@ -63,6 +78,27 @@
 	}
 	preempt_enable();
 }
+
+static void (*delay_fn)(unsigned long) = delay_loop;
+
+void use_tsc_delay(void)
+{
+	delay_fn = delay_tsc;
+}
+
+int __devinit read_current_timer(unsigned long *timer_value)
+{
+	if (delay_fn == delay_tsc) {
+		rdtscll(*timer_value);
+		return 0;
+	}
+	return -1;
+}
+
+void __delay(unsigned long loops)
+{
+	delay_fn(loops);
+}
 EXPORT_SYMBOL(__delay);
 
 inline void __const_udelay(unsigned long xloops)