[PATCH] vmi: sched clock paravirt op fix

The custom_sched_clock hook is broken.  The result from sched_clock needs to
be in nanoseconds, not in CPU cycles.  The TSC is insufficient for this
purpose, because TSC is poorly defined in a virtual environment, and mostly
represents real world time instead of scheduled process time (which can be
interrupted without notice when a virtual machine is descheduled).

To make the scheduler consistent, we must expose a different nature of time,
that is scheduled time.  So deprecate this custom_sched_clock hack and turn it
into a paravirt-op, as it should have been all along.  This allows the tsc.c
code which converts cycles to nanoseconds to be shared by all paravirt-ops
backends.

It is unfortunate to add a new paravirt-op, but this is a very distinct
abstraction which is clearly different for all virtual machine
implementations, and it gets rid of an ugly indirect function which I
ashamedly admit I hacked in to try to get this to work earlier, and then even
got in the wrong units.

Signed-off-by: Zachary Amsden <zach@vmware.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h
index 6317e0a..a132302 100644
--- a/include/asm-i386/paravirt.h
+++ b/include/asm-i386/paravirt.h
@@ -94,6 +94,7 @@
 
 	u64 (*read_tsc)(void);
 	u64 (*read_pmc)(void);
+ 	u64 (*get_scheduled_cycles)(void);
 
 	void (*load_tr_desc)(void);
 	void (*load_gdt)(const struct Xgt_desc_struct *);
@@ -273,6 +274,8 @@
 
 #define rdtscll(val) (val = paravirt_ops.read_tsc())
 
+#define get_scheduled_cycles(val) (val = paravirt_ops.get_scheduled_cycles())
+
 #define write_tsc(val1,val2) wrmsr(0x10, val1, val2)
 
 #define rdpmc(counter,low,high) do {				\
diff --git a/include/asm-i386/time.h b/include/asm-i386/time.h
index 571b429..ea8065a 100644
--- a/include/asm-i386/time.h
+++ b/include/asm-i386/time.h
@@ -30,7 +30,6 @@
 
 #ifdef CONFIG_PARAVIRT
 #include <asm/paravirt.h>
-extern unsigned long long native_sched_clock(void);
 #else /* !CONFIG_PARAVIRT */
 
 #define get_wallclock() native_get_wallclock()
diff --git a/include/asm-i386/timer.h b/include/asm-i386/timer.h
index 4752c3a..d1f7b4f 100644
--- a/include/asm-i386/timer.h
+++ b/include/asm-i386/timer.h
@@ -4,13 +4,19 @@
 #include <linux/pm.h>
 
 #define TICK_SIZE (tick_nsec / 1000)
+
 void setup_pit_timer(void);
+unsigned long long native_sched_clock(void);
+
 /* Modifiers for buggy PIT handling */
 extern int pit_latch_buggy;
 extern int timer_ack;
 extern int no_timer_check;
-extern unsigned long long (*custom_sched_clock)(void);
 extern int no_sync_cmos_clock;
 extern int recalibrate_cpu_khz(void);
 
+#ifndef CONFIG_PARAVIRT
+#define get_scheduled_cycles(val) rdtscll(val)
+#endif
+
 #endif
diff --git a/include/asm-i386/vmi_time.h b/include/asm-i386/vmi_time.h
index c129312..f59c35d 100644
--- a/include/asm-i386/vmi_time.h
+++ b/include/asm-i386/vmi_time.h
@@ -49,7 +49,7 @@
 extern void __init vmi_time_init(void);
 extern unsigned long vmi_get_wallclock(void);
 extern int vmi_set_wallclock(unsigned long now);
-extern unsigned long long vmi_sched_clock(void);
+extern unsigned long long vmi_get_sched_cycles(void);
 
 #ifdef CONFIG_X86_LOCAL_APIC
 extern void __init vmi_timer_setup_boot_alarm(void);