Add a sched_clock paravirt_op
The tsc-based get_scheduled_cycles interface is not a good match for
Xen's runstate accounting, which reports everything in nanoseconds.
This patch replaces this interface with a sched_clock interface, which
matches both Xen and VMI's requirements.
In order to do this, we:
1. replace get_scheduled_cycles with sched_clock
2. hoist cycles_2_ns into a common header
3. update vmi accordingly
One thing to note: because sched_clock is implemented as a weak
function in kernel/sched.c, we must define a real function in order to
override this weak binding. This means the usual paravirt_ops
technique of using an inline function won't work in this case.
Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Cc: Zachary Amsden <zach@vmware.com>
Cc: Dan Hecht <dhecht@vmware.com>
Cc: john stultz <johnstul@us.ibm.com>
diff --git a/arch/i386/kernel/paravirt.c b/arch/i386/kernel/paravirt.c
index 60e08b9..53f07a8 100644
--- a/arch/i386/kernel/paravirt.c
+++ b/arch/i386/kernel/paravirt.c
@@ -302,7 +302,7 @@
.write_msr = native_write_msr_safe,
.read_tsc = native_read_tsc,
.read_pmc = native_read_pmc,
- .get_scheduled_cycles = native_read_tsc,
+ .sched_clock = native_sched_clock,
.get_cpu_khz = native_calculate_cpu_khz,
.load_tr_desc = native_load_tr_desc,
.set_ldt = native_set_ldt,
diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c
index ea63a30..252f901 100644
--- a/arch/i386/kernel/tsc.c
+++ b/arch/i386/kernel/tsc.c
@@ -84,7 +84,7 @@
*
* -johnstul@us.ibm.com "math is hard, lets go shopping!"
*/
-static unsigned long cyc2ns_scale __read_mostly;
+unsigned long cyc2ns_scale __read_mostly;
#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
@@ -93,15 +93,10 @@
cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz;
}
-static inline unsigned long long cycles_2_ns(unsigned long long cyc)
-{
- return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
-}
-
/*
* Scheduler clock - returns current time in nanosec units.
*/
-unsigned long long sched_clock(void)
+unsigned long long native_sched_clock(void)
{
unsigned long long this_offset;
@@ -118,12 +113,24 @@
return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);
/* read the Time Stamp Counter: */
- get_scheduled_cycles(this_offset);
+ rdtscll(this_offset);
/* return the value in ns */
return cycles_2_ns(this_offset);
}
+/* We need to define a real function for sched_clock, to override the
+ weak default version */
+#ifdef CONFIG_PARAVIRT
+unsigned long long sched_clock(void)
+{
+ return paravirt_sched_clock();
+}
+#else
+unsigned long long sched_clock(void)
+ __attribute__((alias("native_sched_clock")));
+#endif
+
unsigned long native_calculate_cpu_khz(void)
{
unsigned long long start, end;
diff --git a/arch/i386/kernel/vmi.c b/arch/i386/kernel/vmi.c
index 234bd6f..72042bb 100644
--- a/arch/i386/kernel/vmi.c
+++ b/arch/i386/kernel/vmi.c
@@ -891,7 +891,7 @@
paravirt_ops.setup_boot_clock = vmi_time_bsp_init;
paravirt_ops.setup_secondary_clock = vmi_time_ap_init;
#endif
- paravirt_ops.get_scheduled_cycles = vmi_get_sched_cycles;
+ paravirt_ops.sched_clock = vmi_sched_clock;
paravirt_ops.get_cpu_khz = vmi_cpu_khz;
/* We have true wallclock functions; disable CMOS clock sync */
diff --git a/arch/i386/kernel/vmiclock.c b/arch/i386/kernel/vmiclock.c
index 26a37f8..f9b845f 100644
--- a/arch/i386/kernel/vmiclock.c
+++ b/arch/i386/kernel/vmiclock.c
@@ -64,10 +64,10 @@
return 0;
}
-/* paravirt_ops.get_scheduled_cycles = vmi_get_sched_cycles */
-unsigned long long vmi_get_sched_cycles(void)
+/* paravirt_ops.sched_clock = vmi_sched_clock */
+unsigned long long vmi_sched_clock(void)
{
- return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE);
+ return cycles_2_ns(vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE));
}
/* paravirt_ops.get_cpu_khz = vmi_cpu_khz */