cpuidle: menu: Lookup CPU runqueues less

The menu governer makes separate lookups of the CPU runqueue to get
load and number of IO waiters but it can be done with a single lookup.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index f55d826..2770274 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -134,12 +134,9 @@
 #define LOAD_INT(x) ((x) >> FSHIFT)
 #define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
 
-static int get_loadavg(void)
+static inline int get_loadavg(unsigned long load)
 {
-	unsigned long this = this_cpu_load();
-
-
-	return LOAD_INT(this) * 10 + LOAD_FRAC(this) / 10;
+	return LOAD_INT(load) * 10 + LOAD_FRAC(load) / 10;
 }
 
 static inline int which_bucket(unsigned int duration, unsigned long nr_iowaiters)
@@ -175,13 +172,13 @@
  * to be, the higher this multiplier, and thus the higher
  * the barrier to go to an expensive C state.
  */
-static inline int performance_multiplier(unsigned long nr_iowaiters)
+static inline int performance_multiplier(unsigned long nr_iowaiters, unsigned long load)
 {
 	int mult = 1;
 
 	/* for higher loadavg, we are more reluctant */
 
-	mult += 2 * get_loadavg();
+	mult += 2 * get_loadavg(load);
 
 	/* for IO wait tasks (per cpu!) we add 5x each */
 	mult += 10 * nr_iowaiters;
@@ -296,7 +293,7 @@
 	int latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY);
 	int i;
 	unsigned int interactivity_req;
-	unsigned long nr_iowaiters;
+	unsigned long nr_iowaiters, cpu_load;
 
 	if (data->needs_update) {
 		menu_update(drv, dev);
@@ -312,7 +309,7 @@
 	/* determine the expected residency time, round up */
 	data->next_timer_us = ktime_to_us(tick_nohz_get_sleep_length());
 
-	nr_iowaiters = nr_iowait_cpu(smp_processor_id());
+	get_iowait_load(&nr_iowaiters, &cpu_load);
 	data->bucket = which_bucket(data->next_timer_us, nr_iowaiters);
 
 	/*
@@ -331,7 +328,7 @@
 	 * duration / latency ratio. Adjust the latency limit if
 	 * necessary.
 	 */
-	interactivity_req = data->predicted_us / performance_multiplier(nr_iowaiters);
+	interactivity_req = data->predicted_us / performance_multiplier(nr_iowaiters, cpu_load);
 	if (latency_req > interactivity_req)
 		latency_req = interactivity_req;
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 306f4f0..641bd95 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -168,8 +168,7 @@
 extern unsigned long nr_running(void);
 extern unsigned long nr_iowait(void);
 extern unsigned long nr_iowait_cpu(int cpu);
-extern unsigned long this_cpu_load(void);
-
+extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load);
 
 extern void calc_global_load(unsigned long ticks);
 extern void update_cpu_load_nohz(void);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 3bdf01b..863ef1d 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2385,6 +2385,13 @@
 	return atomic_read(&this->nr_iowait);
 }
 
+void get_iowait_load(unsigned long *nr_waiters, unsigned long *load)
+{
+	struct rq *this = this_rq();
+	*nr_waiters = atomic_read(&this->nr_iowait);
+	*load = this->cpu_load[0];
+}
+
 #ifdef CONFIG_SMP
 
 /*
diff --git a/kernel/sched/proc.c b/kernel/sched/proc.c
index 16f5a30..8ecd552 100644
--- a/kernel/sched/proc.c
+++ b/kernel/sched/proc.c
@@ -8,13 +8,6 @@
 
 #include "sched.h"
 
-unsigned long this_cpu_load(void)
-{
-	struct rq *this = this_rq();
-	return this->cpu_load[0];
-}
-
-
 /*
  * Global load-average calculations
  *