blob: f21929af1d7f4df0a2080b10d6f2c6b667e3ea43 [file] [log] [blame]
/*
* Multi-purpose Load tracker
*
* Copyright (C) 2018 Samsung Electronics Co., Ltd
* Park Bumgyu <bumgyu.park@samsung.com>
*/
#include <linux/sched.h>
#include <linux/ems_service.h>
#include <linux/ems.h>
#include <trace/events/ems.h>
#include "../tune.h"
#include "../sched.h"
#include "ems.h"
extern long schedtune_margin(unsigned long capacity, unsigned long signal, long boost);
static inline int get_sse(struct sched_entity *se)
{
if (se->my_q)
return 0;
return task_of(se)->sse;
}
/*
* ml_task_runnable - task runnable
*
* The time while the task is in the runqueue. This includes not only task
* running time but also waiting time in the runqueue. The calculation
* is the same as the task util.
*/
unsigned long ml_task_runnable(struct task_struct *p)
{
int boost = schedtune_task_boost(p);
unsigned long runnable_avg = READ_ONCE(p->se.avg.ml.runnable_avg);
unsigned long capacity;
if (boost == 0)
return runnable_avg;
capacity = capacity_orig_of_sse(task_cpu(p), p->sse);
return runnable_avg + schedtune_margin(capacity, runnable_avg, boost);
}
/*
* ml_task_util - task util
*
* Task utilization. The calculation is the same as the task util of cfs,
* but applied capacity is different according to sse and uss of the task,
* therefore, it sse task has different values from the task util of cfs.
*/
unsigned long ml_task_util(struct task_struct *p)
{
return READ_ONCE(p->se.avg.ml.util_avg);
}
/*
* _ml_task_util_est/ml_task_util_est - task util with util-est
*
* Task utilization with util-est, The calculation is the same as
* task_util_est of cfs.
*/
static unsigned long _ml_task_util_est(struct task_struct *p)
{
struct util_est ue = READ_ONCE(p->se.avg.ml.util_est);
return schedtune_util_est_en(p) ? max(ue.ewma, ue.enqueued)
: ml_task_util(p);
}
unsigned long ml_task_util_est(struct task_struct *p)
{
return schedtune_util_est_en(p) ? max(READ_ONCE(p->se.avg.ml.util_avg), _ml_task_util_est(p))
: ml_task_util(p);
}
/*
* ml_boosted_task_util - task util with schedtune boost
*
* Boosted task utilization, it same as boosted_task_util of cfs.
*/
unsigned long ml_boosted_task_util(struct task_struct *p)
{
int boost = schedtune_task_boost(p);
unsigned long util = ml_task_util(p);
unsigned long capacity;
if (boost == 0)
return util;
capacity = capacity_orig_of_sse(task_cpu(p), p->sse);
return util + schedtune_margin(capacity, util, boost);
}
/*
* __ml_cpu_util - sse/uss utilization in cpu
*
* Cpu utilization. This function returns sse or uss utilization in
* the cpu according to "sse" parameter.
*/
unsigned long __ml_cpu_util(int cpu, int sse)
{
struct cfs_rq *cfs_rq = &cpu_rq(cpu)->cfs;
return sse ? READ_ONCE(cfs_rq->avg.ml.util_avg_s) :
READ_ONCE(cfs_rq->avg.ml.util_avg);
}
unsigned long __ml_cpu_util_est(int cpu, int sse)
{
struct cfs_rq *cfs_rq = &cpu_rq(cpu)->cfs;
return sse ? READ_ONCE(cfs_rq->avg.ml.util_est_s.enqueued) :
READ_ONCE(cfs_rq->avg.ml.util_est.enqueued);
}
/*
* __normalize_util - combine sse and uss utilization
*
* Combine sse and uss utilization and normalize to sse or uss according to "sse"
* parameter.
*/
static inline unsigned long
__normalize_util(int cpu, unsigned int uss_util, unsigned int sse_util, int sse)
{
if (sse)
return sse_util + ((capacity_ratio(cpu, sse) * uss_util) >> SCHED_CAPACITY_SHIFT);
else
return uss_util + ((capacity_ratio(cpu, sse) * sse_util) >> SCHED_CAPACITY_SHIFT);
}
/* uss is default because cfs refers uss */
#define USS 0
#define SSE 1
static unsigned long ml_cpu_util_est(int cpu)
{
return __normalize_util(cpu, __ml_cpu_util_est(cpu, USS),
__ml_cpu_util_est(cpu, SSE), USS);
}
/*
* _ml_cpu_util - sse/uss combined cpu utilization
*
* Sse and uss combined cpu utilization. This function returns combined cpu
* utilization normalized to sse or uss according to "sse" parameter.
*/
unsigned long _ml_cpu_util(int cpu, int sse)
{
unsigned long util;
util = __normalize_util(cpu, __ml_cpu_util(cpu, USS),
__ml_cpu_util(cpu, SSE), sse);
if (sched_feat(UTIL_EST))
util = max_t(unsigned long, util, ml_cpu_util_est(cpu));
return min_t(unsigned long, util, capacity_orig_of(cpu));
}
/*
* ml_cpu_util - sse/uss combiend cpu utilization
*
* Sse and uss combined and uss normalized cpu utilization. Default policy
* is to normalize to uss because cfs refers uss.
*/
unsigned long ml_cpu_util(int cpu)
{
return _ml_cpu_util(cpu, USS);
}
/*
* ml_cpu_util_ratio - cpu usuage ratio
*
* Cpu usage ratio of sse or uss. The ratio based on a maximum of 1024.
*/
unsigned long ml_cpu_util_ratio(int cpu, int sse)
{
return (__ml_cpu_util(cpu, sse) << SCHED_CAPACITY_SHIFT)
/ capacity_orig_of_sse(cpu, sse);
}
#define UTIL_AVG_UNCHANGED 0x1
/*
* ml_cpu_util_wake - cpu utilization except waking task
*
* Cpu utilization with any contributions from the waking task p removed.
*/
unsigned long ml_cpu_util_wake(int cpu, struct task_struct *p)
{
unsigned long uss_util, sse_util;
if (cpu != task_cpu(p) || !READ_ONCE(p->se.avg.last_update_time))
return ml_cpu_util(cpu);
uss_util = __ml_cpu_util(cpu, USS);
sse_util = __ml_cpu_util(cpu, SSE);
if (p->sse)
sse_util -= min_t(unsigned long, sse_util, ml_task_util(p));
else
uss_util -= min_t(unsigned long, uss_util, ml_task_util(p));
if (sched_feat(UTIL_EST)) {
unsigned int uss_util_est = __ml_cpu_util_est(cpu, USS);
unsigned int sse_util_est = __ml_cpu_util_est(cpu, SSE);
if (unlikely(task_on_rq_queued(p) || current == p)) {
if (p->sse) {
sse_util_est -= min_t(unsigned int, sse_util_est,
(_ml_task_util_est(p) | UTIL_AVG_UNCHANGED));
} else {
uss_util_est -= min_t(unsigned int, uss_util_est,
(_ml_task_util_est(p) | UTIL_AVG_UNCHANGED));
}
}
uss_util = max_t(unsigned long, uss_util, uss_util_est);
sse_util = max_t(unsigned long, sse_util, sse_util_est);
}
uss_util = min_t(unsigned long, uss_util, capacity_orig_of_sse(cpu, USS));
sse_util = min_t(unsigned long, sse_util, capacity_orig_of_sse(cpu, SSE));
return __normalize_util(cpu, uss_util, sse_util, USS);
}
/*
* ml_task_attached_cpu_util - cpu utilization including waking task
*
* Cpu utilization when the waking task is attached to this cpu. There is no
* change in utilizatino for the cpu on which the task is running.
*/
unsigned long ml_task_attached_cpu_util(int cpu, struct task_struct *p)
{
unsigned long uss_util, sse_util;
uss_util = __ml_cpu_util(cpu, USS);
sse_util = __ml_cpu_util(cpu, SSE);
if (READ_ONCE(p->se.avg.last_update_time))
return ml_cpu_util(cpu);
if (cpu != task_cpu(p)) {
if (p->sse)
sse_util += ml_task_util(p);
else
uss_util += ml_task_util(p);
}
if (sched_feat(UTIL_EST)) {
unsigned int uss_util_est = __ml_cpu_util_est(cpu, USS);
unsigned int sse_util_est = __ml_cpu_util_est(cpu, SSE);
if (p->sse)
sse_util_est += ml_task_util_est(p);
else
uss_util_est += ml_task_util_est(p);
uss_util = max_t(unsigned long, uss_util, uss_util_est);
sse_util = max_t(unsigned long, sse_util, sse_util_est);
}
return __normalize_util(cpu, uss_util, sse_util, USS);
}
/*
* ml_boosted_cpu_util - sse/uss combiend cpu utilization with boost
*
* Sse and uss combined and uss normalized cpu utilization with schedtune.boost.
*/
extern DEFINE_PER_CPU(struct boost_groups, cpu_boost_groups);
unsigned long ml_boosted_cpu_util(int cpu)
{
int fv_boost = 0, boost = schedtune_cpu_boost(cpu);
struct boost_groups *bg = &per_cpu(cpu_boost_groups, cpu);
unsigned long util = ml_cpu_util(cpu);
unsigned long capacity;
if (boost == 0)
return util;
capacity = capacity_orig_of(cpu);
if (bg->group[STUNE_TOPAPP].tasks)
fv_boost = freqvar_st_boost_vector(cpu);
if (fv_boost > boost)
boost = fv_boost;
return util + schedtune_margin(capacity, util, boost);
}
static void update_next_balance(int cpu, struct multi_load *ml)
{
struct sched_avg *sa = container_of(ml, struct sched_avg, ml);
struct sched_entity *se = container_of(sa, struct sched_entity, avg);
if (se->my_q)
return;
if (!need_ontime_migration_trigger(cpu, task_of(se)))
return;
/*
* Update the next_balance of this cpu because tick is most likely
* to occur first in currently running cpu.
*/
cpu_rq(smp_processor_id())->next_balance = jiffies;
}
/* declare extern function from cfs */
extern u64 decay_load(u64 val, u64 n);
static u32 __accumulate_pelt_segments(u64 periods, u32 d1, u32 d3)
{
u32 c1, c2, c3 = d3;
c1 = decay_load((u64)d1, periods);
c2 = LOAD_AVG_MAX - decay_load(LOAD_AVG_MAX, periods) - 1024;
return c1 + c2 + c3;
}
/*
* Below 3 functions have same sequence to update load.
* - __update_task_runnable,
* - __update_task_util,
* - __update_cpu_util
*
* step 1: decay the load for the elapsed periods.
* step 2: accumulate load if the condition is met.
* - runnable : task weight is not 0
* - util : task or cpu is running
* step 3: update load avg with load sum
*/
static void
__update_task_runnable(struct multi_load *ml, u64 periods, u32 contrib,
unsigned long scale_cpu, int weight, int cpu)
{
if (periods)
ml->runnable_sum = decay_load((u64)(ml->runnable_sum), periods);
if (weight)
ml->runnable_sum += contrib * scale_cpu;
if (!periods)
return;
ml->runnable_avg = div_u64(ml->runnable_sum, LOAD_AVG_MAX - 1024 + ml->period_contrib);
update_next_balance(cpu, ml);
}
static void
__update_task_util(struct multi_load *ml, u64 periods, u32 contrib,
unsigned long scale_cpu, int running)
{
if (periods)
ml->util_sum = decay_load((u64)(ml->util_sum), periods);
if (running)
ml->util_sum += contrib * scale_cpu;
if (!periods)
return;
ml->util_avg = ml->util_sum / (LOAD_AVG_MAX - 1024 + ml->period_contrib);
}
static void
__update_cpu_util(struct multi_load *ml, u64 periods, u32 contrib,
unsigned long scale_cpu, int running, struct cfs_rq *cfs_rq)
{
if (periods) {
ml->util_sum = decay_load((u64)(ml->util_sum), periods);
ml->util_sum_s = decay_load((u64)(ml->util_sum_s), periods);
}
if (running) {
if (get_sse(cfs_rq->curr))
ml->util_sum_s += contrib * scale_cpu;
else
ml->util_sum += contrib * scale_cpu;
}
if (!periods)
return;
ml->util_avg = ml->util_sum / (LOAD_AVG_MAX - 1024 + ml->period_contrib);
ml->util_avg_s = ml->util_sum_s / (LOAD_AVG_MAX - 1024 + ml->period_contrib);
}
static void
trace_multi_load(struct multi_load *ml, struct cfs_rq *cfs_rq, struct sched_avg *avg)
{
struct rq *rq;
struct sched_entity *se;
if (cfs_rq) {
rq = cfs_rq->rq;
/* trace only cpu root cfs_rq */
if (&rq->cfs == cfs_rq)
trace_ems_multi_load_cpu(cpu_of(rq), ml->util_avg, ml->util_avg_s);
} else {
se = container_of(avg, struct sched_entity, avg);
if (!se->my_q)
trace_ems_multi_load_task(task_of(se), ml->runnable_avg, ml->util_avg);
}
}
void
update_multi_load(u64 delta, int cpu, struct sched_avg *sa,
unsigned long weight, int running, struct cfs_rq *cfs_rq)
{
struct multi_load *ml = &sa->ml;
struct sched_entity *se;
unsigned long scale_freq, scale_cpu;
u32 contrib = (u32)delta;
u64 periods;
/* Obtain scale freq */
scale_freq = arch_scale_freq_capacity(NULL, cpu);
/* Obtain scale cpu */
if (cfs_rq) {
scale_cpu = running ? capacity_orig_of_sse(cpu, get_sse(cfs_rq->curr)) : 0;
} else {
se = container_of(sa, struct sched_entity, avg);
scale_cpu = capacity_orig_of_sse(cpu, get_sse(se));
}
delta += ml->period_contrib;
periods = delta / 1024; /* A period is 1024us (~1ms) */
if (periods) {
delta %= 1024;
contrib = __accumulate_pelt_segments(periods,
1024 - ml->period_contrib, delta);
}
ml->period_contrib = delta;
contrib = (contrib * scale_freq) >> SCHED_CAPACITY_SHIFT;
if (cfs_rq)
__update_cpu_util(ml, periods, contrib, scale_cpu, running, cfs_rq);
else {
__update_task_util(ml, periods, contrib, scale_cpu, running);
__update_task_runnable(ml, periods, contrib, scale_cpu, weight, cpu);
}
if (periods)
trace_multi_load(ml, cfs_rq, sa);
}
void init_multi_load(struct sched_entity *se)
{
struct multi_load *ml = &se->avg.ml;
ml->period_contrib = 1023;
ml->runnable_sum = current->se.avg.ml.runnable_sum >> 1;
ml->runnable_avg = current->se.avg.ml.runnable_avg >> 1;
ml->util_sum = 0;
ml->util_avg = 0;
ml->util_sum_s = 0;
ml->util_avg_s = 0;
}
/**
* detach_entity_multi_load - detach this entity from its cfs_rq load avg
* @cfs_rq: cfs_rq to detach from
* @se: sched_entity to detach
*/
void detach_entity_multi_load(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
if (get_sse(se)) {
sub_positive(&cfs_rq->avg.ml.util_avg_s, se->avg.ml.util_avg);
sub_positive(&cfs_rq->avg.ml.util_sum_s, se->avg.ml.util_sum);
} else {
sub_positive(&cfs_rq->avg.ml.util_avg, se->avg.ml.util_avg);
sub_positive(&cfs_rq->avg.ml.util_sum, se->avg.ml.util_sum);
}
}
/**
* attach_entity_multi_load - attach this entity to its cfs_rq load avg
* @cfs_rq: cfs_rq to attach to
* @se: sched_entity to attach
*/
void attach_entity_multi_load(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
if (get_sse(se)) {
cfs_rq->avg.ml.util_avg_s += se->avg.ml.util_avg;
cfs_rq->avg.ml.util_sum_s += se->avg.ml.util_sum;
} else {
cfs_rq->avg.ml.util_avg += se->avg.ml.util_avg;
cfs_rq->avg.ml.util_sum += se->avg.ml.util_sum;
}
}
/**
* remove_entity_multi_load - apply removed entity's util average to cfs_rq
* @cfs_rq : cfs_rq to remove from
* @se : sched_entity to remove
*/
void remove_entity_multi_load(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
if (get_sse(se))
atomic_long_add(se->avg.ml.util_avg, &cfs_rq->ml_q.removed_util_avg_s);
else
atomic_long_add(se->avg.ml.util_avg, &cfs_rq->ml_q.removed_util_avg);
}
/**
* apply_removed_multi_load - apply removed entity's util average to cfs_rq
* @cfs_rq : cfs_rq to update
*/
void apply_removed_multi_load(struct cfs_rq *cfs_rq)
{
struct multi_load *ml = &cfs_rq->avg.ml;
long r;
r = atomic_long_xchg(&cfs_rq->ml_q.removed_util_avg, 0);
sub_positive(&ml->util_avg, r);
sub_positive(&ml->util_sum, r * LOAD_AVG_MAX);
r = atomic_long_xchg(&cfs_rq->ml_q.removed_util_avg_s, 0);
sub_positive(&ml->util_avg_s, r);
sub_positive(&ml->util_sum_s, r * LOAD_AVG_MAX);
}
/* Take into account change of utilization of a child task group */
void update_tg_multi_load(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
struct cfs_rq *gcfs_rq = se->my_q;
unsigned long *cfs_rq_util_avg;
u32 *cfs_rq_util_sum;
long delta;
if (get_sse(se)) {
cfs_rq_util_avg = &gcfs_rq->avg.ml.util_avg_s;
cfs_rq_util_sum = &gcfs_rq->avg.ml.util_sum_s;
} else {
cfs_rq_util_avg = &gcfs_rq->avg.ml.util_avg;
cfs_rq_util_sum = &gcfs_rq->avg.ml.util_sum;
}
delta = *cfs_rq_util_avg - se->avg.ml.util_avg;
/* Nothing to update */
if (!delta)
return;
/* Set new sched_entity's utilization */
se->avg.ml.util_avg = *cfs_rq_util_avg;
se->avg.ml.util_sum = se->avg.ml.util_avg * LOAD_AVG_MAX;
/* Update parent cfs_rq utilization */
add_positive(cfs_rq_util_avg, delta);
*cfs_rq_util_sum = *cfs_rq_util_avg * LOAD_AVG_MAX;
}
/*
* When a task is dequeued, its estimated utilization should not be update if
* its util_avg has not been updated at least once.
* This flag is used to synchronize util_avg updates with util_est updates.
* We map this information into the LSB bit of the utilization saved at
* dequeue time (i.e. util_est.dequeued).
*/
void cfs_se_util_change_multi_load(struct task_struct *p, struct sched_avg *avg)
{
unsigned int enqueued;
if (!sched_feat(UTIL_EST))
return;
if (!schedtune_util_est_en(p))
return;
/* Avoid store if the flag has been already set */
enqueued = avg->ml.util_est.enqueued;
if (!(enqueued & UTIL_AVG_UNCHANGED))
return;
/* Reset flag to report util_avg has been updated */
enqueued &= ~UTIL_AVG_UNCHANGED;
WRITE_ONCE(avg->ml.util_est.enqueued, enqueued);
}
static inline struct util_est* cfs_rq_util_est(struct cfs_rq *cfs_rq, int sse)
{
return sse ? &cfs_rq->avg.ml.util_est_s : &cfs_rq->avg.ml.util_est;
}
void enqueue_multi_load(struct cfs_rq *cfs_rq,
struct task_struct *p)
{
unsigned int enqueued;
struct util_est *cfs_rq_ue;
if (!sched_feat(UTIL_EST))
return;
cfs_rq_ue = cfs_rq_util_est(cfs_rq, p->sse);
/* Update root cfs_rq's estimated utilization */
enqueued = cfs_rq_ue->enqueued;
enqueued += (_ml_task_util_est(p) | UTIL_AVG_UNCHANGED);
WRITE_ONCE(cfs_rq_ue->enqueued, enqueued);
/* Update plots for Task and CPU estimated utilization */
trace_ems_util_est_task(p, &p->se.avg);
trace_ems_util_est_cpu(cpu_of(cfs_rq->rq), cfs_rq);
}
/*
* Check if a (signed) value is within a specified (unsigned) margin,
* based on the observation that:
* abs(x) < y := (unsigned)(x + y - 1) < (2 * y - 1)
*
* NOTE: this only works when value + maring < INT_MAX.
*/
static inline bool within_margin(int value, int margin)
{
return ((unsigned int)(value + margin - 1) < (2 * margin - 1));
}
void
dequeue_multi_load(struct cfs_rq *cfs_rq, struct task_struct *p, bool task_sleep)
{
long last_ewma_diff;
struct util_est ue, *cfs_rq_ue;
if (!sched_feat(UTIL_EST))
return;
cfs_rq_ue = cfs_rq_util_est(cfs_rq, p->sse);
/*
* Update root cfs_rq's estimated utilization
*
* If *p is the last task then the root cfs_rq's estimated utilization
* of a CPU is 0 by definition.
*/
ue.enqueued = 0;
if (cfs_rq->nr_running) {
ue.enqueued = cfs_rq_ue->enqueued;
ue.enqueued -= min_t(unsigned int, ue.enqueued,
(_ml_task_util_est(p) | UTIL_AVG_UNCHANGED));
}
WRITE_ONCE(cfs_rq_ue->enqueued, ue.enqueued);
/* Update plots for CPU's estimated utilization */
trace_ems_util_est_cpu(cpu_of(cfs_rq->rq), cfs_rq);
/*
* Skip update of task's estimated utilization when the task has not
* yet completed an activation, e.g. being migrated.
*/
if (!task_sleep)
return;
if (!schedtune_util_est_en(p))
return;
/*
* If the PELT values haven't changed since enqueue time,
* skip the util_est update.
*/
ue = p->se.avg.ml.util_est;
if (ue.enqueued & UTIL_AVG_UNCHANGED)
return;
/*
* Skip update of task's estimated utilization when its EWMA is
* already ~1% close to its last activation value.
*/
ue.enqueued = (ml_task_util(p) | UTIL_AVG_UNCHANGED);
last_ewma_diff = ue.enqueued - ue.ewma;
if (within_margin(last_ewma_diff, capacity_orig_of(task_cpu(p)) / 100))
return;
/*
* Update Task's estimated utilization
*
* When *p completes an activation we can consolidate another sample
* of the task size. This is done by storing the current PELT value
* as ue.enqueued and by using this value to update the Exponential
* Weighted Moving Average (EWMA):
*
* ewma(t) = w * task_util(p) + (1-w) * ewma(t-1)
* = w * task_util(p) + ewma(t-1) - w * ewma(t-1)
* = w * (task_util(p) - ewma(t-1)) + ewma(t-1)
* = w * ( last_ewma_diff ) + ewma(t-1)
* = w * (last_ewma_diff + ewma(t-1) / w)
*
* Where 'w' is the weight of new samples, which is configured to be
* 0.25, thus making w=1/4 ( >>= UTIL_EST_WEIGHT_SHIFT)
*/
ue.ewma <<= UTIL_EST_WEIGHT_SHIFT;
ue.ewma += last_ewma_diff;
ue.ewma >>= UTIL_EST_WEIGHT_SHIFT;
WRITE_ONCE(p->se.avg.ml.util_est, ue);
/* Update plots for Task's estimated utilization */
trace_ems_util_est_task(p, &p->se.avg);
}
/****************************************************************/
/* Periodic Active Ratio Tracking */
/****************************************************************/
enum {
PART_POLICY_RECENT = 0,
PART_POLICY_MAX,
PART_POLICY_MAX_RECENT_MAX,
PART_POLICY_LAST,
PART_POLICY_MAX_RECENT_LAST,
PART_POLICY_MAX_RECENT_AVG,
PART_POLICY_INVALID,
};
char *part_policy_name[] = {
"RECENT",
"MAX",
"MAX_RECENT_MAX",
"LAST",
"MAX_RECENT_LAST",
"MAX_RECENT_AVG",
"INVALID"
};
static __read_mostly unsigned int part_policy_idx = PART_POLICY_MAX_RECENT_LAST;
static __read_mostly u64 period_size = 8 * NSEC_PER_MSEC;
static __read_mostly u64 period_hist_size = 10;
static __read_mostly int high_patten_thres = 700;
static __read_mostly int high_patten_stdev = 200;
static __read_mostly int low_patten_count = 3;
static __read_mostly int low_patten_thres = 1024;
static __read_mostly int low_patten_stdev = 200;
static __read_mostly u64 boost_interval = 16 * NSEC_PER_MSEC;
/********************************************************/
/* Helper funcition */
/********************************************************/
static inline int inc_hist_idx(int idx)
{
return (idx + 1) % period_hist_size;
}
static inline void calc_active_ratio_hist(struct part *pa)
{
int idx;
int sum = 0, max = 0;
int p_avg = 0, p_stdev = 0, p_count = 0;
int patten, diff;
/* Calculate basic statistics of P.A.R.T */
for (idx = 0; idx < period_hist_size; idx++) {
sum += pa->hist[idx];
max = max(max, pa->hist[idx]);
}
pa->active_ratio_avg = sum / period_hist_size;
pa->active_ratio_max = max;
pa->active_ratio_est = 0;
pa->active_ratio_stdev = 0;
/* Calculate stdev for patten recognition */
for (idx = 0; idx < period_hist_size; idx += 2) {
patten = pa->hist[idx] + pa->hist[idx + 1];
if (patten == 0)
continue;
p_avg += patten;
p_count++;
}
if (p_count <= 1) {
p_avg = 0;
p_stdev = 0;
goto out;
}
p_avg /= p_count;
for (idx = 0; idx < period_hist_size; idx += 2) {
patten = pa->hist[idx] + pa->hist[idx + 1];
if (patten == 0)
continue;
diff = patten - p_avg;
p_stdev += diff * diff;
}
p_stdev /= p_count - 1;
p_stdev = int_sqrt(p_stdev);
out:
pa->active_ratio_stdev = p_stdev;
if (p_count >= low_patten_count &&
p_avg <= low_patten_thres &&
p_stdev <= low_patten_stdev)
pa->active_ratio_est = p_avg / 2;
trace_ems_cpu_active_ratio_patten(cpu_of(container_of(pa, struct rq, pa)),
p_count, p_avg, p_stdev);
}
static void update_cpu_active_ratio_hist(struct part *pa, bool full, unsigned int count)
{
/*
* Reflect recent active ratio in the history.
*/
pa->hist_idx = inc_hist_idx(pa->hist_idx);
pa->hist[pa->hist_idx] = pa->active_ratio_recent;
/*
* If count is positive, there are empty/full periods.
* These will be reflected in the history.
*/
while (count--) {
pa->hist_idx = inc_hist_idx(pa->hist_idx);
pa->hist[pa->hist_idx] = full ? SCHED_CAPACITY_SCALE : 0;
}
/*
* Calculate avg/max active ratio through entire history.
*/
calc_active_ratio_hist(pa);
}
static void
__update_cpu_active_ratio(int cpu, struct part *pa, u64 now, int boost)
{
u64 elapsed = now - pa->period_start;
unsigned int period_count = 0;
if (boost) {
pa->last_boost_time = now;
return;
}
if (pa->last_boost_time &&
now > pa->last_boost_time + boost_interval)
pa->last_boost_time = 0;
if (pa->running) {
/*
* If 'pa->running' is true, it means that the rq is active
* from last_update until now.
*/
u64 contributer, remainder;
/*
* If now is in recent period, contributer is from last_updated to now.
* Otherwise, it is from last_updated to period_end
* and remaining active time will be reflected in the next step.
*/
contributer = min(now, pa->period_start + period_size);
pa->active_sum += contributer - pa->last_updated;
pa->active_ratio_recent =
div64_u64(pa->active_sum << SCHED_CAPACITY_SHIFT, period_size);
/*
* If now has passed recent period, calculate full periods and reflect they.
*/
period_count = div64_u64_rem(elapsed, period_size, &remainder);
if (period_count) {
update_cpu_active_ratio_hist(pa, true, period_count - 1);
pa->active_sum = remainder;
pa->active_ratio_recent =
div64_u64(pa->active_sum << SCHED_CAPACITY_SHIFT, period_size);
}
} else {
/*
* If 'pa->running' is false, it means that the rq is idle
* from last_update until now.
*/
/*
* If now has passed recent period, calculate empty periods and reflect they.
*/
period_count = div64_u64(elapsed, period_size);
if (period_count) {
update_cpu_active_ratio_hist(pa, false, period_count - 1);
pa->active_ratio_recent = 0;
pa->active_sum = 0;
}
}
pa->period_start += period_size * period_count;
pa->last_updated = now;
}
/********************************************************/
/* External APIs */
/********************************************************/
void update_cpu_active_ratio(struct rq *rq, struct task_struct *p, int type)
{
struct part *pa = &rq->pa;
int cpu = cpu_of(rq);
u64 now = sched_clock_cpu(0);
if (unlikely(pa->period_start == 0))
return;
switch (type) {
/*
* 1) Enqueue
* This type is called when the rq is switched from idle to running.
* In this time, Update the active ratio for the idle interval
* and change the state to running.
*/
case EMS_PART_ENQUEUE:
__update_cpu_active_ratio(cpu, pa, now, 0);
if (rq->nr_running == 0) {
pa->running = true;
trace_ems_cpu_active_ratio(cpu, pa, "enqueue");
}
break;
/*
* 2) Dequeue
* This type is called when the rq is switched from running to idle.
* In this time, Update the active ratio for the running interval
* and change the state to not-running.
*/
case EMS_PART_DEQUEUE:
__update_cpu_active_ratio(cpu, pa, now, 0);
if (rq->nr_running == 1) {
pa->running = false;
trace_ems_cpu_active_ratio(cpu, pa, "dequeue");
}
break;
/*
* 3) Update
* This type is called to update the active ratio during rq is running.
*/
case EMS_PART_UPDATE:
__update_cpu_active_ratio(cpu, pa, now, 0);
trace_ems_cpu_active_ratio(cpu, pa, "update");
break;
case EMS_PART_WAKEUP_NEW:
__update_cpu_active_ratio(cpu, pa, now, 1);
trace_ems_cpu_active_ratio(cpu, pa, "new task");
break;
}
}
void part_cpu_active_ratio(unsigned long *util, unsigned long *max, int cpu)
{
struct rq *rq = cpu_rq(cpu);
struct part *pa = &rq->pa;
unsigned long pelt_max = *max;
unsigned long pelt_util = *util;
int util_ratio = *util * SCHED_CAPACITY_SCALE / *max;
int demand = 0;
if (unlikely(pa->period_start == 0))
return;
if (pa->last_boost_time && util_ratio < pa->active_ratio_boost) {
*max = SCHED_CAPACITY_SCALE;
*util = pa->active_ratio_boost;
return;
}
if (util_ratio > pa->active_ratio_limit)
return;
if (!pa->running &&
(pa->active_ratio_avg < high_patten_thres ||
pa->active_ratio_stdev > high_patten_stdev)) {
*util = 0;
*max = SCHED_CAPACITY_SCALE;
return;
}
update_cpu_active_ratio(rq, NULL, EMS_PART_UPDATE);
switch (part_policy_idx) {
case PART_POLICY_RECENT:
demand = pa->active_ratio_recent;
break;
case PART_POLICY_MAX:
demand = pa->active_ratio_max;
break;
case PART_POLICY_MAX_RECENT_MAX:
demand = max(pa->active_ratio_recent, pa->active_ratio_max);
break;
case PART_POLICY_LAST:
demand = pa->hist[pa->hist_idx];
break;
case PART_POLICY_MAX_RECENT_LAST:
demand = max(pa->active_ratio_recent, pa->hist[pa->hist_idx]);
break;
case PART_POLICY_MAX_RECENT_AVG:
demand = max(pa->active_ratio_recent, pa->active_ratio_avg);
break;
}
*util = max(demand, pa->active_ratio_est);
*util = min_t(unsigned long, *util, (unsigned long)pa->active_ratio_limit);
*max = SCHED_CAPACITY_SCALE;
if (util_ratio > *util) {
*util = pelt_util;
*max = pelt_max;
}
trace_ems_cpu_active_ratio_util_stat(cpu, *util, (unsigned long)util_ratio);
}
void set_part_period_start(struct rq *rq)
{
struct part *pa = &rq->pa;
u64 now;
if (likely(pa->period_start))
return;
now = sched_clock_cpu(0);
pa->period_start = now;
pa->last_updated = now;
}
/********************************************************/
/* SYSFS */
/********************************************************/
static ssize_t show_part_policy(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
return sprintf(buf, "%u. %s\n", part_policy_idx,
part_policy_name[part_policy_idx]);
}
static ssize_t store_part_policy(struct kobject *kobj,
struct kobj_attribute *attr, const char *buf,
size_t count)
{
long input;
if (!sscanf(buf, "%ld", &input))
return -EINVAL;
if (input >= PART_POLICY_INVALID || input < 0)
return -EINVAL;
part_policy_idx = input;
return count;
}
static ssize_t show_part_policy_list(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
ssize_t len = 0;
int i;
for (i = 0; i < PART_POLICY_INVALID ; i++)
len += sprintf(buf + len, "%u. %s\n", i, part_policy_name[i]);
return len;
}
static ssize_t show_active_ratio_limit(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
struct part *pa;
int cpu, len = 0;
for_each_possible_cpu(cpu) {
pa = &cpu_rq(cpu)->pa;
len += sprintf(buf + len, "cpu%d ratio:%3d\n",
cpu, pa->active_ratio_limit);
}
return len;
}
static ssize_t store_active_ratio_limit(struct kobject *kobj,
struct kobj_attribute *attr, const char *buf,
size_t count)
{
struct part *pa;
int cpu, ratio, i;
if (sscanf(buf, "%d %d", &cpu, &ratio) != 2)
return -EINVAL;
/* Check cpu is possible */
if (!cpumask_test_cpu(cpu, cpu_possible_mask))
return -EINVAL;
/* Check ratio isn't outrage */
if (ratio < 0 || ratio > SCHED_CAPACITY_SCALE)
return -EINVAL;
for_each_cpu(i, cpu_coregroup_mask(cpu)) {
pa = &cpu_rq(i)->pa;
pa->active_ratio_limit = ratio;
}
return count;
}
static ssize_t show_active_ratio_boost(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
struct part *pa;
int cpu, len = 0;
for_each_possible_cpu(cpu) {
pa = &cpu_rq(cpu)->pa;
len += sprintf(buf + len, "cpu%d ratio:%3d\n",
cpu, pa->active_ratio_boost);
}
return len;
}
static ssize_t store_active_ratio_boost(struct kobject *kobj,
struct kobj_attribute *attr, const char *buf,
size_t count)
{
struct part *pa;
int cpu, ratio, i;
if (sscanf(buf, "%d %d", &cpu, &ratio) != 2)
return -EINVAL;
/* Check cpu is possible */
if (!cpumask_test_cpu(cpu, cpu_possible_mask))
return -EINVAL;
/* Check ratio isn't outrage */
if (ratio < 0 || ratio > SCHED_CAPACITY_SCALE)
return -EINVAL;
for_each_cpu(i, cpu_coregroup_mask(cpu)) {
pa = &cpu_rq(i)->pa;
pa->active_ratio_boost = ratio;
}
return count;
}
#define show_node_function(_name) \
static ssize_t show_##_name(struct kobject *kobj, \
struct kobj_attribute *attr, char *buf) \
{ \
return sprintf(buf, "%d\n", _name); \
}
#define store_node_function(_name, _max) \
static ssize_t store_##_name(struct kobject *kobj, \
struct kobj_attribute *attr, const char *buf, \
size_t count) \
{ \
unsigned int input; \
\
if (!sscanf(buf, "%u", &input)) \
return -EINVAL; \
\
if (input > _max) \
return -EINVAL; \
\
_name = input; \
\
return count; \
}
show_node_function(high_patten_thres);
store_node_function(high_patten_thres, SCHED_CAPACITY_SCALE);
show_node_function(high_patten_stdev);
store_node_function(high_patten_stdev, SCHED_CAPACITY_SCALE);
show_node_function(low_patten_count);
store_node_function(low_patten_count, (period_size / 2));
show_node_function(low_patten_thres);
store_node_function(low_patten_thres, (SCHED_CAPACITY_SCALE * 2));
show_node_function(low_patten_stdev);
store_node_function(low_patten_stdev, SCHED_CAPACITY_SCALE);
static struct kobj_attribute _policy =
__ATTR(policy, 0644, show_part_policy, store_part_policy);
static struct kobj_attribute _policy_list =
__ATTR(policy_list, 0444, show_part_policy_list, NULL);
static struct kobj_attribute _high_patten_thres =
__ATTR(high_patten_thres, 0644, show_high_patten_thres, store_high_patten_thres);
static struct kobj_attribute _high_patten_stdev =
__ATTR(high_patten_stdev, 0644, show_high_patten_stdev, store_high_patten_stdev);
static struct kobj_attribute _low_patten_count =
__ATTR(low_patten_count, 0644, show_low_patten_count, store_low_patten_count);
static struct kobj_attribute _low_patten_thres =
__ATTR(low_patten_thres, 0644, show_low_patten_thres, store_low_patten_thres);
static struct kobj_attribute _low_patten_stdev =
__ATTR(low_patten_stdev, 0644, show_low_patten_stdev, store_low_patten_stdev);
static struct kobj_attribute _active_ratio_limit =
__ATTR(active_ratio_limit, 0644, show_active_ratio_limit, store_active_ratio_limit);
static struct kobj_attribute _active_ratio_boost =
__ATTR(active_ratio_boost, 0644, show_active_ratio_boost, store_active_ratio_boost);
static struct attribute *attrs[] = {
&_policy.attr,
&_policy_list.attr,
&_high_patten_thres.attr,
&_high_patten_stdev.attr,
&_low_patten_count.attr,
&_low_patten_thres.attr,
&_low_patten_stdev.attr,
&_active_ratio_limit.attr,
&_active_ratio_boost.attr,
NULL,
};
static const struct attribute_group attr_group = {
.attrs = attrs,
};
static int __init init_part_sysfs(void)
{
struct kobject *kobj;
kobj = kobject_create_and_add("part", ems_kobj);
if (!kobj)
return -EINVAL;
if (sysfs_create_group(kobj, &attr_group))
return -EINVAL;
return 0;
}
late_initcall(init_part_sysfs);
static int __init parse_part(void)
{
struct device_node *dn, *coregroup;
char name[15];
int cpu, cnt = 0, limit = -1, boost = -1;
dn = of_find_node_by_path("/cpus/ems/part");
if (!dn)
return 0;
for_each_possible_cpu(cpu) {
struct part *pa = &cpu_rq(cpu)->pa;
if (cpu != cpumask_first(cpu_coregroup_mask(cpu)))
goto skip_parse;
limit = -1;
boost = -1;
snprintf(name, sizeof(name), "coregroup%d", cnt++);
coregroup = of_get_child_by_name(dn, name);
if (!coregroup)
continue;
of_property_read_s32(coregroup, "active-ratio-limit", &limit);
of_property_read_s32(coregroup, "active-ratio-boost", &boost);
skip_parse:
if (limit >= 0)
pa->active_ratio_limit = SCHED_CAPACITY_SCALE * limit / 100;
if (boost >= 0)
pa->active_ratio_boost = SCHED_CAPACITY_SCALE * boost / 100;
}
return 0;
}
core_initcall(parse_part);
void __init init_part(void)
{
int cpu, idx;
for_each_possible_cpu(cpu) {
struct part *pa = &cpu_rq(cpu)->pa;
/* Set by default value */
pa->running = false;
pa->active_sum = 0;
pa->active_ratio_recent = 0;
pa->hist_idx = 0;
for (idx = 0; idx < PART_HIST_SIZE_MAX; idx++)
pa->hist[idx] = 0;
pa->period_start = 0;
pa->last_updated = 0;
}
}