blob: 60feae3cfe638fabbf65e5c4667f4529a7907941 [file] [log] [blame]
/*
* Copyright (C) 2016 MediaTek Inc.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See http://www.gnu.org/licenses/gpl-2.0.html for more details.
*/
#include <linux/stop_machine.h>
#ifdef OPLUS_FEATURE_SPECIALOPT
// caichen@TECH.Kernel.Sched, 2020/09/26, add for heavy load task
inline unsigned long task_util(struct task_struct *p);
#else
static inline unsigned long task_util(struct task_struct *p);
#endif
static int select_max_spare_capacity(struct task_struct *p, int target);
int cpu_eff_tp = 1024;
unsigned long long big_cpu_eff_tp = 1024;
#ifndef cpu_isolated
#define cpu_isolated(cpu) 0
#endif
#if defined(CONFIG_MACH_MT6763) || defined(CONFIG_MACH_MT6758)
/* cpu7 is L+ */
int l_plus_cpu = 7;
#else
int l_plus_cpu = -1;
#endif
#ifdef CONFIG_MTK_SCHED_EAS_POWER_SUPPORT
#if defined(CONFIG_MACH_MT6763) || defined(CONFIG_MACH_MT6758)
/* MT6763: 2 gears. cluster 0 & 1 is buck shared. */
static int share_buck[3] = {1, 0, 2};
#elif defined(CONFIG_MACH_MT6799)
/* MT6799: 3 gears. cluster 0 & 2 is buck shared. */
static int share_buck[3] = {2, 1, 0};
#elif defined(CONFIG_MACH_MT6765) || defined(CONFIG_MACH_MT6762)
static int share_buck[3] = {1, 0, 2};
#elif defined(CONFIG_MACH_MT6779)
static int share_buck[2] = {2, 1};
#define ARM_V8_2
int l_plus_cpu = -1;
#elif defined(CONFIG_MACH_MT6893) || \
(defined(CONFIG_MACH_MT6885) && defined(CONFIG_MTK_SCHED_MULTI_GEARS))
static int share_buck[3] = {0, 2, 1};
#else
/* no buck shared */
static int share_buck[3] = {0, 1, 2};
#endif
#endif
#define CCI_ID (arch_get_nr_clusters())
static void
update_system_overutilized(struct lb_env *env)
{
unsigned long group_util;
bool intra_overutil = false;
unsigned long min_capacity;
struct sched_group *group = env->sd->groups;
int this_cpu;
int min_cap_orig_cpu;
bool overutilized = sd_overutilized(env->sd);
int i;
if (!sched_feat(SCHED_MTK_EAS))
return;
this_cpu = smp_processor_id();
min_cap_orig_cpu = cpu_rq(this_cpu)->rd->min_cap_orig_cpu;
if (min_cap_orig_cpu > -1)
min_capacity = capacity_orig_of(min_cap_orig_cpu);
else
return;
do {
group_util = 0;
for_each_cpu_and(i, sched_group_span(group), env->cpus) {
if (cpu_isolated(i))
continue;
group_util += cpu_util(i);
/*if (cpu_overutilized(i)) {
if (capacity_orig_of(i) == min_capacity) {
intra_overutil = true;
break;
}
}*/
}
/*
* A capacity base hint for over-utilization.
* Not to trigger system overutiled if heavy tasks
* in Big.cluster, so
* add the free room(20%) of Big.cluster is impacted which means
* system-wide over-utilization,
* that considers whole cluster not single cpu
*/
if (group->group_weight > 1 && (group->sgc->capacity * 1024 <
group_util * capacity_margin)) {
intra_overutil = true;
break;
}
group = group->next;
} while (group != env->sd->groups && !intra_overutil);
if (overutilized != intra_overutil) {
if (intra_overutil == true)
set_sd_overutilized(env->sd);
else
clear_sd_overutilized(env->sd);
}
}
bool is_intra_domain(int prev, int target)
{
#ifdef CONFIG_ARM64
return (cpu_topology[prev].cluster_id ==
cpu_topology[target].cluster_id);
#else
return (cpu_topology[prev].socket_id ==
cpu_topology[target].socket_id);
#endif
}
static int
___select_idle_sibling(struct task_struct *p, int prev_cpu, int new_cpu)
{
if (sched_feat(SCHED_MTK_EAS)) {
#ifdef CONFIG_SCHED_TUNE
bool prefer_idle = schedtune_prefer_idle(p) > 0;
#else
bool prefer_idle = true;
#endif
int idle_cpu;
idle_cpu = find_best_idle_cpu(p, prefer_idle);
if (idle_cpu >= 0)
new_cpu = idle_cpu;
else
new_cpu = select_max_spare_capacity(p, new_cpu);
} else
new_cpu = select_idle_sibling(p, prev_cpu, new_cpu);
return new_cpu;
}
/* To find a CPU with max spare capacity in the same cluster with target */
static
int select_max_spare_capacity(struct task_struct *p, int target)
{
unsigned long int max_spare_capacity = 0;
int max_spare_cpu = -1;
struct cpumask cls_cpus;
int cid = arch_get_cluster_id(target); /* cid of target CPU */
int cpu = task_cpu(p);
struct cpumask *tsk_cpus_allow = &p->cpus_allowed;
/* If the prevous cpu is cache affine and idle, choose it first. */
if (cpu != l_plus_cpu && cpu != target &&
cpus_share_cache(cpu, target) &&
idle_cpu(cpu) && !cpu_isolated(cpu))
return cpu;
arch_get_cluster_cpus(&cls_cpus, cid);
/* Otherwise, find a CPU with max spare-capacity in cluster */
for_each_cpu_and(cpu, tsk_cpus_allow, &cls_cpus) {
unsigned long int new_usage;
unsigned long int spare_cap;
if (!cpu_online(cpu))
continue;
if (cpu_isolated(cpu))
continue;
#ifdef CONFIG_MTK_SCHED_INTEROP
if (cpu_rq(cpu)->rt.rt_nr_running &&
likely(!is_rt_throttle(cpu)))
continue;
#endif
#ifdef CONFIG_SCHED_WALT
if (walt_cpu_high_irqload(cpu))
continue;
#endif
if (idle_cpu(cpu))
return cpu;
new_usage = cpu_util(cpu) + task_util(p);
if (new_usage >= capacity_of(cpu))
spare_cap = 0;
else /* consider RT/IRQ capacity reduction */
spare_cap = (capacity_of(cpu) - new_usage);
/* update CPU with max spare capacity */
if ((long int)spare_cap > (long int)max_spare_capacity) {
max_spare_cpu = cpu;
max_spare_capacity = spare_cap;
}
}
/* if max_spare_cpu exist, choose it. */
if (max_spare_cpu > -1)
return max_spare_cpu;
else
return task_cpu(p);
}
/*
* @p: the task want to be located at.
*
* Return:
*
* cpu id or
* -1 if target CPU is not found
*/
int find_best_idle_cpu(struct task_struct *p, bool prefer_idle)
{
int i;
int best_idle_cpu = -1;
struct cpumask *tsk_cpus_allow = &p->cpus_allowed;
struct hmp_domain *domain;
int domain_order = 0;
int prefer_big = prefer_idle && (task_util(p) > stune_task_threshold);
for_each_hmp_domain_L_first(domain) {
for_each_cpu(i, &domain->possible_cpus) {
/* tsk with prefer idle to find bigger idle cpu */
if (!cpu_online(i) || cpu_isolated(i) ||
!cpumask_test_cpu(i, tsk_cpus_allow))
continue;
#ifdef CONFIG_MTK_SCHED_INTEROP
if (cpu_rq(i)->rt.rt_nr_running &&
likely(!is_rt_throttle(i)))
continue;
#endif
/* favoring tasks that prefer idle cpus
* to improve latency.
*/
if (idle_cpu(i)) {
best_idle_cpu = i;
if (!prefer_big) {
goto find_idle_cpu;
} else {
#ifdef CONFIG_MTK_SCHED_BL_FIRST
if (domain_order == 1)
goto find_idle_cpu;
#endif
}
}
}
domain_order++;
}
find_idle_cpu:
return best_idle_cpu;
}
static int init_cpu_info(void)
{
int i;
for (i = 0; i < nr_cpu_ids; i++) {
unsigned long capacity = SCHED_CAPACITY_SCALE;
if (cpu_core_energy(i)) {
int idx = cpu_core_energy(i)->nr_cap_states - 1;
capacity = cpu_core_energy(i)->cap_states[idx].cap;
}
}
return 0;
}
late_initcall_sync(init_cpu_info)
#ifdef CONFIG_MTK_UNIFY_POWER
void set_sched_turn_point_cap(void)
{
int turn_point_idx;
struct hmp_domain *domain;
int cpu;
const struct sched_group_energy *sge_core;
domain = list_entry(hmp_domains.prev, struct hmp_domain, hmp_domains);
cpu = cpumask_first(&domain->possible_cpus);
sge_core = cpu_core_energy(cpu);
turn_point_idx = max(upower_get_turn_point() - 1, 0);
cpu_eff_tp = sge_core->cap_states[turn_point_idx].cap;
}
#else
void set_sched_turn_point_cap(void)
{
return;
}
#endif
#if defined(CONFIG_SCHED_HMP) || defined(CONFIG_MTK_IDLE_BALANCE_ENHANCEMENT)
/*
* Heterogenous Multi-Processor (HMP) Global Load Balance
*/
static DEFINE_SPINLOCK(hmp_force_migration);
/*
* For debugging purpose,
* to depart functions of cpu_stop to make call_stack clear.
*/
static int hmp_idle_pull_cpu_stop(void *data)
{
int ret;
struct task_struct *p = ((struct rq *)data)->migrate_task;
ret = active_load_balance_cpu_stop(data);
put_task_struct(p);
return ret;
}
static int
migrate_running_task(int this_cpu, struct task_struct *p, struct rq *target)
{
unsigned long flags;
unsigned int force = 0;
/* now we have a candidate */
raw_spin_lock_irqsave(&target->lock, flags);
if (!target->active_balance &&
(task_rq(p) == target) && !cpu_park(cpu_of(target)) &&
p->state != TASK_DEAD) {
get_task_struct(p);
target->push_cpu = this_cpu;
target->migrate_task = p;
trace_sched_hmp_migrate(p, target->push_cpu, MIGR_IDLE_RUNNING);
#ifdef CONFIG_SCHED_HMP
hmp_next_up_delay(&p->se, target->push_cpu);
#endif
target->active_balance = MIGR_IDLE_RUNNING; /* idle pull */
force = 1;
}
raw_spin_unlock_irqrestore(&target->lock, flags);
if (force) {
if (!stop_one_cpu_nowait(cpu_of(target),
hmp_idle_pull_cpu_stop,
target, &target->active_balance_work)) {
put_task_struct(p); /* out of rq->lock */
raw_spin_lock_irqsave(&target->lock, flags);
target->active_balance = 0;
target->migrate_task = NULL;
force = 0;
raw_spin_unlock_irqrestore(&target->lock, flags);
}
}
return force;
}
#endif
unsigned long cluster_max_capacity(void)
{
struct hmp_domain *domain;
unsigned int max_capacity = 0;
for_each_hmp_domain_L_first(domain) {
int cpu;
unsigned long capacity;
cpu = cpumask_first(&domain->possible_cpus);
capacity = capacity_of(cpu);
if (capacity > max_capacity)
max_capacity = capacity;
}
return max_capacity;
}
inline unsigned long task_uclamped_min_w_ceiling(struct task_struct *p)
{
unsigned long max_capacity = cluster_max_capacity();
return min_t(unsigned int, uclamp_task_effective_util(p, UCLAMP_MIN),
max_capacity);
}
/* Calculte util with DVFS margin */
inline unsigned int freq_util(unsigned long util)
{
return util * capacity_margin / SCHED_CAPACITY_SCALE;
}
#ifdef CONFIG_MTK_IDLE_BALANCE_ENHANCEMENT
bool idle_lb_enhance(struct task_struct *p, int cpu)
{
int target_capacity = capacity_orig_of(cpu);
if (schedtune_prefer_idle(p))
return 1;
if (uclamp_task_effective_util(p, UCLAMP_MIN) > target_capacity)
return 1;
return 0;
}
/* must hold runqueue lock for queue se is currently on */
static const int idle_prefer_max_tasks = 5;
static struct sched_entity
*get_idle_prefer_task(int cpu, int target_cpu, int check_min_cap,
struct task_struct **backup_task, int *backup_cpu)
{
int num_tasks = idle_prefer_max_tasks;
const struct cpumask *hmp_target_mask = NULL;
int src_capacity;
unsigned int util_min;
struct cfs_rq *cfs_rq;
struct sched_entity *se;
if (target_cpu >= 0)
hmp_target_mask = cpumask_of(target_cpu);
else
return NULL;
/* The currently running task is not on the runqueue
* a. idle prefer
* b. task_capacity > belonged CPU
*/
src_capacity = capacity_orig_of(cpu);
cfs_rq = &cpu_rq(cpu)->cfs;
se = __pick_first_entity(cfs_rq);
while (num_tasks && se) {
if (entity_is_task(se) &&
cpumask_intersects(hmp_target_mask,
&(task_of(se)->cpus_allowed))) {
struct task_struct *p;
p = task_of(se);
util_min = uclamp_task_effective_util(p, UCLAMP_MIN);
#ifdef CONFIG_MTK_SCHED_BOOST
if (!task_prefer_match_on_cpu(p, cpu, target_cpu))
return se;
#endif
if (check_min_cap && util_min >= src_capacity)
return se;
if (schedtune_prefer_idle(task_of(se)) &&
cpu_rq(cpu)->nr_running > 1) {
if (!check_min_cap)
return se;
if (backup_task && !*backup_task) {
*backup_cpu = cpu;
/* get task and selection inside
* rq lock
*/
*backup_task = task_of(se);
get_task_struct(*backup_task);
}
}
}
se = __pick_next_entity(se);
num_tasks--;
}
return NULL;
}
static void
hmp_slowest_idle_prefer_pull(int this_cpu, struct task_struct **p,
struct rq **target)
{
int cpu, backup_cpu;
struct sched_entity *se = NULL;
struct task_struct *backup_task = NULL;
struct hmp_domain *domain;
struct list_head *pos;
int selected = 0;
struct rq *rq;
unsigned long flags;
int check_min_cap;
/* 1. select a runnable task
* idle prefer
*
* order: fast to slow hmp domain
*/
check_min_cap = 0;
list_for_each(pos, &hmp_domains) {
domain = list_entry(pos, struct hmp_domain, hmp_domains);
for_each_cpu(cpu, &domain->cpus) {
if (cpu == this_cpu)
continue;
rq = cpu_rq(cpu);
raw_spin_lock_irqsave(&rq->lock, flags);
se = get_idle_prefer_task(cpu, this_cpu,
check_min_cap, &backup_task, &backup_cpu);
if (se && entity_is_task(se) &&
cpumask_test_cpu(this_cpu,
&(task_of(se))->cpus_allowed)) {
selected = 1;
/* get task and selection inside rq lock */
*p = task_of(se);
get_task_struct(*p);
*target = rq;
}
raw_spin_unlock_irqrestore(&rq->lock, flags);
if (selected) {
/* To put task out of rq lock */
if (backup_task)
put_task_struct(backup_task);
return;
}
}
}
if (backup_task) {
*target = cpu_rq(backup_cpu);
return;
}
}
DECLARE_PER_CPU(struct hmp_domain *, hmp_cpu_domain);
static void
hmp_fastest_idle_prefer_pull(int this_cpu, struct task_struct **p,
struct rq **target)
{
int cpu, backup_cpu;
struct sched_entity *se = NULL;
struct task_struct *backup_task = NULL;
struct hmp_domain *hmp_domain = NULL, *domain;
struct list_head *pos;
int selected = 0;
struct rq *rq;
unsigned long flags;
int target_capacity;
int check_min_cap;
int turning;
hmp_domain = per_cpu(hmp_cpu_domain, this_cpu);
/* 1. select a runnable task
*
* first candidate:
* capacity_min in slow domain
*
* order: target->next to slow hmp domain
*/
check_min_cap = 1;
list_for_each(pos, &hmp_domain->hmp_domains) {
domain = list_entry(pos, struct hmp_domain, hmp_domains);
for_each_cpu(cpu, &domain->cpus) {
if (cpu == this_cpu)
continue;
rq = cpu_rq(cpu);
raw_spin_lock_irqsave(&rq->lock, flags);
se = get_idle_prefer_task(cpu, this_cpu,
check_min_cap, &backup_task, &backup_cpu);
if (se && entity_is_task(se) &&
cpumask_test_cpu(this_cpu,
&(task_of(se))->cpus_allowed)) {
selected = 1;
/* get task and selection inside rq lock */
*p = task_of(se);
get_task_struct(*p);
*target = rq;
}
raw_spin_unlock_irqrestore(&rq->lock, flags);
if (selected) {
/* To put task out of rq lock */
if (backup_task)
put_task_struct(backup_task);
return;
}
}
if (list_is_last(pos, &hmp_domains))
break;
}
/* backup candidate:
* idle prefer
*
* order: fastest to target hmp domain
*/
check_min_cap = 0;
list_for_each(pos, &hmp_domains) {
domain = list_entry(pos, struct hmp_domain, hmp_domains);
for_each_cpu(cpu, &domain->cpus) {
if (cpu == this_cpu)
continue;
rq = cpu_rq(cpu);
raw_spin_lock_irqsave(&rq->lock, flags);
se = get_idle_prefer_task(cpu, this_cpu,
check_min_cap, &backup_task, &backup_cpu);
if (se && entity_is_task(se) &&
cpumask_test_cpu(this_cpu,
&(task_of(se)->cpus_allowed))) {
selected = 1;
/* get task and selection inside rq lock */
*p = task_of(se);
get_task_struct(*p);
*target = rq;
}
raw_spin_unlock_irqrestore(&rq->lock, flags);
if (selected) {
/* To put task out of rq lock */
if (backup_task)
put_task_struct(backup_task);
return;
}
}
if (cpumask_test_cpu(this_cpu, &domain->cpus))
break;
}
if (backup_task) {
*p = backup_task;
*target = cpu_rq(backup_cpu);
return;
}
/* 2. select a running task
* order: target->next to slow hmp domain
* 3. turning = true, pick a runnable task from slower domain
*/
turning = check_freq_turning();
list_for_each(pos, &hmp_domain->hmp_domains) {
domain = list_entry(pos, struct hmp_domain, hmp_domains);
for_each_cpu(cpu, &domain->cpus) {
if (cpu == this_cpu)
continue;
rq = cpu_rq(cpu);
raw_spin_lock_irqsave(&rq->lock, flags);
se = rq->cfs.curr;
if (!se) {
raw_spin_unlock_irqrestore(&rq->lock, flags);
continue;
}
if (!entity_is_task(se)) {
struct cfs_rq *cfs_rq;
cfs_rq = group_cfs_rq(se);
while (cfs_rq) {
se = cfs_rq->curr;
if (!entity_is_task(se))
cfs_rq = group_cfs_rq(se);
else
cfs_rq = NULL;
}
}
target_capacity = capacity_orig_of(cpu);
if (se && entity_is_task(se) &&
(uclamp_task_effective_util(task_of(se),
UCLAMP_MIN) >= target_capacity) &&
cpumask_test_cpu(this_cpu,
&((task_of(se))->cpus_allowed))) {
selected = 1;
/* get task and selection inside rq lock */
*p = task_of(se);
get_task_struct(*p);
*target = rq;
}
raw_spin_unlock_irqrestore(&rq->lock, flags);
if (selected) {
/* To put task out of rq lock */
if (backup_task)
put_task_struct(backup_task);
return;
}
if (turning && !backup_task) {
const struct cpumask *hmp_target_mask = NULL;
struct cfs_rq *cfs_rq;
struct sched_entity *se;
raw_spin_lock_irqsave(&rq->lock, flags);
hmp_target_mask = cpumask_of(this_cpu);
cfs_rq = &rq->cfs;
se = __pick_first_entity(cfs_rq);
if (se && entity_is_task(se) &&
cpumask_intersects(hmp_target_mask,
&(task_of(se)->cpus_allowed))) {
backup_cpu = cpu;
/* get task and selection inside
* rq lock
*/
backup_task = task_of(se);
get_task_struct(backup_task);
}
raw_spin_unlock_irqrestore(&rq->lock, flags);
}
}
if (list_is_last(pos, &hmp_domains))
break;
}
if (backup_task) {
*p = backup_task;
*target = cpu_rq(backup_cpu);
return;
}
}
/*
* rq: src rq
*/
static int
migrate_runnable_task(struct task_struct *p, int dst_cpu,
struct rq *rq)
{
struct rq_flags rf;
int moved = 0;
int src_cpu = cpu_of(rq);
if (!raw_spin_trylock(&p->pi_lock))
return moved;
rq_lock(rq, &rf);
/* Are both target and busiest cpu online */
if (!cpu_online(src_cpu) || !cpu_online(dst_cpu) ||
cpu_isolated(src_cpu) || cpu_isolated(dst_cpu))
goto out_unlock;
/* Task has migrated meanwhile, abort forced migration */
/* can't migrate running task */
if (task_running(rq, p))
goto out_unlock;
/*
* If task_rq(p) != rq, it cannot be migrated here, because we're
* holding rq->lock, if p->on_rq == 0 it cannot get enqueued because
* we're holding p->pi_lock.
*/
if (task_rq(p) == rq) {
if (task_on_rq_queued(p)) {
rq = __migrate_task(rq, &rf, p, dst_cpu);
moved = 1;
}
}
out_unlock:
rq_unlock(rq, &rf);
raw_spin_unlock(&p->pi_lock);
return moved;
}
static unsigned int aggressive_idle_pull(int this_cpu)
{
int moved = 0;
struct rq *target = NULL;
struct task_struct *p = NULL;
if (!sched_smp_initialized)
return 0;
if (!spin_trylock(&hmp_force_migration))
return 0;
/*
* aggressive idle balance for min_cap/idle_prefer
*/
if (hmp_cpu_is_slowest(this_cpu)) {
hmp_slowest_idle_prefer_pull(this_cpu, &p, &target);
if (p) {
trace_sched_hmp_migrate(p, this_cpu, 0x10);
moved = migrate_runnable_task(p, this_cpu, target);
if (moved)
goto done;
}
} else {
hmp_fastest_idle_prefer_pull(this_cpu, &p, &target);
if (p) {
trace_sched_hmp_migrate(p, this_cpu, 0x10);
moved = migrate_runnable_task(p, this_cpu, target);
if (moved)
goto done;
moved = migrate_running_task(this_cpu, p, target);
}
}
done:
spin_unlock(&hmp_force_migration);
if (p)
put_task_struct(p);
return moved;
}
#else
bool idle_lb_enhance(struct task_struct *p, int cpu)
{
return 0;
}
static unsigned int aggressive_idle_pull(int this_cpu)
{
return 0;
}
#endif
#ifdef CONFIG_UCLAMP_TASK
static __always_inline
unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util,
struct task_struct *p)
{
unsigned long min_util = rq->uclamp.value[UCLAMP_MIN];
unsigned long max_util = rq->uclamp.value[UCLAMP_MAX];
if (p) {
min_util = max_t(unsigned long, min_util,
(unsigned long)uclamp_task_effective_util(p, UCLAMP_MIN));
max_util = max_t(unsigned long, max_util,
(unsigned long)uclamp_task_effective_util(p, UCLAMP_MAX));
}
/*
* Since CPU's {min,max}_util clamps are MAX aggregated considering
* RUNNABLE tasks with_different_ clamps, we can end up with an
* inversion. Fix it now when the clamps are applied.
*/
if (unlikely(min_util >= max_util))
return min_util;
return clamp(util, min_util, max_util);
}
#endif
#ifdef CONFIG_MTK_SCHED_EAS_POWER_SUPPORT
#define fits_capacity(cap, max) ((cap) * capacity_margin < (max) * 1024)
static unsigned long __cpu_norm_sumutil(unsigned long util,
unsigned long capacity)
{
return (util << SCHED_CAPACITY_SHIFT)/capacity;
}
struct sg_state {
int cid;
int cap_idx;
unsigned long cap;
unsigned long volt;
unsigned long max_util;
unsigned long sum_util;
};
/*
* compute_energy(): Estimates the energy that @pd would consume if @p was
* migrated to @dst_cpu. compute_energy() predicts what will be the utilization
* landscape of @pd's CPUs after the task migration, and uses the Energy Model
* to compute what would be the energy if we decided to actually migrate that
* task.
*/
static int
update_sg_util(struct task_struct *p, int dst_cpu,
const struct cpumask *sg_mask, struct sg_state *sg_env)
{
int cpu = cpumask_first(sg_mask);
struct sched_domain *sd;
const struct sched_group *sg;
const struct sched_group_energy *sge;
unsigned long new_util;
int idx, max_idx;
sg_env->sum_util = 0;
sg_env->max_util = 0;
sge = cpu_core_energy(cpu); /* for CPU */
/*
* The capacity state of CPUs of the current rd can be driven by CPUs
* of another rd if they belong to the same pd. So, account for the
* utilization of these CPUs too by masking pd with cpu_online_mask
* instead of the rd span.
*
* If an entire pd is outside of the current rd, it will not appear in
* its pd list and will not be accounted by compute_energy().
*/
for_each_cpu_and(cpu, sg_mask, cpu_online_mask) {
unsigned long cpu_util, cpu_boosted_util;
struct task_struct *tsk = cpu == dst_cpu ? p : NULL;
cpu_util = cpu_util_without(cpu, p);
cpu_boosted_util = uclamp_rq_util_with(cpu_rq(cpu), cpu_util, p);
if (tsk)
cpu_util += task_util_est(p);
sg_env->sum_util += cpu_util;
sg_env->max_util = max(sg_env->max_util, cpu_boosted_util);
}
/* default is max_cap if we don't find a match */
max_idx = sge->nr_cap_states - 1;
sg_env->cap_idx = max_idx;
sg_env->cap = sge->cap_states[max_idx].cap;
new_util = sg_env->max_util * capacity_margin >> SCHED_CAPACITY_SHIFT;
new_util = min_t(unsigned long, new_util,
(unsigned long) sge->cap_states[sge->nr_cap_states-1].cap);
for (idx = 0; idx < sge->nr_cap_states; idx++) {
if (sge->cap_states[idx].cap >= new_util) {
/* Keep track of SG's capacity */
sg_env->cap_idx = idx;
sg_env->cap = sge->cap_states[idx].cap;
sg_env->volt = sge->cap_states[idx].volt;
break;
}
}
mt_sched_printf(sched_eas_energy_calc,
"dst_cpu=%d mask=0x%lx sum_util=%lu max_util=%lu new_util=%lu (idx=%d cap=%ld volt=%ld)",
dst_cpu, sg_mask->bits[0], sg_env->sum_util, sg_env->max_util,
new_util, sg_env->cap_idx, sg_env->cap, sg_env->volt);
return 1;
}
unsigned int share_buck_lkg_idx(const struct sched_group_energy *_sge,
int cpu_idx, unsigned long v_max)
{
int co_buck_lkg_idx = _sge->nr_cap_states - 1;
int idx;
for (idx = cpu_idx; idx < _sge->nr_cap_states; idx++) {
if (_sge->cap_states[idx].volt >= v_max) {
co_buck_lkg_idx = idx;
break;
}
}
return co_buck_lkg_idx;
}
#define VOLT_SCALE 10
void calc_pwr(int sd_level, const struct sched_group_energy *_sge,
int cap_idx, unsigned long volt, unsigned long co_volt,
unsigned long *dyn_pwr, unsigned long *lkg_pwr)
{
unsigned long int volt_factor = 1;
if (co_volt > volt) {
/*
* calculated power with share-buck impact
*
* dynamic power = F*V^2
*
* dyn_pwr = current_power * (v_max/v_min)^2
* lkg_pwr = tlb[idx of v_max].leak;
*/
unsigned long v_max = co_volt;
unsigned long v_min = volt;
int lkg_idx = _sge->lkg_idx;
int co_buck_lkg_idx;
volt_factor = ((v_max*v_max) << VOLT_SCALE) /
(v_min*v_min);
*dyn_pwr = (_sge->cap_states[cap_idx].dyn_pwr *
volt_factor) >> VOLT_SCALE;
co_buck_lkg_idx = share_buck_lkg_idx(_sge, cap_idx, v_max);
*lkg_pwr = _sge->cap_states[co_buck_lkg_idx].lkg_pwr[lkg_idx];
trace_sched_busy_power(sd_level, cap_idx,
_sge->cap_states[cap_idx].dyn_pwr, volt_factor,
*dyn_pwr, co_buck_lkg_idx, *lkg_pwr,
*dyn_pwr + *lkg_pwr);
} else {
/* No share buck impact */
int lkg_idx = _sge->lkg_idx;
*dyn_pwr = _sge->cap_states[cap_idx].dyn_pwr;
*lkg_pwr = _sge->cap_states[cap_idx].lkg_pwr[lkg_idx];
trace_sched_busy_power(sd_level, cap_idx, *dyn_pwr,
volt_factor, *dyn_pwr, cap_idx,
*lkg_pwr, *dyn_pwr + *lkg_pwr);
}
}
/**
* em_sg_energy() - Estimates the energy consumed by the CPUs of a perf. domain
* @sd : performance domain for which energy has to be estimated
* @max_util : highest utilization among CPUs of the domain
* @sum_util : sum of the utilization of all CPUs in the domain
*
* Return: the sum of the energy consumed by the CPUs of the domain assuming
* a capacity state satisfying the max utilization of the domain.
*/
static inline unsigned long compute_energy_sg(const struct cpumask *sg_cpus,
struct sg_state *sg_env, struct sg_state *share_env)
{
int cpu;
const struct sched_group_energy *_sge;
unsigned long dyn_pwr, lkg_pwr;
unsigned long dyn_egy, lkg_egy;
unsigned long total_energy;
unsigned long sg_util;
cpu = cpumask_first(sg_cpus);
_sge = cpu_core_energy(cpu); /* for CPU */
calc_pwr(0, _sge,
sg_env->cap_idx, sg_env->volt, share_env->volt,
&dyn_pwr, &lkg_pwr);
sg_util = __cpu_norm_sumutil(sg_env->sum_util, sg_env->cap);
dyn_egy = sg_util * dyn_pwr;
lkg_egy = SCHED_CAPACITY_SCALE * lkg_pwr;
total_energy = dyn_egy + lkg_egy;
mt_sched_printf(sched_eas_energy_calc,
"sg_util=%lu dyn_egy=%d lkg_egy=%d (cost=%d) mask=0x%lx",
sg_util,
(int)dyn_egy, (int)lkg_egy, (int)total_energy,
sg_cpus->bits[0]);
return total_energy;
}
bool is_share_buck(int cid, int *co_buck_cid)
{
bool ret = false;
if (share_buck[cid] != cid) {
*co_buck_cid = share_buck[cid];
ret = true;
}
return ret;
}
static long
compute_energy_enhanced(struct task_struct *p, int dst_cpu,
struct sched_group *sg)
{
int cid, share_cid, cpu;
struct sg_state sg_env, share_env;
const struct cpumask *sg_cpus;
struct cpumask share_cpus;
unsigned long total_energy = 0;
share_env.volt = 0;
sg_cpus = sched_group_span(sg);
cpu = cpumask_first(sg_cpus);
#ifdef CONFIG_ARM64
cid = cpu_topology[cpu].cluster_id;
#else
cid = cpu_topology[cpu].socket_id;
#endif
if (!update_sg_util(p, dst_cpu, sg_cpus, &sg_env))
return 0;
if (is_share_buck(cid, &share_cid)) {
arch_get_cluster_cpus(&share_cpus, share_cid);
if (!update_sg_util(p, dst_cpu, &share_cpus, &share_env))
return 0;
total_energy += compute_energy_sg(&share_cpus, &share_env,
&sg_env);
}
total_energy += compute_energy_sg(sg_cpus, &sg_env, &share_env);
return total_energy;
}
static int find_energy_efficient_cpu_enhanced(struct task_struct *p,
int this_cpu, int prev_cpu, int sync)
{
unsigned long prev_energy = 0;
unsigned long prev_delta = ULONG_MAX, best_delta = ULONG_MAX;
int max_spare_cap_cpu_ls = prev_cpu;
unsigned long max_spare_cap_ls = 0, target_cap;
unsigned long sys_max_spare_cap = 0;
unsigned long cpu_cap, util, wake_util;
bool boosted, prefer_idle = false;
unsigned int min_exit_lat = UINT_MAX;
int sys_max_spare_cap_cpu = -1;
int best_energy_cpu = prev_cpu;
struct cpuidle_state *idle;
struct sched_domain *sd;
struct sched_group *sg;
if (sysctl_sched_sync_hint_enable && sync) {
if (cpumask_test_cpu(this_cpu, &p->cpus_allowed) &&
!cpu_isolated(this_cpu)) {
return this_cpu;
}
}
sd = rcu_dereference(per_cpu(sd_ea, this_cpu));
if (!sd)
return -1;
if (!boosted_task_util(p))
return -1;
prefer_idle = schedtune_prefer_idle(p);
boosted = (schedtune_task_boost(p) > 0) || (uclamp_task_effective_util(p, UCLAMP_MIN) > 0);
target_cap = boosted ? 0 : ULONG_MAX;
sg = sd->groups;
do {
unsigned long cur_energy = 0, cur_delta = 0;
unsigned long spare_cap, max_spare_cap = 0;
unsigned long base_energy_sg;
int max_spare_cap_cpu = -1, best_idle_cpu = -1;
int cpu;
/* compute the ''base' energy of the sg, without @p*/
base_energy_sg = compute_energy_enhanced(p, -1, sg);
for_each_cpu_and(cpu, &p->cpus_allowed, sched_group_span(sg)) {
if (cpu_isolated(cpu))
continue;
#ifdef CONFIG_MTK_SCHED_INTEROP
if (cpu_rq(cpu)->rt.rt_nr_running &&
likely(!is_rt_throttle(cpu)))
continue;
#endif
/* Skip CPUs that will be overutilized. */
wake_util = cpu_util_without(cpu, p);
util = wake_util + task_util_est(p);
cpu_cap = capacity_of(cpu);
spare_cap = cpu_cap - util;
if (spare_cap > sys_max_spare_cap) {
sys_max_spare_cap = spare_cap;
sys_max_spare_cap_cpu = cpu;
}
/*
* Skip CPUs that cannot satisfy the capacity request.
* IOW, placing the task there would make the CPU
* overutilized. Take uclamp into account to see how
* much capacity we can get out of the CPU; this is
* aligned with schedutil_cpu_util().
*/
util = uclamp_rq_util_with(cpu_rq(cpu), util, p);
if (!fits_capacity(util, cpu_cap))
continue;
/* Always use prev_cpu as a candidate. */
if (cpu == prev_cpu &&
(!prefer_idle || (prefer_idle && idle_cpu(cpu)))) {
prev_energy = compute_energy_enhanced(p,
prev_cpu, sg);
prev_delta = prev_energy - base_energy_sg;
best_delta = min(best_delta, prev_delta);
}
/*
* Find the CPU with the maximum spare capacity in
* the performance domain
*/
spare_cap = cpu_cap - util;
if (spare_cap > max_spare_cap) {
max_spare_cap = spare_cap;
max_spare_cap_cpu = cpu;
}
if (!prefer_idle)
continue;
if (idle_cpu(cpu)) {
cpu_cap = capacity_orig_of(cpu);
if (!boosted && cpu_cap > target_cap)
continue;
idle = idle_get_state(cpu_rq(cpu));
if (idle && idle->exit_latency > min_exit_lat &&
cpu_cap == target_cap)
continue;
if (idle)
min_exit_lat = idle->exit_latency;
target_cap = cpu_cap;
best_idle_cpu = cpu;
} else if (spare_cap > max_spare_cap_ls) {
max_spare_cap_ls = spare_cap;
max_spare_cap_cpu_ls = cpu;
}
}
if (!prefer_idle && max_spare_cap_cpu >= 0 &&
max_spare_cap_cpu != prev_cpu) {
cur_energy = compute_energy_enhanced(p,
max_spare_cap_cpu, sg);
cur_delta = cur_energy - base_energy_sg;
if (cur_delta < best_delta) {
best_delta = cur_delta;
best_energy_cpu = max_spare_cap_cpu;
}
}
if (prefer_idle && best_idle_cpu >= 0 &&
best_idle_cpu != prev_cpu) {
cur_energy = compute_energy_enhanced(p,
best_idle_cpu, sg);
cur_delta = cur_energy - base_energy_sg;
if (cur_delta < best_delta) {
best_delta = cur_delta;
best_energy_cpu = best_idle_cpu;
}
}
mt_sched_printf(sched_eas_energy_calc,
"prev_cpu=%d base_energy=%lu prev_energy=%lu prev_delta=%d",
prev_cpu, base_energy_sg, prev_energy, (int)prev_delta);
mt_sched_printf(sched_eas_energy_calc,
"max_spare_cap_cpu=%d best_idle_cpu=%d cur_energy=%lu cur_delta=%d",
max_spare_cap_cpu, best_idle_cpu, cur_energy, (int)cur_delta);
} while (sg = sg->next, sg != sd->groups);
/*
* Pick the best CPU if prev_cpu cannot be used, or it it saves energy
* used by prev_cpu.
*/
if (prev_delta == ULONG_MAX) {
/* All cpu failed on !fit_capacity, use sys_max_spare_cap_cpu */
if (best_energy_cpu == prev_cpu)
return sys_max_spare_cap_cpu;
else
return best_energy_cpu;
}
if ((prev_delta - best_delta) > 0)
return best_energy_cpu;
return prev_cpu;
}
static int __find_energy_efficient_cpu(struct sched_domain *sd,
struct task_struct *p,
int cpu, int prev_cpu,
int sync)
{
int num_cluster = arch_get_nr_clusters();
if (num_cluster <= 2)
return find_energy_efficient_cpu(sd, p, cpu, prev_cpu, sync);
else
return find_energy_efficient_cpu_enhanced(p, cpu, prev_cpu, sync);
}
/*
* group_norm_util() returns the approximated group util relative to it's
* current capacity (busy ratio) in the range [0..SCHED_CAPACITY_SCALE] for use
* in energy calculations. Since task executions may or may not overlap in time
* in the group the true normalized util is between max(cpu_norm_util(i)) and
* sum(cpu_norm_util(i)) when iterating over all cpus in the group, i. The
* latter is used as the estimate as it leads to a more pessimistic energy
* estimate (more busy).
*/
static unsigned
long group_norm_util(struct energy_env *eenv, int cpu_idx)
{
struct sched_group *sg = eenv->sg;
int cpu_id = group_first_cpu(sg);
#ifdef CONFIG_ARM64
int cid = cpu_topology[cpu_id].cluster_id;
#else
int cid = cpu_topology[cpu_id].socket_id;
#endif
unsigned long capacity = eenv->cpu[cpu_idx].cap[cid];
unsigned long util, util_sum = 0;
int cpu;
for_each_cpu(cpu, sched_group_span(eenv->sg)) {
util = cpu_util_without(cpu, eenv->p);
/*
* If we are looking at the target CPU specified by the eenv,
* then we should add the (estimated) utilization of the task
* assuming we will wake it up on that CPU.
*/
if (unlikely(cpu == eenv->cpu[cpu_idx].cpu_id))
util += eenv->util_delta;
util_sum += __cpu_norm_util(util, capacity);
trace_group_norm_util(cpu_idx, cpu, cid, util_sum,
__cpu_norm_util(util, capacity), eenv->util_delta,
util, capacity);
}
if (util_sum > SCHED_CAPACITY_SCALE)
return SCHED_CAPACITY_SCALE;
return util_sum;
}
#endif
#ifdef CONFIG_MTK_SCHED_EAS_POWER_SUPPORT
static unsigned long
mtk_cluster_max_usage(int cid, struct energy_env *eenv, int cpu_idx,
int *max_cpu)
{
unsigned long util, max_util = 0;
int cpu = -1;
struct cpumask cls_cpus;
*max_cpu = -1;
arch_get_cluster_cpus(&cls_cpus, cid);
for_each_cpu(cpu, &cls_cpus) {
if (!cpu_online(cpu))
continue;
util = cpu_util_without(cpu, eenv->p);
/*
* If we are looking at the target CPU specified by the eenv,
* then we should add the (estimated) utilization of the task
* assuming we will wake it up on that CPU.
*/
if (unlikely(cpu == eenv->cpu[cpu_idx].cpu_id))
util += eenv->util_delta;
if (util >= max_util) {
max_util = util;
*max_cpu = cpu;
}
}
return max_util;
}
void mtk_cluster_capacity_idx(int cid, struct energy_env *eenv, int cpu_idx)
{
int cpu;
unsigned long util = mtk_cluster_max_usage(cid, eenv, cpu_idx, &cpu);
unsigned long new_capacity = util;
struct sched_domain *sd;
struct sched_group *sg;
const struct sched_group_energy *sge;
int idx, max_idx;
if (cpu == -1) { /* maybe no online CPU */
printk_deferred("sched: %s no online CPU", __func__);
return;
}
sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd);
if (sd) {
sg = sd->groups;
sge = sg->sge;
} else{
printk_deferred("sched: %s no sd", __func__);
return;
}
max_idx = sge->nr_cap_states - 1;
/* default is max_cap if we don't find a match */
eenv->cpu[cpu_idx].cap_idx[cid] = max_idx;
eenv->cpu[cpu_idx].cap[cid] = sge->cap_states[max_idx].cap;
/* OPP idx to refer capacity margin */
new_capacity = util * capacity_margin >> SCHED_CAPACITY_SHIFT;
new_capacity = min(new_capacity,
(unsigned long) sge->cap_states[sge->nr_cap_states-1].cap);
for (idx = 0; idx < sge->nr_cap_states; idx++) {
if (sge->cap_states[idx].cap >= new_capacity) {
/* Keep track of SG's capacity */
eenv->cpu[cpu_idx].cap_idx[cid] = idx;
eenv->cpu[cpu_idx].cap[cid] = sge->cap_states[idx].cap;
break;
}
}
mt_sched_printf(sched_eas_energy_calc,
"cpu_idx=%d dst_cpu=%d cid=%d max_cpu=%d (util=%ld new=%ld) max_opp=%d (cap=%d)",
cpu_idx, eenv->cpu[cpu_idx].cpu_id,
cid, cpu, util, new_capacity,
eenv->cpu[cpu_idx].cap_idx[cid],
eenv->cpu[cpu_idx].cap[cid]);
}
#if defined(ARM_V8_2) && defined(CONFIG_MTK_UNIFY_POWER)
struct sched_group_energy cci_tbl;
const struct sched_group_energy * const cci_energy(void)
{
struct sched_group_energy *sge = &cci_tbl;
struct upower_tbl_info **addr_ptr_tbl_info;
struct upower_tbl_info *ptr_tbl_info;
struct upower_tbl *ptr_tbl;
addr_ptr_tbl_info = upower_get_tbl();
ptr_tbl_info = *addr_ptr_tbl_info;
ptr_tbl = ptr_tbl_info[UPOWER_BANK_CCI].p_upower_tbl;
sge->nr_cap_states = ptr_tbl->row_num;
sge->cap_states = ptr_tbl->row;
sge->lkg_idx = ptr_tbl->lkg_idx;
return sge;
}
extern unsigned int mt_cpufreq_get_cur_cci_freq_idx(void);
void get_cci_volt(struct sg_state *cci)
{
const struct sched_group_energy *_sge;
static int CCI_nr_cap_stats;
_sge = cci_energy();
if (CCI_nr_cap_stats == 0) {
CCI_nr_cap_stats = _sge->nr_cap_states;
}
cci->cap_idx = CCI_nr_cap_stats - mt_cpufreq_get_cur_cci_freq_idx();
cci->volt = _sge->cap_states[cci->cap_idx].volt;
}
#else
void get_cci_volt(struct sg_state *cci)
{
}
#endif
void share_buck_volt(struct energy_env *eenv, int cpu_idx, int cid,
struct sg_state *co_buck)
{
if (is_share_buck(cid, &(co_buck->cid))) {
int num_cluster = arch_get_nr_clusters();
int cap_idx = eenv->cpu[cpu_idx].cap_idx[cid];
if (co_buck->cid < num_cluster) {
struct cpumask cls_cpus;
const struct sched_group_energy *sge_core;
int cpu;
arch_get_cluster_cpus(&cls_cpus, co_buck->cid);
cpu = cpumask_first(&cls_cpus);
sge_core = cpu_core_energy(cpu);
co_buck->cap_idx =
eenv->cpu[cpu_idx].cap_idx[co_buck->cid];
co_buck->volt =
sge_core->cap_states[co_buck->cap_idx].volt;
#if defined(ARM_V8_2) && defined(CONFIG_MTK_UNIFY_POWER)
} else if (co_buck->cid == CCI_ID) { /* CCI + DSU */
get_cci_volt(co_buck);
#endif
}
trace_sched_share_buck(cpu_idx, cid, cap_idx, co_buck->cid,
co_buck->cap_idx, co_buck->volt);
}
}
int
mtk_idle_power(int cpu_idx, int idle_state, int cpu, void *argu, int sd_level)
{
struct energy_env *eenv = (struct energy_env *)argu;
const struct sched_group_energy *_sge, *sge_core, *sge_clus;
struct sched_domain *sd;
unsigned long volt;
int energy_cost = 0;
#ifdef CONFIG_ARM64
int cid = cpu_topology[cpu].cluster_id;
#else
int cid = cpu_topology[cpu].socket_id;
#endif
int cap_idx = eenv->cpu[cpu_idx].cap_idx[cid];
struct sg_state co_buck = {-1, -1, 0};
sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd);
/* [FIXME] racing with hotplug */
if (!sd)
return 0;
/* [FIXME] racing with hotplug */
if (cap_idx == -1)
return 0;
_sge = cpu_core_energy(cpu);
volt = _sge->cap_states[cap_idx].volt;
share_buck_volt(eenv, cpu_idx, cid, &co_buck);
if (co_buck.volt > volt)
cap_idx = share_buck_lkg_idx(_sge, cap_idx, co_buck.volt);
_sge = sge_core = sge_clus = NULL;
/* To handle only 1 CPU in cluster by HPS */
if (unlikely(!sd->child &&
(rcu_dereference(per_cpu(sd_scs, cpu)) == NULL))) {
struct upower_tbl_row *cpu_pwr_tbl, *clu_pwr_tbl;
sge_core = cpu_core_energy(cpu);
sge_clus = cpu_cluster_energy(cpu);
cpu_pwr_tbl = &sge_core->cap_states[cap_idx];
clu_pwr_tbl = &sge_clus->cap_states[cap_idx];
/* idle: core->leask_power + cluster->lkg_pwr */
energy_cost = cpu_pwr_tbl->lkg_pwr[sge_core->lkg_idx] +
clu_pwr_tbl->lkg_pwr[sge_clus->lkg_idx];
mt_sched_printf(sched_eas_energy_calc,
"%s: %s lv=%d tlb_cpu[%d].leak=%d tlb_clu[%d].leak=%d total=%d",
__func__, "WFI", sd_level,
cap_idx,
cpu_pwr_tbl->lkg_pwr[sge_core->lkg_idx],
cap_idx,
clu_pwr_tbl->lkg_pwr[sge_clus->lkg_idx],
energy_cost);
} else {
struct upower_tbl_row *pwr_tbl;
unsigned long lkg_pwr;
if (sd_level == 0)
_sge = cpu_core_energy(cpu); /* for cpu */
else
_sge = cpu_cluster_energy(cpu); /* for cluster */
pwr_tbl = &_sge->cap_states[cap_idx];
lkg_pwr = pwr_tbl->lkg_pwr[_sge->lkg_idx];
energy_cost = lkg_pwr;
trace_sched_idle_power(sd_level, cap_idx, lkg_pwr, energy_cost);
}
idle_state = 0;
#if defined(ARM_V8_2) && defined(CONFIG_MTK_UNIFY_POWER)
if ((sd_level != 0) && (co_buck.cid == CCI_ID)) {
struct upower_tbl_row *CCI_pwr_tbl;
unsigned long lkg_pwr;
_sge = cci_energy();
CCI_pwr_tbl = &_sge->cap_states[cap_idx];
lkg_pwr = CCI_pwr_tbl->lkg_pwr[_sge->lkg_idx];
energy_cost += lkg_pwr;
trace_sched_idle_power(sd_level, cap_idx, lkg_pwr, energy_cost);
}
#endif
return energy_cost;
}
int calc_busy_power(const struct sched_group_energy *_sge, int cap_idx,
unsigned long co_volt, int sd_level)
{
unsigned long dyn_pwr, lkg_pwr;
unsigned long volt;
volt = _sge->cap_states[cap_idx].volt;
calc_pwr(sd_level, _sge, cap_idx, volt, co_volt, &dyn_pwr, &lkg_pwr);
return dyn_pwr + lkg_pwr;
}
int mtk_busy_power(int cpu_idx, int cpu, void *argu, int sd_level)
{
struct energy_env *eenv = (struct energy_env *)argu;
const struct sched_group_energy *_sge;
struct sched_domain *sd;
int energy_cost = 0;
#ifdef CONFIG_ARM64
int cid = cpu_topology[cpu].cluster_id;
#else
int cid = cpu_topology[cpu].socket_id;
#endif
int cap_idx = eenv->cpu[cpu_idx].cap_idx[cid];
struct sg_state co_buck = {-1, -1, 0};
sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd);
/* [FIXME] racing with hotplug */
if (!sd)
return 0;
/* [FIXME] racing with hotplug */
if (cap_idx == -1)
return 0;
share_buck_volt(eenv, cpu_idx, cid, &co_buck);
/* To handle only 1 CPU in cluster by HPS */
if (unlikely(!sd->child &&
(rcu_dereference(per_cpu(sd_scs, cpu)) == NULL))) {
/* fix HPS defeats: only one CPU in this cluster */
_sge = cpu_core_energy(cpu); /* for CPU */
energy_cost = calc_busy_power(_sge, cap_idx, co_buck.volt,
0);
_sge = cpu_cluster_energy(cpu); /* for cluster */
energy_cost += calc_busy_power(_sge, cap_idx, co_buck.volt,
1);
} else {
if (sd_level == 0)
_sge = cpu_core_energy(cpu); /* for CPU */
else
_sge = cpu_cluster_energy(cpu); /* for cluster */
energy_cost = calc_busy_power(_sge, cap_idx, co_buck.volt,
sd_level);
}
#if defined(ARM_V8_2) && defined(CONFIG_MTK_UNIFY_POWER)
if ((sd_level != 0) && (co_buck.cid == CCI_ID)) {
/* CCI + DSU */
unsigned long volt;
_sge = cpu_core_energy(cpu); /* for CPU */
volt = _sge->cap_states[cap_idx].volt;
_sge = cci_energy();
energy_cost += calc_busy_power(_sge, co_buck.cap_idx, volt,
sd_level);
}
#endif
return energy_cost;
}
#endif
#ifdef CONFIG_MTK_SCHED_EAS_POWER_SUPPORT
void mtk_update_new_capacity(struct energy_env *eenv)
{
int i, cpu_idx;
/* To get max opp index of every cluster for power estimation of
* share buck
*/
for (cpu_idx = EAS_CPU_PRV; cpu_idx < eenv->max_cpu_count ; ++cpu_idx) {
if (eenv->cpu[cpu_idx].cpu_id == -1)
continue;
for (i = 0; i < arch_get_nr_clusters(); i++)
mtk_cluster_capacity_idx(i, eenv, cpu_idx);
}
}
#else
void mtk_update_new_capacity(struct energy_env *eenv)
{
}
static int __find_energy_efficient_cpu(struct sched_domain *sd,
struct task_struct *p,
int cpu, int prev_cpu,
int sync)
{
return find_energy_efficient_cpu(sd, p, cpu, prev_cpu, sync);
}
#endif
#ifdef CONFIG_MTK_SCHED_BOOST
static void select_task_prefer_cpu_fair(struct task_struct *p, int *result)
{
int task_prefer;
int cpu, new_cpu;
task_prefer = cpu_prefer(p);
cpu = (*result & LB_CPU_MASK);
new_cpu = select_task_prefer_cpu(p, cpu);
if ((new_cpu >= 0) && (new_cpu != cpu)) {
if (task_prefer_match(p, cpu))
*result = new_cpu | LB_THERMAL;
else
*result = new_cpu | LB_HINT;
}
}
#else
static void select_task_prefer_cpu_fair(struct task_struct *p, int *result)
{
}
#endif
inline int
task_match_on_dst_cpu(struct task_struct *p, int src_cpu, int target_cpu)
{
struct task_struct *target_tsk;
struct rq *rq = cpu_rq(target_cpu);
#ifdef CONFIG_MTK_SCHED_BOOST
if (task_prefer_match(p, src_cpu))
return 0;
target_tsk = rq->curr;
if (task_prefer_fit(target_tsk, target_cpu))
return 0;
#endif
return 1;
}
static int check_freq_turning(void)
{
struct root_domain *rd = cpu_rq(smp_processor_id())->rd;
unsigned long capacity_curr_little, capacity_curr_big;
if (rd->min_cap_orig_cpu < 0 || rd->max_cap_orig_cpu < 0)
return false;
capacity_curr_little = capacity_curr_of(rd->min_cap_orig_cpu);
capacity_curr_big = capacity_curr_of(rd->max_cap_orig_cpu);
if ((capacity_curr_little > cpu_eff_tp) &&
(capacity_curr_big <= big_cpu_eff_tp))
return true;
return false;
}
struct task_rotate_work {
struct work_struct w;
struct task_struct *src_task;
struct task_struct *dst_task;
int src_cpu;
int dst_cpu;
};
static DEFINE_PER_CPU(struct task_rotate_work, task_rotate_works);
struct task_rotate_reset_uclamp_work task_rotate_reset_uclamp_works;
unsigned int sysctl_sched_rotation_enable;
bool set_uclamp;
void set_sched_rotation_enable(bool enable)
{
sysctl_sched_rotation_enable = enable;
}
bool is_min_capacity_cpu(int cpu)
{
struct root_domain *rd = cpu_rq(smp_processor_id())->rd;
if (rd->min_cap_orig_cpu < 0)
return false;
if (capacity_orig_of(cpu) == capacity_orig_of(rd->min_cap_orig_cpu))
return true;
return false;
}
static void task_rotate_work_func(struct work_struct *work)
{
struct task_rotate_work *wr = container_of(work,
struct task_rotate_work, w);
int ret = -1;
struct rq *src_rq, *dst_rq;
ret = migrate_swap(wr->src_task, wr->dst_task);
if (ret == 0) {
update_eas_uclamp_min(EAS_UCLAMP_KIR_BIG_TASK, CGROUP_TA,
scale_to_percent(SCHED_CAPACITY_SCALE));
set_uclamp = true;
trace_sched_big_task_rotation(wr->src_cpu, wr->dst_cpu,
wr->src_task->pid,
wr->dst_task->pid,
true, set_uclamp);
}
put_task_struct(wr->src_task);
put_task_struct(wr->dst_task);
src_rq = cpu_rq(wr->src_cpu);
dst_rq = cpu_rq(wr->dst_cpu);
local_irq_disable();
double_rq_lock(src_rq, dst_rq);
src_rq->active_balance = 0;
dst_rq->active_balance = 0;
double_rq_unlock(src_rq, dst_rq);
local_irq_enable();
}
static void task_rotate_reset_uclamp_work_func(struct work_struct *work)
{
update_eas_uclamp_min(EAS_UCLAMP_KIR_BIG_TASK, CGROUP_TA, 0);
set_uclamp = false;
trace_sched_big_task_rotation_reset(set_uclamp);
}
void task_rotate_work_init(void)
{
int i;
for_each_possible_cpu(i) {
struct task_rotate_work *wr = &per_cpu(task_rotate_works, i);
INIT_WORK(&wr->w, task_rotate_work_func);
}
INIT_WORK(&task_rotate_reset_uclamp_works.w,
task_rotate_reset_uclamp_work_func);
}
void task_check_for_rotation(struct rq *src_rq)
{
u64 wc, wait, max_wait = 0, run, max_run = 0;
int deserved_cpu = nr_cpu_ids, dst_cpu = nr_cpu_ids;
int i, src_cpu = cpu_of(src_rq);
struct rq *dst_rq;
struct task_rotate_work *wr = NULL;
int heavy_task = 0;
int force = 0;
if (!sysctl_sched_rotation_enable)
return;
if (is_max_capacity_cpu(src_cpu))
return;
for_each_possible_cpu(i) {
struct rq *rq = cpu_rq(i);
struct task_struct *curr_task = rq->curr;
if (curr_task &&
!task_fits_capacity(curr_task, capacity_of(i)))
heavy_task += 1;
}
if (heavy_task < HEAVY_TASK_NUM)
return;
wc = ktime_get_ns();
for_each_possible_cpu(i) {
struct rq *rq = cpu_rq(i);
if (!is_min_capacity_cpu(i))
continue;
if (is_reserved(i))
continue;
if (!rq->misfit_task_load || rq->curr->sched_class !=
&fair_sched_class)
continue;
wait = wc - rq->curr->last_enqueued_ts;
if (wait > max_wait) {
max_wait = wait;
deserved_cpu = i;
}
}
if (deserved_cpu != src_cpu)
return;
for_each_possible_cpu(i) {
struct rq *rq = cpu_rq(i);
if (capacity_orig_of(i) <= capacity_orig_of(src_cpu))
continue;
if (is_reserved(i))
continue;
if (rq->curr->sched_class != &fair_sched_class)
continue;
if (rq->nr_running > 1)
continue;
run = wc - rq->curr->last_enqueued_ts;
if (run < TASK_ROTATION_THRESHOLD_NS)
continue;
if (run > max_run) {
max_run = run;
dst_cpu = i;
}
}
if (dst_cpu == nr_cpu_ids)
return;
dst_rq = cpu_rq(dst_cpu);
double_rq_lock(src_rq, dst_rq);
if (dst_rq->curr->sched_class == &fair_sched_class) {
if (!cpumask_test_cpu(dst_cpu,
&(src_rq->curr)->cpus_allowed) ||
!cpumask_test_cpu(src_cpu,
&(dst_rq->curr)->cpus_allowed)) {
double_rq_unlock(src_rq, dst_rq);
return;
}
if (!src_rq->active_balance && !dst_rq->active_balance) {
src_rq->active_balance = MIGR_ROTATION;
dst_rq->active_balance = MIGR_ROTATION;
get_task_struct(src_rq->curr);
get_task_struct(dst_rq->curr);
wr = &per_cpu(task_rotate_works, src_cpu);
wr->src_task = src_rq->curr;
wr->dst_task = dst_rq->curr;
wr->src_cpu = src_rq->cpu;
wr->dst_cpu = dst_rq->cpu;
force = 1;
}
}
double_rq_unlock(src_rq, dst_rq);
if (force) {
queue_work_on(src_cpu, system_highpri_wq, &wr->w);
trace_sched_big_task_rotation(wr->src_cpu, wr->dst_cpu,
wr->src_task->pid, wr->dst_task->pid,
false, set_uclamp);
}
}