sched: rt-group: make rt groups scheduling configurable
Make the rt group scheduler compile time configurable.
Keep it experimental for now.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/kernel/sched.c b/kernel/sched.c
index 85a5fbf..5edc549 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -155,7 +155,7 @@
struct list_head queue[MAX_RT_PRIO];
};
-#ifdef CONFIG_FAIR_GROUP_SCHED
+#ifdef CONFIG_GROUP_SCHED
#include <linux/cgroup.h>
@@ -165,19 +165,16 @@
/* task group related information */
struct task_group {
-#ifdef CONFIG_FAIR_CGROUP_SCHED
+#ifdef CONFIG_CGROUP_SCHED
struct cgroup_subsys_state css;
#endif
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
/* schedulable entities of this group on each cpu */
struct sched_entity **se;
/* runqueue "owned" by this group on each cpu */
struct cfs_rq **cfs_rq;
- struct sched_rt_entity **rt_se;
- struct rt_rq **rt_rq;
-
- u64 rt_runtime;
-
/*
* shares assigned to a task group governs how much of cpu bandwidth
* is allocated to the group. The more shares a group has, the more is
@@ -213,24 +210,36 @@
*
*/
unsigned long shares;
+#endif
+
+#ifdef CONFIG_RT_GROUP_SCHED
+ struct sched_rt_entity **rt_se;
+ struct rt_rq **rt_rq;
+
+ u64 rt_runtime;
+#endif
struct rcu_head rcu;
struct list_head list;
};
+#ifdef CONFIG_FAIR_GROUP_SCHED
/* Default task group's sched entity on each cpu */
static DEFINE_PER_CPU(struct sched_entity, init_sched_entity);
/* Default task group's cfs_rq on each cpu */
static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp;
+static struct sched_entity *init_sched_entity_p[NR_CPUS];
+static struct cfs_rq *init_cfs_rq_p[NR_CPUS];
+#endif
+
+#ifdef CONFIG_RT_GROUP_SCHED
static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity);
static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp;
-static struct sched_entity *init_sched_entity_p[NR_CPUS];
-static struct cfs_rq *init_cfs_rq_p[NR_CPUS];
-
static struct sched_rt_entity *init_sched_rt_entity_p[NR_CPUS];
static struct rt_rq *init_rt_rq_p[NR_CPUS];
+#endif
/* task_group_lock serializes add/remove of task groups and also changes to
* a task group's cpu shares.
@@ -240,6 +249,7 @@
/* doms_cur_mutex serializes access to doms_cur[] array */
static DEFINE_MUTEX(doms_cur_mutex);
+#ifdef CONFIG_FAIR_GROUP_SCHED
#ifdef CONFIG_SMP
/* kernel thread that runs rebalance_shares() periodically */
static struct task_struct *lb_monitor_task;
@@ -248,18 +258,7 @@
static void set_se_shares(struct sched_entity *se, unsigned long shares);
-/* Default task group.
- * Every task in system belong to this group at bootup.
- */
-struct task_group init_task_group = {
- .se = init_sched_entity_p,
- .cfs_rq = init_cfs_rq_p,
-
- .rt_se = init_sched_rt_entity_p,
- .rt_rq = init_rt_rq_p,
-};
-
-#ifdef CONFIG_FAIR_USER_SCHED
+#ifdef CONFIG_USER_SCHED
# define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD)
#else
# define INIT_TASK_GROUP_LOAD NICE_0_LOAD
@@ -268,15 +267,31 @@
#define MIN_GROUP_SHARES 2
static int init_task_group_load = INIT_TASK_GROUP_LOAD;
+#endif
+
+/* Default task group.
+ * Every task in system belong to this group at bootup.
+ */
+struct task_group init_task_group = {
+#ifdef CONFIG_FAIR_GROUP_SCHED
+ .se = init_sched_entity_p,
+ .cfs_rq = init_cfs_rq_p,
+#endif
+
+#ifdef CONFIG_RT_GROUP_SCHED
+ .rt_se = init_sched_rt_entity_p,
+ .rt_rq = init_rt_rq_p,
+#endif
+};
/* return group to which a task belongs */
static inline struct task_group *task_group(struct task_struct *p)
{
struct task_group *tg;
-#ifdef CONFIG_FAIR_USER_SCHED
+#ifdef CONFIG_USER_SCHED
tg = p->user->tg;
-#elif defined(CONFIG_FAIR_CGROUP_SCHED)
+#elif defined(CONFIG_CGROUP_SCHED)
tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id),
struct task_group, css);
#else
@@ -288,11 +303,15 @@
/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
{
+#ifdef CONFIG_FAIR_GROUP_SCHED
p->se.cfs_rq = task_group(p)->cfs_rq[cpu];
p->se.parent = task_group(p)->se[cpu];
+#endif
+#ifdef CONFIG_RT_GROUP_SCHED
p->rt.rt_rq = task_group(p)->rt_rq[cpu];
p->rt.parent = task_group(p)->rt_se[cpu];
+#endif
}
static inline void lock_doms_cur(void)
@@ -311,7 +330,7 @@
static inline void lock_doms_cur(void) { }
static inline void unlock_doms_cur(void) { }
-#endif /* CONFIG_FAIR_GROUP_SCHED */
+#endif /* CONFIG_GROUP_SCHED */
/* CFS-related fields in a runqueue */
struct cfs_rq {
@@ -351,7 +370,7 @@
struct rt_rq {
struct rt_prio_array active;
unsigned long rt_nr_running;
-#if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED
+#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
int highest_prio; /* highest queued rt task prio */
#endif
#ifdef CONFIG_SMP
@@ -361,7 +380,7 @@
int rt_throttled;
u64 rt_time;
-#ifdef CONFIG_FAIR_GROUP_SCHED
+#ifdef CONFIG_RT_GROUP_SCHED
unsigned long rt_nr_boosted;
struct rq *rq;
@@ -437,6 +456,8 @@
#ifdef CONFIG_FAIR_GROUP_SCHED
/* list of leaf cfs_rq on this cpu: */
struct list_head leaf_cfs_rq_list;
+#endif
+#ifdef CONFIG_RT_GROUP_SCHED
struct list_head leaf_rt_rq_list;
#endif
@@ -7104,7 +7125,7 @@
/* delimiter for bitsearch: */
__set_bit(MAX_RT_PRIO, array->bitmap);
-#if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED
+#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
rt_rq->highest_prio = MAX_RT_PRIO;
#endif
#ifdef CONFIG_SMP
@@ -7115,7 +7136,7 @@
rt_rq->rt_time = 0;
rt_rq->rt_throttled = 0;
-#ifdef CONFIG_FAIR_GROUP_SCHED
+#ifdef CONFIG_RT_GROUP_SCHED
rt_rq->rt_nr_boosted = 0;
rt_rq->rq = rq;
#endif
@@ -7139,7 +7160,9 @@
se->load.inv_weight = div64_64(1ULL<<32, se->load.weight);
se->parent = NULL;
}
+#endif
+#ifdef CONFIG_RT_GROUP_SCHED
static void init_tg_rt_entry(struct rq *rq, struct task_group *tg,
struct rt_rq *rt_rq, struct sched_rt_entity *rt_se,
int cpu, int add)
@@ -7168,7 +7191,7 @@
init_defrootdomain();
#endif
-#ifdef CONFIG_FAIR_GROUP_SCHED
+#ifdef CONFIG_GROUP_SCHED
list_add(&init_task_group.list, &task_groups);
#endif
@@ -7189,6 +7212,8 @@
&per_cpu(init_cfs_rq, i),
&per_cpu(init_sched_entity, i), i, 1);
+#endif
+#ifdef CONFIG_RT_GROUP_SCHED
init_task_group.rt_runtime =
sysctl_sched_rt_runtime * NSEC_PER_USEC;
INIT_LIST_HEAD(&rq->leaf_rt_rq_list);
@@ -7381,9 +7406,9 @@
#endif
-#ifdef CONFIG_FAIR_GROUP_SCHED
+#ifdef CONFIG_GROUP_SCHED
-#ifdef CONFIG_SMP
+#if defined CONFIG_FAIR_GROUP_SCHED && defined CONFIG_SMP
/*
* distribute shares of all task groups among their schedulable entities,
* to reflect load distribution across cpus.
@@ -7539,20 +7564,28 @@
int i;
for_each_possible_cpu(i) {
+#ifdef CONFIG_FAIR_GROUP_SCHED
if (tg->cfs_rq)
kfree(tg->cfs_rq[i]);
if (tg->se)
kfree(tg->se[i]);
+#endif
+#ifdef CONFIG_RT_GROUP_SCHED
if (tg->rt_rq)
kfree(tg->rt_rq[i]);
if (tg->rt_se)
kfree(tg->rt_se[i]);
+#endif
}
+#ifdef CONFIG_FAIR_GROUP_SCHED
kfree(tg->cfs_rq);
kfree(tg->se);
+#endif
+#ifdef CONFIG_RT_GROUP_SCHED
kfree(tg->rt_rq);
kfree(tg->rt_se);
+#endif
kfree(tg);
}
@@ -7560,10 +7593,14 @@
struct task_group *sched_create_group(void)
{
struct task_group *tg;
+#ifdef CONFIG_FAIR_GROUP_SCHED
struct cfs_rq *cfs_rq;
struct sched_entity *se;
+#endif
+#ifdef CONFIG_RT_GROUP_SCHED
struct rt_rq *rt_rq;
struct sched_rt_entity *rt_se;
+#endif
struct rq *rq;
unsigned long flags;
int i;
@@ -7572,12 +7609,18 @@
if (!tg)
return ERR_PTR(-ENOMEM);
+#ifdef CONFIG_FAIR_GROUP_SCHED
tg->cfs_rq = kzalloc(sizeof(cfs_rq) * NR_CPUS, GFP_KERNEL);
if (!tg->cfs_rq)
goto err;
tg->se = kzalloc(sizeof(se) * NR_CPUS, GFP_KERNEL);
if (!tg->se)
goto err;
+
+ tg->shares = NICE_0_LOAD;
+#endif
+
+#ifdef CONFIG_RT_GROUP_SCHED
tg->rt_rq = kzalloc(sizeof(rt_rq) * NR_CPUS, GFP_KERNEL);
if (!tg->rt_rq)
goto err;
@@ -7585,12 +7628,13 @@
if (!tg->rt_se)
goto err;
- tg->shares = NICE_0_LOAD;
tg->rt_runtime = 0;
+#endif
for_each_possible_cpu(i) {
rq = cpu_rq(i);
+#ifdef CONFIG_FAIR_GROUP_SCHED
cfs_rq = kmalloc_node(sizeof(struct cfs_rq),
GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
if (!cfs_rq)
@@ -7601,6 +7645,10 @@
if (!se)
goto err;
+ init_tg_cfs_entry(rq, tg, cfs_rq, se, i, 0);
+#endif
+
+#ifdef CONFIG_RT_GROUP_SCHED
rt_rq = kmalloc_node(sizeof(struct rt_rq),
GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
if (!rt_rq)
@@ -7611,17 +7659,21 @@
if (!rt_se)
goto err;
- init_tg_cfs_entry(rq, tg, cfs_rq, se, i, 0);
init_tg_rt_entry(rq, tg, rt_rq, rt_se, i, 0);
+#endif
}
spin_lock_irqsave(&task_group_lock, flags);
for_each_possible_cpu(i) {
rq = cpu_rq(i);
+#ifdef CONFIG_FAIR_GROUP_SCHED
cfs_rq = tg->cfs_rq[i];
list_add_rcu(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list);
+#endif
+#ifdef CONFIG_RT_GROUP_SCHED
rt_rq = tg->rt_rq[i];
list_add_rcu(&rt_rq->leaf_rt_rq_list, &rq->leaf_rt_rq_list);
+#endif
}
list_add_rcu(&tg->list, &task_groups);
spin_unlock_irqrestore(&task_group_lock, flags);
@@ -7643,23 +7695,21 @@
/* Destroy runqueue etc associated with a task group */
void sched_destroy_group(struct task_group *tg)
{
- struct cfs_rq *cfs_rq = NULL;
- struct rt_rq *rt_rq = NULL;
unsigned long flags;
int i;
spin_lock_irqsave(&task_group_lock, flags);
for_each_possible_cpu(i) {
- cfs_rq = tg->cfs_rq[i];
- list_del_rcu(&cfs_rq->leaf_cfs_rq_list);
- rt_rq = tg->rt_rq[i];
- list_del_rcu(&rt_rq->leaf_rt_rq_list);
+#ifdef CONFIG_FAIR_GROUP_SCHED
+ list_del_rcu(&tg->cfs_rq[i]->leaf_cfs_rq_list);
+#endif
+#ifdef CONFIG_RT_GROUP_SCHED
+ list_del_rcu(&tg->rt_rq[i]->leaf_rt_rq_list);
+#endif
}
list_del_rcu(&tg->list);
spin_unlock_irqrestore(&task_group_lock, flags);
- BUG_ON(!cfs_rq);
-
/* wait for possible concurrent references to cfs_rqs complete */
call_rcu(&tg->rcu, free_sched_group_rcu);
}
@@ -7699,6 +7749,7 @@
task_rq_unlock(rq, &flags);
}
+#ifdef CONFIG_FAIR_GROUP_SCHED
/* rq->lock to be locked by caller */
static void set_se_shares(struct sched_entity *se, unsigned long shares)
{
@@ -7786,7 +7837,9 @@
{
return tg->shares;
}
+#endif
+#ifdef CONFIG_RT_GROUP_SCHED
/*
* Ensure that the real time constraints are schedulable.
*/
@@ -7858,9 +7911,10 @@
do_div(rt_runtime_us, NSEC_PER_USEC);
return rt_runtime_us;
}
-#endif /* CONFIG_FAIR_GROUP_SCHED */
+#endif
+#endif /* CONFIG_GROUP_SCHED */
-#ifdef CONFIG_FAIR_CGROUP_SCHED
+#ifdef CONFIG_CGROUP_SCHED
/* return corresponding task_group object of a cgroup */
static inline struct task_group *cgroup_tg(struct cgroup *cgrp)
@@ -7920,6 +7974,7 @@
sched_move_task(tsk);
}
+#ifdef CONFIG_FAIR_GROUP_SCHED
static int cpu_shares_write_uint(struct cgroup *cgrp, struct cftype *cftype,
u64 shareval)
{
@@ -7932,7 +7987,9 @@
return (u64) tg->shares;
}
+#endif
+#ifdef CONFIG_RT_GROUP_SCHED
static int cpu_rt_runtime_write(struct cgroup *cgrp, struct cftype *cft,
struct file *file,
const char __user *userbuf,
@@ -7977,18 +8034,23 @@
return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
}
+#endif
static struct cftype cpu_files[] = {
+#ifdef CONFIG_FAIR_GROUP_SCHED
{
.name = "shares",
.read_uint = cpu_shares_read_uint,
.write_uint = cpu_shares_write_uint,
},
+#endif
+#ifdef CONFIG_RT_GROUP_SCHED
{
.name = "rt_runtime_us",
.read = cpu_rt_runtime_read,
.write = cpu_rt_runtime_write,
},
+#endif
};
static int cpu_cgroup_populate(struct cgroup_subsys *ss, struct cgroup *cont)
@@ -8007,7 +8069,7 @@
.early_init = 1,
};
-#endif /* CONFIG_FAIR_CGROUP_SCHED */
+#endif /* CONFIG_CGROUP_SCHED */
#ifdef CONFIG_CGROUP_CPUACCT
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 35825b2..f54792b 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -55,7 +55,7 @@
return !list_empty(&rt_se->run_list);
}
-#ifdef CONFIG_FAIR_GROUP_SCHED
+#ifdef CONFIG_RT_GROUP_SCHED
static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
{
@@ -177,7 +177,7 @@
static inline int rt_se_prio(struct sched_rt_entity *rt_se)
{
-#ifdef CONFIG_FAIR_GROUP_SCHED
+#ifdef CONFIG_RT_GROUP_SCHED
struct rt_rq *rt_rq = group_rt_rq(rt_se);
if (rt_rq)
@@ -269,7 +269,7 @@
{
WARN_ON(!rt_prio(rt_se_prio(rt_se)));
rt_rq->rt_nr_running++;
-#if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED
+#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
if (rt_se_prio(rt_se) < rt_rq->highest_prio)
rt_rq->highest_prio = rt_se_prio(rt_se);
#endif
@@ -281,7 +281,7 @@
update_rt_migration(rq_of_rt_rq(rt_rq));
#endif
-#ifdef CONFIG_FAIR_GROUP_SCHED
+#ifdef CONFIG_RT_GROUP_SCHED
if (rt_se_boosted(rt_se))
rt_rq->rt_nr_boosted++;
#endif
@@ -293,7 +293,7 @@
WARN_ON(!rt_prio(rt_se_prio(rt_se)));
WARN_ON(!rt_rq->rt_nr_running);
rt_rq->rt_nr_running--;
-#if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED
+#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
if (rt_rq->rt_nr_running) {
struct rt_prio_array *array;
@@ -315,7 +315,7 @@
update_rt_migration(rq_of_rt_rq(rt_rq));
#endif /* CONFIG_SMP */
-#ifdef CONFIG_FAIR_GROUP_SCHED
+#ifdef CONFIG_RT_GROUP_SCHED
if (rt_se_boosted(rt_se))
rt_rq->rt_nr_boosted--;
diff --git a/kernel/user.c b/kernel/user.c
index 9f6d471b..7132022 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -57,7 +57,7 @@
.uid_keyring = &root_user_keyring,
.session_keyring = &root_session_keyring,
#endif
-#ifdef CONFIG_FAIR_USER_SCHED
+#ifdef CONFIG_USER_SCHED
.tg = &init_task_group,
#endif
};
@@ -90,7 +90,7 @@
return NULL;
}
-#ifdef CONFIG_FAIR_USER_SCHED
+#ifdef CONFIG_USER_SCHED
static void sched_destroy_user(struct user_struct *up)
{
@@ -113,15 +113,15 @@
sched_move_task(p);
}
-#else /* CONFIG_FAIR_USER_SCHED */
+#else /* CONFIG_USER_SCHED */
static void sched_destroy_user(struct user_struct *up) { }
static int sched_create_user(struct user_struct *up) { return 0; }
static void sched_switch_user(struct task_struct *p) { }
-#endif /* CONFIG_FAIR_USER_SCHED */
+#endif /* CONFIG_USER_SCHED */
-#if defined(CONFIG_FAIR_USER_SCHED) && defined(CONFIG_SYSFS)
+#if defined(CONFIG_USER_SCHED) && defined(CONFIG_SYSFS)
static struct kset *uids_kset; /* represents the /sys/kernel/uids/ directory */
static DEFINE_MUTEX(uids_mutex);
@@ -137,6 +137,7 @@
}
/* uid directory attributes */
+#ifdef CONFIG_FAIR_GROUP_SCHED
static ssize_t cpu_shares_show(struct kobject *kobj,
struct kobj_attribute *attr,
char *buf)
@@ -163,7 +164,9 @@
static struct kobj_attribute cpu_share_attr =
__ATTR(cpu_share, 0644, cpu_shares_show, cpu_shares_store);
+#endif
+#ifdef CONFIG_RT_GROUP_SCHED
static ssize_t cpu_rt_runtime_show(struct kobject *kobj,
struct kobj_attribute *attr,
char *buf)
@@ -190,11 +193,16 @@
static struct kobj_attribute cpu_rt_runtime_attr =
__ATTR(cpu_rt_runtime, 0644, cpu_rt_runtime_show, cpu_rt_runtime_store);
+#endif
/* default attributes per uid directory */
static struct attribute *uids_attributes[] = {
+#ifdef CONFIG_FAIR_GROUP_SCHED
&cpu_share_attr.attr,
+#endif
+#ifdef CONFIG_RT_GROUP_SCHED
&cpu_rt_runtime_attr.attr,
+#endif
NULL
};
@@ -297,7 +305,7 @@
schedule_work(&up->work);
}
-#else /* CONFIG_FAIR_USER_SCHED && CONFIG_SYSFS */
+#else /* CONFIG_USER_SCHED && CONFIG_SYSFS */
int uids_sysfs_init(void) { return 0; }
static inline int uids_user_create(struct user_struct *up) { return 0; }
@@ -401,7 +409,7 @@
spin_lock_irq(&uidhash_lock);
up = uid_hash_find(uid, hashent);
if (up) {
- /* This case is not possible when CONFIG_FAIR_USER_SCHED
+ /* This case is not possible when CONFIG_USER_SCHED
* is defined, since we serialize alloc_uid() using
* uids_mutex. Hence no need to call
* sched_destroy_user() or remove_user_sysfs_dir().