sched: add RT-balance cpu-weight Some RT tasks (particularly kthreads) are bound to one specific CPU. It is fairly common for two or more bound tasks to get queued up at the same time. Consider, for instance, softirq_timer and softirq_sched. A timer goes off in an ISR which schedules softirq_thread to run at RT50. Then the timer handler determines that it's time to smp-rebalance the system so it schedules softirq_sched to run. So we are in a situation where we have two RT50 tasks queued, and the system will go into rt-overload condition to request other CPUs for help. This causes two problems in the current code: 1) If a high-priority bound task and a low-priority unbounded task queue up behind the running task, we will fail to ever relocate the unbounded task because we terminate the search on the first unmovable task. 2) We spend precious futile cycles in the fast-path trying to pull overloaded tasks over. It is therefore optimial to strive to avoid the overhead all together if we can cheaply detect the condition before overload even occurs. This patch tries to achieve this optimization by utilizing the hamming weight of the task->cpus_allowed mask. A weight of 1 indicates that the task cannot be migrated. We will then utilize this information to skip non-migratable tasks and to eliminate uncessary rebalance attempts. We introduce a per-rq variable to count the number of migratable tasks that are currently running. We only go into overload if we have more than one rt task, AND at least one of them is migratable. In addition, we introduce a per-task variable to cache the cpus_allowed weight, since the hamming calculation is probably relatively expensive. We only update the cached value when the mask is updated which should be relatively infrequent, especially compared to scheduling frequency in the fast path. Signed-off-by: Gregory Haskins <ghaskins@novell.com> Signed-off-by: Steven Rostedt <srostedt@redhat.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit: 73fe6aae84400e2b475e2a1dc4e8592cd3ed6e69 [log] [tgz]
author: Gregory Haskins <ghaskins@novell.com> Fri Jan 25 21:08:07 2008 +0100
committer: Ingo Molnar <mingo@elte.hu> Fri Jan 25 21:08:07 2008 +0100
tree: 97c7d6a866d75563082c422491fc423b47aca9d7
parent: c7a1e46aa9782a947cf2ed506245d43396dbf991 [diff] [blame]
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index c492fd2..ae4995c 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c

@@ -33,6 +33,14 @@
 	atomic_dec(&rto_count);
 	cpu_clear(rq->cpu, rt_overload_mask);
 }
+
+static void update_rt_migration(struct rq *rq)
+{
+	if (rq->rt.rt_nr_migratory && (rq->rt.rt_nr_running > 1))
+		rt_set_overload(rq);
+	else
+		rt_clear_overload(rq);
+}
 #endif /* CONFIG_SMP */
 
 /*
@@ -65,8 +73,10 @@
 #ifdef CONFIG_SMP
 	if (p->prio < rq->rt.highest_prio)
 		rq->rt.highest_prio = p->prio;
-	if (rq->rt.rt_nr_running > 1)
-		rt_set_overload(rq);
+	if (p->nr_cpus_allowed > 1)
+		rq->rt.rt_nr_migratory++;
+
+	update_rt_migration(rq);
 #endif /* CONFIG_SMP */
 }
 
@@ -88,8 +98,10 @@
 		} /* otherwise leave rq->highest prio alone */
 	} else
 		rq->rt.highest_prio = MAX_RT_PRIO;
-	if (rq->rt.rt_nr_running < 2)
-		rt_clear_overload(rq);
+	if (p->nr_cpus_allowed > 1)
+		rq->rt.rt_nr_migratory--;
+
+	update_rt_migration(rq);
 #endif /* CONFIG_SMP */
 }
 
@@ -182,7 +194,8 @@
 static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
 {
 	if (!task_running(rq, p) &&
-	    (cpu < 0 || cpu_isset(cpu, p->cpus_allowed)))
+	    (cpu < 0 || cpu_isset(cpu, p->cpus_allowed)) &&
+	    (p->nr_cpus_allowed > 1))
 		return 1;
 	return 0;
 }
@@ -584,6 +597,32 @@
 	/* don't touch RT tasks */
 	return 0;
 }
+static void set_cpus_allowed_rt(struct task_struct *p, cpumask_t *new_mask)
+{
+	int weight = cpus_weight(*new_mask);
+
+	BUG_ON(!rt_task(p));
+
+	/*
+	 * Update the migration status of the RQ if we have an RT task
+	 * which is running AND changing its weight value.
+	 */
+	if (p->se.on_rq && (weight != p->nr_cpus_allowed)) {
+		struct rq *rq = task_rq(p);
+
+		if ((p->nr_cpus_allowed <= 1) && (weight > 1))
+			rq->rt.rt_nr_migratory++;
+		else if((p->nr_cpus_allowed > 1) && (weight <= 1)) {
+			BUG_ON(!rq->rt.rt_nr_migratory);
+			rq->rt.rt_nr_migratory--;
+		}
+
+		update_rt_migration(rq);
+	}
+
+	p->cpus_allowed    = *new_mask;
+	p->nr_cpus_allowed = weight;
+}
 #else /* CONFIG_SMP */
 # define schedule_tail_balance_rt(rq)	do { } while (0)
 # define schedule_balance_rt(rq, prev)	do { } while (0)
@@ -637,6 +676,7 @@
 #ifdef CONFIG_SMP
 	.load_balance		= load_balance_rt,
 	.move_one_task		= move_one_task_rt,
+	.set_cpus_allowed       = set_cpus_allowed_rt,
 #endif
 
 	.set_curr_task          = set_curr_task_rt,
commit	73fe6aae84400e2b475e2a1dc4e8592cd3ed6e69	[log] [tgz]
author	Gregory Haskins <ghaskins@novell.com>	Fri Jan 25 21:08:07 2008 +0100
committer	Ingo Molnar <mingo@elte.hu>	Fri Jan 25 21:08:07 2008 +0100
tree	97c7d6a866d75563082c422491fc423b47aca9d7
parent	c7a1e46aa9782a947cf2ed506245d43396dbf991 [diff] [blame]