Merge git://git.kernel.org/pub/scm/linux/kernel/git/mingo/linux-2.6-sched
* git://git.kernel.org/pub/scm/linux/kernel/git/mingo/linux-2.6-sched:
sched: fix new task startup crash
sched: fix !SYSFS build breakage
sched: fix improper load balance across sched domain
sched: more robust sd-sysctl entry freeing
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3de5aa2..c204ab0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -535,10 +535,12 @@
#ifdef CONFIG_FAIR_USER_SCHED
struct task_group *tg;
+#ifdef CONFIG_SYSFS
struct kset kset;
struct subsys_attribute user_attr;
struct work_struct work;
#endif
+#endif
};
#ifdef CONFIG_FAIR_USER_SCHED
diff --git a/kernel/sched.c b/kernel/sched.c
index c4889ab..92721d1 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1712,7 +1712,7 @@
p->prio = effective_prio(p);
- if (!p->sched_class->task_new || !current->se.on_rq || !rq->cfs.curr) {
+ if (!p->sched_class->task_new || !current->se.on_rq) {
activate_task(rq, p, 0);
} else {
/*
@@ -2336,7 +2336,7 @@
unsigned long max_pull;
unsigned long busiest_load_per_task, busiest_nr_running;
unsigned long this_load_per_task, this_nr_running;
- int load_idx;
+ int load_idx, group_imb = 0;
#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
int power_savings_balance = 1;
unsigned long leader_nr_running = 0, min_load_per_task = 0;
@@ -2355,9 +2355,10 @@
load_idx = sd->idle_idx;
do {
- unsigned long load, group_capacity;
+ unsigned long load, group_capacity, max_cpu_load, min_cpu_load;
int local_group;
int i;
+ int __group_imb = 0;
unsigned int balance_cpu = -1, first_idle_cpu = 0;
unsigned long sum_nr_running, sum_weighted_load;
@@ -2368,6 +2369,8 @@
/* Tally up the load of all CPUs in the group */
sum_weighted_load = sum_nr_running = avg_load = 0;
+ max_cpu_load = 0;
+ min_cpu_load = ~0UL;
for_each_cpu_mask(i, group->cpumask) {
struct rq *rq;
@@ -2388,8 +2391,13 @@
}
load = target_load(i, load_idx);
- } else
+ } else {
load = source_load(i, load_idx);
+ if (load > max_cpu_load)
+ max_cpu_load = load;
+ if (min_cpu_load > load)
+ min_cpu_load = load;
+ }
avg_load += load;
sum_nr_running += rq->nr_running;
@@ -2415,6 +2423,9 @@
avg_load = sg_div_cpu_power(group,
avg_load * SCHED_LOAD_SCALE);
+ if ((max_cpu_load - min_cpu_load) > SCHED_LOAD_SCALE)
+ __group_imb = 1;
+
group_capacity = group->__cpu_power / SCHED_LOAD_SCALE;
if (local_group) {
@@ -2423,11 +2434,12 @@
this_nr_running = sum_nr_running;
this_load_per_task = sum_weighted_load;
} else if (avg_load > max_load &&
- sum_nr_running > group_capacity) {
+ (sum_nr_running > group_capacity || __group_imb)) {
max_load = avg_load;
busiest = group;
busiest_nr_running = sum_nr_running;
busiest_load_per_task = sum_weighted_load;
+ group_imb = __group_imb;
}
#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
@@ -2499,6 +2511,9 @@
goto out_balanced;
busiest_load_per_task /= busiest_nr_running;
+ if (group_imb)
+ busiest_load_per_task = min(busiest_load_per_task, avg_load);
+
/*
* We're trying to get all the cpus to the average_load, so we don't
* want to push ourselves above the average load, nor do we wish to
@@ -5282,11 +5297,20 @@
static void sd_free_ctl_entry(struct ctl_table **tablep)
{
- struct ctl_table *entry = *tablep;
+ struct ctl_table *entry;
- for (entry = *tablep; entry->procname; entry++)
+ /*
+ * In the intermediate directories, both the child directory and
+ * procname are dynamically allocated and could fail but the mode
+ * will always be set. In the lowest directory the names are
+ * static strings and all have proc handlers.
+ */
+ for (entry = *tablep; entry->mode; entry++) {
if (entry->child)
sd_free_ctl_entry(&entry->child);
+ if (entry->proc_handler == NULL)
+ kfree(entry->procname);
+ }
kfree(*tablep);
*tablep = NULL;
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index a17b785..166ed6d 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1031,12 +1031,8 @@
swap(curr->vruntime, se->vruntime);
}
- update_stats_enqueue(cfs_rq, se);
- check_spread(cfs_rq, se);
- check_spread(cfs_rq, curr);
- __enqueue_entity(cfs_rq, se);
- account_entity_enqueue(cfs_rq, se);
se->peer_preempt = 0;
+ enqueue_task_fair(rq, p, 0);
resched_task(rq->curr);
}
diff --git a/kernel/user.c b/kernel/user.c
index 9cb6f64..e91331c 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -84,9 +84,6 @@
#ifdef CONFIG_FAIR_USER_SCHED
-static struct kobject uids_kobject; /* represents /sys/kernel/uids directory */
-static DEFINE_MUTEX(uids_mutex);
-
static void sched_destroy_user(struct user_struct *up)
{
sched_destroy_group(up->tg);
@@ -108,6 +105,19 @@
sched_move_task(p);
}
+#else /* CONFIG_FAIR_USER_SCHED */
+
+static void sched_destroy_user(struct user_struct *up) { }
+static int sched_create_user(struct user_struct *up) { return 0; }
+static void sched_switch_user(struct task_struct *p) { }
+
+#endif /* CONFIG_FAIR_USER_SCHED */
+
+#if defined(CONFIG_FAIR_USER_SCHED) && defined(CONFIG_SYSFS)
+
+static struct kobject uids_kobject; /* represents /sys/kernel/uids directory */
+static DEFINE_MUTEX(uids_mutex);
+
static inline void uids_mutex_lock(void)
{
mutex_lock(&uids_mutex);
@@ -254,11 +264,8 @@
schedule_work(&up->work);
}
-#else /* CONFIG_FAIR_USER_SCHED */
+#else /* CONFIG_FAIR_USER_SCHED && CONFIG_SYSFS */
-static void sched_destroy_user(struct user_struct *up) { }
-static int sched_create_user(struct user_struct *up) { return 0; }
-static void sched_switch_user(struct task_struct *p) { }
static inline int user_kobject_create(struct user_struct *up) { return 0; }
static inline void uids_mutex_lock(void) { }
static inline void uids_mutex_unlock(void) { }
@@ -277,7 +284,7 @@
kmem_cache_free(uid_cachep, up);
}
-#endif /* CONFIG_FAIR_USER_SCHED */
+#endif
/*
* Locate the user_struct for the passed UID. If found, take a ref on it. The