[PATCH] cond_resched() fix

Fix a bug identified by Zou Nan hai <nanhai.zou@intel.com>:

If the system is in state SYSTEM_BOOTING, and need_resched() is true,
cond_resched() returns true even though it didn't reschedule.  Consequently
need_resched() remains true and JBD locks up.

Fix that by teaching cond_resched() to only return true if it really did call
schedule().

cond_resched_lock() and cond_resched_softirq() have a problem too.  If we're
in SYSTEM_BOOTING state and need_resched() is true, these functions will drop
the lock and will then try to call schedule(), but the SYSTEM_BOOTING state
will prevent schedule() from being called.  So on return, need_resched() will
still be true, but cond_resched_lock() has to return 1 to tell the caller that
the lock was dropped.  The caller will probably lock up.

Bottom line: if these functions dropped the lock, they _must_ call schedule()
to clear need_resched().   Make it so.

Also, uninline __cond_resched().  It's largeish, and slowpath.

Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/kernel/sched.c b/kernel/sched.c
index 2629c17..d5e3707 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4386,7 +4386,16 @@
 	return 0;
 }
 
-static inline void __cond_resched(void)
+static inline int __resched_legal(void)
+{
+	if (unlikely(preempt_count()))
+		return 0;
+	if (unlikely(system_state != SYSTEM_RUNNING))
+		return 0;
+	return 1;
+}
+
+static void __cond_resched(void)
 {
 #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
 	__might_sleep(__FILE__, __LINE__);
@@ -4396,10 +4405,6 @@
 	 * PREEMPT_ACTIVE, which could trigger a second
 	 * cond_resched() call.
 	 */
-	if (unlikely(preempt_count()))
-		return;
-	if (unlikely(system_state != SYSTEM_RUNNING))
-		return;
 	do {
 		add_preempt_count(PREEMPT_ACTIVE);
 		schedule();
@@ -4409,13 +4414,12 @@
 
 int __sched cond_resched(void)
 {
-	if (need_resched()) {
+	if (need_resched() && __resched_legal()) {
 		__cond_resched();
 		return 1;
 	}
 	return 0;
 }
-
 EXPORT_SYMBOL(cond_resched);
 
 /*
@@ -4436,7 +4440,7 @@
 		ret = 1;
 		spin_lock(lock);
 	}
-	if (need_resched()) {
+	if (need_resched() && __resched_legal()) {
 		_raw_spin_unlock(lock);
 		preempt_enable_no_resched();
 		__cond_resched();
@@ -4445,14 +4449,13 @@
 	}
 	return ret;
 }
-
 EXPORT_SYMBOL(cond_resched_lock);
 
 int __sched cond_resched_softirq(void)
 {
 	BUG_ON(!in_softirq());
 
-	if (need_resched()) {
+	if (need_resched() && __resched_legal()) {
 		__local_bh_enable();
 		__cond_resched();
 		local_bh_disable();
@@ -4460,10 +4463,8 @@
 	}
 	return 0;
 }
-
 EXPORT_SYMBOL(cond_resched_softirq);
 
-
 /**
  * yield - yield the current processor to other threads.
  *