Merge branch 'core-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'core-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
locking: Make sparse work with inline spinlocks and rwlocks
x86/mce: Fix RCU lockdep splats
rcu: Increase RCU CPU stall timeouts if PROVE_RCU
ftrace: Replace read_barrier_depends() with rcu_dereference_raw()
rcu: Suppress RCU lockdep warnings during early boot
rcu, ftrace: Fix RCU lockdep splat in ftrace_perf_buf_prepare()
rcu: Suppress __mpol_dup() false positive from RCU lockdep
rcu: Make rcu_read_lock_sched_held() handle !PREEMPT
rcu: Add control variables to lockdep_rcu_dereference() diagnostics
rcu, cgroup: Relax the check in task_subsys_state() as early boot is now handled by lockdep-RCU
rcu: Use wrapper function instead of exporting tasklist_lock
sched, rcu: Fix rcu_dereference() for RCU-lockdep
rcu: Make task_subsys_state() RCU-lockdep checks handle boot-time use
rcu: Fix holdoff for accelerated GPs for last non-dynticked CPU
x86/gart: Unexport gart_iommu_aperture
Fix trivial conflicts in kernel/trace/ftrace.c
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index f147a95..3704997 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -31,7 +31,6 @@
#include <asm/x86_init.h>
int gart_iommu_aperture;
-EXPORT_SYMBOL_GPL(gart_iommu_aperture);
int gart_iommu_aperture_disabled __initdata;
int gart_iommu_aperture_allowed __initdata;
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 28cba46..bd58de4 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -46,6 +46,11 @@
#include "mce-internal.h"
+#define rcu_dereference_check_mce(p) \
+ rcu_dereference_check((p), \
+ rcu_read_lock_sched_held() || \
+ lockdep_is_held(&mce_read_mutex))
+
#define CREATE_TRACE_POINTS
#include <trace/events/mce.h>
@@ -158,7 +163,7 @@
mce->finished = 0;
wmb();
for (;;) {
- entry = rcu_dereference(mcelog.next);
+ entry = rcu_dereference_check_mce(mcelog.next);
for (;;) {
/*
* When the buffer fills up discard new entries.
@@ -1500,7 +1505,7 @@
return -ENOMEM;
mutex_lock(&mce_read_mutex);
- next = rcu_dereference(mcelog.next);
+ next = rcu_dereference_check_mce(mcelog.next);
/* Only supports full reads right now */
if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) {
@@ -1565,7 +1570,7 @@
static unsigned int mce_poll(struct file *file, poll_table *wait)
{
poll_wait(file, &mce_wait, wait);
- if (rcu_dereference(mcelog.next))
+ if (rcu_dereference_check_mce(mcelog.next))
return POLLIN | POLLRDNORM;
return 0;
}
diff --git a/include/linux/cred.h b/include/linux/cred.h
index 4db09f8..52507c3 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -280,7 +280,7 @@
* task or by holding tasklist_lock to prevent it from being unlinked.
*/
#define __task_cred(task) \
- ((const struct cred *)(rcu_dereference_check((task)->real_cred, rcu_read_lock_held() || lockdep_is_held(&tasklist_lock))))
+ ((const struct cred *)(rcu_dereference_check((task)->real_cred, rcu_read_lock_held() || lockdep_tasklist_lock_is_held())))
/**
* get_task_cred - Get another task's objective credentials
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index a005cac..3024050 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -101,6 +101,11 @@
# define rcu_read_release_sched() \
lock_release(&rcu_sched_lock_map, 1, _THIS_IP_)
+static inline int debug_lockdep_rcu_enabled(void)
+{
+ return likely(rcu_scheduler_active && debug_locks);
+}
+
/**
* rcu_read_lock_held - might we be in RCU read-side critical section?
*
@@ -108,12 +113,14 @@
* an RCU read-side critical section. In absence of CONFIG_PROVE_LOCKING,
* this assumes we are in an RCU read-side critical section unless it can
* prove otherwise.
+ *
+ * Check rcu_scheduler_active to prevent false positives during boot.
*/
static inline int rcu_read_lock_held(void)
{
- if (debug_locks)
- return lock_is_held(&rcu_lock_map);
- return 1;
+ if (!debug_lockdep_rcu_enabled())
+ return 1;
+ return lock_is_held(&rcu_lock_map);
}
/**
@@ -123,12 +130,14 @@
* an RCU-bh read-side critical section. In absence of CONFIG_PROVE_LOCKING,
* this assumes we are in an RCU-bh read-side critical section unless it can
* prove otherwise.
+ *
+ * Check rcu_scheduler_active to prevent false positives during boot.
*/
static inline int rcu_read_lock_bh_held(void)
{
- if (debug_locks)
- return lock_is_held(&rcu_bh_lock_map);
- return 1;
+ if (!debug_lockdep_rcu_enabled())
+ return 1;
+ return lock_is_held(&rcu_bh_lock_map);
}
/**
@@ -139,15 +148,26 @@
* this assumes we are in an RCU-sched read-side critical section unless it
* can prove otherwise. Note that disabling of preemption (including
* disabling irqs) counts as an RCU-sched read-side critical section.
+ *
+ * Check rcu_scheduler_active to prevent false positives during boot.
*/
+#ifdef CONFIG_PREEMPT
static inline int rcu_read_lock_sched_held(void)
{
int lockdep_opinion = 0;
+ if (!debug_lockdep_rcu_enabled())
+ return 1;
if (debug_locks)
lockdep_opinion = lock_is_held(&rcu_sched_lock_map);
- return lockdep_opinion || preempt_count() != 0 || !rcu_scheduler_active;
+ return lockdep_opinion || preempt_count() != 0;
}
+#else /* #ifdef CONFIG_PREEMPT */
+static inline int rcu_read_lock_sched_held(void)
+{
+ return 1;
+}
+#endif /* #else #ifdef CONFIG_PREEMPT */
#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
@@ -168,10 +188,17 @@
return 1;
}
+#ifdef CONFIG_PREEMPT
static inline int rcu_read_lock_sched_held(void)
{
- return preempt_count() != 0 || !rcu_scheduler_active;
+ return !rcu_scheduler_active || preempt_count() != 0;
}
+#else /* #ifdef CONFIG_PREEMPT */
+static inline int rcu_read_lock_sched_held(void)
+{
+ return 1;
+}
+#endif /* #else #ifdef CONFIG_PREEMPT */
#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
@@ -188,7 +215,7 @@
*/
#define rcu_dereference_check(p, c) \
({ \
- if (debug_locks && !(c)) \
+ if (debug_lockdep_rcu_enabled() && !(c)) \
lockdep_rcu_dereference(__FILE__, __LINE__); \
rcu_dereference_raw(p); \
})
diff --git a/include/linux/rwlock.h b/include/linux/rwlock.h
index 71e0b00..bc2994e 100644
--- a/include/linux/rwlock.h
+++ b/include/linux/rwlock.h
@@ -29,25 +29,25 @@
#endif
#ifdef CONFIG_DEBUG_SPINLOCK
- extern void do_raw_read_lock(rwlock_t *lock);
+ extern void do_raw_read_lock(rwlock_t *lock) __acquires(lock);
#define do_raw_read_lock_flags(lock, flags) do_raw_read_lock(lock)
extern int do_raw_read_trylock(rwlock_t *lock);
- extern void do_raw_read_unlock(rwlock_t *lock);
- extern void do_raw_write_lock(rwlock_t *lock);
+ extern void do_raw_read_unlock(rwlock_t *lock) __releases(lock);
+ extern void do_raw_write_lock(rwlock_t *lock) __acquires(lock);
#define do_raw_write_lock_flags(lock, flags) do_raw_write_lock(lock)
extern int do_raw_write_trylock(rwlock_t *lock);
- extern void do_raw_write_unlock(rwlock_t *lock);
+ extern void do_raw_write_unlock(rwlock_t *lock) __releases(lock);
#else
-# define do_raw_read_lock(rwlock) arch_read_lock(&(rwlock)->raw_lock)
+# define do_raw_read_lock(rwlock) do {__acquire(lock); arch_read_lock(&(rwlock)->raw_lock); } while (0)
# define do_raw_read_lock_flags(lock, flags) \
- arch_read_lock_flags(&(lock)->raw_lock, *(flags))
+ do {__acquire(lock); arch_read_lock_flags(&(lock)->raw_lock, *(flags)); } while (0)
# define do_raw_read_trylock(rwlock) arch_read_trylock(&(rwlock)->raw_lock)
-# define do_raw_read_unlock(rwlock) arch_read_unlock(&(rwlock)->raw_lock)
-# define do_raw_write_lock(rwlock) arch_write_lock(&(rwlock)->raw_lock)
+# define do_raw_read_unlock(rwlock) do {arch_read_unlock(&(rwlock)->raw_lock); __release(lock); } while (0)
+# define do_raw_write_lock(rwlock) do {__acquire(lock); arch_write_lock(&(rwlock)->raw_lock); } while (0)
# define do_raw_write_lock_flags(lock, flags) \
- arch_write_lock_flags(&(lock)->raw_lock, *(flags))
+ do {__acquire(lock); arch_write_lock_flags(&(lock)->raw_lock, *(flags)); } while (0)
# define do_raw_write_trylock(rwlock) arch_write_trylock(&(rwlock)->raw_lock)
-# define do_raw_write_unlock(rwlock) arch_write_unlock(&(rwlock)->raw_lock)
+# define do_raw_write_unlock(rwlock) do {arch_write_unlock(&(rwlock)->raw_lock); __release(lock); } while (0)
#endif
#define read_can_lock(rwlock) arch_read_can_lock(&(rwlock)->raw_lock)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8d70ff8..dad7f66 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -258,6 +258,10 @@
struct task_struct;
+#ifdef CONFIG_PROVE_RCU
+extern int lockdep_tasklist_lock_is_held(void);
+#endif /* #ifdef CONFIG_PROVE_RCU */
+
extern void sched_init(void);
extern void sched_init_smp(void);
extern asmlinkage void schedule_tail(struct task_struct *prev);
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index 8608821..89fac6a 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -128,19 +128,21 @@
#define raw_spin_unlock_wait(lock) arch_spin_unlock_wait(&(lock)->raw_lock)
#ifdef CONFIG_DEBUG_SPINLOCK
- extern void do_raw_spin_lock(raw_spinlock_t *lock);
+ extern void do_raw_spin_lock(raw_spinlock_t *lock) __acquires(lock);
#define do_raw_spin_lock_flags(lock, flags) do_raw_spin_lock(lock)
extern int do_raw_spin_trylock(raw_spinlock_t *lock);
- extern void do_raw_spin_unlock(raw_spinlock_t *lock);
+ extern void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock);
#else
-static inline void do_raw_spin_lock(raw_spinlock_t *lock)
+static inline void do_raw_spin_lock(raw_spinlock_t *lock) __acquires(lock)
{
+ __acquire(lock);
arch_spin_lock(&lock->raw_lock);
}
static inline void
-do_raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long *flags)
+do_raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long *flags) __acquires(lock)
{
+ __acquire(lock);
arch_spin_lock_flags(&lock->raw_lock, *flags);
}
@@ -149,9 +151,10 @@
return arch_spin_trylock(&(lock)->raw_lock);
}
-static inline void do_raw_spin_unlock(raw_spinlock_t *lock)
+static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock)
{
arch_spin_unlock(&lock->raw_lock);
+ __release(lock);
}
#endif
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 0804cd5..601ad77 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -699,9 +699,9 @@
* __cpu = smp_processor_id();
*
* if (in_nmi())
- * trace_buf = rcu_dereference(perf_trace_buf_nmi);
+ * trace_buf = rcu_dereference_sched(perf_trace_buf_nmi);
* else
- * trace_buf = rcu_dereference(perf_trace_buf);
+ * trace_buf = rcu_dereference_sched(perf_trace_buf);
*
* if (!trace_buf)
* goto end;
diff --git a/kernel/exit.c b/kernel/exit.c
index ce1e48c..cce59cb 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -87,7 +87,7 @@
sighand = rcu_dereference_check(tsk->sighand,
rcu_read_lock_held() ||
- lockdep_is_held(&tasklist_lock));
+ lockdep_tasklist_lock_is_held());
spin_lock(&sighand->siglock);
posix_cpu_timers_exit(tsk);
diff --git a/kernel/fork.c b/kernel/fork.c
index 1beb6c3..4799c5f 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -86,7 +86,14 @@
DEFINE_PER_CPU(unsigned long, process_counts) = 0;
__cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */
-EXPORT_SYMBOL_GPL(tasklist_lock);
+
+#ifdef CONFIG_PROVE_RCU
+int lockdep_tasklist_lock_is_held(void)
+{
+ return lockdep_is_held(&tasklist_lock);
+}
+EXPORT_SYMBOL_GPL(lockdep_tasklist_lock_is_held);
+#endif /* #ifdef CONFIG_PROVE_RCU */
int nr_processes(void)
{
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 0c30d04..681bc2e 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -3822,6 +3822,7 @@
printk("%s:%d invoked rcu_dereference_check() without protection!\n",
file, line);
printk("\nother info that might help us debug this:\n\n");
+ printk("\nrcu_scheduler_active = %d, debug_locks = %d\n", rcu_scheduler_active, debug_locks);
lockdep_print_held_locks(curr);
printk("\nstack backtrace:\n");
dump_stack();
diff --git a/kernel/pid.c b/kernel/pid.c
index 86b2969..aebb30d 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -367,7 +367,9 @@
struct task_struct *result = NULL;
if (pid) {
struct hlist_node *first;
- first = rcu_dereference_check(pid->tasks[type].first, rcu_read_lock_held() || lockdep_is_held(&tasklist_lock));
+ first = rcu_dereference_check(pid->tasks[type].first,
+ rcu_read_lock_held() ||
+ lockdep_tasklist_lock_is_held());
if (first)
result = hlist_entry(first, struct task_struct, pids[(type)].node);
}
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 1439eb5..4a525a3 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -246,12 +246,21 @@
#define RCU_JIFFIES_TILL_FORCE_QS 3 /* for rsp->jiffies_force_qs */
#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
-#define RCU_SECONDS_TILL_STALL_CHECK (10 * HZ) /* for rsp->jiffies_stall */
-#define RCU_SECONDS_TILL_STALL_RECHECK (30 * HZ) /* for rsp->jiffies_stall */
-#define RCU_STALL_RAT_DELAY 2 /* Allow other CPUs time */
- /* to take at least one */
- /* scheduling clock irq */
- /* before ratting on them. */
+
+#ifdef CONFIG_PROVE_RCU
+#define RCU_STALL_DELAY_DELTA (5 * HZ)
+#else
+#define RCU_STALL_DELAY_DELTA 0
+#endif
+
+#define RCU_SECONDS_TILL_STALL_CHECK (10 * HZ + RCU_STALL_DELAY_DELTA)
+ /* for rsp->jiffies_stall */
+#define RCU_SECONDS_TILL_STALL_RECHECK (30 * HZ + RCU_STALL_DELAY_DELTA)
+ /* for rsp->jiffies_stall */
+#define RCU_STALL_RAT_DELAY 2 /* Allow other CPUs time */
+ /* to take at least one */
+ /* scheduling clock irq */
+ /* before ratting on them. */
#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 464ad2c..79b53bd 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -1010,6 +1010,10 @@
int c = 0;
int thatcpu;
+ /* Check for being in the holdoff period. */
+ if (per_cpu(rcu_dyntick_holdoff, cpu) == jiffies)
+ return rcu_needs_cpu_quick_check(cpu);
+
/* Don't bother unless we are the last non-dyntick-idle CPU. */
for_each_cpu_not(thatcpu, nohz_cpu_mask)
if (thatcpu != cpu) {
@@ -1041,10 +1045,8 @@
}
/* If RCU callbacks are still pending, RCU still needs this CPU. */
- if (c) {
+ if (c)
raise_softirq(RCU_SOFTIRQ);
- per_cpu(rcu_dyntick_holdoff, cpu) = jiffies;
- }
return c;
}
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 3e1fd96..5a5ea2c 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -3476,7 +3476,7 @@
static inline int on_null_domain(int cpu)
{
- return !rcu_dereference(cpu_rq(cpu)->sd);
+ return !rcu_dereference_sched(cpu_rq(cpu)->sd);
}
/*
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index bb53edb..d9062f5 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -27,6 +27,7 @@
#include <linux/ctype.h>
#include <linux/list.h>
#include <linux/hash.h>
+#include <linux/rcupdate.h>
#include <trace/events/sched.h>
@@ -84,18 +85,22 @@
ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub;
ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub;
+/*
+ * Traverse the ftrace_list, invoking all entries. The reason that we
+ * can use rcu_dereference_raw() is that elements removed from this list
+ * are simply leaked, so there is no need to interact with a grace-period
+ * mechanism. The rcu_dereference_raw() calls are needed to handle
+ * concurrent insertions into the ftrace_list.
+ *
+ * Silly Alpha and silly pointer-speculation compiler optimizations!
+ */
static void ftrace_list_func(unsigned long ip, unsigned long parent_ip)
{
- struct ftrace_ops *op = ftrace_list;
-
- /* in case someone actually ports this to alpha! */
- read_barrier_depends();
+ struct ftrace_ops *op = rcu_dereference_raw(ftrace_list); /*see above*/
while (op != &ftrace_list_end) {
- /* silly alpha */
- read_barrier_depends();
op->func(ip, parent_ip);
- op = op->next;
+ op = rcu_dereference_raw(op->next); /*see above*/
};
}
@@ -150,8 +155,7 @@
* the ops->next pointer is valid before another CPU sees
* the ops pointer included into the ftrace_list.
*/
- smp_wmb();
- ftrace_list = ops;
+ rcu_assign_pointer(ftrace_list, ops);
if (ftrace_enabled) {
ftrace_func_t func;
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
index f0d6930..c1cc3ab 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_profile.c
@@ -138,9 +138,9 @@
cpu = smp_processor_id();
if (in_nmi())
- trace_buf = rcu_dereference(perf_trace_buf_nmi);
+ trace_buf = rcu_dereference_sched(perf_trace_buf_nmi);
else
- trace_buf = rcu_dereference(perf_trace_buf);
+ trace_buf = rcu_dereference_sched(perf_trace_buf);
if (!trace_buf)
goto err;
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index bda230e..643f66e 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1756,10 +1756,12 @@
if (!new)
return ERR_PTR(-ENOMEM);
+ rcu_read_lock();
if (current_cpuset_is_being_rebound()) {
nodemask_t mems = cpuset_mems_allowed(current);
mpol_rebind_policy(old, &mems);
}
+ rcu_read_unlock();
*new = *old;
atomic_set(&new->refcnt, 1);
return new;