rcu: Move _rcu_barrier()'s rcu_head structures to rcu_data structures

In order for multiple flavors of RCU to each concurrently run one
rcu_barrier(), each flavor needs its own per-CPU set of rcu_head
structures.  This commit therefore moves _rcu_barrier()'s set of
per-CPU rcu_head structures from per-CPU variables to the existing
per-CPU and per-RCU-flavor rcu_data structures.

Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 00c518f..1e55259 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -157,7 +157,6 @@
 
 /* State information for rcu_barrier() and friends. */
 
-static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL};
 static atomic_t rcu_barrier_cpu_count;
 static DEFINE_MUTEX(rcu_barrier_mutex);
 static struct completion rcu_barrier_completion;
@@ -2282,12 +2281,11 @@
  */
 static void rcu_barrier_func(void *type)
 {
-	int cpu = smp_processor_id();
-	struct rcu_head *head = &per_cpu(rcu_barrier_head, cpu);
 	struct rcu_state *rsp = type;
+	struct rcu_data *rdp = __this_cpu_ptr(rsp->rda);
 
 	atomic_inc(&rcu_barrier_cpu_count);
-	rsp->call(head, rcu_barrier_callback);
+	rsp->call(&rdp->barrier_head, rcu_barrier_callback);
 }
 
 /*
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 049896a..586d93c 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -314,6 +314,9 @@
 	unsigned long n_rp_need_fqs;
 	unsigned long n_rp_need_nothing;
 
+	/* 6) _rcu_barrier() callback. */
+	struct rcu_head barrier_head;
+
 	int cpu;
 	struct rcu_state *rsp;
 };