[PATCH] rcu: simplify/improve batch tuning

Kill a hard-to-calculate 'rsinterval' boot parameter and per-cpu
rcu_data.last_rs_qlen.  Instead, it adds adds a flag rcu_ctrlblk.signaled,
which records the fact that one of CPUs has sent a resched IPI since the
last rcu_start_batch().

Roughly speaking, we need two rcu_start_batch()s in order to move callbacks
from ->nxtlist to ->donelist.  This means that when ->qlen exceeds qhimark
and continues to grow, we should send a resched IPI, and then do it again
after we gone through a quiescent state.

On the other hand, if it was already sent, we don't need to do it again
when another CPU detects overflow of the queue.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Acked-by: Paul E. McKenney <paulmck@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 12b3b24..e1543a3 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1357,10 +1357,6 @@
 	rcu.qlowmark=	[KNL,BOOT] Set threshold of queued
 			RCU callbacks below which batch limiting is re-enabled.
 
-	rcu.rsinterval=	[KNL,BOOT,SMP] Set the number of additional
-			RCU callbacks to queued before forcing reschedule
-			on all cpus.
-
 	rdinit=		[KNL]
 			Format: <full_path>
 			Run specified binary instead of /init from the ramdisk,
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index b4ca73d..f6dd71b 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -66,6 +66,8 @@
 	long	completed;	/* Number of the last completed batch         */
 	int	next_pending;	/* Is the next batch already waiting?         */
 
+	int	signaled;
+
 	spinlock_t	lock	____cacheline_internodealigned_in_smp;
 	cpumask_t	cpumask; /* CPUs that need to switch in order    */
 	                         /* for current batch to proceed.        */
@@ -106,9 +108,6 @@
 	long		blimit;		 /* Upper limit on a processed batch */
 	int cpu;
 	struct rcu_head barrier;
-#ifdef CONFIG_SMP
-	long		last_rs_qlen;	 /* qlen during the last resched */
-#endif
 };
 
 DECLARE_PER_CPU(struct rcu_data, rcu_data);
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 523e464..26bb5ff 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -71,9 +71,6 @@
 static int blimit = 10;
 static int qhimark = 10000;
 static int qlowmark = 100;
-#ifdef CONFIG_SMP
-static int rsinterval = 1000;
-#endif
 
 static atomic_t rcu_barrier_cpu_count;
 static DEFINE_MUTEX(rcu_barrier_mutex);
@@ -86,8 +83,8 @@
 	int cpu;
 	cpumask_t cpumask;
 	set_need_resched();
-	if (unlikely(rdp->qlen - rdp->last_rs_qlen > rsinterval)) {
-		rdp->last_rs_qlen = rdp->qlen;
+	if (unlikely(!rcp->signaled)) {
+		rcp->signaled = 1;
 		/*
 		 * Don't send IPI to itself. With irqs disabled,
 		 * rdp->cpu is the current cpu.
@@ -301,6 +298,7 @@
 		smp_mb();
 		cpus_andnot(rcp->cpumask, cpu_online_map, nohz_cpu_mask);
 
+		rcp->signaled = 0;
 	}
 }
 
@@ -628,9 +626,6 @@
 module_param(blimit, int, 0);
 module_param(qhimark, int, 0);
 module_param(qlowmark, int, 0);
-#ifdef CONFIG_SMP
-module_param(rsinterval, int, 0);
-#endif
 EXPORT_SYMBOL_GPL(rcu_batches_completed);
 EXPORT_SYMBOL_GPL(rcu_batches_completed_bh);
 EXPORT_SYMBOL_GPL(call_rcu);