percpu-rw-semaphores: use rcu_read_lock_sched

Use rcu_read_lock_sched / rcu_read_unlock_sched / synchronize_sched
instead of rcu_read_lock / rcu_read_unlock / synchronize_rcu.

This is an optimization. The RCU-protected region is very small, so
there will be no latency problems if we disable preempt in this region.

So we use rcu_read_lock_sched / rcu_read_unlock_sched that translates
to preempt_disable / preempt_disable. It is smaller (and supposedly
faster) than preemptible rcu_read_lock / rcu_read_unlock.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h
index 18f35b5..250a4ac 100644
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -17,16 +17,16 @@
 
 static inline void percpu_down_read(struct percpu_rw_semaphore *p)
 {
-	rcu_read_lock();
+	rcu_read_lock_sched();
 	if (unlikely(p->locked)) {
-		rcu_read_unlock();
+		rcu_read_unlock_sched();
 		mutex_lock(&p->mtx);
 		this_cpu_inc(*p->counters);
 		mutex_unlock(&p->mtx);
 		return;
 	}
 	this_cpu_inc(*p->counters);
-	rcu_read_unlock();
+	rcu_read_unlock_sched();
 	light_mb(); /* A, between read of p->locked and read of data, paired with D */
 }
 
@@ -51,7 +51,7 @@
 {
 	mutex_lock(&p->mtx);
 	p->locked = true;
-	synchronize_rcu();
+	synchronize_sched(); /* make sure that all readers exit the rcu_read_lock_sched region */
 	while (__percpu_count(p->counters))
 		msleep(1);
 	heavy_mb(); /* C, between read of p->counter and write to data, paired with B */