Blame - kernel/rcupreempt.c - LeafOS-Devices/android_kernel_realme_mt6785

blob: 59236e8b9daa38e1e92a709e769fa75d857bb41e [file] [log] [blame]

Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	1	/*
				2	* Read-Copy Update mechanism for mutual exclusion, realtime implementation
				3	*
				4	* This program is free software; you can redistribute it and/or modify
				5	* it under the terms of the GNU General Public License as published by
				6	* the Free Software Foundation; either version 2 of the License, or
				7	* (at your option) any later version.
				8	*
				9	* This program is distributed in the hope that it will be useful,
				10	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				11	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				12	* GNU General Public License for more details.
				13	*
				14	* You should have received a copy of the GNU General Public License
				15	* along with this program; if not, write to the Free Software
				16	* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
				17	*
				18	* Copyright IBM Corporation, 2006
				19	*
				20	* Authors: Paul E. McKenney <paulmck@us.ibm.com>
				21	* With thanks to Esben Nielsen, Bill Huey, and Ingo Molnar
				22	* for pushing me away from locks and towards counters, and
				23	* to Suparna Bhattacharya for pushing me completely away
				24	* from atomic instructions on the read side.
				25	*
Steven Rostedt	2232c2d	2008-02-29 18:46:50 +0100	[diff] [blame]	26	* - Added handling of Dynamic Ticks
				27	* Copyright 2007 - Paul E. Mckenney <paulmck@us.ibm.com>
				28	* - Steven Rostedt <srostedt@redhat.com>
				29	*
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	30	* Papers: http://www.rdrop.com/users/paulmck/RCU
				31	*
				32	* Design Document: http://lwn.net/Articles/253651/
				33	*
				34	* For detailed explanation of Read-Copy Update mechanism see -
				35	* Documentation/RCU/ *.txt
				36	*
				37	*/
				38	#include <linux/types.h>
				39	#include <linux/kernel.h>
				40	#include <linux/init.h>
				41	#include <linux/spinlock.h>
				42	#include <linux/smp.h>
				43	#include <linux/rcupdate.h>
				44	#include <linux/interrupt.h>
				45	#include <linux/sched.h>
				46	#include <asm/atomic.h>
				47	#include <linux/bitops.h>
				48	#include <linux/module.h>
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	49	#include <linux/kthread.h>
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	50	#include <linux/completion.h>
				51	#include <linux/moduleparam.h>
				52	#include <linux/percpu.h>
				53	#include <linux/notifier.h>
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	54	#include <linux/cpu.h>
				55	#include <linux/random.h>
				56	#include <linux/delay.h>
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	57	#include <linux/cpumask.h>
				58	#include <linux/rcupreempt_trace.h>
Harvey Harrison	1a651a0	2008-10-18 20:28:37 -0700	[diff] [blame]	59	#include <asm/byteorder.h>
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	60
				61	/*
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	62	* PREEMPT_RCU data structures.
				63	*/
				64
				65	/*
				66	* GP_STAGES specifies the number of times the state machine has
				67	* to go through the all the rcu_try_flip_states (see below)
				68	* in a single Grace Period.
				69	*
				70	* GP in GP_STAGES stands for Grace Period ;)
				71	*/
				72	#define GP_STAGES 2
				73	struct rcu_data {
				74	spinlock_t lock; /* Protect rcu_data fields. */
				75	long completed; /* Number of last completed batch. */
				76	int waitlistcount;
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	77	struct rcu_head *nextlist;
				78	struct rcu_head **nexttail;
				79	struct rcu_head *waitlist[GP_STAGES];
				80	struct rcu_head **waittail[GP_STAGES];
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	81	struct rcu_head donelist; / from waitlist & waitschedlist */
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	82	struct rcu_head **donetail;
				83	long rcu_flipctr[2];
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	84	struct rcu_head *nextschedlist;
				85	struct rcu_head **nextschedtail;
				86	struct rcu_head *waitschedlist;
				87	struct rcu_head **waitschedtail;
				88	int rcu_sched_sleeping;
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	89	#ifdef CONFIG_RCU_TRACE
				90	struct rcupreempt_trace trace;
				91	#endif /* #ifdef CONFIG_RCU_TRACE */
				92	};
				93
				94	/*
				95	* States for rcu_try_flip() and friends.
				96	*/
				97
				98	enum rcu_try_flip_states {
				99
				100	/*
				101	* Stay here if nothing is happening. Flip the counter if somthing
				102	* starts happening. Denoted by "I"
				103	*/
				104	rcu_try_flip_idle_state,
				105
				106	/*
				107	* Wait here for all CPUs to notice that the counter has flipped. This
				108	* prevents the old set of counters from ever being incremented once
				109	* we leave this state, which in turn is necessary because we cannot
				110	* test any individual counter for zero -- we can only check the sum.
				111	* Denoted by "A".
				112	*/
				113	rcu_try_flip_waitack_state,
				114
				115	/*
				116	* Wait here for the sum of the old per-CPU counters to reach zero.
				117	* Denoted by "Z".
				118	*/
				119	rcu_try_flip_waitzero_state,
				120
				121	/*
				122	* Wait here for each of the other CPUs to execute a memory barrier.
				123	* This is necessary to ensure that these other CPUs really have
				124	* completed executing their RCU read-side critical sections, despite
				125	* their CPUs wildly reordering memory. Denoted by "M".
				126	*/
				127	rcu_try_flip_waitmb_state,
				128	};
				129
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	130	/*
				131	* States for rcu_ctrlblk.rcu_sched_sleep.
				132	*/
				133
				134	enum rcu_sched_sleep_states {
				135	rcu_sched_not_sleeping, /* Not sleeping, callbacks need GP. */
				136	rcu_sched_sleep_prep, /* Thinking of sleeping, rechecking. */
				137	rcu_sched_sleeping, /* Sleeping, awaken if GP needed. */
				138	};
				139
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	140	struct rcu_ctrlblk {
				141	spinlock_t fliplock; /* Protect state-machine transitions. */
				142	long completed; /* Number of last completed batch. */
				143	enum rcu_try_flip_states rcu_try_flip_state; /* The current state of
				144	the rcu state machine */
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	145	spinlock_t schedlock; /* Protect rcu_sched sleep state. */
				146	enum rcu_sched_sleep_states sched_sleep; /* rcu_sched state. */
				147	wait_queue_head_t sched_wq; /* Place for rcu_sched to sleep. */
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	148	};
				149
				150	static DEFINE_PER_CPU(struct rcu_data, rcu_data);
				151	static struct rcu_ctrlblk rcu_ctrlblk = {
				152	.fliplock = __SPIN_LOCK_UNLOCKED(rcu_ctrlblk.fliplock),
				153	.completed = 0,
				154	.rcu_try_flip_state = rcu_try_flip_idle_state,
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	155	.schedlock = __SPIN_LOCK_UNLOCKED(rcu_ctrlblk.schedlock),
				156	.sched_sleep = rcu_sched_not_sleeping,
				157	.sched_wq = __WAIT_QUEUE_HEAD_INITIALIZER(rcu_ctrlblk.sched_wq),
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	158	};
				159
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	160	static struct task_struct *rcu_sched_grace_period_task;
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	161
				162	#ifdef CONFIG_RCU_TRACE
				163	static char *rcu_try_flip_state_names[] =
				164	{ "idle", "waitack", "waitzero", "waitmb" };
				165	#endif /* #ifdef CONFIG_RCU_TRACE */
				166
Paul E. McKenney	eaf649e	2008-01-25 21:08:25 +0100	[diff] [blame]	167	static cpumask_t rcu_cpu_online_map __read_mostly = CPU_MASK_NONE;
				168
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	169	/*
				170	* Enum and per-CPU flag to determine when each CPU has seen
				171	* the most recent counter flip.
				172	*/
				173
				174	enum rcu_flip_flag_values {
				175	rcu_flip_seen, /* Steady/initial state, last flip seen. */
				176	/* Only GP detector can update. */
				177	rcu_flipped /* Flip just completed, need confirmation. */
				178	/* Only corresponding CPU can update. */
				179	};
				180	static DEFINE_PER_CPU_SHARED_ALIGNED(enum rcu_flip_flag_values, rcu_flip_flag)
				181	= rcu_flip_seen;
				182
				183	/*
				184	* Enum and per-CPU flag to determine when each CPU has executed the
				185	* needed memory barrier to fence in memory references from its last RCU
				186	* read-side critical section in the just-completed grace period.
				187	*/
				188
				189	enum rcu_mb_flag_values {
				190	rcu_mb_done, /* Steady/initial state, no mb()s required. */
				191	/* Only GP detector can update. */
				192	rcu_mb_needed /* Flip just completed, need an mb(). */
				193	/* Only corresponding CPU can update. */
				194	};
				195	static DEFINE_PER_CPU_SHARED_ALIGNED(enum rcu_mb_flag_values, rcu_mb_flag)
				196	= rcu_mb_done;
				197
				198	/*
				199	* RCU_DATA_ME: find the current CPU's rcu_data structure.
				200	* RCU_DATA_CPU: find the specified CPU's rcu_data structure.
				201	*/
				202	#define RCU_DATA_ME() (&__get_cpu_var(rcu_data))
				203	#define RCU_DATA_CPU(cpu) (&per_cpu(rcu_data, cpu))
				204
				205	/*
				206	* Helper macro for tracing when the appropriate rcu_data is not
				207	* cached in a local variable, but where the CPU number is so cached.
				208	*/
				209	#define RCU_TRACE_CPU(f, cpu) RCU_TRACE(f, &(RCU_DATA_CPU(cpu)->trace));
				210
				211	/*
				212	* Helper macro for tracing when the appropriate rcu_data is not
				213	* cached in a local variable.
				214	*/
				215	#define RCU_TRACE_ME(f) RCU_TRACE(f, &(RCU_DATA_ME()->trace));
				216
				217	/*
				218	* Helper macro for tracing when the appropriate rcu_data is pointed
				219	* to by a local variable.
				220	*/
				221	#define RCU_TRACE_RDP(f, rdp) RCU_TRACE(f, &((rdp)->trace));
				222
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	223	#define RCU_SCHED_BATCH_TIME (HZ / 50)
				224
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	225	/*
				226	* Return the number of RCU batches processed thus far. Useful
				227	* for debug and statistics.
				228	*/
				229	long rcu_batches_completed(void)
				230	{
				231	return rcu_ctrlblk.completed;
				232	}
				233	EXPORT_SYMBOL_GPL(rcu_batches_completed);
				234
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	235	void __rcu_read_lock(void)
				236	{
				237	int idx;
				238	struct task_struct *t = current;
				239	int nesting;
				240
				241	nesting = ACCESS_ONCE(t->rcu_read_lock_nesting);
				242	if (nesting != 0) {
				243
				244	/* An earlier rcu_read_lock() covers us, just count it. */
				245
				246	t->rcu_read_lock_nesting = nesting + 1;
				247
				248	} else {
				249	unsigned long flags;
				250
				251	/*
				252	* We disable interrupts for the following reasons:
				253	* - If we get scheduling clock interrupt here, and we
				254	* end up acking the counter flip, it's like a promise
				255	* that we will never increment the old counter again.
				256	* Thus we will break that promise if that
				257	* scheduling clock interrupt happens between the time
				258	* we pick the .completed field and the time that we
				259	* increment our counter.
				260	*
				261	* - We don't want to be preempted out here.
				262	*
				263	* NMIs can still occur, of course, and might themselves
				264	* contain rcu_read_lock().
				265	*/
				266
				267	local_irq_save(flags);
				268
				269	/*
				270	* Outermost nesting of rcu_read_lock(), so increment
				271	* the current counter for the current CPU. Use volatile
				272	* casts to prevent the compiler from reordering.
				273	*/
				274
				275	idx = ACCESS_ONCE(rcu_ctrlblk.completed) & 0x1;
				276	ACCESS_ONCE(RCU_DATA_ME()->rcu_flipctr[idx])++;
				277
				278	/*
				279	* Now that the per-CPU counter has been incremented, we
				280	* are protected from races with rcu_read_lock() invoked
				281	* from NMI handlers on this CPU. We can therefore safely
				282	* increment the nesting counter, relieving further NMIs
				283	* of the need to increment the per-CPU counter.
				284	*/
				285
				286	ACCESS_ONCE(t->rcu_read_lock_nesting) = nesting + 1;
				287
				288	/*
				289	* Now that we have preventing any NMIs from storing
				290	* to the ->rcu_flipctr_idx, we can safely use it to
				291	* remember which counter to decrement in the matching
				292	* rcu_read_unlock().
				293	*/
				294
				295	ACCESS_ONCE(t->rcu_flipctr_idx) = idx;
				296	local_irq_restore(flags);
				297	}
				298	}
				299	EXPORT_SYMBOL_GPL(__rcu_read_lock);
				300
				301	void __rcu_read_unlock(void)
				302	{
				303	int idx;
				304	struct task_struct *t = current;
				305	int nesting;
				306
				307	nesting = ACCESS_ONCE(t->rcu_read_lock_nesting);
				308	if (nesting > 1) {
				309
				310	/*
				311	* We are still protected by the enclosing rcu_read_lock(),
				312	* so simply decrement the counter.
				313	*/
				314
				315	t->rcu_read_lock_nesting = nesting - 1;
				316
				317	} else {
				318	unsigned long flags;
				319
				320	/*
				321	* Disable local interrupts to prevent the grace-period
				322	* detection state machine from seeing us half-done.
				323	* NMIs can still occur, of course, and might themselves
				324	* contain rcu_read_lock() and rcu_read_unlock().
				325	*/
				326
				327	local_irq_save(flags);
				328
				329	/*
				330	* Outermost nesting of rcu_read_unlock(), so we must
				331	* decrement the current counter for the current CPU.
				332	* This must be done carefully, because NMIs can
				333	* occur at any point in this code, and any rcu_read_lock()
				334	* and rcu_read_unlock() pairs in the NMI handlers
				335	* must interact non-destructively with this code.
				336	* Lots of volatile casts, and -very- careful ordering.
				337	*
				338	* Changes to this code, including this one, must be
				339	* inspected, validated, and tested extremely carefully!!!
				340	*/
				341
				342	/*
				343	* First, pick up the index.
				344	*/
				345
				346	idx = ACCESS_ONCE(t->rcu_flipctr_idx);
				347
				348	/*
				349	* Now that we have fetched the counter index, it is
				350	* safe to decrement the per-task RCU nesting counter.
				351	* After this, any interrupts or NMIs will increment and
				352	* decrement the per-CPU counters.
				353	*/
				354	ACCESS_ONCE(t->rcu_read_lock_nesting) = nesting - 1;
				355
				356	/*
				357	* It is now safe to decrement this task's nesting count.
				358	* NMIs that occur after this statement will route their
				359	* rcu_read_lock() calls through this "else" clause, and
				360	* will thus start incrementing the per-CPU counter on
				361	* their own. They will also clobber ->rcu_flipctr_idx,
				362	* but that is OK, since we have already fetched it.
				363	*/
				364
				365	ACCESS_ONCE(RCU_DATA_ME()->rcu_flipctr[idx])--;
				366	local_irq_restore(flags);
				367	}
				368	}
				369	EXPORT_SYMBOL_GPL(__rcu_read_unlock);
				370
				371	/*
				372	* If a global counter flip has occurred since the last time that we
				373	* advanced callbacks, advance them. Hardware interrupts must be
				374	* disabled when calling this function.
				375	*/
				376	static void __rcu_advance_callbacks(struct rcu_data *rdp)
				377	{
				378	int cpu;
				379	int i;
				380	int wlc = 0;
				381
				382	if (rdp->completed != rcu_ctrlblk.completed) {
				383	if (rdp->waitlist[GP_STAGES - 1] != NULL) {
				384	*rdp->donetail = rdp->waitlist[GP_STAGES - 1];
				385	rdp->donetail = rdp->waittail[GP_STAGES - 1];
				386	RCU_TRACE_RDP(rcupreempt_trace_move2done, rdp);
				387	}
				388	for (i = GP_STAGES - 2; i >= 0; i--) {
				389	if (rdp->waitlist[i] != NULL) {
				390	rdp->waitlist[i + 1] = rdp->waitlist[i];
				391	rdp->waittail[i + 1] = rdp->waittail[i];
				392	wlc++;
				393	} else {
				394	rdp->waitlist[i + 1] = NULL;
				395	rdp->waittail[i + 1] =
				396	&rdp->waitlist[i + 1];
				397	}
				398	}
				399	if (rdp->nextlist != NULL) {
				400	rdp->waitlist[0] = rdp->nextlist;
				401	rdp->waittail[0] = rdp->nexttail;
				402	wlc++;
				403	rdp->nextlist = NULL;
				404	rdp->nexttail = &rdp->nextlist;
				405	RCU_TRACE_RDP(rcupreempt_trace_move2wait, rdp);
				406	} else {
				407	rdp->waitlist[0] = NULL;
				408	rdp->waittail[0] = &rdp->waitlist[0];
				409	}
				410	rdp->waitlistcount = wlc;
				411	rdp->completed = rcu_ctrlblk.completed;
				412	}
				413
				414	/*
				415	* Check to see if this CPU needs to report that it has seen
				416	* the most recent counter flip, thereby declaring that all
				417	* subsequent rcu_read_lock() invocations will respect this flip.
				418	*/
				419
				420	cpu = raw_smp_processor_id();
				421	if (per_cpu(rcu_flip_flag, cpu) == rcu_flipped) {
				422	smp_mb(); /* Subsequent counter accesses must see new value */
				423	per_cpu(rcu_flip_flag, cpu) = rcu_flip_seen;
				424	smp_mb(); /* Subsequent RCU read-side critical sections */
				425	/* seen -after- acknowledgement. */
				426	}
				427	}
				428
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	429	DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_dyntick_sched, rcu_dyntick_sched) = {
				430	.dynticks = 1,
				431	};
Steven Rostedt	2232c2d	2008-02-29 18:46:50 +0100	[diff] [blame]	432
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	433	#ifdef CONFIG_NO_HZ
Steven Rostedt	2232c2d	2008-02-29 18:46:50 +0100	[diff] [blame]	434	static DEFINE_PER_CPU(int, rcu_update_flag);
				435
				436	/**
				437	* rcu_irq_enter - Called from Hard irq handlers and NMI/SMI.
				438	*
				439	* If the CPU was idle with dynamic ticks active, this updates the
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	440	* rcu_dyntick_sched.dynticks to let the RCU handling know that the
Steven Rostedt	2232c2d	2008-02-29 18:46:50 +0100	[diff] [blame]	441	* CPU is active.
				442	*/
				443	void rcu_irq_enter(void)
				444	{
				445	int cpu = smp_processor_id();
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	446	struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
Steven Rostedt	2232c2d	2008-02-29 18:46:50 +0100	[diff] [blame]	447
				448	if (per_cpu(rcu_update_flag, cpu))
				449	per_cpu(rcu_update_flag, cpu)++;
				450
				451	/*
				452	* Only update if we are coming from a stopped ticks mode
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	453	* (rcu_dyntick_sched.dynticks is even).
Steven Rostedt	2232c2d	2008-02-29 18:46:50 +0100	[diff] [blame]	454	*/
				455	if (!in_interrupt() &&
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	456	(rdssp->dynticks & 0x1) == 0) {
Steven Rostedt	2232c2d	2008-02-29 18:46:50 +0100	[diff] [blame]	457	/*
				458	* The following might seem like we could have a race
				459	* with NMI/SMIs. But this really isn't a problem.
				460	* Here we do a read/modify/write, and the race happens
				461	* when an NMI/SMI comes in after the read and before
				462	* the write. But NMI/SMIs will increment this counter
				463	* twice before returning, so the zero bit will not
				464	* be corrupted by the NMI/SMI which is the most important
				465	* part.
				466	*
				467	* The only thing is that we would bring back the counter
				468	* to a postion that it was in during the NMI/SMI.
				469	* But the zero bit would be set, so the rest of the
				470	* counter would again be ignored.
				471	*
				472	* On return from the IRQ, the counter may have the zero
				473	* bit be 0 and the counter the same as the return from
				474	* the NMI/SMI. If the state machine was so unlucky to
				475	* see that, it still doesn't matter, since all
				476	* RCU read-side critical sections on this CPU would
				477	* have already completed.
				478	*/
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	479	rdssp->dynticks++;
Steven Rostedt	2232c2d	2008-02-29 18:46:50 +0100	[diff] [blame]	480	/*
				481	* The following memory barrier ensures that any
				482	* rcu_read_lock() primitives in the irq handler
				483	* are seen by other CPUs to follow the above
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	484	* increment to rcu_dyntick_sched.dynticks. This is
Steven Rostedt	2232c2d	2008-02-29 18:46:50 +0100	[diff] [blame]	485	* required in order for other CPUs to correctly
				486	* determine when it is safe to advance the RCU
				487	* grace-period state machine.
				488	*/
				489	smp_mb(); /* see above block comment. */
				490	/*
				491	* Since we can't determine the dynamic tick mode from
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	492	* the rcu_dyntick_sched.dynticks after this routine,
Steven Rostedt	2232c2d	2008-02-29 18:46:50 +0100	[diff] [blame]	493	* we use a second flag to acknowledge that we came
				494	* from an idle state with ticks stopped.
				495	*/
				496	per_cpu(rcu_update_flag, cpu)++;
				497	/*
				498	* If we take an NMI/SMI now, they will also increment
				499	* the rcu_update_flag, and will not update the
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	500	* rcu_dyntick_sched.dynticks on exit. That is for
Steven Rostedt	2232c2d	2008-02-29 18:46:50 +0100	[diff] [blame]	501	* this IRQ to do.
				502	*/
				503	}
				504	}
				505
				506	/**
				507	* rcu_irq_exit - Called from exiting Hard irq context.
				508	*
				509	* If the CPU was idle with dynamic ticks active, update the
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	510	* rcu_dyntick_sched.dynticks to put let the RCU handling be
Steven Rostedt	2232c2d	2008-02-29 18:46:50 +0100	[diff] [blame]	511	* aware that the CPU is going back to idle with no ticks.
				512	*/
				513	void rcu_irq_exit(void)
				514	{
				515	int cpu = smp_processor_id();
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	516	struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
Steven Rostedt	2232c2d	2008-02-29 18:46:50 +0100	[diff] [blame]	517
				518	/*
				519	* rcu_update_flag is set if we interrupted the CPU
				520	* when it was idle with ticks stopped.
				521	* Once this occurs, we keep track of interrupt nesting
				522	* because a NMI/SMI could also come in, and we still
				523	* only want the IRQ that started the increment of the
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	524	* rcu_dyntick_sched.dynticks to be the one that modifies
Steven Rostedt	2232c2d	2008-02-29 18:46:50 +0100	[diff] [blame]	525	* it on exit.
				526	*/
				527	if (per_cpu(rcu_update_flag, cpu)) {
				528	if (--per_cpu(rcu_update_flag, cpu))
				529	return;
				530
				531	/* This must match the interrupt nesting */
				532	WARN_ON(in_interrupt());
				533
				534	/*
				535	* If an NMI/SMI happens now we are still
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	536	* protected by the rcu_dyntick_sched.dynticks being odd.
Steven Rostedt	2232c2d	2008-02-29 18:46:50 +0100	[diff] [blame]	537	*/
				538
				539	/*
				540	* The following memory barrier ensures that any
				541	* rcu_read_unlock() primitives in the irq handler
				542	* are seen by other CPUs to preceed the following
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	543	* increment to rcu_dyntick_sched.dynticks. This
Steven Rostedt	2232c2d	2008-02-29 18:46:50 +0100	[diff] [blame]	544	* is required in order for other CPUs to determine
				545	* when it is safe to advance the RCU grace-period
				546	* state machine.
				547	*/
				548	smp_mb(); /* see above block comment. */
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	549	rdssp->dynticks++;
				550	WARN_ON(rdssp->dynticks & 0x1);
Steven Rostedt	2232c2d	2008-02-29 18:46:50 +0100	[diff] [blame]	551	}
				552	}
				553
				554	static void dyntick_save_progress_counter(int cpu)
				555	{
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	556	struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
				557
				558	rdssp->dynticks_snap = rdssp->dynticks;
Steven Rostedt	2232c2d	2008-02-29 18:46:50 +0100	[diff] [blame]	559	}
				560
				561	static inline int
				562	rcu_try_flip_waitack_needed(int cpu)
				563	{
				564	long curr;
				565	long snap;
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	566	struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
Steven Rostedt	2232c2d	2008-02-29 18:46:50 +0100	[diff] [blame]	567
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	568	curr = rdssp->dynticks;
				569	snap = rdssp->dynticks_snap;
Steven Rostedt	2232c2d	2008-02-29 18:46:50 +0100	[diff] [blame]	570	smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
				571
				572	/*
				573	* If the CPU remained in dynticks mode for the entire time
				574	* and didn't take any interrupts, NMIs, SMIs, or whatever,
				575	* then it cannot be in the middle of an rcu_read_lock(), so
				576	* the next rcu_read_lock() it executes must use the new value
				577	* of the counter. So we can safely pretend that this CPU
				578	* already acknowledged the counter.
				579	*/
				580
				581	if ((curr == snap) && ((curr & 0x1) == 0))
				582	return 0;
				583
				584	/*
				585	* If the CPU passed through or entered a dynticks idle phase with
				586	* no active irq handlers, then, as above, we can safely pretend
				587	* that this CPU already acknowledged the counter.
				588	*/
				589
Paul E. McKenney	d7c0651	2008-05-12 21:21:06 +0200	[diff] [blame]	590	if ((curr - snap) > 2 \|\| (curr & 0x1) == 0)
Steven Rostedt	2232c2d	2008-02-29 18:46:50 +0100	[diff] [blame]	591	return 0;
				592
				593	/* We need this CPU to explicitly acknowledge the counter flip. */
				594
				595	return 1;
				596	}
				597
				598	static inline int
				599	rcu_try_flip_waitmb_needed(int cpu)
				600	{
				601	long curr;
				602	long snap;
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	603	struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
Steven Rostedt	2232c2d	2008-02-29 18:46:50 +0100	[diff] [blame]	604
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	605	curr = rdssp->dynticks;
				606	snap = rdssp->dynticks_snap;
Steven Rostedt	2232c2d	2008-02-29 18:46:50 +0100	[diff] [blame]	607	smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
				608
				609	/*
				610	* If the CPU remained in dynticks mode for the entire time
				611	* and didn't take any interrupts, NMIs, SMIs, or whatever,
				612	* then it cannot have executed an RCU read-side critical section
				613	* during that time, so there is no need for it to execute a
				614	* memory barrier.
				615	*/
				616
				617	if ((curr == snap) && ((curr & 0x1) == 0))
				618	return 0;
				619
				620	/*
				621	* If the CPU either entered or exited an outermost interrupt,
				622	* SMI, NMI, or whatever handler, then we know that it executed
				623	* a memory barrier when doing so. So we don't need another one.
				624	*/
				625	if (curr != snap)
				626	return 0;
				627
				628	/* We need the CPU to execute a memory barrier. */
				629
				630	return 1;
				631	}
				632
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	633	static void dyntick_save_progress_counter_sched(int cpu)
				634	{
				635	struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
				636
				637	rdssp->sched_dynticks_snap = rdssp->dynticks;
				638	}
				639
				640	static int rcu_qsctr_inc_needed_dyntick(int cpu)
				641	{
				642	long curr;
				643	long snap;
				644	struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
				645
				646	curr = rdssp->dynticks;
				647	snap = rdssp->sched_dynticks_snap;
				648	smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
				649
				650	/*
				651	* If the CPU remained in dynticks mode for the entire time
				652	* and didn't take any interrupts, NMIs, SMIs, or whatever,
				653	* then it cannot be in the middle of an rcu_read_lock(), so
				654	* the next rcu_read_lock() it executes must use the new value
				655	* of the counter. Therefore, this CPU has been in a quiescent
				656	* state the entire time, and we don't need to wait for it.
				657	*/
				658
				659	if ((curr == snap) && ((curr & 0x1) == 0))
				660	return 0;
				661
				662	/*
				663	* If the CPU passed through or entered a dynticks idle phase with
				664	* no active irq handlers, then, as above, this CPU has already
				665	* passed through a quiescent state.
				666	*/
				667
				668	if ((curr - snap) > 2 \|\| (snap & 0x1) == 0)
				669	return 0;
				670
				671	/* We need this CPU to go through a quiescent state. */
				672
				673	return 1;
				674	}
				675
Steven Rostedt	2232c2d	2008-02-29 18:46:50 +0100	[diff] [blame]	676	#else /* !CONFIG_NO_HZ */
				677
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	678	# define dyntick_save_progress_counter(cpu) do { } while (0)
				679	# define rcu_try_flip_waitack_needed(cpu) (1)
				680	# define rcu_try_flip_waitmb_needed(cpu) (1)
				681
				682	# define dyntick_save_progress_counter_sched(cpu) do { } while (0)
				683	# define rcu_qsctr_inc_needed_dyntick(cpu) (1)
Steven Rostedt	2232c2d	2008-02-29 18:46:50 +0100	[diff] [blame]	684
				685	#endif /* CONFIG_NO_HZ */
				686
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	687	static void save_qsctr_sched(int cpu)
				688	{
				689	struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
				690
				691	rdssp->sched_qs_snap = rdssp->sched_qs;
				692	}
				693
				694	static inline int rcu_qsctr_inc_needed(int cpu)
				695	{
				696	struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
				697
				698	/*
				699	* If there has been a quiescent state, no more need to wait
				700	* on this CPU.
				701	*/
				702
				703	if (rdssp->sched_qs != rdssp->sched_qs_snap) {
				704	smp_mb(); /* force ordering with cpu entering schedule(). */
				705	return 0;
				706	}
				707
				708	/* We need this CPU to go through a quiescent state. */
				709
				710	return 1;
				711	}
				712
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	713	/*
				714	* Get here when RCU is idle. Decide whether we need to
				715	* move out of idle state, and return non-zero if so.
				716	* "Straightforward" approach for the moment, might later
				717	* use callback-list lengths, grace-period duration, or
				718	* some such to determine when to exit idle state.
				719	* Might also need a pre-idle test that does not acquire
				720	* the lock, but let's get the simple case working first...
				721	*/
				722
				723	static int
				724	rcu_try_flip_idle(void)
				725	{
				726	int cpu;
				727
				728	RCU_TRACE_ME(rcupreempt_trace_try_flip_i1);
				729	if (!rcu_pending(smp_processor_id())) {
				730	RCU_TRACE_ME(rcupreempt_trace_try_flip_ie1);
				731	return 0;
				732	}
				733
				734	/*
				735	* Do the flip.
				736	*/
				737
				738	RCU_TRACE_ME(rcupreempt_trace_try_flip_g1);
				739	rcu_ctrlblk.completed++; /* stands in for rcu_try_flip_g2 */
				740
				741	/*
				742	* Need a memory barrier so that other CPUs see the new
				743	* counter value before they see the subsequent change of all
				744	* the rcu_flip_flag instances to rcu_flipped.
				745	*/
				746
				747	smp_mb(); /* see above block comment. */
				748
				749	/* Now ask each CPU for acknowledgement of the flip. */
				750
Mike Travis	363ab6f	2008-05-12 21:21:13 +0200	[diff] [blame]	751	for_each_cpu_mask_nr(cpu, rcu_cpu_online_map) {
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	752	per_cpu(rcu_flip_flag, cpu) = rcu_flipped;
Steven Rostedt	2232c2d	2008-02-29 18:46:50 +0100	[diff] [blame]	753	dyntick_save_progress_counter(cpu);
				754	}
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	755
				756	return 1;
				757	}
				758
				759	/*
				760	* Wait for CPUs to acknowledge the flip.
				761	*/
				762
				763	static int
				764	rcu_try_flip_waitack(void)
				765	{
				766	int cpu;
				767
				768	RCU_TRACE_ME(rcupreempt_trace_try_flip_a1);
Mike Travis	363ab6f	2008-05-12 21:21:13 +0200	[diff] [blame]	769	for_each_cpu_mask_nr(cpu, rcu_cpu_online_map)
Steven Rostedt	2232c2d	2008-02-29 18:46:50 +0100	[diff] [blame]	770	if (rcu_try_flip_waitack_needed(cpu) &&
				771	per_cpu(rcu_flip_flag, cpu) != rcu_flip_seen) {
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	772	RCU_TRACE_ME(rcupreempt_trace_try_flip_ae1);
				773	return 0;
				774	}
				775
				776	/*
				777	* Make sure our checks above don't bleed into subsequent
				778	* waiting for the sum of the counters to reach zero.
				779	*/
				780
				781	smp_mb(); /* see above block comment. */
				782	RCU_TRACE_ME(rcupreempt_trace_try_flip_a2);
				783	return 1;
				784	}
				785
				786	/*
				787	* Wait for collective ``last'' counter to reach zero,
				788	* then tell all CPUs to do an end-of-grace-period memory barrier.
				789	*/
				790
				791	static int
				792	rcu_try_flip_waitzero(void)
				793	{
				794	int cpu;
				795	int lastidx = !(rcu_ctrlblk.completed & 0x1);
				796	int sum = 0;
				797
				798	/* Check to see if the sum of the "last" counters is zero. */
				799
				800	RCU_TRACE_ME(rcupreempt_trace_try_flip_z1);
Mike Travis	363ab6f	2008-05-12 21:21:13 +0200	[diff] [blame]	801	for_each_cpu_mask_nr(cpu, rcu_cpu_online_map)
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	802	sum += RCU_DATA_CPU(cpu)->rcu_flipctr[lastidx];
				803	if (sum != 0) {
				804	RCU_TRACE_ME(rcupreempt_trace_try_flip_ze1);
				805	return 0;
				806	}
				807
				808	/*
				809	* This ensures that the other CPUs see the call for
				810	* memory barriers -after- the sum to zero has been
				811	* detected here
				812	*/
				813	smp_mb(); /* ^^^^^^^^^^^^ */
				814
				815	/* Call for a memory barrier from each CPU. */
Mike Travis	363ab6f	2008-05-12 21:21:13 +0200	[diff] [blame]	816	for_each_cpu_mask_nr(cpu, rcu_cpu_online_map) {
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	817	per_cpu(rcu_mb_flag, cpu) = rcu_mb_needed;
Steven Rostedt	2232c2d	2008-02-29 18:46:50 +0100	[diff] [blame]	818	dyntick_save_progress_counter(cpu);
				819	}
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	820
				821	RCU_TRACE_ME(rcupreempt_trace_try_flip_z2);
				822	return 1;
				823	}
				824
				825	/*
				826	* Wait for all CPUs to do their end-of-grace-period memory barrier.
				827	* Return 0 once all CPUs have done so.
				828	*/
				829
				830	static int
				831	rcu_try_flip_waitmb(void)
				832	{
				833	int cpu;
				834
				835	RCU_TRACE_ME(rcupreempt_trace_try_flip_m1);
Mike Travis	363ab6f	2008-05-12 21:21:13 +0200	[diff] [blame]	836	for_each_cpu_mask_nr(cpu, rcu_cpu_online_map)
Steven Rostedt	2232c2d	2008-02-29 18:46:50 +0100	[diff] [blame]	837	if (rcu_try_flip_waitmb_needed(cpu) &&
				838	per_cpu(rcu_mb_flag, cpu) != rcu_mb_done) {
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	839	RCU_TRACE_ME(rcupreempt_trace_try_flip_me1);
				840	return 0;
				841	}
				842
				843	smp_mb(); /* Ensure that the above checks precede any following flip. */
				844	RCU_TRACE_ME(rcupreempt_trace_try_flip_m2);
				845	return 1;
				846	}
				847
				848	/*
				849	* Attempt a single flip of the counters. Remember, a single flip does
				850	* -not- constitute a grace period. Instead, the interval between
				851	* at least GP_STAGES consecutive flips is a grace period.
				852	*
				853	* If anyone is nuts enough to run this CONFIG_PREEMPT_RCU implementation
				854	* on a large SMP, they might want to use a hierarchical organization of
				855	* the per-CPU-counter pairs.
				856	*/
				857	static void rcu_try_flip(void)
				858	{
				859	unsigned long flags;
				860
				861	RCU_TRACE_ME(rcupreempt_trace_try_flip_1);
				862	if (unlikely(!spin_trylock_irqsave(&rcu_ctrlblk.fliplock, flags))) {
				863	RCU_TRACE_ME(rcupreempt_trace_try_flip_e1);
				864	return;
				865	}
				866
				867	/*
				868	* Take the next transition(s) through the RCU grace-period
				869	* flip-counter state machine.
				870	*/
				871
				872	switch (rcu_ctrlblk.rcu_try_flip_state) {
				873	case rcu_try_flip_idle_state:
				874	if (rcu_try_flip_idle())
				875	rcu_ctrlblk.rcu_try_flip_state =
				876	rcu_try_flip_waitack_state;
				877	break;
				878	case rcu_try_flip_waitack_state:
				879	if (rcu_try_flip_waitack())
				880	rcu_ctrlblk.rcu_try_flip_state =
				881	rcu_try_flip_waitzero_state;
				882	break;
				883	case rcu_try_flip_waitzero_state:
				884	if (rcu_try_flip_waitzero())
				885	rcu_ctrlblk.rcu_try_flip_state =
				886	rcu_try_flip_waitmb_state;
				887	break;
				888	case rcu_try_flip_waitmb_state:
				889	if (rcu_try_flip_waitmb())
				890	rcu_ctrlblk.rcu_try_flip_state =
				891	rcu_try_flip_idle_state;
				892	}
				893	spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags);
				894	}
				895
				896	/*
				897	* Check to see if this CPU needs to do a memory barrier in order to
				898	* ensure that any prior RCU read-side critical sections have committed
				899	* their counter manipulations and critical-section memory references
				900	* before declaring the grace period to be completed.
				901	*/
				902	static void rcu_check_mb(int cpu)
				903	{
				904	if (per_cpu(rcu_mb_flag, cpu) == rcu_mb_needed) {
				905	smp_mb(); /* Ensure RCU read-side accesses are visible. */
				906	per_cpu(rcu_mb_flag, cpu) = rcu_mb_done;
				907	}
				908	}
				909
				910	void rcu_check_callbacks(int cpu, int user)
				911	{
				912	unsigned long flags;
				913	struct rcu_data *rdp = RCU_DATA_CPU(cpu);
				914
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	915	/*
				916	* If this CPU took its interrupt from user mode or from the
				917	* idle loop, and this is not a nested interrupt, then
				918	* this CPU has to have exited all prior preept-disable
				919	* sections of code. So increment the counter to note this.
				920	*
				921	* The memory barrier is needed to handle the case where
				922	* writes from a preempt-disable section of code get reordered
				923	* into schedule() by this CPU's write buffer. So the memory
				924	* barrier makes sure that the rcu_qsctr_inc() is seen by other
				925	* CPUs to happen after any such write.
				926	*/
				927
				928	if (user \|\|
				929	(idle_cpu(cpu) && !in_softirq() &&
				930	hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
				931	smp_mb(); /* Guard against aggressive schedule(). */
				932	rcu_qsctr_inc(cpu);
				933	}
				934
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	935	rcu_check_mb(cpu);
				936	if (rcu_ctrlblk.completed == rdp->completed)
				937	rcu_try_flip();
				938	spin_lock_irqsave(&rdp->lock, flags);
				939	RCU_TRACE_RDP(rcupreempt_trace_check_callbacks, rdp);
				940	__rcu_advance_callbacks(rdp);
				941	if (rdp->donelist == NULL) {
				942	spin_unlock_irqrestore(&rdp->lock, flags);
				943	} else {
				944	spin_unlock_irqrestore(&rdp->lock, flags);
				945	raise_softirq(RCU_SOFTIRQ);
				946	}
				947	}
				948
				949	/*
				950	* Needed by dynticks, to make sure all RCU processing has finished
				951	* when we go idle:
				952	*/
				953	void rcu_advance_callbacks(int cpu, int user)
				954	{
				955	unsigned long flags;
				956	struct rcu_data *rdp = RCU_DATA_CPU(cpu);
				957
				958	if (rcu_ctrlblk.completed == rdp->completed) {
				959	rcu_try_flip();
				960	if (rcu_ctrlblk.completed == rdp->completed)
				961	return;
				962	}
				963	spin_lock_irqsave(&rdp->lock, flags);
				964	RCU_TRACE_RDP(rcupreempt_trace_check_callbacks, rdp);
				965	__rcu_advance_callbacks(rdp);
				966	spin_unlock_irqrestore(&rdp->lock, flags);
				967	}
				968
Paul E. McKenney	eaf649e	2008-01-25 21:08:25 +0100	[diff] [blame]	969	#ifdef CONFIG_HOTPLUG_CPU
				970	#define rcu_offline_cpu_enqueue(srclist, srctail, dstlist, dsttail) do { \
				971	*dsttail = srclist; \
				972	if (srclist != NULL) { \
				973	dsttail = srctail; \
				974	srclist = NULL; \
				975	srctail = &srclist;\
				976	} \
				977	} while (0)
				978
				979	void rcu_offline_cpu(int cpu)
				980	{
				981	int i;
				982	struct rcu_head *list = NULL;
				983	unsigned long flags;
				984	struct rcu_data *rdp = RCU_DATA_CPU(cpu);
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	985	struct rcu_head *schedlist = NULL;
				986	struct rcu_head **schedtail = &schedlist;
Paul E. McKenney	eaf649e	2008-01-25 21:08:25 +0100	[diff] [blame]	987	struct rcu_head **tail = &list;
				988
				989	/*
				990	* Remove all callbacks from the newly dead CPU, retaining order.
				991	* Otherwise rcu_barrier() will fail
				992	*/
				993
				994	spin_lock_irqsave(&rdp->lock, flags);
				995	rcu_offline_cpu_enqueue(rdp->donelist, rdp->donetail, list, tail);
				996	for (i = GP_STAGES - 1; i >= 0; i--)
				997	rcu_offline_cpu_enqueue(rdp->waitlist[i], rdp->waittail[i],
				998	list, tail);
				999	rcu_offline_cpu_enqueue(rdp->nextlist, rdp->nexttail, list, tail);
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	1000	rcu_offline_cpu_enqueue(rdp->waitschedlist, rdp->waitschedtail,
				1001	schedlist, schedtail);
				1002	rcu_offline_cpu_enqueue(rdp->nextschedlist, rdp->nextschedtail,
				1003	schedlist, schedtail);
				1004	rdp->rcu_sched_sleeping = 0;
Paul E. McKenney	eaf649e	2008-01-25 21:08:25 +0100	[diff] [blame]	1005	spin_unlock_irqrestore(&rdp->lock, flags);
				1006	rdp->waitlistcount = 0;
				1007
				1008	/* Disengage the newly dead CPU from the grace-period computation. */
				1009
				1010	spin_lock_irqsave(&rcu_ctrlblk.fliplock, flags);
				1011	rcu_check_mb(cpu);
				1012	if (per_cpu(rcu_flip_flag, cpu) == rcu_flipped) {
				1013	smp_mb(); /* Subsequent counter accesses must see new value */
				1014	per_cpu(rcu_flip_flag, cpu) = rcu_flip_seen;
				1015	smp_mb(); /* Subsequent RCU read-side critical sections */
				1016	/* seen -after- acknowledgement. */
				1017	}
				1018
				1019	RCU_DATA_ME()->rcu_flipctr[0] += RCU_DATA_CPU(cpu)->rcu_flipctr[0];
				1020	RCU_DATA_ME()->rcu_flipctr[1] += RCU_DATA_CPU(cpu)->rcu_flipctr[1];
				1021
				1022	RCU_DATA_CPU(cpu)->rcu_flipctr[0] = 0;
				1023	RCU_DATA_CPU(cpu)->rcu_flipctr[1] = 0;
				1024
				1025	cpu_clear(cpu, rcu_cpu_online_map);
				1026
				1027	spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags);
				1028
				1029	/*
				1030	* Place the removed callbacks on the current CPU's queue.
				1031	* Make them all start a new grace period: simple approach,
				1032	* in theory could starve a given set of callbacks, but
				1033	* you would need to be doing some serious CPU hotplugging
				1034	* to make this happen. If this becomes a problem, adding
				1035	* a synchronize_rcu() to the hotplug path would be a simple
				1036	* fix.
				1037	*/
				1038
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	1039	local_irq_save(flags); /* disable preempt till we know what lock. */
Paul E. McKenney	eaf649e	2008-01-25 21:08:25 +0100	[diff] [blame]	1040	rdp = RCU_DATA_ME();
Paul E. McKenney	ae77886	2008-02-27 16:21:10 -0800	[diff] [blame]	1041	spin_lock(&rdp->lock);
Paul E. McKenney	eaf649e	2008-01-25 21:08:25 +0100	[diff] [blame]	1042	*rdp->nexttail = list;
				1043	if (list)
				1044	rdp->nexttail = tail;
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	1045	*rdp->nextschedtail = schedlist;
				1046	if (schedlist)
				1047	rdp->nextschedtail = schedtail;
Paul E. McKenney	eaf649e	2008-01-25 21:08:25 +0100	[diff] [blame]	1048	spin_unlock_irqrestore(&rdp->lock, flags);
				1049	}
				1050
Paul E. McKenney	eaf649e	2008-01-25 21:08:25 +0100	[diff] [blame]	1051	#else /* #ifdef CONFIG_HOTPLUG_CPU */
				1052
				1053	void rcu_offline_cpu(int cpu)
				1054	{
				1055	}
				1056
Paul E. McKenney	eaf649e	2008-01-25 21:08:25 +0100	[diff] [blame]	1057	#endif /* #else #ifdef CONFIG_HOTPLUG_CPU */
				1058
Nick Piggin	70ff055	2008-07-10 17:25:35 +1000	[diff] [blame]	1059	void __cpuinit rcu_online_cpu(int cpu)
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	1060	{
				1061	unsigned long flags;
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	1062	struct rcu_data *rdp;
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	1063
				1064	spin_lock_irqsave(&rcu_ctrlblk.fliplock, flags);
				1065	cpu_set(cpu, rcu_cpu_online_map);
				1066	spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags);
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	1067
				1068	/*
				1069	* The rcu_sched grace-period processing might have bypassed
				1070	* this CPU, given that it was not in the rcu_cpu_online_map
				1071	* when the grace-period scan started. This means that the
				1072	* grace-period task might sleep. So make sure that if this
				1073	* should happen, the first callback posted to this CPU will
				1074	* wake up the grace-period task if need be.
				1075	*/
				1076
				1077	rdp = RCU_DATA_CPU(cpu);
				1078	spin_lock_irqsave(&rdp->lock, flags);
				1079	rdp->rcu_sched_sleeping = 1;
				1080	spin_unlock_irqrestore(&rdp->lock, flags);
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	1081	}
				1082
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	1083	static void rcu_process_callbacks(struct softirq_action *unused)
				1084	{
				1085	unsigned long flags;
				1086	struct rcu_head next, list;
Paul E. McKenney	c9e7100	2008-02-28 11:51:07 -0800	[diff] [blame]	1087	struct rcu_data *rdp;
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	1088
Paul E. McKenney	c9e7100	2008-02-28 11:51:07 -0800	[diff] [blame]	1089	local_irq_save(flags);
				1090	rdp = RCU_DATA_ME();
				1091	spin_lock(&rdp->lock);
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	1092	list = rdp->donelist;
				1093	if (list == NULL) {
				1094	spin_unlock_irqrestore(&rdp->lock, flags);
				1095	return;
				1096	}
				1097	rdp->donelist = NULL;
				1098	rdp->donetail = &rdp->donelist;
				1099	RCU_TRACE_RDP(rcupreempt_trace_done_remove, rdp);
				1100	spin_unlock_irqrestore(&rdp->lock, flags);
				1101	while (list) {
				1102	next = list->next;
				1103	list->func(list);
				1104	list = next;
				1105	RCU_TRACE_ME(rcupreempt_trace_invoke);
				1106	}
				1107	}
				1108
				1109	void call_rcu(struct rcu_head head, void (func)(struct rcu_head *rcu))
				1110	{
				1111	unsigned long flags;
				1112	struct rcu_data *rdp;
				1113
				1114	head->func = func;
				1115	head->next = NULL;
				1116	local_irq_save(flags);
				1117	rdp = RCU_DATA_ME();
				1118	spin_lock(&rdp->lock);
				1119	__rcu_advance_callbacks(rdp);
				1120	*rdp->nexttail = head;
				1121	rdp->nexttail = &head->next;
				1122	RCU_TRACE_RDP(rcupreempt_trace_next_add, rdp);
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	1123	spin_unlock_irqrestore(&rdp->lock, flags);
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	1124	}
				1125	EXPORT_SYMBOL_GPL(call_rcu);
				1126
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	1127	void call_rcu_sched(struct rcu_head head, void (func)(struct rcu_head *rcu))
				1128	{
				1129	unsigned long flags;
				1130	struct rcu_data *rdp;
				1131	int wake_gp = 0;
				1132
				1133	head->func = func;
				1134	head->next = NULL;
				1135	local_irq_save(flags);
				1136	rdp = RCU_DATA_ME();
				1137	spin_lock(&rdp->lock);
				1138	*rdp->nextschedtail = head;
				1139	rdp->nextschedtail = &head->next;
				1140	if (rdp->rcu_sched_sleeping) {
				1141
				1142	/* Grace-period processing might be sleeping... */
				1143
				1144	rdp->rcu_sched_sleeping = 0;
				1145	wake_gp = 1;
				1146	}
				1147	spin_unlock_irqrestore(&rdp->lock, flags);
				1148	if (wake_gp) {
				1149
				1150	/* Wake up grace-period processing, unless someone beat us. */
				1151
				1152	spin_lock_irqsave(&rcu_ctrlblk.schedlock, flags);
				1153	if (rcu_ctrlblk.sched_sleep != rcu_sched_sleeping)
				1154	wake_gp = 0;
				1155	rcu_ctrlblk.sched_sleep = rcu_sched_not_sleeping;
				1156	spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags);
				1157	if (wake_gp)
				1158	wake_up_interruptible(&rcu_ctrlblk.sched_wq);
				1159	}
				1160	}
				1161	EXPORT_SYMBOL_GPL(call_rcu_sched);
				1162
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	1163	/*
				1164	* Wait until all currently running preempt_disable() code segments
				1165	* (including hardware-irq-disable segments) complete. Note that
				1166	* in -rt this does -not- necessarily result in all currently executing
				1167	* interrupt -handlers- having completed.
				1168	*/
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	1169	synchronize_rcu_xxx(__synchronize_sched, call_rcu_sched)
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	1170	EXPORT_SYMBOL_GPL(__synchronize_sched);
				1171
				1172	/*
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	1173	* kthread function that manages call_rcu_sched grace periods.
				1174	*/
				1175	static int rcu_sched_grace_period(void *arg)
				1176	{
				1177	int couldsleep; /* might sleep after current pass. */
				1178	int couldsleepnext = 0; /* might sleep after next pass. */
				1179	int cpu;
				1180	unsigned long flags;
				1181	struct rcu_data *rdp;
				1182	int ret;
				1183
				1184	/*
				1185	* Each pass through the following loop handles one
				1186	* rcu_sched grace period cycle.
				1187	*/
				1188	do {
				1189	/* Save each CPU's current state. */
				1190
				1191	for_each_online_cpu(cpu) {
				1192	dyntick_save_progress_counter_sched(cpu);
				1193	save_qsctr_sched(cpu);
				1194	}
				1195
				1196	/*
				1197	* Sleep for about an RCU grace-period's worth to
				1198	* allow better batching and to consume less CPU.
				1199	*/
				1200	schedule_timeout_interruptible(RCU_SCHED_BATCH_TIME);
				1201
				1202	/*
				1203	* If there was nothing to do last time, prepare to
				1204	* sleep at the end of the current grace period cycle.
				1205	*/
				1206	couldsleep = couldsleepnext;
				1207	couldsleepnext = 1;
				1208	if (couldsleep) {
				1209	spin_lock_irqsave(&rcu_ctrlblk.schedlock, flags);
				1210	rcu_ctrlblk.sched_sleep = rcu_sched_sleep_prep;
				1211	spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags);
				1212	}
				1213
				1214	/*
				1215	* Wait on each CPU in turn to have either visited
				1216	* a quiescent state or been in dynticks-idle mode.
				1217	*/
				1218	for_each_online_cpu(cpu) {
				1219	while (rcu_qsctr_inc_needed(cpu) &&
				1220	rcu_qsctr_inc_needed_dyntick(cpu)) {
				1221	/* resched_cpu(cpu); @@@ */
				1222	schedule_timeout_interruptible(1);
				1223	}
				1224	}
				1225
				1226	/* Advance callbacks for each CPU. */
				1227
				1228	for_each_online_cpu(cpu) {
				1229
				1230	rdp = RCU_DATA_CPU(cpu);
				1231	spin_lock_irqsave(&rdp->lock, flags);
				1232
				1233	/*
				1234	* We are running on this CPU irq-disabled, so no
				1235	* CPU can go offline until we re-enable irqs.
				1236	* The current CPU might have already gone
				1237	* offline (between the for_each_offline_cpu and
				1238	* the spin_lock_irqsave), but in that case all its
				1239	* callback lists will be empty, so no harm done.
				1240	*
				1241	* Advance the callbacks! We share normal RCU's
				1242	* donelist, since callbacks are invoked the
				1243	* same way in either case.
				1244	*/
				1245	if (rdp->waitschedlist != NULL) {
				1246	*rdp->donetail = rdp->waitschedlist;
				1247	rdp->donetail = rdp->waitschedtail;
				1248
				1249	/*
				1250	* Next rcu_check_callbacks() will
				1251	* do the required raise_softirq().
				1252	*/
				1253	}
				1254	if (rdp->nextschedlist != NULL) {
				1255	rdp->waitschedlist = rdp->nextschedlist;
				1256	rdp->waitschedtail = rdp->nextschedtail;
				1257	couldsleep = 0;
				1258	couldsleepnext = 0;
				1259	} else {
				1260	rdp->waitschedlist = NULL;
				1261	rdp->waitschedtail = &rdp->waitschedlist;
				1262	}
				1263	rdp->nextschedlist = NULL;
				1264	rdp->nextschedtail = &rdp->nextschedlist;
				1265
				1266	/* Mark sleep intention. */
				1267
				1268	rdp->rcu_sched_sleeping = couldsleep;
				1269
				1270	spin_unlock_irqrestore(&rdp->lock, flags);
				1271	}
				1272
				1273	/* If we saw callbacks on the last scan, go deal with them. */
				1274
				1275	if (!couldsleep)
				1276	continue;
				1277
				1278	/* Attempt to block... */
				1279
				1280	spin_lock_irqsave(&rcu_ctrlblk.schedlock, flags);
				1281	if (rcu_ctrlblk.sched_sleep != rcu_sched_sleep_prep) {
				1282
				1283	/*
				1284	* Someone posted a callback after we scanned.
				1285	* Go take care of it.
				1286	*/
				1287	spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags);
				1288	couldsleepnext = 0;
				1289	continue;
				1290	}
				1291
				1292	/* Block until the next person posts a callback. */
				1293
				1294	rcu_ctrlblk.sched_sleep = rcu_sched_sleeping;
				1295	spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags);
				1296	ret = 0;
				1297	__wait_event_interruptible(rcu_ctrlblk.sched_wq,
				1298	rcu_ctrlblk.sched_sleep != rcu_sched_sleeping,
				1299	ret);
				1300
				1301	/*
				1302	* Signals would prevent us from sleeping, and we cannot
				1303	* do much with them in any case. So flush them.
				1304	*/
				1305	if (ret)
				1306	flush_signals(current);
				1307	couldsleepnext = 0;
				1308
				1309	} while (!kthread_should_stop());
				1310
				1311	return (0);
				1312	}
				1313
				1314	/*
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	1315	* Check to see if any future RCU-related work will need to be done
				1316	* by the current CPU, even if none need be done immediately, returning
				1317	* 1 if so. Assumes that notifiers would take care of handling any
				1318	* outstanding requests from the RCU core.
				1319	*
				1320	* This function is part of the RCU implementation; it is -not-
				1321	* an exported member of the RCU API.
				1322	*/
				1323	int rcu_needs_cpu(int cpu)
				1324	{
				1325	struct rcu_data *rdp = RCU_DATA_CPU(cpu);
				1326
				1327	return (rdp->donelist != NULL \|\|
				1328	!!rdp->waitlistcount \|\|
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	1329	rdp->nextlist != NULL \|\|
				1330	rdp->nextschedlist != NULL \|\|
				1331	rdp->waitschedlist != NULL);
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	1332	}
				1333
				1334	int rcu_pending(int cpu)
				1335	{
				1336	struct rcu_data *rdp = RCU_DATA_CPU(cpu);
				1337
				1338	/* The CPU has at least one callback queued somewhere. */
				1339
				1340	if (rdp->donelist != NULL \|\|
				1341	!!rdp->waitlistcount \|\|
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	1342	rdp->nextlist != NULL \|\|
				1343	rdp->nextschedlist != NULL \|\|
				1344	rdp->waitschedlist != NULL)
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	1345	return 1;
				1346
				1347	/* The RCU core needs an acknowledgement from this CPU. */
				1348
				1349	if ((per_cpu(rcu_flip_flag, cpu) == rcu_flipped) \|\|
				1350	(per_cpu(rcu_mb_flag, cpu) == rcu_mb_needed))
				1351	return 1;
				1352
				1353	/* This CPU has fallen behind the global grace-period number. */
				1354
				1355	if (rdp->completed != rcu_ctrlblk.completed)
				1356	return 1;
				1357
				1358	/* Nothing needed from this CPU. */
				1359
				1360	return 0;
				1361	}
				1362
Paul E. McKenney	eaf649e	2008-01-25 21:08:25 +0100	[diff] [blame]	1363	static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
				1364	unsigned long action, void *hcpu)
				1365	{
				1366	long cpu = (long)hcpu;
				1367
				1368	switch (action) {
				1369	case CPU_UP_PREPARE:
				1370	case CPU_UP_PREPARE_FROZEN:
				1371	rcu_online_cpu(cpu);
				1372	break;
				1373	case CPU_UP_CANCELED:
				1374	case CPU_UP_CANCELED_FROZEN:
				1375	case CPU_DEAD:
				1376	case CPU_DEAD_FROZEN:
				1377	rcu_offline_cpu(cpu);
				1378	break;
				1379	default:
				1380	break;
				1381	}
				1382	return NOTIFY_OK;
				1383	}
				1384
				1385	static struct notifier_block __cpuinitdata rcu_nb = {
				1386	.notifier_call = rcu_cpu_notify,
				1387	};
				1388
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	1389	void __init __rcu_init(void)
				1390	{
				1391	int cpu;
				1392	int i;
				1393	struct rcu_data *rdp;
				1394
				1395	printk(KERN_NOTICE "Preemptible RCU implementation.\n");
				1396	for_each_possible_cpu(cpu) {
				1397	rdp = RCU_DATA_CPU(cpu);
				1398	spin_lock_init(&rdp->lock);
				1399	rdp->completed = 0;
				1400	rdp->waitlistcount = 0;
				1401	rdp->nextlist = NULL;
				1402	rdp->nexttail = &rdp->nextlist;
				1403	for (i = 0; i < GP_STAGES; i++) {
				1404	rdp->waitlist[i] = NULL;
				1405	rdp->waittail[i] = &rdp->waitlist[i];
				1406	}
				1407	rdp->donelist = NULL;
				1408	rdp->donetail = &rdp->donelist;
				1409	rdp->rcu_flipctr[0] = 0;
				1410	rdp->rcu_flipctr[1] = 0;
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	1411	rdp->nextschedlist = NULL;
				1412	rdp->nextschedtail = &rdp->nextschedlist;
				1413	rdp->waitschedlist = NULL;
				1414	rdp->waitschedtail = &rdp->waitschedlist;
				1415	rdp->rcu_sched_sleeping = 0;
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	1416	}
Paul E. McKenney	eaf649e	2008-01-25 21:08:25 +0100	[diff] [blame]	1417	register_cpu_notifier(&rcu_nb);
				1418
				1419	/*
				1420	* We don't need protection against CPU-Hotplug here
				1421	* since
				1422	* a) If a CPU comes online while we are iterating over the
				1423	* cpu_online_map below, we would only end up making a
				1424	* duplicate call to rcu_online_cpu() which sets the corresponding
				1425	* CPU's mask in the rcu_cpu_online_map.
				1426	*
				1427	* b) A CPU cannot go offline at this point in time since the user
				1428	* does not have access to the sysfs interface, nor do we
				1429	* suspend the system.
				1430	*/
				1431	for_each_online_cpu(cpu)
				1432	rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE, (void *)(long) cpu);
				1433
Carlos R. Mafra	962cf36	2008-05-15 11:15:37 -0300	[diff] [blame]	1434	open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	1435	}
				1436
				1437	/*
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	1438	* Late-boot-time RCU initialization that must wait until after scheduler
				1439	* has been initialized.
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	1440	*/
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	1441	void __init rcu_init_sched(void)
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	1442	{
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	1443	rcu_sched_grace_period_task = kthread_run(rcu_sched_grace_period,
				1444	NULL,
				1445	"rcu_sched_grace_period");
				1446	WARN_ON(IS_ERR(rcu_sched_grace_period_task));
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	1447	}
				1448
				1449	#ifdef CONFIG_RCU_TRACE
				1450	long *rcupreempt_flipctr(int cpu)
				1451	{
				1452	return &RCU_DATA_CPU(cpu)->rcu_flipctr[0];
				1453	}
				1454	EXPORT_SYMBOL_GPL(rcupreempt_flipctr);
				1455
				1456	int rcupreempt_flip_flag(int cpu)
				1457	{
				1458	return per_cpu(rcu_flip_flag, cpu);
				1459	}
				1460	EXPORT_SYMBOL_GPL(rcupreempt_flip_flag);
				1461
				1462	int rcupreempt_mb_flag(int cpu)
				1463	{
				1464	return per_cpu(rcu_mb_flag, cpu);
				1465	}
				1466	EXPORT_SYMBOL_GPL(rcupreempt_mb_flag);
				1467
				1468	char *rcupreempt_try_flip_state_name(void)
				1469	{
				1470	return rcu_try_flip_state_names[rcu_ctrlblk.rcu_try_flip_state];
				1471	}
				1472	EXPORT_SYMBOL_GPL(rcupreempt_try_flip_state_name);
				1473
				1474	struct rcupreempt_trace *rcupreempt_trace_cpu(int cpu)
				1475	{
				1476	struct rcu_data *rdp = RCU_DATA_CPU(cpu);
				1477
				1478	return &rdp->trace;
				1479	}
				1480	EXPORT_SYMBOL_GPL(rcupreempt_trace_cpu);
				1481
				1482	#endif /* #ifdef RCU_TRACE */