10 лет назад · 8ff4fbfd69
--- a/Documentation/RCU/stallwarn.txt
+++ b/Documentation/RCU/stallwarn.txt
@@ -26,12 +26,6 @@ CONFIG_RCU_CPU_STALL_TIMEOUT
 
				 	Stall-warning messages may be enabled and disabled completely via
			
 
				 	/sys/module/rcupdate/parameters/rcu_cpu_stall_suppress.
			
 
				 
			
 
				-CONFIG_RCU_CPU_STALL_INFO
			
 
				-
			
 
				-	This kernel configuration parameter causes the stall warning to
			
 
				-	print out additional per-CPU diagnostic information, including
			
 
				-	information on scheduling-clock ticks and RCU's idle-CPU tracking.
			
 
				-
			
 
				 RCU_STALL_DELAY_DELTA
			
 
				 
			
 
				 	Although the lockdep facility is extremely useful, it does add
			
@@ -101,15 +95,13 @@ interact.  Please note that it is not possible to entirely eliminate this
 
				 sort of false positive without resorting to things like stop_machine(),
			
 
				 which is overkill for this sort of problem.
			
 
				 
			
 
				-If the CONFIG_RCU_CPU_STALL_INFO kernel configuration parameter is set,
			
 
				-more information is printed with the stall-warning message, for example:
			
 
				+Recent kernels will print a long form of the stall-warning message:
			
 
				 
			
 
				 	INFO: rcu_preempt detected stall on CPU
			
 
				 	0: (63959 ticks this GP) idle=241/3fffffffffffffff/0 softirq=82/543
			
 
				 	   (t=65000 jiffies)
			
 
				 
			
 
				-In kernels with CONFIG_RCU_FAST_NO_HZ, even more information is
			
 
				-printed:
			
 
				+In kernels with CONFIG_RCU_FAST_NO_HZ, more information is printed:
			
 
				 
			
 
				 	INFO: rcu_preempt detected stall on CPU
			
 
				 	0: (64628 ticks this GP) idle=dd5/3fffffffffffffff/0 softirq=82/543 last_accelerate: a345/d342 nonlazy_posted: 25 .D
			
@@ -171,6 +163,23 @@ message will be about three times the interval between the beginning
 
				 of the stall and the first message.
			
 
				 
			
 
				 
			
 
				+Stall Warnings for Expedited Grace Periods
			
 
				+
			
 
				+If an expedited grace period detects a stall, it will place a message
			
 
				+like the following in dmesg:
			
 
				+
			
 
				+	INFO: rcu_sched detected expedited stalls on CPUs: { 1 2 6 } 26009 jiffies s: 1043
			
 
				+
			
 
				+This indicates that CPUs 1, 2, and 6 have failed to respond to a
			
 
				+reschedule IPI, that the expedited grace period has been going on for
			
 
				+26,009 jiffies, and that the expedited grace-period sequence counter is
			
 
				+1043.  The fact that this last value is odd indicates that an expedited
			
 
				+grace period is in flight.
			
 
				+
			
 
				+It is entirely possible to see stall warnings from normal and from
			
 
				+expedited grace periods at about the same time from the same run.
			
 
				+
			
 
				+
			
 
				 What Causes RCU CPU Stall Warnings?
			
 
				 
			
 
				 So your kernel printed an RCU CPU stall warning.  The next question is
			
--- a/Documentation/RCU/trace.txt
+++ b/Documentation/RCU/trace.txt
@@ -237,42 +237,26 @@ o	"ktl" is the low-order 16 bits (in hexadecimal) of the count of
 
				 
			
 
				 The output of "cat rcu/rcu_preempt/rcuexp" looks as follows:
			
 
				 
			
 
				-s=21872 d=21872 w=0 tf=0 wd1=0 wd2=0 n=0 sc=21872 dt=21872 dl=0 dx=21872
			
 
				+s=21872 wd0=0 wd1=0 wd2=0 wd3=5 n=0 enq=0 sc=21872
			
 
				 
			
 
				 These fields are as follows:
			
 
				 
			
 
				-o	"s" is the starting sequence number.
			
 
				+o	"s" is the sequence number, with an odd number indicating that
			
 
				+	an expedited grace period is in progress.
			
 
				 
			
 
				-o	"d" is the ending sequence number.  When the starting and ending
			
 
				-	numbers differ, there is an expedited grace period in progress.
			
 
				-
			
 
				-o	"w" is the number of times that the sequence numbers have been
			
 
				-	in danger of wrapping.
			
 
				-
			
 
				-o	"tf" is the number of times that contention has resulted in a
			
 
				-	failure to begin an expedited grace period.
			
 
				-
			
 
				-o	"wd1" and "wd2" are the number of times that an attempt to
			
 
				-	start an expedited grace period found that someone else had
			
 
				-	completed an expedited grace period that satisfies the
			
 
				+o	"wd0", "wd1", "wd2", and "wd3" are the number of times that an
			
 
				+	attempt to start an expedited grace period found that someone
			
 
				+	else had completed an expedited grace period that satisfies the
			
 
				 	attempted request.  "Our work is done."
			
 
				 
			
 
				-o	"n" is number of times that contention was so great that
			
 
				-	the request was demoted from an expedited grace period to
			
 
				-	a normal grace period.
			
 
				+o	"n" is number of times that a concurrent CPU-hotplug operation
			
 
				+	forced a fallback to a normal grace period.
			
 
				+
			
 
				+o	"enq" is the number of quiescent states still outstanding.
			
 
				 
			
 
				 o	"sc" is the number of times that the attempt to start a
			
 
				 	new expedited grace period succeeded.
			
 
				 
			
 
				-o	"dt" is the number of times that we attempted to update
			
 
				-	the "d" counter.
			
 
				-
			
 
				-o	"dl" is the number of times that we failed to update the "d"
			
 
				-	counter.
			
 
				-
			
 
				-o	"dx" is the number of times that we succeeded in updating
			
 
				-	the "d" counter.
			
 
				-
			
 
				 
			
 
				 The output of "cat rcu/rcu_preempt/rcugp" looks as follows:
			
 
				 
			
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -661,7 +661,6 @@ TRACE_EVENT(rcu_torture_read,
 
				  * Tracepoint for _rcu_barrier() execution.  The string "s" describes
			
 
				  * the _rcu_barrier phase:
			
 
				  *	"Begin": _rcu_barrier() started.
			
 
				- *	"Check": _rcu_barrier() checking for piggybacking.
			
 
				  *	"EarlyExit": _rcu_barrier() piggybacked, thus early exit.
			
 
				  *	"Inc1": _rcu_barrier() piggyback check counter incremented.
			
 
				  *	"OfflineNoCB": _rcu_barrier() found callback on never-online CPU
			
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -70,6 +70,8 @@ MODULE_ALIAS("rcutree");
 
				 
			
 
				 static struct lock_class_key rcu_node_class[RCU_NUM_LVLS];
			
 
				 static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS];
			
 
				+static struct lock_class_key rcu_exp_class[RCU_NUM_LVLS];
			
 
				+static struct lock_class_key rcu_exp_sched_class[RCU_NUM_LVLS];
			
 
				 
			
 
				 /*
			
 
				  * In order to export the rcu_state name to the tracing tools, it
			
@@ -124,13 +126,8 @@ module_param(rcu_fanout_exact, bool, 0444);
 
				 static int rcu_fanout_leaf = RCU_FANOUT_LEAF;
			
 
				 module_param(rcu_fanout_leaf, int, 0444);
			
 
				 int rcu_num_lvls __read_mostly = RCU_NUM_LVLS;
			
 
				-static int num_rcu_lvl[] = {  /* Number of rcu_nodes at specified level. */
			
 
				-	NUM_RCU_LVL_0,
			
 
				-	NUM_RCU_LVL_1,
			
 
				-	NUM_RCU_LVL_2,
			
 
				-	NUM_RCU_LVL_3,
			
 
				-	NUM_RCU_LVL_4,
			
 
				-};
			
 
				+/* Number of rcu_nodes at specified level. */
			
 
				+static int num_rcu_lvl[] = NUM_RCU_LVL_INIT;
			
 
				 int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */
			
 
				 
			
 
				 /*
			
@@ -1178,9 +1175,11 @@ static void rcu_check_gp_kthread_starvation(struct rcu_state *rsp)
 
				 	j = jiffies;
			
 
				 	gpa = READ_ONCE(rsp->gp_activity);
			
 
				 	if (j - gpa > 2 * HZ)
			
 
				-		pr_err("%s kthread starved for %ld jiffies! g%lu c%lu f%#x\n",
			
 
				+		pr_err("%s kthread starved for %ld jiffies! g%lu c%lu f%#x s%d ->state=%#lx\n",
			
 
				 		       rsp->name, j - gpa,
			
 
				-		       rsp->gpnum, rsp->completed, rsp->gp_flags);
			
 
				+		       rsp->gpnum, rsp->completed,
			
 
				+		       rsp->gp_flags, rsp->gp_state,
			
 
				+		       rsp->gp_kthread ? rsp->gp_kthread->state : 0);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -1905,6 +1904,26 @@ static int rcu_gp_init(struct rcu_state *rsp)
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Helper function for wait_event_interruptible_timeout() wakeup
			
 
				+ * at force-quiescent-state time.
			
 
				+ */
			
 
				+static bool rcu_gp_fqs_check_wake(struct rcu_state *rsp, int *gfp)
			
 
				+{
			
 
				+	struct rcu_node *rnp = rcu_get_root(rsp);
			
 
				+
			
 
				+	/* Someone like call_rcu() requested a force-quiescent-state scan. */
			
 
				+	*gfp = READ_ONCE(rsp->gp_flags);
			
 
				+	if (*gfp & RCU_GP_FLAG_FQS)
			
 
				+		return true;
			
 
				+
			
 
				+	/* The current grace period has completed. */
			
 
				+	if (!READ_ONCE(rnp->qsmask) && !rcu_preempt_blocked_readers_cgp(rnp))
			
 
				+		return true;
			
 
				+
			
 
				+	return false;
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * Do one round of quiescent-state forcing.
			
 
				  */
			
@@ -2041,6 +2060,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
 
				 			wait_event_interruptible(rsp->gp_wq,
			
 
				 						 READ_ONCE(rsp->gp_flags) &
			
 
				 						 RCU_GP_FLAG_INIT);
			
 
				+			rsp->gp_state = RCU_GP_DONE_GPS;
			
 
				 			/* Locking provides needed memory barrier. */
			
 
				 			if (rcu_gp_init(rsp))
			
 
				 				break;
			
@@ -2068,11 +2088,8 @@ static int __noreturn rcu_gp_kthread(void *arg)
 
				 					       TPS("fqswait"));
			
 
				 			rsp->gp_state = RCU_GP_WAIT_FQS;
			
 
				 			ret = wait_event_interruptible_timeout(rsp->gp_wq,
			
 
				-					((gf = READ_ONCE(rsp->gp_flags)) &
			
 
				-					 RCU_GP_FLAG_FQS) ||
			
 
				-					(!READ_ONCE(rnp->qsmask) &&
			
 
				-					 !rcu_preempt_blocked_readers_cgp(rnp)),
			
 
				-					j);
			
 
				+					rcu_gp_fqs_check_wake(rsp, &gf), j);
			
 
				+			rsp->gp_state = RCU_GP_DOING_FQS;
			
 
				 			/* Locking provides needed memory barriers. */
			
 
				 			/* If grace period done, leave loop. */
			
 
				 			if (!READ_ONCE(rnp->qsmask) &&
			
@@ -2110,7 +2127,9 @@ static int __noreturn rcu_gp_kthread(void *arg)
 
				 		}
			
 
				 
			
 
				 		/* Handle grace-period end. */
			
 
				+		rsp->gp_state = RCU_GP_CLEANUP;
			
 
				 		rcu_gp_cleanup(rsp);
			
 
				+		rsp->gp_state = RCU_GP_CLEANED;
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -3305,23 +3324,195 @@ void cond_synchronize_sched(unsigned long oldstate)
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(cond_synchronize_sched);
			
 
				 
			
 
				-static int synchronize_sched_expedited_cpu_stop(void *data)
			
 
				+/* Adjust sequence number for start of update-side operation. */
			
 
				+static void rcu_seq_start(unsigned long *sp)
			
 
				+{
			
 
				+	WRITE_ONCE(*sp, *sp + 1);
			
 
				+	smp_mb(); /* Ensure update-side operation after counter increment. */
			
 
				+	WARN_ON_ONCE(!(*sp & 0x1));
			
 
				+}
			
 
				+
			
 
				+/* Adjust sequence number for end of update-side operation. */
			
 
				+static void rcu_seq_end(unsigned long *sp)
			
 
				+{
			
 
				+	smp_mb(); /* Ensure update-side operation before counter increment. */
			
 
				+	WRITE_ONCE(*sp, *sp + 1);
			
 
				+	WARN_ON_ONCE(*sp & 0x1);
			
 
				+}
			
 
				+
			
 
				+/* Take a snapshot of the update side's sequence number. */
			
 
				+static unsigned long rcu_seq_snap(unsigned long *sp)
			
 
				+{
			
 
				+	unsigned long s;
			
 
				+
			
 
				+	smp_mb(); /* Caller's modifications seen first by other CPUs. */
			
 
				+	s = (READ_ONCE(*sp) + 3) & ~0x1;
			
 
				+	smp_mb(); /* Above access must not bleed into critical section. */
			
 
				+	return s;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Given a snapshot from rcu_seq_snap(), determine whether or not a
			
 
				+ * full update-side operation has occurred.
			
 
				+ */
			
 
				+static bool rcu_seq_done(unsigned long *sp, unsigned long s)
			
 
				+{
			
 
				+	return ULONG_CMP_GE(READ_ONCE(*sp), s);
			
 
				+}
			
 
				+
			
 
				+/* Wrapper functions for expedited grace periods.  */
			
 
				+static void rcu_exp_gp_seq_start(struct rcu_state *rsp)
			
 
				+{
			
 
				+	rcu_seq_start(&rsp->expedited_sequence);
			
 
				+}
			
 
				+static void rcu_exp_gp_seq_end(struct rcu_state *rsp)
			
 
				+{
			
 
				+	rcu_seq_end(&rsp->expedited_sequence);
			
 
				+	smp_mb(); /* Ensure that consecutive grace periods serialize. */
			
 
				+}
			
 
				+static unsigned long rcu_exp_gp_seq_snap(struct rcu_state *rsp)
			
 
				+{
			
 
				+	return rcu_seq_snap(&rsp->expedited_sequence);
			
 
				+}
			
 
				+static bool rcu_exp_gp_seq_done(struct rcu_state *rsp, unsigned long s)
			
 
				+{
			
 
				+	return rcu_seq_done(&rsp->expedited_sequence, s);
			
 
				+}
			
 
				+
			
 
				+/* Common code for synchronize_{rcu,sched}_expedited() work-done checking. */
			
 
				+static bool sync_exp_work_done(struct rcu_state *rsp, struct rcu_node *rnp,
			
 
				+			       struct rcu_data *rdp,
			
 
				+			       atomic_long_t *stat, unsigned long s)
			
 
				 {
			
 
				+	if (rcu_exp_gp_seq_done(rsp, s)) {
			
 
				+		if (rnp)
			
 
				+			mutex_unlock(&rnp->exp_funnel_mutex);
			
 
				+		else if (rdp)
			
 
				+			mutex_unlock(&rdp->exp_funnel_mutex);
			
 
				+		/* Ensure test happens before caller kfree(). */
			
 
				+		smp_mb__before_atomic(); /* ^^^ */
			
 
				+		atomic_long_inc(stat);
			
 
				+		return true;
			
 
				+	}
			
 
				+	return false;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Funnel-lock acquisition for expedited grace periods.  Returns a
			
 
				+ * pointer to the root rcu_node structure, or NULL if some other
			
 
				+ * task did the expedited grace period for us.
			
 
				+ */
			
 
				+static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
			
 
				+{
			
 
				+	struct rcu_data *rdp;
			
 
				+	struct rcu_node *rnp0;
			
 
				+	struct rcu_node *rnp1 = NULL;
			
 
				+
			
 
				 	/*
			
 
				-	 * There must be a full memory barrier on each affected CPU
			
 
				-	 * between the time that try_stop_cpus() is called and the
			
 
				-	 * time that it returns.
			
 
				-	 *
			
 
				-	 * In the current initial implementation of cpu_stop, the
			
 
				-	 * above condition is already met when the control reaches
			
 
				-	 * this point and the following smp_mb() is not strictly
			
 
				-	 * necessary.  Do smp_mb() anyway for documentation and
			
 
				-	 * robustness against future implementation changes.
			
 
				+	 * First try directly acquiring the root lock in order to reduce
			
 
				+	 * latency in the common case where expedited grace periods are
			
 
				+	 * rare.  We check mutex_is_locked() to avoid pathological levels of
			
 
				+	 * memory contention on ->exp_funnel_mutex in the heavy-load case.
			
 
				 	 */
			
 
				-	smp_mb(); /* See above comment block. */
			
 
				+	rnp0 = rcu_get_root(rsp);
			
 
				+	if (!mutex_is_locked(&rnp0->exp_funnel_mutex)) {
			
 
				+		if (mutex_trylock(&rnp0->exp_funnel_mutex)) {
			
 
				+			if (sync_exp_work_done(rsp, rnp0, NULL,
			
 
				+					       &rsp->expedited_workdone0, s))
			
 
				+				return NULL;
			
 
				+			return rnp0;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Each pass through the following loop works its way
			
 
				+	 * up the rcu_node tree, returning if others have done the
			
 
				+	 * work or otherwise falls through holding the root rnp's
			
 
				+	 * ->exp_funnel_mutex.  The mapping from CPU to rcu_node structure
			
 
				+	 * can be inexact, as it is just promoting locality and is not
			
 
				+	 * strictly needed for correctness.
			
 
				+	 */
			
 
				+	rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id());
			
 
				+	if (sync_exp_work_done(rsp, NULL, NULL, &rsp->expedited_workdone1, s))
			
 
				+		return NULL;
			
 
				+	mutex_lock(&rdp->exp_funnel_mutex);
			
 
				+	rnp0 = rdp->mynode;
			
 
				+	for (; rnp0 != NULL; rnp0 = rnp0->parent) {
			
 
				+		if (sync_exp_work_done(rsp, rnp1, rdp,
			
 
				+				       &rsp->expedited_workdone2, s))
			
 
				+			return NULL;
			
 
				+		mutex_lock(&rnp0->exp_funnel_mutex);
			
 
				+		if (rnp1)
			
 
				+			mutex_unlock(&rnp1->exp_funnel_mutex);
			
 
				+		else
			
 
				+			mutex_unlock(&rdp->exp_funnel_mutex);
			
 
				+		rnp1 = rnp0;
			
 
				+	}
			
 
				+	if (sync_exp_work_done(rsp, rnp1, rdp,
			
 
				+			       &rsp->expedited_workdone3, s))
			
 
				+		return NULL;
			
 
				+	return rnp1;
			
 
				+}
			
 
				+
			
 
				+/* Invoked on each online non-idle CPU for expedited quiescent state. */
			
 
				+static int synchronize_sched_expedited_cpu_stop(void *data)
			
 
				+{
			
 
				+	struct rcu_data *rdp = data;
			
 
				+	struct rcu_state *rsp = rdp->rsp;
			
 
				+
			
 
				+	/* We are here: If we are last, do the wakeup. */
			
 
				+	rdp->exp_done = true;
			
 
				+	if (atomic_dec_and_test(&rsp->expedited_need_qs))
			
 
				+		wake_up(&rsp->expedited_wq);
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
			
 
				+{
			
 
				+	int cpu;
			
 
				+	unsigned long jiffies_stall;
			
 
				+	unsigned long jiffies_start;
			
 
				+	struct rcu_data *rdp;
			
 
				+	int ret;
			
 
				+
			
 
				+	jiffies_stall = rcu_jiffies_till_stall_check();
			
 
				+	jiffies_start = jiffies;
			
 
				+
			
 
				+	for (;;) {
			
 
				+		ret = wait_event_interruptible_timeout(
			
 
				+				rsp->expedited_wq,
			
 
				+				!atomic_read(&rsp->expedited_need_qs),
			
 
				+				jiffies_stall);
			
 
				+		if (ret > 0)
			
 
				+			return;
			
 
				+		if (ret < 0) {
			
 
				+			/* Hit a signal, disable CPU stall warnings. */
			
 
				+			wait_event(rsp->expedited_wq,
			
 
				+				   !atomic_read(&rsp->expedited_need_qs));
			
 
				+			return;
			
 
				+		}
			
 
				+		pr_err("INFO: %s detected expedited stalls on CPUs: {",
			
 
				+		       rsp->name);
			
 
				+		for_each_online_cpu(cpu) {
			
 
				+			rdp = per_cpu_ptr(rsp->rda, cpu);
			
 
				+
			
 
				+			if (rdp->exp_done)
			
 
				+				continue;
			
 
				+			pr_cont(" %d", cpu);
			
 
				+		}
			
 
				+		pr_cont(" } %lu jiffies s: %lu\n",
			
 
				+			jiffies - jiffies_start, rsp->expedited_sequence);
			
 
				+		for_each_online_cpu(cpu) {
			
 
				+			rdp = per_cpu_ptr(rsp->rda, cpu);
			
 
				+
			
 
				+			if (rdp->exp_done)
			
 
				+				continue;
			
 
				+			dump_cpu_task(cpu);
			
 
				+		}
			
 
				+		jiffies_stall = 3 * rcu_jiffies_till_stall_check() + 3;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 /**
			
 
				  * synchronize_sched_expedited - Brute-force RCU-sched grace period
			
 
				  *
			
@@ -3333,58 +3524,21 @@ static int synchronize_sched_expedited_cpu_stop(void *data)
 
				  * restructure your code to batch your updates, and then use a single
			
 
				  * synchronize_sched() instead.
			
 
				  *
			
 
				- * This implementation can be thought of as an application of ticket
			
 
				- * locking to RCU, with sync_sched_expedited_started and
			
 
				- * sync_sched_expedited_done taking on the roles of the halves
			
 
				- * of the ticket-lock word.  Each task atomically increments
			
 
				- * sync_sched_expedited_started upon entry, snapshotting the old value,
			
 
				- * then attempts to stop all the CPUs.  If this succeeds, then each
			
 
				- * CPU will have executed a context switch, resulting in an RCU-sched
			
 
				- * grace period.  We are then done, so we use atomic_cmpxchg() to
			
 
				- * update sync_sched_expedited_done to match our snapshot -- but
			
 
				- * only if someone else has not already advanced past our snapshot.
			
 
				- *
			
 
				- * On the other hand, if try_stop_cpus() fails, we check the value
			
 
				- * of sync_sched_expedited_done.  If it has advanced past our
			
 
				- * initial snapshot, then someone else must have forced a grace period
			
 
				- * some time after we took our snapshot.  In this case, our work is
			
 
				- * done for us, and we can simply return.  Otherwise, we try again,
			
 
				- * but keep our initial snapshot for purposes of checking for someone
			
 
				- * doing our work for us.
			
 
				- *
			
 
				- * If we fail too many times in a row, we fall back to synchronize_sched().
			
 
				+ * This implementation can be thought of as an application of sequence
			
 
				+ * locking to expedited grace periods, but using the sequence counter to
			
 
				+ * determine when someone else has already done the work instead of for
			
 
				+ * retrying readers.
			
 
				  */
			
 
				 void synchronize_sched_expedited(void)
			
 
				 {
			
 
				-	cpumask_var_t cm;
			
 
				-	bool cma = false;
			
 
				 	int cpu;
			
 
				-	long firstsnap, s, snap;
			
 
				-	int trycount = 0;
			
 
				+	unsigned long s;
			
 
				+	struct rcu_node *rnp;
			
 
				 	struct rcu_state *rsp = &rcu_sched_state;
			
 
				 
			
 
				-	/*
			
 
				-	 * If we are in danger of counter wrap, just do synchronize_sched().
			
 
				-	 * By allowing sync_sched_expedited_started to advance no more than
			
 
				-	 * ULONG_MAX/8 ahead of sync_sched_expedited_done, we are ensuring
			
 
				-	 * that more than 3.5 billion CPUs would be required to force a
			
 
				-	 * counter wrap on a 32-bit system.  Quite a few more CPUs would of
			
 
				-	 * course be required on a 64-bit system.
			
 
				-	 */
			
 
				-	if (ULONG_CMP_GE((ulong)atomic_long_read(&rsp->expedited_start),
			
 
				-			 (ulong)atomic_long_read(&rsp->expedited_done) +
			
 
				-			 ULONG_MAX / 8)) {
			
 
				-		wait_rcu_gp(call_rcu_sched);
			
 
				-		atomic_long_inc(&rsp->expedited_wrap);
			
 
				-		return;
			
 
				-	}
			
 
				+	/* Take a snapshot of the sequence number.  */
			
 
				+	s = rcu_exp_gp_seq_snap(rsp);
			
 
				 
			
 
				-	/*
			
 
				-	 * Take a ticket.  Note that atomic_inc_return() implies a
			
 
				-	 * full memory barrier.
			
 
				-	 */
			
 
				-	snap = atomic_long_inc_return(&rsp->expedited_start);
			
 
				-	firstsnap = snap;
			
 
				 	if (!try_get_online_cpus()) {
			
 
				 		/* CPU hotplug operation in flight, fall back to normal GP. */
			
 
				 		wait_rcu_gp(call_rcu_sched);
			
@@ -3393,100 +3547,38 @@ void synchronize_sched_expedited(void)
 
				 	}
			
 
				 	WARN_ON_ONCE(cpu_is_offline(raw_smp_processor_id()));
			
 
				 
			
 
				-	/* Offline CPUs, idle CPUs, and any CPU we run on are quiescent. */
			
 
				-	cma = zalloc_cpumask_var(&cm, GFP_KERNEL);
			
 
				-	if (cma) {
			
 
				-		cpumask_copy(cm, cpu_online_mask);
			
 
				-		cpumask_clear_cpu(raw_smp_processor_id(), cm);
			
 
				-		for_each_cpu(cpu, cm) {
			
 
				-			struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
			
 
				-
			
 
				-			if (!(atomic_add_return(0, &rdtp->dynticks) & 0x1))
			
 
				-				cpumask_clear_cpu(cpu, cm);
			
 
				-		}
			
 
				-		if (cpumask_weight(cm) == 0)
			
 
				-			goto all_cpus_idle;
			
 
				+	rnp = exp_funnel_lock(rsp, s);
			
 
				+	if (rnp == NULL) {
			
 
				+		put_online_cpus();
			
 
				+		return;  /* Someone else did our work for us. */
			
 
				 	}
			
 
				 
			
 
				-	/*
			
 
				-	 * Each pass through the following loop attempts to force a
			
 
				-	 * context switch on each CPU.
			
 
				-	 */
			
 
				-	while (try_stop_cpus(cma ? cm : cpu_online_mask,
			
 
				-			     synchronize_sched_expedited_cpu_stop,
			
 
				-			     NULL) == -EAGAIN) {
			
 
				-		put_online_cpus();
			
 
				-		atomic_long_inc(&rsp->expedited_tryfail);
			
 
				-
			
 
				-		/* Check to see if someone else did our work for us. */
			
 
				-		s = atomic_long_read(&rsp->expedited_done);
			
 
				-		if (ULONG_CMP_GE((ulong)s, (ulong)firstsnap)) {
			
 
				-			/* ensure test happens before caller kfree */
			
 
				-			smp_mb__before_atomic(); /* ^^^ */
			
 
				-			atomic_long_inc(&rsp->expedited_workdone1);
			
 
				-			free_cpumask_var(cm);
			
 
				-			return;
			
 
				-		}
			
 
				+	rcu_exp_gp_seq_start(rsp);
			
 
				 
			
 
				-		/* No joy, try again later.  Or just synchronize_sched(). */
			
 
				-		if (trycount++ < 10) {
			
 
				-			udelay(trycount * num_online_cpus());
			
 
				-		} else {
			
 
				-			wait_rcu_gp(call_rcu_sched);
			
 
				-			atomic_long_inc(&rsp->expedited_normal);
			
 
				-			free_cpumask_var(cm);
			
 
				-			return;
			
 
				-		}
			
 
				+	/* Stop each CPU that is online, non-idle, and not us. */
			
 
				+	init_waitqueue_head(&rsp->expedited_wq);
			
 
				+	atomic_set(&rsp->expedited_need_qs, 1); /* Extra count avoids race. */
			
 
				+	for_each_online_cpu(cpu) {
			
 
				+		struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
			
 
				+		struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
			
 
				 
			
 
				-		/* Recheck to see if someone else did our work for us. */
			
 
				-		s = atomic_long_read(&rsp->expedited_done);
			
 
				-		if (ULONG_CMP_GE((ulong)s, (ulong)firstsnap)) {
			
 
				-			/* ensure test happens before caller kfree */
			
 
				-			smp_mb__before_atomic(); /* ^^^ */
			
 
				-			atomic_long_inc(&rsp->expedited_workdone2);
			
 
				-			free_cpumask_var(cm);
			
 
				-			return;
			
 
				-		}
			
 
				+		rdp->exp_done = false;
			
 
				 
			
 
				-		/*
			
 
				-		 * Refetching sync_sched_expedited_started allows later
			
 
				-		 * callers to piggyback on our grace period.  We retry
			
 
				-		 * after they started, so our grace period works for them,
			
 
				-		 * and they started after our first try, so their grace
			
 
				-		 * period works for us.
			
 
				-		 */
			
 
				-		if (!try_get_online_cpus()) {
			
 
				-			/* CPU hotplug operation in flight, use normal GP. */
			
 
				-			wait_rcu_gp(call_rcu_sched);
			
 
				-			atomic_long_inc(&rsp->expedited_normal);
			
 
				-			free_cpumask_var(cm);
			
 
				-			return;
			
 
				-		}
			
 
				-		snap = atomic_long_read(&rsp->expedited_start);
			
 
				-		smp_mb(); /* ensure read is before try_stop_cpus(). */
			
 
				+		/* Skip our CPU and any idle CPUs. */
			
 
				+		if (raw_smp_processor_id() == cpu ||
			
 
				+		    !(atomic_add_return(0, &rdtp->dynticks) & 0x1))
			
 
				+			continue;
			
 
				+		atomic_inc(&rsp->expedited_need_qs);
			
 
				+		stop_one_cpu_nowait(cpu, synchronize_sched_expedited_cpu_stop,
			
 
				+				    rdp, &rdp->exp_stop_work);
			
 
				 	}
			
 
				-	atomic_long_inc(&rsp->expedited_stoppedcpus);
			
 
				 
			
 
				-all_cpus_idle:
			
 
				-	free_cpumask_var(cm);
			
 
				+	/* Remove extra count and, if necessary, wait for CPUs to stop. */
			
 
				+	if (!atomic_dec_and_test(&rsp->expedited_need_qs))
			
 
				+		synchronize_sched_expedited_wait(rsp);
			
 
				 
			
 
				-	/*
			
 
				-	 * Everyone up to our most recent fetch is covered by our grace
			
 
				-	 * period.  Update the counter, but only if our work is still
			
 
				-	 * relevant -- which it won't be if someone who started later
			
 
				-	 * than we did already did their update.
			
 
				-	 */
			
 
				-	do {
			
 
				-		atomic_long_inc(&rsp->expedited_done_tries);
			
 
				-		s = atomic_long_read(&rsp->expedited_done);
			
 
				-		if (ULONG_CMP_GE((ulong)s, (ulong)snap)) {
			
 
				-			/* ensure test happens before caller kfree */
			
 
				-			smp_mb__before_atomic(); /* ^^^ */
			
 
				-			atomic_long_inc(&rsp->expedited_done_lost);
			
 
				-			break;
			
 
				-		}
			
 
				-	} while (atomic_long_cmpxchg(&rsp->expedited_done, s, snap) != s);
			
 
				-	atomic_long_inc(&rsp->expedited_done_exit);
			
 
				+	rcu_exp_gp_seq_end(rsp);
			
 
				+	mutex_unlock(&rnp->exp_funnel_mutex);
			
 
				 
			
 
				 	put_online_cpus();
			
 
				 }
			
@@ -3623,10 +3715,10 @@ static void rcu_barrier_callback(struct rcu_head *rhp)
 
				 	struct rcu_state *rsp = rdp->rsp;
			
 
				 
			
 
				 	if (atomic_dec_and_test(&rsp->barrier_cpu_count)) {
			
 
				-		_rcu_barrier_trace(rsp, "LastCB", -1, rsp->n_barrier_done);
			
 
				+		_rcu_barrier_trace(rsp, "LastCB", -1, rsp->barrier_sequence);
			
 
				 		complete(&rsp->barrier_completion);
			
 
				 	} else {
			
 
				-		_rcu_barrier_trace(rsp, "CB", -1, rsp->n_barrier_done);
			
 
				+		_rcu_barrier_trace(rsp, "CB", -1, rsp->barrier_sequence);
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -3638,7 +3730,7 @@ static void rcu_barrier_func(void *type)
 
				 	struct rcu_state *rsp = type;
			
 
				 	struct rcu_data *rdp = raw_cpu_ptr(rsp->rda);
			
 
				 
			
 
				-	_rcu_barrier_trace(rsp, "IRQ", -1, rsp->n_barrier_done);
			
 
				+	_rcu_barrier_trace(rsp, "IRQ", -1, rsp->barrier_sequence);
			
 
				 	atomic_inc(&rsp->barrier_cpu_count);
			
 
				 	rsp->call(&rdp->barrier_head, rcu_barrier_callback);
			
 
				 }
			
@@ -3651,55 +3743,24 @@ static void _rcu_barrier(struct rcu_state *rsp)
 
				 {
			
 
				 	int cpu;
			
 
				 	struct rcu_data *rdp;
			
 
				-	unsigned long snap = READ_ONCE(rsp->n_barrier_done);
			
 
				-	unsigned long snap_done;
			
 
				+	unsigned long s = rcu_seq_snap(&rsp->barrier_sequence);
			
 
				 
			
 
				-	_rcu_barrier_trace(rsp, "Begin", -1, snap);
			
 
				+	_rcu_barrier_trace(rsp, "Begin", -1, s);
			
 
				 
			
 
				 	/* Take mutex to serialize concurrent rcu_barrier() requests. */
			
 
				 	mutex_lock(&rsp->barrier_mutex);
			
 
				 
			
 
				-	/*
			
 
				-	 * Ensure that all prior references, including to ->n_barrier_done,
			
 
				-	 * are ordered before the _rcu_barrier() machinery.
			
 
				-	 */
			
 
				-	smp_mb();  /* See above block comment. */
			
 
				-
			
 
				-	/*
			
 
				-	 * Recheck ->n_barrier_done to see if others did our work for us.
			
 
				-	 * This means checking ->n_barrier_done for an even-to-odd-to-even
			
 
				-	 * transition.  The "if" expression below therefore rounds the old
			
 
				-	 * value up to the next even number and adds two before comparing.
			
 
				-	 */
			
 
				-	snap_done = rsp->n_barrier_done;
			
 
				-	_rcu_barrier_trace(rsp, "Check", -1, snap_done);
			
 
				-
			
 
				-	/*
			
 
				-	 * If the value in snap is odd, we needed to wait for the current
			
 
				-	 * rcu_barrier() to complete, then wait for the next one, in other
			
 
				-	 * words, we need the value of snap_done to be three larger than
			
 
				-	 * the value of snap.  On the other hand, if the value in snap is
			
 
				-	 * even, we only had to wait for the next rcu_barrier() to complete,
			
 
				-	 * in other words, we need the value of snap_done to be only two
			
 
				-	 * greater than the value of snap.  The "(snap + 3) & ~0x1" computes
			
 
				-	 * this for us (thank you, Linus!).
			
 
				-	 */
			
 
				-	if (ULONG_CMP_GE(snap_done, (snap + 3) & ~0x1)) {
			
 
				-		_rcu_barrier_trace(rsp, "EarlyExit", -1, snap_done);
			
 
				+	/* Did someone else do our work for us? */
			
 
				+	if (rcu_seq_done(&rsp->barrier_sequence, s)) {
			
 
				+		_rcu_barrier_trace(rsp, "EarlyExit", -1, rsp->barrier_sequence);
			
 
				 		smp_mb(); /* caller's subsequent code after above check. */
			
 
				 		mutex_unlock(&rsp->barrier_mutex);
			
 
				 		return;
			
 
				 	}
			
 
				 
			
 
				-	/*
			
 
				-	 * Increment ->n_barrier_done to avoid duplicate work.  Use
			
 
				-	 * WRITE_ONCE() to prevent the compiler from speculating
			
 
				-	 * the increment to precede the early-exit check.
			
 
				-	 */
			
 
				-	WRITE_ONCE(rsp->n_barrier_done, rsp->n_barrier_done + 1);
			
 
				-	WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 1);
			
 
				-	_rcu_barrier_trace(rsp, "Inc1", -1, rsp->n_barrier_done);
			
 
				-	smp_mb(); /* Order ->n_barrier_done increment with below mechanism. */
			
 
				+	/* Mark the start of the barrier operation. */
			
 
				+	rcu_seq_start(&rsp->barrier_sequence);
			
 
				+	_rcu_barrier_trace(rsp, "Inc1", -1, rsp->barrier_sequence);
			
 
				 
			
 
				 	/*
			
 
				 	 * Initialize the count to one rather than to zero in order to
			
@@ -3723,10 +3784,10 @@ static void _rcu_barrier(struct rcu_state *rsp)
 
				 		if (rcu_is_nocb_cpu(cpu)) {
			
 
				 			if (!rcu_nocb_cpu_needs_barrier(rsp, cpu)) {
			
 
				 				_rcu_barrier_trace(rsp, "OfflineNoCB", cpu,
			
 
				-						   rsp->n_barrier_done);
			
 
				+						   rsp->barrier_sequence);
			
 
				 			} else {
			
 
				 				_rcu_barrier_trace(rsp, "OnlineNoCB", cpu,
			
 
				-						   rsp->n_barrier_done);
			
 
				+						   rsp->barrier_sequence);
			
 
				 				smp_mb__before_atomic();
			
 
				 				atomic_inc(&rsp->barrier_cpu_count);
			
 
				 				__call_rcu(&rdp->barrier_head,
			
@@ -3734,11 +3795,11 @@ static void _rcu_barrier(struct rcu_state *rsp)
 
				 			}
			
 
				 		} else if (READ_ONCE(rdp->qlen)) {
			
 
				 			_rcu_barrier_trace(rsp, "OnlineQ", cpu,
			
 
				-					   rsp->n_barrier_done);
			
 
				+					   rsp->barrier_sequence);
			
 
				 			smp_call_function_single(cpu, rcu_barrier_func, rsp, 1);
			
 
				 		} else {
			
 
				 			_rcu_barrier_trace(rsp, "OnlineNQ", cpu,
			
 
				-					   rsp->n_barrier_done);
			
 
				+					   rsp->barrier_sequence);
			
 
				 		}
			
 
				 	}
			
 
				 	put_online_cpus();
			
@@ -3750,16 +3811,13 @@ static void _rcu_barrier(struct rcu_state *rsp)
 
				 	if (atomic_dec_and_test(&rsp->barrier_cpu_count))
			
 
				 		complete(&rsp->barrier_completion);
			
 
				 
			
 
				-	/* Increment ->n_barrier_done to prevent duplicate work. */
			
 
				-	smp_mb(); /* Keep increment after above mechanism. */
			
 
				-	WRITE_ONCE(rsp->n_barrier_done, rsp->n_barrier_done + 1);
			
 
				-	WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 0);
			
 
				-	_rcu_barrier_trace(rsp, "Inc2", -1, rsp->n_barrier_done);
			
 
				-	smp_mb(); /* Keep increment before caller's subsequent code. */
			
 
				-
			
 
				 	/* Wait for all rcu_barrier_callback() callbacks to be invoked. */
			
 
				 	wait_for_completion(&rsp->barrier_completion);
			
 
				 
			
 
				+	/* Mark the end of the barrier operation. */
			
 
				+	_rcu_barrier_trace(rsp, "Inc2", -1, rsp->barrier_sequence);
			
 
				+	rcu_seq_end(&rsp->barrier_sequence);
			
 
				+
			
 
				 	/* Other rcu_barrier() invocations can now safely proceed. */
			
 
				 	mutex_unlock(&rsp->barrier_mutex);
			
 
				 }
			
@@ -3822,6 +3880,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
 
				 	WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1);
			
 
				 	rdp->cpu = cpu;
			
 
				 	rdp->rsp = rsp;
			
 
				+	mutex_init(&rdp->exp_funnel_mutex);
			
 
				 	rcu_boot_init_nocb_percpu_data(rdp);
			
 
				 	raw_spin_unlock_irqrestore(&rnp->lock, flags);
			
 
				 }
			
@@ -4013,22 +4072,22 @@ void rcu_scheduler_starting(void)
 
				  * Compute the per-level fanout, either using the exact fanout specified
			
 
				  * or balancing the tree, depending on the rcu_fanout_exact boot parameter.
			
 
				  */
			
 
				-static void __init rcu_init_levelspread(struct rcu_state *rsp)
			
 
				+static void __init rcu_init_levelspread(int *levelspread, const int *levelcnt)
			
 
				 {
			
 
				 	int i;
			
 
				 
			
 
				 	if (rcu_fanout_exact) {
			
 
				-		rsp->levelspread[rcu_num_lvls - 1] = rcu_fanout_leaf;
			
 
				+		levelspread[rcu_num_lvls - 1] = rcu_fanout_leaf;
			
 
				 		for (i = rcu_num_lvls - 2; i >= 0; i--)
			
 
				-			rsp->levelspread[i] = RCU_FANOUT;
			
 
				+			levelspread[i] = RCU_FANOUT;
			
 
				 	} else {
			
 
				 		int ccur;
			
 
				 		int cprv;
			
 
				 
			
 
				 		cprv = nr_cpu_ids;
			
 
				 		for (i = rcu_num_lvls - 1; i >= 0; i--) {
			
 
				-			ccur = rsp->levelcnt[i];
			
 
				-			rsp->levelspread[i] = (cprv + ccur - 1) / ccur;
			
 
				+			ccur = levelcnt[i];
			
 
				+			levelspread[i] = (cprv + ccur - 1) / ccur;
			
 
				 			cprv = ccur;
			
 
				 		}
			
 
				 	}
			
@@ -4040,23 +4099,20 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp)
 
				 static void __init rcu_init_one(struct rcu_state *rsp,
			
 
				 		struct rcu_data __percpu *rda)
			
 
				 {
			
 
				-	static const char * const buf[] = {
			
 
				-		"rcu_node_0",
			
 
				-		"rcu_node_1",
			
 
				-		"rcu_node_2",
			
 
				-		"rcu_node_3" };  /* Match MAX_RCU_LVLS */
			
 
				-	static const char * const fqs[] = {
			
 
				-		"rcu_node_fqs_0",
			
 
				-		"rcu_node_fqs_1",
			
 
				-		"rcu_node_fqs_2",
			
 
				-		"rcu_node_fqs_3" };  /* Match MAX_RCU_LVLS */
			
 
				+	static const char * const buf[] = RCU_NODE_NAME_INIT;
			
 
				+	static const char * const fqs[] = RCU_FQS_NAME_INIT;
			
 
				+	static const char * const exp[] = RCU_EXP_NAME_INIT;
			
 
				+	static const char * const exp_sched[] = RCU_EXP_SCHED_NAME_INIT;
			
 
				 	static u8 fl_mask = 0x1;
			
 
				+
			
 
				+	int levelcnt[RCU_NUM_LVLS];		/* # nodes in each level. */
			
 
				+	int levelspread[RCU_NUM_LVLS];		/* kids/node in each level. */
			
 
				 	int cpustride = 1;
			
 
				 	int i;
			
 
				 	int j;
			
 
				 	struct rcu_node *rnp;
			
 
				 
			
 
				-	BUILD_BUG_ON(MAX_RCU_LVLS > ARRAY_SIZE(buf));  /* Fix buf[] init! */
			
 
				+	BUILD_BUG_ON(RCU_NUM_LVLS > ARRAY_SIZE(buf));  /* Fix buf[] init! */
			
 
				 
			
 
				 	/* Silence gcc 4.8 false positive about array index out of range. */
			
 
				 	if (rcu_num_lvls <= 0 || rcu_num_lvls > RCU_NUM_LVLS)
			
@@ -4065,19 +4121,19 @@ static void __init rcu_init_one(struct rcu_state *rsp,
 
				 	/* Initialize the level-tracking arrays. */
			
 
				 
			
 
				 	for (i = 0; i < rcu_num_lvls; i++)
			
 
				-		rsp->levelcnt[i] = num_rcu_lvl[i];
			
 
				+		levelcnt[i] = num_rcu_lvl[i];
			
 
				 	for (i = 1; i < rcu_num_lvls; i++)
			
 
				-		rsp->level[i] = rsp->level[i - 1] + rsp->levelcnt[i - 1];
			
 
				-	rcu_init_levelspread(rsp);
			
 
				+		rsp->level[i] = rsp->level[i - 1] + levelcnt[i - 1];
			
 
				+	rcu_init_levelspread(levelspread, levelcnt);
			
 
				 	rsp->flavor_mask = fl_mask;
			
 
				 	fl_mask <<= 1;
			
 
				 
			
 
				 	/* Initialize the elements themselves, starting from the leaves. */
			
 
				 
			
 
				 	for (i = rcu_num_lvls - 1; i >= 0; i--) {
			
 
				-		cpustride *= rsp->levelspread[i];
			
 
				+		cpustride *= levelspread[i];
			
 
				 		rnp = rsp->level[i];
			
 
				-		for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) {
			
 
				+		for (j = 0; j < levelcnt[i]; j++, rnp++) {
			
 
				 			raw_spin_lock_init(&rnp->lock);
			
 
				 			lockdep_set_class_and_name(&rnp->lock,
			
 
				 						   &rcu_node_class[i], buf[i]);
			
@@ -4097,14 +4153,23 @@ static void __init rcu_init_one(struct rcu_state *rsp,
 
				 				rnp->grpmask = 0;
			
 
				 				rnp->parent = NULL;
			
 
				 			} else {
			
 
				-				rnp->grpnum = j % rsp->levelspread[i - 1];
			
 
				+				rnp->grpnum = j % levelspread[i - 1];
			
 
				 				rnp->grpmask = 1UL << rnp->grpnum;
			
 
				 				rnp->parent = rsp->level[i - 1] +
			
 
				-					      j / rsp->levelspread[i - 1];
			
 
				+					      j / levelspread[i - 1];
			
 
				 			}
			
 
				 			rnp->level = i;
			
 
				 			INIT_LIST_HEAD(&rnp->blkd_tasks);
			
 
				 			rcu_init_one_nocb(rnp);
			
 
				+			mutex_init(&rnp->exp_funnel_mutex);
			
 
				+			if (rsp == &rcu_sched_state)
			
 
				+				lockdep_set_class_and_name(
			
 
				+					&rnp->exp_funnel_mutex,
			
 
				+					&rcu_exp_sched_class[i], exp_sched[i]);
			
 
				+			else
			
 
				+				lockdep_set_class_and_name(
			
 
				+					&rnp->exp_funnel_mutex,
			
 
				+					&rcu_exp_class[i], exp[i]);
			
 
				 		}
			
 
				 	}
			
 
				 
			
@@ -4128,9 +4193,7 @@ static void __init rcu_init_geometry(void)
 
				 {
			
 
				 	ulong d;
			
 
				 	int i;
			
 
				-	int j;
			
 
				-	int n = nr_cpu_ids;
			
 
				-	int rcu_capacity[MAX_RCU_LVLS + 1];
			
 
				+	int rcu_capacity[RCU_NUM_LVLS];
			
 
				 
			
 
				 	/*
			
 
				 	 * Initialize any unspecified boot parameters.
			
@@ -4152,48 +4215,50 @@ static void __init rcu_init_geometry(void)
 
				 	pr_info("RCU: Adjusting geometry for rcu_fanout_leaf=%d, nr_cpu_ids=%d\n",
			
 
				 		rcu_fanout_leaf, nr_cpu_ids);
			
 
				 
			
 
				-	/*
			
 
				-	 * Compute number of nodes that can be handled an rcu_node tree
			
 
				-	 * with the given number of levels.  Setting rcu_capacity[0] makes
			
 
				-	 * some of the arithmetic easier.
			
 
				-	 */
			
 
				-	rcu_capacity[0] = 1;
			
 
				-	rcu_capacity[1] = rcu_fanout_leaf;
			
 
				-	for (i = 2; i <= MAX_RCU_LVLS; i++)
			
 
				-		rcu_capacity[i] = rcu_capacity[i - 1] * RCU_FANOUT;
			
 
				-
			
 
				 	/*
			
 
				 	 * The boot-time rcu_fanout_leaf parameter is only permitted
			
 
				 	 * to increase the leaf-level fanout, not decrease it.  Of course,
			
 
				 	 * the leaf-level fanout cannot exceed the number of bits in
			
 
				-	 * the rcu_node masks.  Finally, the tree must be able to accommodate
			
 
				-	 * the configured number of CPUs.  Complain and fall back to the
			
 
				-	 * compile-time values if these limits are exceeded.
			
 
				+	 * the rcu_node masks.  Complain and fall back to the compile-
			
 
				+	 * time values if these limits are exceeded.
			
 
				 	 */
			
 
				 	if (rcu_fanout_leaf < RCU_FANOUT_LEAF ||
			
 
				-	    rcu_fanout_leaf > sizeof(unsigned long) * 8 ||
			
 
				-	    n > rcu_capacity[MAX_RCU_LVLS]) {
			
 
				+	    rcu_fanout_leaf > sizeof(unsigned long) * 8) {
			
 
				+		rcu_fanout_leaf = RCU_FANOUT_LEAF;
			
 
				 		WARN_ON(1);
			
 
				 		return;
			
 
				 	}
			
 
				 
			
 
				+	/*
			
 
				+	 * Compute number of nodes that can be handled an rcu_node tree
			
 
				+	 * with the given number of levels.
			
 
				+	 */
			
 
				+	rcu_capacity[0] = rcu_fanout_leaf;
			
 
				+	for (i = 1; i < RCU_NUM_LVLS; i++)
			
 
				+		rcu_capacity[i] = rcu_capacity[i - 1] * RCU_FANOUT;
			
 
				+
			
 
				+	/*
			
 
				+	 * The tree must be able to accommodate the configured number of CPUs.
			
 
				+	 * If this limit is exceeded than we have a serious problem elsewhere.
			
 
				+	 */
			
 
				+	if (nr_cpu_ids > rcu_capacity[RCU_NUM_LVLS - 1])
			
 
				+		panic("rcu_init_geometry: rcu_capacity[] is too small");
			
 
				+
			
 
				+	/* Calculate the number of levels in the tree. */
			
 
				+	for (i = 0; nr_cpu_ids > rcu_capacity[i]; i++) {
			
 
				+	}
			
 
				+	rcu_num_lvls = i + 1;
			
 
				+
			
 
				 	/* Calculate the number of rcu_nodes at each level of the tree. */
			
 
				-	for (i = 1; i <= MAX_RCU_LVLS; i++)
			
 
				-		if (n <= rcu_capacity[i]) {
			
 
				-			for (j = 0; j <= i; j++)
			
 
				-				num_rcu_lvl[j] =
			
 
				-					DIV_ROUND_UP(n, rcu_capacity[i - j]);
			
 
				-			rcu_num_lvls = i;
			
 
				-			for (j = i + 1; j <= MAX_RCU_LVLS; j++)
			
 
				-				num_rcu_lvl[j] = 0;
			
 
				-			break;
			
 
				-		}
			
 
				+	for (i = 0; i < rcu_num_lvls; i++) {
			
 
				+		int cap = rcu_capacity[(rcu_num_lvls - 1) - i];
			
 
				+		num_rcu_lvl[i] = DIV_ROUND_UP(nr_cpu_ids, cap);
			
 
				+	}
			
 
				 
			
 
				 	/* Calculate the total number of rcu_node structures. */
			
 
				 	rcu_num_nodes = 0;
			
 
				-	for (i = 0; i <= MAX_RCU_LVLS; i++)
			
 
				+	for (i = 0; i < rcu_num_lvls; i++)
			
 
				 		rcu_num_nodes += num_rcu_lvl[i];
			
 
				-	rcu_num_nodes -= n;
			
 
				 }
			
 
				 
			
 
				 /*
			
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -27,6 +27,7 @@
 
				 #include <linux/threads.h>
			
 
				 #include <linux/cpumask.h>
			
 
				 #include <linux/seqlock.h>
			
 
				+#include <linux/stop_machine.h>
			
 
				 
			
 
				 /*
			
 
				  * Define shape of hierarchy based on NR_CPUS, CONFIG_RCU_FANOUT, and
			
@@ -36,8 +37,6 @@
 
				  * Of course, your mileage may vary.
			
 
				  */
			
 
				 
			
 
				-#define MAX_RCU_LVLS 4
			
 
				-
			
 
				 #ifdef CONFIG_RCU_FANOUT
			
 
				 #define RCU_FANOUT CONFIG_RCU_FANOUT
			
 
				 #else /* #ifdef CONFIG_RCU_FANOUT */
			
@@ -66,38 +65,53 @@
 
				 #if NR_CPUS <= RCU_FANOUT_1
			
 
				 #  define RCU_NUM_LVLS	      1
			
 
				 #  define NUM_RCU_LVL_0	      1
			
 
				-#  define NUM_RCU_LVL_1	      (NR_CPUS)
			
 
				-#  define NUM_RCU_LVL_2	      0
			
 
				-#  define NUM_RCU_LVL_3	      0
			
 
				-#  define NUM_RCU_LVL_4	      0
			
 
				+#  define NUM_RCU_NODES	      NUM_RCU_LVL_0
			
 
				+#  define NUM_RCU_LVL_INIT    { NUM_RCU_LVL_0 }
			
 
				+#  define RCU_NODE_NAME_INIT  { "rcu_node_0" }
			
 
				+#  define RCU_FQS_NAME_INIT   { "rcu_node_fqs_0" }
			
 
				+#  define RCU_EXP_NAME_INIT   { "rcu_node_exp_0" }
			
 
				+#  define RCU_EXP_SCHED_NAME_INIT \
			
 
				+			      { "rcu_node_exp_sched_0" }
			
 
				 #elif NR_CPUS <= RCU_FANOUT_2
			
 
				 #  define RCU_NUM_LVLS	      2
			
 
				 #  define NUM_RCU_LVL_0	      1
			
 
				 #  define NUM_RCU_LVL_1	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
			
 
				-#  define NUM_RCU_LVL_2	      (NR_CPUS)
			
 
				-#  define NUM_RCU_LVL_3	      0
			
 
				-#  define NUM_RCU_LVL_4	      0
			
 
				+#  define NUM_RCU_NODES	      (NUM_RCU_LVL_0 + NUM_RCU_LVL_1)
			
 
				+#  define NUM_RCU_LVL_INIT    { NUM_RCU_LVL_0, NUM_RCU_LVL_1 }
			
 
				+#  define RCU_NODE_NAME_INIT  { "rcu_node_0", "rcu_node_1" }
			
 
				+#  define RCU_FQS_NAME_INIT   { "rcu_node_fqs_0", "rcu_node_fqs_1" }
			
 
				+#  define RCU_EXP_NAME_INIT   { "rcu_node_exp_0", "rcu_node_exp_1" }
			
 
				+#  define RCU_EXP_SCHED_NAME_INIT \
			
 
				+			      { "rcu_node_exp_sched_0", "rcu_node_exp_sched_1" }
			
 
				 #elif NR_CPUS <= RCU_FANOUT_3
			
 
				 #  define RCU_NUM_LVLS	      3
			
 
				 #  define NUM_RCU_LVL_0	      1
			
 
				 #  define NUM_RCU_LVL_1	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
			
 
				 #  define NUM_RCU_LVL_2	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
			
 
				-#  define NUM_RCU_LVL_3	      (NR_CPUS)
			
 
				-#  define NUM_RCU_LVL_4	      0
			
 
				+#  define NUM_RCU_NODES	      (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2)
			
 
				+#  define NUM_RCU_LVL_INIT    { NUM_RCU_LVL_0, NUM_RCU_LVL_1, NUM_RCU_LVL_2 }
			
 
				+#  define RCU_NODE_NAME_INIT  { "rcu_node_0", "rcu_node_1", "rcu_node_2" }
			
 
				+#  define RCU_FQS_NAME_INIT   { "rcu_node_fqs_0", "rcu_node_fqs_1", "rcu_node_fqs_2" }
			
 
				+#  define RCU_EXP_NAME_INIT   { "rcu_node_exp_0", "rcu_node_exp_1", "rcu_node_exp_2" }
			
 
				+#  define RCU_EXP_SCHED_NAME_INIT \
			
 
				+			      { "rcu_node_exp_sched_0", "rcu_node_exp_sched_1", "rcu_node_exp_sched_2" }
			
 
				 #elif NR_CPUS <= RCU_FANOUT_4
			
 
				 #  define RCU_NUM_LVLS	      4
			
 
				 #  define NUM_RCU_LVL_0	      1
			
 
				 #  define NUM_RCU_LVL_1	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_3)
			
 
				 #  define NUM_RCU_LVL_2	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
			
 
				 #  define NUM_RCU_LVL_3	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
			
 
				-#  define NUM_RCU_LVL_4	      (NR_CPUS)
			
 
				+#  define NUM_RCU_NODES	      (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3)
			
 
				+#  define NUM_RCU_LVL_INIT    { NUM_RCU_LVL_0, NUM_RCU_LVL_1, NUM_RCU_LVL_2, NUM_RCU_LVL_3 }
			
 
				+#  define RCU_NODE_NAME_INIT  { "rcu_node_0", "rcu_node_1", "rcu_node_2", "rcu_node_3" }
			
 
				+#  define RCU_FQS_NAME_INIT   { "rcu_node_fqs_0", "rcu_node_fqs_1", "rcu_node_fqs_2", "rcu_node_fqs_3" }
			
 
				+#  define RCU_EXP_NAME_INIT   { "rcu_node_exp_0", "rcu_node_exp_1", "rcu_node_exp_2", "rcu_node_exp_3" }
			
 
				+#  define RCU_EXP_SCHED_NAME_INIT \
			
 
				+			      { "rcu_node_exp_sched_0", "rcu_node_exp_sched_1", "rcu_node_exp_sched_2", "rcu_node_exp_sched_3" }
			
 
				 #else
			
 
				 # error "CONFIG_RCU_FANOUT insufficient for NR_CPUS"
			
 
				 #endif /* #if (NR_CPUS) <= RCU_FANOUT_1 */
			
 
				 
			
 
				-#define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3 + NUM_RCU_LVL_4)
			
 
				-#define NUM_RCU_NODES (RCU_SUM - NR_CPUS)
			
 
				-
			
 
				 extern int rcu_num_lvls;
			
 
				 extern int rcu_num_nodes;
			
 
				 
			
@@ -236,6 +250,8 @@ struct rcu_node {
 
				 	int need_future_gp[2];
			
 
				 				/* Counts of upcoming no-CB GP requests. */
			
 
				 	raw_spinlock_t fqslock ____cacheline_internodealigned_in_smp;
			
 
				+
			
 
				+	struct mutex exp_funnel_mutex ____cacheline_internodealigned_in_smp;
			
 
				 } ____cacheline_internodealigned_in_smp;
			
 
				 
			
 
				 /*
			
@@ -287,12 +303,13 @@ struct rcu_data {
 
				 	bool		gpwrap;		/* Possible gpnum/completed wrap. */
			
 
				 	struct rcu_node *mynode;	/* This CPU's leaf of hierarchy */
			
 
				 	unsigned long grpmask;		/* Mask to apply to leaf qsmask. */
			
 
				-#ifdef CONFIG_RCU_CPU_STALL_INFO
			
 
				 	unsigned long	ticks_this_gp;	/* The number of scheduling-clock */
			
 
				 					/*  ticks this CPU has handled */
			
 
				 					/*  during and after the last grace */
			
 
				 					/* period it is aware of. */
			
 
				-#endif /* #ifdef CONFIG_RCU_CPU_STALL_INFO */
			
 
				+	struct cpu_stop_work exp_stop_work;
			
 
				+					/* Expedited grace-period control */
			
 
				+					/*  for CPU stopping. */
			
 
				 
			
 
				 	/* 2) batch handling */
			
 
				 	/*
			
@@ -355,11 +372,13 @@ struct rcu_data {
 
				 	unsigned long n_rp_nocb_defer_wakeup;
			
 
				 	unsigned long n_rp_need_nothing;
			
 
				 
			
 
				-	/* 6) _rcu_barrier() and OOM callbacks. */
			
 
				+	/* 6) _rcu_barrier(), OOM callbacks, and expediting. */
			
 
				 	struct rcu_head barrier_head;
			
 
				 #ifdef CONFIG_RCU_FAST_NO_HZ
			
 
				 	struct rcu_head oom_head;
			
 
				 #endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
			
 
				+	struct mutex exp_funnel_mutex;
			
 
				+	bool exp_done;			/* Expedited QS for this CPU? */
			
 
				 
			
 
				 	/* 7) Callback offloading. */
			
 
				 #ifdef CONFIG_RCU_NOCB_CPU
			
@@ -387,9 +406,7 @@ struct rcu_data {
 
				 #endif /* #ifdef CONFIG_RCU_NOCB_CPU */
			
 
				 
			
 
				 	/* 8) RCU CPU stall data. */
			
 
				-#ifdef CONFIG_RCU_CPU_STALL_INFO
			
 
				 	unsigned int softirq_snap;	/* Snapshot of softirq activity. */
			
 
				-#endif /* #ifdef CONFIG_RCU_CPU_STALL_INFO */
			
 
				 
			
 
				 	int cpu;
			
 
				 	struct rcu_state *rsp;
			
@@ -442,9 +459,9 @@ do {									\
 
				  */
			
 
				 struct rcu_state {
			
 
				 	struct rcu_node node[NUM_RCU_NODES];	/* Hierarchy. */
			
 
				-	struct rcu_node *level[RCU_NUM_LVLS];	/* Hierarchy levels. */
			
 
				-	u32 levelcnt[MAX_RCU_LVLS + 1];		/* # nodes in each level. */
			
 
				-	u8 levelspread[RCU_NUM_LVLS];		/* kids/node in each level. */
			
 
				+	struct rcu_node *level[RCU_NUM_LVLS + 1];
			
 
				+						/* Hierarchy levels (+1 to */
			
 
				+						/*  shut bogus gcc warning) */
			
 
				 	u8 flavor_mask;				/* bit in flavor mask. */
			
 
				 	struct rcu_data __percpu *rda;		/* pointer of percu rcu_data. */
			
 
				 	void (*call)(struct rcu_head *head,	/* call_rcu() flavor. */
			
@@ -479,21 +496,18 @@ struct rcu_state {
 
				 	struct mutex barrier_mutex;		/* Guards barrier fields. */
			
 
				 	atomic_t barrier_cpu_count;		/* # CPUs waiting on. */
			
 
				 	struct completion barrier_completion;	/* Wake at barrier end. */
			
 
				-	unsigned long n_barrier_done;		/* ++ at start and end of */
			
 
				+	unsigned long barrier_sequence;		/* ++ at start and end of */
			
 
				 						/*  _rcu_barrier(). */
			
 
				 	/* End of fields guarded by barrier_mutex. */
			
 
				 
			
 
				-	atomic_long_t expedited_start;		/* Starting ticket. */
			
 
				-	atomic_long_t expedited_done;		/* Done ticket. */
			
 
				-	atomic_long_t expedited_wrap;		/* # near-wrap incidents. */
			
 
				-	atomic_long_t expedited_tryfail;	/* # acquisition failures. */
			
 
				+	unsigned long expedited_sequence;	/* Take a ticket. */
			
 
				+	atomic_long_t expedited_workdone0;	/* # done by others #0. */
			
 
				 	atomic_long_t expedited_workdone1;	/* # done by others #1. */
			
 
				 	atomic_long_t expedited_workdone2;	/* # done by others #2. */
			
 
				+	atomic_long_t expedited_workdone3;	/* # done by others #3. */
			
 
				 	atomic_long_t expedited_normal;		/* # fallbacks to normal. */
			
 
				-	atomic_long_t expedited_stoppedcpus;	/* # successful stop_cpus. */
			
 
				-	atomic_long_t expedited_done_tries;	/* # tries to update _done. */
			
 
				-	atomic_long_t expedited_done_lost;	/* # times beaten to _done. */
			
 
				-	atomic_long_t expedited_done_exit;	/* # times exited _done loop. */
			
 
				+	atomic_t expedited_need_qs;		/* # CPUs left to check in. */
			
 
				+	wait_queue_head_t expedited_wq;		/* Wait for check-ins. */
			
 
				 
			
 
				 	unsigned long jiffies_force_qs;		/* Time at which to invoke */
			
 
				 						/*  force_quiescent_state(). */
			
@@ -527,7 +541,11 @@ struct rcu_state {
 
				 /* Values for rcu_state structure's gp_flags field. */
			
 
				 #define RCU_GP_WAIT_INIT 0	/* Initial state. */
			
 
				 #define RCU_GP_WAIT_GPS  1	/* Wait for grace-period start. */
			
 
				-#define RCU_GP_WAIT_FQS  2	/* Wait for force-quiescent-state time. */
			
 
				+#define RCU_GP_DONE_GPS  2	/* Wait done for grace-period start. */
			
 
				+#define RCU_GP_WAIT_FQS  3	/* Wait for force-quiescent-state time. */
			
 
				+#define RCU_GP_DOING_FQS 4	/* Wait done for force-quiescent-state time. */
			
 
				+#define RCU_GP_CLEANUP   5	/* Grace-period cleanup started. */
			
 
				+#define RCU_GP_CLEANED   6	/* Grace-period cleanup complete. */
			
 
				 
			
 
				 extern struct list_head rcu_struct_flavors;
			
 
				 
			
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -82,10 +82,8 @@ static void __init rcu_bootup_announce_oddness(void)
 
				 		pr_info("\tRCU lockdep checking is enabled.\n");
			
 
				 	if (IS_ENABLED(CONFIG_RCU_TORTURE_TEST_RUNNABLE))
			
 
				 		pr_info("\tRCU torture testing starts during boot.\n");
			
 
				-	if (IS_ENABLED(CONFIG_RCU_CPU_STALL_INFO))
			
 
				-		pr_info("\tAdditional per-CPU info printed with stalls.\n");
			
 
				-	if (NUM_RCU_LVL_4 != 0)
			
 
				-		pr_info("\tFour-level hierarchy is enabled.\n");
			
 
				+	if (RCU_NUM_LVLS >= 4)
			
 
				+		pr_info("\tFour(or more)-level hierarchy is enabled.\n");
			
 
				 	if (RCU_FANOUT_LEAF != 16)
			
 
				 		pr_info("\tBuild-time adjustment of leaf fanout to %d.\n",
			
 
				 			RCU_FANOUT_LEAF);
			
@@ -418,8 +416,6 @@ static void rcu_print_detail_task_stall(struct rcu_state *rsp)
 
				 		rcu_print_detail_task_stall_rnp(rnp);
			
 
				 }
			
 
				 
			
 
				-#ifdef CONFIG_RCU_CPU_STALL_INFO
			
 
				-
			
 
				 static void rcu_print_task_stall_begin(struct rcu_node *rnp)
			
 
				 {
			
 
				 	pr_err("\tTasks blocked on level-%d rcu_node (CPUs %d-%d):",
			
@@ -431,18 +427,6 @@ static void rcu_print_task_stall_end(void)
 
				 	pr_cont("\n");
			
 
				 }
			
 
				 
			
 
				-#else /* #ifdef CONFIG_RCU_CPU_STALL_INFO */
			
 
				-
			
 
				-static void rcu_print_task_stall_begin(struct rcu_node *rnp)
			
 
				-{
			
 
				-}
			
 
				-
			
 
				-static void rcu_print_task_stall_end(void)
			
 
				-{
			
 
				-}
			
 
				-
			
 
				-#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_INFO */
			
 
				-
			
 
				 /*
			
 
				  * Scan the current list of tasks blocked within RCU read-side critical
			
 
				  * sections, printing out the tid of each.
			
@@ -552,8 +536,6 @@ void synchronize_rcu(void)
 
				 EXPORT_SYMBOL_GPL(synchronize_rcu);
			
 
				 
			
 
				 static DECLARE_WAIT_QUEUE_HEAD(sync_rcu_preempt_exp_wq);
			
 
				-static unsigned long sync_rcu_preempt_exp_count;
			
 
				-static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex);
			
 
				 
			
 
				 /*
			
 
				  * Return non-zero if there are any tasks in RCU read-side critical
			
@@ -573,7 +555,7 @@ static int rcu_preempted_readers_exp(struct rcu_node *rnp)
 
				  * for the current expedited grace period.  Works only for preemptible
			
 
				  * RCU -- other RCU implementation use other means.
			
 
				  *
			
 
				- * Caller must hold sync_rcu_preempt_exp_mutex.
			
 
				+ * Caller must hold the root rcu_node's exp_funnel_mutex.
			
 
				  */
			
 
				 static int sync_rcu_preempt_exp_done(struct rcu_node *rnp)
			
 
				 {
			
@@ -589,7 +571,7 @@ static int sync_rcu_preempt_exp_done(struct rcu_node *rnp)
 
				  * recursively up the tree.  (Calm down, calm down, we do the recursion
			
 
				  * iteratively!)
			
 
				  *
			
 
				- * Caller must hold sync_rcu_preempt_exp_mutex.
			
 
				+ * Caller must hold the root rcu_node's exp_funnel_mutex.
			
 
				  */
			
 
				 static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
			
 
				 			       bool wake)
			
@@ -628,7 +610,7 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
 
				  * set the ->expmask bits on the leaf rcu_node structures to tell phase 2
			
 
				  * that work is needed here.
			
 
				  *
			
 
				- * Caller must hold sync_rcu_preempt_exp_mutex.
			
 
				+ * Caller must hold the root rcu_node's exp_funnel_mutex.
			
 
				  */
			
 
				 static void
			
 
				 sync_rcu_preempt_exp_init1(struct rcu_state *rsp, struct rcu_node *rnp)
			
@@ -671,7 +653,7 @@ sync_rcu_preempt_exp_init1(struct rcu_state *rsp, struct rcu_node *rnp)
 
				  * invoke rcu_report_exp_rnp() to clear out the upper-level ->expmask bits,
			
 
				  * enabling rcu_read_unlock_special() to do the bit-clearing.
			
 
				  *
			
 
				- * Caller must hold sync_rcu_preempt_exp_mutex.
			
 
				+ * Caller must hold the root rcu_node's exp_funnel_mutex.
			
 
				  */
			
 
				 static void
			
 
				 sync_rcu_preempt_exp_init2(struct rcu_state *rsp, struct rcu_node *rnp)
			
@@ -719,51 +701,17 @@ sync_rcu_preempt_exp_init2(struct rcu_state *rsp, struct rcu_node *rnp)
 
				 void synchronize_rcu_expedited(void)
			
 
				 {
			
 
				 	struct rcu_node *rnp;
			
 
				+	struct rcu_node *rnp_unlock;
			
 
				 	struct rcu_state *rsp = rcu_state_p;
			
 
				-	unsigned long snap;
			
 
				-	int trycount = 0;
			
 
				+	unsigned long s;
			
 
				 
			
 
				-	smp_mb(); /* Caller's modifications seen first by other CPUs. */
			
 
				-	snap = READ_ONCE(sync_rcu_preempt_exp_count) + 1;
			
 
				-	smp_mb(); /* Above access cannot bleed into critical section. */
			
 
				+	s = rcu_exp_gp_seq_snap(rsp);
			
 
				 
			
 
				-	/*
			
 
				-	 * Block CPU-hotplug operations.  This means that any CPU-hotplug
			
 
				-	 * operation that finds an rcu_node structure with tasks in the
			
 
				-	 * process of being boosted will know that all tasks blocking
			
 
				-	 * this expedited grace period will already be in the process of
			
 
				-	 * being boosted.  This simplifies the process of moving tasks
			
 
				-	 * from leaf to root rcu_node structures.
			
 
				-	 */
			
 
				-	if (!try_get_online_cpus()) {
			
 
				-		/* CPU-hotplug operation in flight, fall back to normal GP. */
			
 
				-		wait_rcu_gp(call_rcu);
			
 
				-		return;
			
 
				-	}
			
 
				+	rnp_unlock = exp_funnel_lock(rsp, s);
			
 
				+	if (rnp_unlock == NULL)
			
 
				+		return;  /* Someone else did our work for us. */
			
 
				 
			
 
				-	/*
			
 
				-	 * Acquire lock, falling back to synchronize_rcu() if too many
			
 
				-	 * lock-acquisition failures.  Of course, if someone does the
			
 
				-	 * expedited grace period for us, just leave.
			
 
				-	 */
			
 
				-	while (!mutex_trylock(&sync_rcu_preempt_exp_mutex)) {
			
 
				-		if (ULONG_CMP_LT(snap,
			
 
				-		    READ_ONCE(sync_rcu_preempt_exp_count))) {
			
 
				-			put_online_cpus();
			
 
				-			goto mb_ret; /* Others did our work for us. */
			
 
				-		}
			
 
				-		if (trycount++ < 10) {
			
 
				-			udelay(trycount * num_online_cpus());
			
 
				-		} else {
			
 
				-			put_online_cpus();
			
 
				-			wait_rcu_gp(call_rcu);
			
 
				-			return;
			
 
				-		}
			
 
				-	}
			
 
				-	if (ULONG_CMP_LT(snap, READ_ONCE(sync_rcu_preempt_exp_count))) {
			
 
				-		put_online_cpus();
			
 
				-		goto unlock_mb_ret; /* Others did our work for us. */
			
 
				-	}
			
 
				+	rcu_exp_gp_seq_start(rsp);
			
 
				 
			
 
				 	/* force all RCU readers onto ->blkd_tasks lists. */
			
 
				 	synchronize_sched_expedited();
			
@@ -779,20 +727,14 @@ void synchronize_rcu_expedited(void)
 
				 	rcu_for_each_leaf_node(rsp, rnp)
			
 
				 		sync_rcu_preempt_exp_init2(rsp, rnp);
			
 
				 
			
 
				-	put_online_cpus();
			
 
				-
			
 
				 	/* Wait for snapshotted ->blkd_tasks lists to drain. */
			
 
				 	rnp = rcu_get_root(rsp);
			
 
				 	wait_event(sync_rcu_preempt_exp_wq,
			
 
				 		   sync_rcu_preempt_exp_done(rnp));
			
 
				 
			
 
				 	/* Clean up and exit. */
			
 
				-	smp_mb(); /* ensure expedited GP seen before counter increment. */
			
 
				-	WRITE_ONCE(sync_rcu_preempt_exp_count, sync_rcu_preempt_exp_count + 1);
			
 
				-unlock_mb_ret:
			
 
				-	mutex_unlock(&sync_rcu_preempt_exp_mutex);
			
 
				-mb_ret:
			
 
				-	smp_mb(); /* ensure subsequent action seen after grace period. */
			
 
				+	rcu_exp_gp_seq_end(rsp);
			
 
				+	mutex_unlock(&rnp_unlock->exp_funnel_mutex);
			
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
			
 
				 
			
@@ -1703,8 +1645,6 @@ early_initcall(rcu_register_oom_notifier);
 
				 
			
 
				 #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */
			
 
				 
			
 
				-#ifdef CONFIG_RCU_CPU_STALL_INFO
			
 
				-
			
 
				 #ifdef CONFIG_RCU_FAST_NO_HZ
			
 
				 
			
 
				 static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
			
@@ -1793,33 +1733,6 @@ static void increment_cpu_stall_ticks(void)
 
				 		raw_cpu_inc(rsp->rda->ticks_this_gp);
			
 
				 }
			
 
				 
			
 
				-#else /* #ifdef CONFIG_RCU_CPU_STALL_INFO */
			
 
				-
			
 
				-static void print_cpu_stall_info_begin(void)
			
 
				-{
			
 
				-	pr_cont(" {");
			
 
				-}
			
 
				-
			
 
				-static void print_cpu_stall_info(struct rcu_state *rsp, int cpu)
			
 
				-{
			
 
				-	pr_cont(" %d", cpu);
			
 
				-}
			
 
				-
			
 
				-static void print_cpu_stall_info_end(void)
			
 
				-{
			
 
				-	pr_cont("} ");
			
 
				-}
			
 
				-
			
 
				-static void zero_cpu_stall_ticks(struct rcu_data *rdp)
			
 
				-{
			
 
				-}
			
 
				-
			
 
				-static void increment_cpu_stall_ticks(void)
			
 
				-{
			
 
				-}
			
 
				-
			
 
				-#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_INFO */
			
 
				-
			
 
				 #ifdef CONFIG_RCU_NOCB_CPU
			
 
				 
			
 
				 /*
			
--- a/kernel/rcu/tree_trace.c
+++ b/kernel/rcu/tree_trace.c
@@ -81,9 +81,9 @@ static void r_stop(struct seq_file *m, void *v)
 
				 static int show_rcubarrier(struct seq_file *m, void *v)
			
 
				 {
			
 
				 	struct rcu_state *rsp = (struct rcu_state *)m->private;
			
 
				-	seq_printf(m, "bcc: %d nbd: %lu\n",
			
 
				+	seq_printf(m, "bcc: %d bseq: %lu\n",
			
 
				 		   atomic_read(&rsp->barrier_cpu_count),
			
 
				-		   rsp->n_barrier_done);
			
 
				+		   rsp->barrier_sequence);
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -185,18 +185,15 @@ static int show_rcuexp(struct seq_file *m, void *v)
 
				 {
			
 
				 	struct rcu_state *rsp = (struct rcu_state *)m->private;
			
 
				 
			
 
				-	seq_printf(m, "s=%lu d=%lu w=%lu tf=%lu wd1=%lu wd2=%lu n=%lu sc=%lu dt=%lu dl=%lu dx=%lu\n",
			
 
				-		   atomic_long_read(&rsp->expedited_start),
			
 
				-		   atomic_long_read(&rsp->expedited_done),
			
 
				-		   atomic_long_read(&rsp->expedited_wrap),
			
 
				-		   atomic_long_read(&rsp->expedited_tryfail),
			
 
				+	seq_printf(m, "s=%lu wd0=%lu wd1=%lu wd2=%lu wd3=%lu n=%lu enq=%d sc=%lu\n",
			
 
				+		   rsp->expedited_sequence,
			
 
				+		   atomic_long_read(&rsp->expedited_workdone0),
			
 
				 		   atomic_long_read(&rsp->expedited_workdone1),
			
 
				 		   atomic_long_read(&rsp->expedited_workdone2),
			
 
				+		   atomic_long_read(&rsp->expedited_workdone3),
			
 
				 		   atomic_long_read(&rsp->expedited_normal),
			
 
				-		   atomic_long_read(&rsp->expedited_stoppedcpus),
			
 
				-		   atomic_long_read(&rsp->expedited_done_tries),
			
 
				-		   atomic_long_read(&rsp->expedited_done_lost),
			
 
				-		   atomic_long_read(&rsp->expedited_done_exit));
			
 
				+		   atomic_read(&rsp->expedited_need_qs),
			
 
				+		   rsp->expedited_sequence / 2);
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1353,20 +1353,6 @@ config RCU_CPU_STALL_TIMEOUT
 
				 	  RCU grace period persists, additional CPU stall warnings are
			
 
				 	  printed at more widely spaced intervals.
			
 
				 
			
 
				-config RCU_CPU_STALL_INFO
			
 
				-	bool "Print additional diagnostics on RCU CPU stall"
			
 
				-	depends on (TREE_RCU || PREEMPT_RCU) && DEBUG_KERNEL
			
 
				-	default y
			
 
				-	help
			
 
				-	  For each stalled CPU that is aware of the current RCU grace
			
 
				-	  period, print out additional per-CPU diagnostic information
			
 
				-	  regarding scheduling-clock ticks, idle state, and,
			
 
				-	  for RCU_FAST_NO_HZ kernels, idle-entry state.
			
 
				-
			
 
				-	  Say N if you are unsure.
			
 
				-
			
 
				-	  Say Y if you want to enable such diagnostics.
			
 
				-
			
 
				 config RCU_TRACE
			
 
				 	bool "Enable tracing for RCU"
			
 
				 	depends on DEBUG_KERNEL
			
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE01
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE01
@@ -13,7 +13,6 @@ CONFIG_MAXSMP=y
 
				 CONFIG_RCU_NOCB_CPU=y
			
 
				 CONFIG_RCU_NOCB_CPU_ZERO=y
			
 
				 CONFIG_DEBUG_LOCK_ALLOC=n
			
 
				-CONFIG_RCU_CPU_STALL_INFO=n
			
 
				 CONFIG_RCU_BOOST=n
			
 
				 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
			
 
				 CONFIG_RCU_EXPERT=y
			
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE02
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE02
@@ -17,7 +17,6 @@ CONFIG_RCU_FANOUT_LEAF=3
 
				 CONFIG_RCU_NOCB_CPU=n
			
 
				 CONFIG_DEBUG_LOCK_ALLOC=y
			
 
				 CONFIG_PROVE_LOCKING=n
			
 
				-CONFIG_RCU_CPU_STALL_INFO=n
			
 
				 CONFIG_RCU_BOOST=n
			
 
				 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
			
 
				 CONFIG_RCU_EXPERT=y
			
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE02-T
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE02-T
@@ -17,6 +17,5 @@ CONFIG_RCU_FANOUT_LEAF=3
 
				 CONFIG_RCU_NOCB_CPU=n
			
 
				 CONFIG_DEBUG_LOCK_ALLOC=y
			
 
				 CONFIG_PROVE_LOCKING=n
			
 
				-CONFIG_RCU_CPU_STALL_INFO=n
			
 
				 CONFIG_RCU_BOOST=n
			
 
				 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
			
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE03
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE03
@@ -13,7 +13,6 @@ CONFIG_RCU_FANOUT=2
 
				 CONFIG_RCU_FANOUT_LEAF=2
			
 
				 CONFIG_RCU_NOCB_CPU=n
			
 
				 CONFIG_DEBUG_LOCK_ALLOC=n
			
 
				-CONFIG_RCU_CPU_STALL_INFO=n
			
 
				 CONFIG_RCU_BOOST=y
			
 
				 CONFIG_RCU_KTHREAD_PRIO=2
			
 
				 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
			
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE04
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE04
@@ -17,6 +17,5 @@ CONFIG_RCU_FANOUT=4
 
				 CONFIG_RCU_FANOUT_LEAF=4
			
 
				 CONFIG_RCU_NOCB_CPU=n
			
 
				 CONFIG_DEBUG_LOCK_ALLOC=n
			
 
				-CONFIG_RCU_CPU_STALL_INFO=n
			
 
				 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
			
 
				 CONFIG_RCU_EXPERT=y
			
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE05
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE05
@@ -17,6 +17,5 @@ CONFIG_RCU_NOCB_CPU_NONE=y
 
				 CONFIG_DEBUG_LOCK_ALLOC=y
			
 
				 CONFIG_PROVE_LOCKING=y
			
 
				 #CHECK#CONFIG_PROVE_RCU=y
			
 
				-CONFIG_RCU_CPU_STALL_INFO=n
			
 
				 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
			
 
				 CONFIG_RCU_EXPERT=y
			
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE06
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE06
@@ -18,6 +18,5 @@ CONFIG_RCU_NOCB_CPU=n
 
				 CONFIG_DEBUG_LOCK_ALLOC=y
			
 
				 CONFIG_PROVE_LOCKING=y
			
 
				 #CHECK#CONFIG_PROVE_RCU=y
			
 
				-CONFIG_RCU_CPU_STALL_INFO=n
			
 
				 CONFIG_DEBUG_OBJECTS_RCU_HEAD=y
			
 
				 CONFIG_RCU_EXPERT=y
			
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE07
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE07
@@ -17,6 +17,5 @@ CONFIG_RCU_FANOUT=2
 
				 CONFIG_RCU_FANOUT_LEAF=2
			
 
				 CONFIG_RCU_NOCB_CPU=n
			
 
				 CONFIG_DEBUG_LOCK_ALLOC=n
			
 
				-CONFIG_RCU_CPU_STALL_INFO=n
			
 
				 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
			
 
				 CONFIG_RCU_EXPERT=y
			
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE08
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE08
@@ -19,7 +19,6 @@ CONFIG_RCU_NOCB_CPU_ALL=y
 
				 CONFIG_DEBUG_LOCK_ALLOC=n
			
 
				 CONFIG_PROVE_LOCKING=y
			
 
				 #CHECK#CONFIG_PROVE_RCU=y
			
 
				-CONFIG_RCU_CPU_STALL_INFO=n
			
 
				 CONFIG_RCU_BOOST=n
			
 
				 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
			
 
				 CONFIG_RCU_EXPERT=y
			
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE08-T
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE08-T
@@ -17,6 +17,5 @@ CONFIG_RCU_FANOUT_LEAF=2
 
				 CONFIG_RCU_NOCB_CPU=y
			
 
				 CONFIG_RCU_NOCB_CPU_ALL=y
			
 
				 CONFIG_DEBUG_LOCK_ALLOC=n
			
 
				-CONFIG_RCU_CPU_STALL_INFO=n
			
 
				 CONFIG_RCU_BOOST=n
			
 
				 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
			
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE09
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE09
@@ -13,7 +13,6 @@ CONFIG_SUSPEND=n
 
				 CONFIG_HIBERNATION=n
			
 
				 CONFIG_RCU_NOCB_CPU=n
			
 
				 CONFIG_DEBUG_LOCK_ALLOC=n
			
 
				-CONFIG_RCU_CPU_STALL_INFO=n
			
 
				 CONFIG_RCU_BOOST=n
			
 
				 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
			
 
				 #CHECK#CONFIG_RCU_EXPERT=n
			
--- a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt
+++ b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt
@@ -16,7 +16,6 @@ CONFIG_PROVE_LOCKING -- Do several, covering CONFIG_DEBUG_LOCK_ALLOC=y and not.
 
				 CONFIG_PROVE_RCU -- Hardwired to CONFIG_PROVE_LOCKING.
			
 
				 CONFIG_RCU_BOOST -- one of PREEMPT_RCU.
			
 
				 CONFIG_RCU_KTHREAD_PRIO -- set to 2 for _BOOST testing.
			
 
				-CONFIG_RCU_CPU_STALL_INFO -- Now default, avoid at least twice.
			
 
				 CONFIG_RCU_FANOUT -- Cover hierarchy, but overlap with others.
			
 
				 CONFIG_RCU_FANOUT_LEAF -- Do one non-default.
			
 
				 CONFIG_RCU_FAST_NO_HZ -- Do one, but not with CONFIG_RCU_NOCB_CPU_ALL.