|
@@ -156,6 +156,10 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
|
|
|
static void invoke_rcu_core(void);
|
|
|
static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp);
|
|
|
|
|
|
+/* rcuc/rcub kthread realtime priority */
|
|
|
+static int kthread_prio = CONFIG_RCU_KTHREAD_PRIO;
|
|
|
+module_param(kthread_prio, int, 0644);
|
|
|
+
|
|
|
/*
|
|
|
* Track the rcutorture test sequence number and the update version
|
|
|
* number within a given test. The rcutorture_testseq is incremented
|
|
@@ -215,6 +219,9 @@ static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
|
|
|
#endif /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
|
|
|
};
|
|
|
|
|
|
+DEFINE_PER_CPU_SHARED_ALIGNED(unsigned long, rcu_qs_ctr);
|
|
|
+EXPORT_PER_CPU_SYMBOL_GPL(rcu_qs_ctr);
|
|
|
+
|
|
|
/*
|
|
|
* Let the RCU core know that this CPU has gone through the scheduler,
|
|
|
* which is a quiescent state. This is called when the need for a
|
|
@@ -284,6 +291,22 @@ void rcu_note_context_switch(void)
|
|
|
}
|
|
|
EXPORT_SYMBOL_GPL(rcu_note_context_switch);
|
|
|
|
|
|
+/*
|
|
|
+ * Register a quiesecent state for all RCU flavors. If there is an
|
|
|
+ * emergency, invoke rcu_momentary_dyntick_idle() to do a heavy-weight
|
|
|
+ * dyntick-idle quiescent state visible to other CPUs (but only for those
|
|
|
+ * RCU flavors in desparate need of a quiescent state, which will normally
|
|
|
+ * be none of them). Either way, do a lightweight quiescent state for
|
|
|
+ * all RCU flavors.
|
|
|
+ */
|
|
|
+void rcu_all_qs(void)
|
|
|
+{
|
|
|
+ if (unlikely(raw_cpu_read(rcu_sched_qs_mask)))
|
|
|
+ rcu_momentary_dyntick_idle();
|
|
|
+ this_cpu_inc(rcu_qs_ctr);
|
|
|
+}
|
|
|
+EXPORT_SYMBOL_GPL(rcu_all_qs);
|
|
|
+
|
|
|
static long blimit = 10; /* Maximum callbacks per rcu_do_batch. */
|
|
|
static long qhimark = 10000; /* If this many pending, ignore blimit. */
|
|
|
static long qlowmark = 100; /* Once only this many pending, use blimit. */
|
|
@@ -315,18 +338,54 @@ static void force_quiescent_state(struct rcu_state *rsp);
|
|
|
static int rcu_pending(void);
|
|
|
|
|
|
/*
|
|
|
- * Return the number of RCU-sched batches processed thus far for debug & stats.
|
|
|
+ * Return the number of RCU batches started thus far for debug & stats.
|
|
|
+ */
|
|
|
+unsigned long rcu_batches_started(void)
|
|
|
+{
|
|
|
+ return rcu_state_p->gpnum;
|
|
|
+}
|
|
|
+EXPORT_SYMBOL_GPL(rcu_batches_started);
|
|
|
+
|
|
|
+/*
|
|
|
+ * Return the number of RCU-sched batches started thus far for debug & stats.
|
|
|
*/
|
|
|
-long rcu_batches_completed_sched(void)
|
|
|
+unsigned long rcu_batches_started_sched(void)
|
|
|
+{
|
|
|
+ return rcu_sched_state.gpnum;
|
|
|
+}
|
|
|
+EXPORT_SYMBOL_GPL(rcu_batches_started_sched);
|
|
|
+
|
|
|
+/*
|
|
|
+ * Return the number of RCU BH batches started thus far for debug & stats.
|
|
|
+ */
|
|
|
+unsigned long rcu_batches_started_bh(void)
|
|
|
+{
|
|
|
+ return rcu_bh_state.gpnum;
|
|
|
+}
|
|
|
+EXPORT_SYMBOL_GPL(rcu_batches_started_bh);
|
|
|
+
|
|
|
+/*
|
|
|
+ * Return the number of RCU batches completed thus far for debug & stats.
|
|
|
+ */
|
|
|
+unsigned long rcu_batches_completed(void)
|
|
|
+{
|
|
|
+ return rcu_state_p->completed;
|
|
|
+}
|
|
|
+EXPORT_SYMBOL_GPL(rcu_batches_completed);
|
|
|
+
|
|
|
+/*
|
|
|
+ * Return the number of RCU-sched batches completed thus far for debug & stats.
|
|
|
+ */
|
|
|
+unsigned long rcu_batches_completed_sched(void)
|
|
|
{
|
|
|
return rcu_sched_state.completed;
|
|
|
}
|
|
|
EXPORT_SYMBOL_GPL(rcu_batches_completed_sched);
|
|
|
|
|
|
/*
|
|
|
- * Return the number of RCU BH batches processed thus far for debug & stats.
|
|
|
+ * Return the number of RCU BH batches completed thus far for debug & stats.
|
|
|
*/
|
|
|
-long rcu_batches_completed_bh(void)
|
|
|
+unsigned long rcu_batches_completed_bh(void)
|
|
|
{
|
|
|
return rcu_bh_state.completed;
|
|
|
}
|
|
@@ -930,16 +989,13 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp,
|
|
|
trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti"));
|
|
|
return 1;
|
|
|
} else {
|
|
|
+ if (ULONG_CMP_LT(ACCESS_ONCE(rdp->gpnum) + ULONG_MAX / 4,
|
|
|
+ rdp->mynode->gpnum))
|
|
|
+ ACCESS_ONCE(rdp->gpwrap) = true;
|
|
|
return 0;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-/*
|
|
|
- * This function really isn't for public consumption, but RCU is special in
|
|
|
- * that context switches can allow the state machine to make progress.
|
|
|
- */
|
|
|
-extern void resched_cpu(int cpu);
|
|
|
-
|
|
|
/*
|
|
|
* Return true if the specified CPU has passed through a quiescent
|
|
|
* state by virtue of being in or having passed through an dynticks
|
|
@@ -1043,6 +1099,22 @@ static void record_gp_stall_check_time(struct rcu_state *rsp)
|
|
|
j1 = rcu_jiffies_till_stall_check();
|
|
|
ACCESS_ONCE(rsp->jiffies_stall) = j + j1;
|
|
|
rsp->jiffies_resched = j + j1 / 2;
|
|
|
+ rsp->n_force_qs_gpstart = ACCESS_ONCE(rsp->n_force_qs);
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * Complain about starvation of grace-period kthread.
|
|
|
+ */
|
|
|
+static void rcu_check_gp_kthread_starvation(struct rcu_state *rsp)
|
|
|
+{
|
|
|
+ unsigned long gpa;
|
|
|
+ unsigned long j;
|
|
|
+
|
|
|
+ j = jiffies;
|
|
|
+ gpa = ACCESS_ONCE(rsp->gp_activity);
|
|
|
+ if (j - gpa > 2 * HZ)
|
|
|
+ pr_err("%s kthread starved for %ld jiffies!\n",
|
|
|
+ rsp->name, j - gpa);
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -1065,11 +1137,13 @@ static void rcu_dump_cpu_stacks(struct rcu_state *rsp)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-static void print_other_cpu_stall(struct rcu_state *rsp)
|
|
|
+static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum)
|
|
|
{
|
|
|
int cpu;
|
|
|
long delta;
|
|
|
unsigned long flags;
|
|
|
+ unsigned long gpa;
|
|
|
+ unsigned long j;
|
|
|
int ndetected = 0;
|
|
|
struct rcu_node *rnp = rcu_get_root(rsp);
|
|
|
long totqlen = 0;
|
|
@@ -1107,30 +1181,34 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
|
|
|
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
|
|
}
|
|
|
|
|
|
- /*
|
|
|
- * Now rat on any tasks that got kicked up to the root rcu_node
|
|
|
- * due to CPU offlining.
|
|
|
- */
|
|
|
- rnp = rcu_get_root(rsp);
|
|
|
- raw_spin_lock_irqsave(&rnp->lock, flags);
|
|
|
- ndetected += rcu_print_task_stall(rnp);
|
|
|
- raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
|
|
-
|
|
|
print_cpu_stall_info_end();
|
|
|
for_each_possible_cpu(cpu)
|
|
|
totqlen += per_cpu_ptr(rsp->rda, cpu)->qlen;
|
|
|
pr_cont("(detected by %d, t=%ld jiffies, g=%ld, c=%ld, q=%lu)\n",
|
|
|
smp_processor_id(), (long)(jiffies - rsp->gp_start),
|
|
|
(long)rsp->gpnum, (long)rsp->completed, totqlen);
|
|
|
- if (ndetected == 0)
|
|
|
- pr_err("INFO: Stall ended before state dump start\n");
|
|
|
- else
|
|
|
+ if (ndetected) {
|
|
|
rcu_dump_cpu_stacks(rsp);
|
|
|
+ } else {
|
|
|
+ if (ACCESS_ONCE(rsp->gpnum) != gpnum ||
|
|
|
+ ACCESS_ONCE(rsp->completed) == gpnum) {
|
|
|
+ pr_err("INFO: Stall ended before state dump start\n");
|
|
|
+ } else {
|
|
|
+ j = jiffies;
|
|
|
+ gpa = ACCESS_ONCE(rsp->gp_activity);
|
|
|
+ pr_err("All QSes seen, last %s kthread activity %ld (%ld-%ld), jiffies_till_next_fqs=%ld\n",
|
|
|
+ rsp->name, j - gpa, j, gpa,
|
|
|
+ jiffies_till_next_fqs);
|
|
|
+ /* In this case, the current CPU might be at fault. */
|
|
|
+ sched_show_task(current);
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
/* Complain about tasks blocking the grace period. */
|
|
|
-
|
|
|
rcu_print_detail_task_stall(rsp);
|
|
|
|
|
|
+ rcu_check_gp_kthread_starvation(rsp);
|
|
|
+
|
|
|
force_quiescent_state(rsp); /* Kick them all. */
|
|
|
}
|
|
|
|
|
@@ -1155,6 +1233,9 @@ static void print_cpu_stall(struct rcu_state *rsp)
|
|
|
pr_cont(" (t=%lu jiffies g=%ld c=%ld q=%lu)\n",
|
|
|
jiffies - rsp->gp_start,
|
|
|
(long)rsp->gpnum, (long)rsp->completed, totqlen);
|
|
|
+
|
|
|
+ rcu_check_gp_kthread_starvation(rsp);
|
|
|
+
|
|
|
rcu_dump_cpu_stacks(rsp);
|
|
|
|
|
|
raw_spin_lock_irqsave(&rnp->lock, flags);
|
|
@@ -1225,7 +1306,7 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
|
|
|
ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY)) {
|
|
|
|
|
|
/* They had a few time units to dump stack, so complain. */
|
|
|
- print_other_cpu_stall(rsp);
|
|
|
+ print_other_cpu_stall(rsp, gpnum);
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -1562,7 +1643,8 @@ static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp,
|
|
|
bool ret;
|
|
|
|
|
|
/* Handle the ends of any preceding grace periods first. */
|
|
|
- if (rdp->completed == rnp->completed) {
|
|
|
+ if (rdp->completed == rnp->completed &&
|
|
|
+ !unlikely(ACCESS_ONCE(rdp->gpwrap))) {
|
|
|
|
|
|
/* No grace period end, so just accelerate recent callbacks. */
|
|
|
ret = rcu_accelerate_cbs(rsp, rnp, rdp);
|
|
@@ -1577,7 +1659,7 @@ static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp,
|
|
|
trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuend"));
|
|
|
}
|
|
|
|
|
|
- if (rdp->gpnum != rnp->gpnum) {
|
|
|
+ if (rdp->gpnum != rnp->gpnum || unlikely(ACCESS_ONCE(rdp->gpwrap))) {
|
|
|
/*
|
|
|
* If the current grace period is waiting for this CPU,
|
|
|
* set up to detect a quiescent state, otherwise don't
|
|
@@ -1586,8 +1668,10 @@ static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp,
|
|
|
rdp->gpnum = rnp->gpnum;
|
|
|
trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpustart"));
|
|
|
rdp->passed_quiesce = 0;
|
|
|
+ rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr);
|
|
|
rdp->qs_pending = !!(rnp->qsmask & rdp->grpmask);
|
|
|
zero_cpu_stall_ticks(rdp);
|
|
|
+ ACCESS_ONCE(rdp->gpwrap) = false;
|
|
|
}
|
|
|
return ret;
|
|
|
}
|
|
@@ -1601,7 +1685,8 @@ static void note_gp_changes(struct rcu_state *rsp, struct rcu_data *rdp)
|
|
|
local_irq_save(flags);
|
|
|
rnp = rdp->mynode;
|
|
|
if ((rdp->gpnum == ACCESS_ONCE(rnp->gpnum) &&
|
|
|
- rdp->completed == ACCESS_ONCE(rnp->completed)) || /* w/out lock. */
|
|
|
+ rdp->completed == ACCESS_ONCE(rnp->completed) &&
|
|
|
+ !unlikely(ACCESS_ONCE(rdp->gpwrap))) || /* w/out lock. */
|
|
|
!raw_spin_trylock(&rnp->lock)) { /* irqs already off, so later. */
|
|
|
local_irq_restore(flags);
|
|
|
return;
|
|
@@ -1621,6 +1706,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
|
|
|
struct rcu_data *rdp;
|
|
|
struct rcu_node *rnp = rcu_get_root(rsp);
|
|
|
|
|
|
+ ACCESS_ONCE(rsp->gp_activity) = jiffies;
|
|
|
rcu_bind_gp_kthread();
|
|
|
raw_spin_lock_irq(&rnp->lock);
|
|
|
smp_mb__after_unlock_lock();
|
|
@@ -1681,6 +1767,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
|
|
|
rnp->grphi, rnp->qsmask);
|
|
|
raw_spin_unlock_irq(&rnp->lock);
|
|
|
cond_resched_rcu_qs();
|
|
|
+ ACCESS_ONCE(rsp->gp_activity) = jiffies;
|
|
|
}
|
|
|
|
|
|
mutex_unlock(&rsp->onoff_mutex);
|
|
@@ -1697,6 +1784,7 @@ static int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in)
|
|
|
unsigned long maxj;
|
|
|
struct rcu_node *rnp = rcu_get_root(rsp);
|
|
|
|
|
|
+ ACCESS_ONCE(rsp->gp_activity) = jiffies;
|
|
|
rsp->n_force_qs++;
|
|
|
if (fqs_state == RCU_SAVE_DYNTICK) {
|
|
|
/* Collect dyntick-idle snapshots. */
|
|
@@ -1735,6 +1823,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
|
|
|
struct rcu_data *rdp;
|
|
|
struct rcu_node *rnp = rcu_get_root(rsp);
|
|
|
|
|
|
+ ACCESS_ONCE(rsp->gp_activity) = jiffies;
|
|
|
raw_spin_lock_irq(&rnp->lock);
|
|
|
smp_mb__after_unlock_lock();
|
|
|
gp_duration = jiffies - rsp->gp_start;
|
|
@@ -1771,6 +1860,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
|
|
|
nocb += rcu_future_gp_cleanup(rsp, rnp);
|
|
|
raw_spin_unlock_irq(&rnp->lock);
|
|
|
cond_resched_rcu_qs();
|
|
|
+ ACCESS_ONCE(rsp->gp_activity) = jiffies;
|
|
|
}
|
|
|
rnp = rcu_get_root(rsp);
|
|
|
raw_spin_lock_irq(&rnp->lock);
|
|
@@ -1820,6 +1910,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
|
|
|
if (rcu_gp_init(rsp))
|
|
|
break;
|
|
|
cond_resched_rcu_qs();
|
|
|
+ ACCESS_ONCE(rsp->gp_activity) = jiffies;
|
|
|
WARN_ON(signal_pending(current));
|
|
|
trace_rcu_grace_period(rsp->name,
|
|
|
ACCESS_ONCE(rsp->gpnum),
|
|
@@ -1863,9 +1954,11 @@ static int __noreturn rcu_gp_kthread(void *arg)
|
|
|
ACCESS_ONCE(rsp->gpnum),
|
|
|
TPS("fqsend"));
|
|
|
cond_resched_rcu_qs();
|
|
|
+ ACCESS_ONCE(rsp->gp_activity) = jiffies;
|
|
|
} else {
|
|
|
/* Deal with stray signal. */
|
|
|
cond_resched_rcu_qs();
|
|
|
+ ACCESS_ONCE(rsp->gp_activity) = jiffies;
|
|
|
WARN_ON(signal_pending(current));
|
|
|
trace_rcu_grace_period(rsp->name,
|
|
|
ACCESS_ONCE(rsp->gpnum),
|
|
@@ -2042,8 +2135,10 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp)
|
|
|
rnp = rdp->mynode;
|
|
|
raw_spin_lock_irqsave(&rnp->lock, flags);
|
|
|
smp_mb__after_unlock_lock();
|
|
|
- if (rdp->passed_quiesce == 0 || rdp->gpnum != rnp->gpnum ||
|
|
|
- rnp->completed == rnp->gpnum) {
|
|
|
+ if ((rdp->passed_quiesce == 0 &&
|
|
|
+ rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_qs_ctr)) ||
|
|
|
+ rdp->gpnum != rnp->gpnum || rnp->completed == rnp->gpnum ||
|
|
|
+ rdp->gpwrap) {
|
|
|
|
|
|
/*
|
|
|
* The grace period in which this quiescent state was
|
|
@@ -2052,6 +2147,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp)
|
|
|
* within the current grace period.
|
|
|
*/
|
|
|
rdp->passed_quiesce = 0; /* need qs for new gp. */
|
|
|
+ rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr);
|
|
|
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
|
|
return;
|
|
|
}
|
|
@@ -2096,7 +2192,8 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp)
|
|
|
* Was there a quiescent state since the beginning of the grace
|
|
|
* period? If no, then exit and wait for the next call.
|
|
|
*/
|
|
|
- if (!rdp->passed_quiesce)
|
|
|
+ if (!rdp->passed_quiesce &&
|
|
|
+ rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_qs_ctr))
|
|
|
return;
|
|
|
|
|
|
/*
|
|
@@ -2226,6 +2323,46 @@ static void rcu_cleanup_dying_cpu(struct rcu_state *rsp)
|
|
|
TPS("cpuofl"));
|
|
|
}
|
|
|
|
|
|
+/*
|
|
|
+ * All CPUs for the specified rcu_node structure have gone offline,
|
|
|
+ * and all tasks that were preempted within an RCU read-side critical
|
|
|
+ * section while running on one of those CPUs have since exited their RCU
|
|
|
+ * read-side critical section. Some other CPU is reporting this fact with
|
|
|
+ * the specified rcu_node structure's ->lock held and interrupts disabled.
|
|
|
+ * This function therefore goes up the tree of rcu_node structures,
|
|
|
+ * clearing the corresponding bits in the ->qsmaskinit fields. Note that
|
|
|
+ * the leaf rcu_node structure's ->qsmaskinit field has already been
|
|
|
+ * updated
|
|
|
+ *
|
|
|
+ * This function does check that the specified rcu_node structure has
|
|
|
+ * all CPUs offline and no blocked tasks, so it is OK to invoke it
|
|
|
+ * prematurely. That said, invoking it after the fact will cost you
|
|
|
+ * a needless lock acquisition. So once it has done its work, don't
|
|
|
+ * invoke it again.
|
|
|
+ */
|
|
|
+static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf)
|
|
|
+{
|
|
|
+ long mask;
|
|
|
+ struct rcu_node *rnp = rnp_leaf;
|
|
|
+
|
|
|
+ if (rnp->qsmaskinit || rcu_preempt_has_tasks(rnp))
|
|
|
+ return;
|
|
|
+ for (;;) {
|
|
|
+ mask = rnp->grpmask;
|
|
|
+ rnp = rnp->parent;
|
|
|
+ if (!rnp)
|
|
|
+ break;
|
|
|
+ raw_spin_lock(&rnp->lock); /* irqs already disabled. */
|
|
|
+ smp_mb__after_unlock_lock(); /* GP memory ordering. */
|
|
|
+ rnp->qsmaskinit &= ~mask;
|
|
|
+ if (rnp->qsmaskinit) {
|
|
|
+ raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* The CPU has been completely removed, and some other CPU is reporting
|
|
|
* this fact from process context. Do the remainder of the cleanup,
|
|
@@ -2236,8 +2373,6 @@ static void rcu_cleanup_dying_cpu(struct rcu_state *rsp)
|
|
|
static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
|
|
|
{
|
|
|
unsigned long flags;
|
|
|
- unsigned long mask;
|
|
|
- int need_report = 0;
|
|
|
struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
|
|
|
struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */
|
|
|
|
|
@@ -2251,40 +2386,15 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
|
|
|
/* Orphan the dead CPU's callbacks, and adopt them if appropriate. */
|
|
|
rcu_send_cbs_to_orphanage(cpu, rsp, rnp, rdp);
|
|
|
rcu_adopt_orphan_cbs(rsp, flags);
|
|
|
+ raw_spin_unlock_irqrestore(&rsp->orphan_lock, flags);
|
|
|
|
|
|
- /* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */
|
|
|
- mask = rdp->grpmask; /* rnp->grplo is constant. */
|
|
|
- do {
|
|
|
- raw_spin_lock(&rnp->lock); /* irqs already disabled. */
|
|
|
- smp_mb__after_unlock_lock();
|
|
|
- rnp->qsmaskinit &= ~mask;
|
|
|
- if (rnp->qsmaskinit != 0) {
|
|
|
- if (rnp != rdp->mynode)
|
|
|
- raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
|
|
|
- break;
|
|
|
- }
|
|
|
- if (rnp == rdp->mynode)
|
|
|
- need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp);
|
|
|
- else
|
|
|
- raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
|
|
|
- mask = rnp->grpmask;
|
|
|
- rnp = rnp->parent;
|
|
|
- } while (rnp != NULL);
|
|
|
-
|
|
|
- /*
|
|
|
- * We still hold the leaf rcu_node structure lock here, and
|
|
|
- * irqs are still disabled. The reason for this subterfuge is
|
|
|
- * because invoking rcu_report_unblock_qs_rnp() with ->orphan_lock
|
|
|
- * held leads to deadlock.
|
|
|
- */
|
|
|
- raw_spin_unlock(&rsp->orphan_lock); /* irqs remain disabled. */
|
|
|
- rnp = rdp->mynode;
|
|
|
- if (need_report & RCU_OFL_TASKS_NORM_GP)
|
|
|
- rcu_report_unblock_qs_rnp(rnp, flags);
|
|
|
- else
|
|
|
- raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
|
|
- if (need_report & RCU_OFL_TASKS_EXP_GP)
|
|
|
- rcu_report_exp_rnp(rsp, rnp, true);
|
|
|
+ /* Remove outgoing CPU from mask in the leaf rcu_node structure. */
|
|
|
+ raw_spin_lock_irqsave(&rnp->lock, flags);
|
|
|
+ smp_mb__after_unlock_lock(); /* Enforce GP memory-order guarantee. */
|
|
|
+ rnp->qsmaskinit &= ~rdp->grpmask;
|
|
|
+ if (rnp->qsmaskinit == 0 && !rcu_preempt_has_tasks(rnp))
|
|
|
+ rcu_cleanup_dead_rnp(rnp);
|
|
|
+ rcu_report_qs_rnp(rdp->grpmask, rsp, rnp, flags); /* Rlses rnp->lock. */
|
|
|
WARN_ONCE(rdp->qlen != 0 || rdp->nxtlist != NULL,
|
|
|
"rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, nxtlist=%p\n",
|
|
|
cpu, rdp->qlen, rdp->nxtlist);
|
|
@@ -2300,6 +2410,10 @@ static void rcu_cleanup_dying_cpu(struct rcu_state *rsp)
|
|
|
{
|
|
|
}
|
|
|
|
|
|
+static void __maybe_unused rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf)
|
|
|
+{
|
|
|
+}
|
|
|
+
|
|
|
static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
|
|
|
{
|
|
|
}
|
|
@@ -2496,12 +2610,6 @@ static void force_qs_rnp(struct rcu_state *rsp,
|
|
|
}
|
|
|
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
|
|
}
|
|
|
- rnp = rcu_get_root(rsp);
|
|
|
- if (rnp->qsmask == 0) {
|
|
|
- raw_spin_lock_irqsave(&rnp->lock, flags);
|
|
|
- smp_mb__after_unlock_lock();
|
|
|
- rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */
|
|
|
- }
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -2601,7 +2709,7 @@ static void rcu_process_callbacks(struct softirq_action *unused)
|
|
|
* Schedule RCU callback invocation. If the specified type of RCU
|
|
|
* does not support RCU priority boosting, just do a direct call,
|
|
|
* otherwise wake up the per-CPU kernel kthread. Note that because we
|
|
|
- * are running on the current CPU with interrupts disabled, the
|
|
|
+ * are running on the current CPU with softirqs disabled, the
|
|
|
* rcu_cpu_kthread_task cannot disappear out from under us.
|
|
|
*/
|
|
|
static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
|
|
@@ -3141,9 +3249,12 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
|
|
|
|
|
|
/* Is the RCU core waiting for a quiescent state from this CPU? */
|
|
|
if (rcu_scheduler_fully_active &&
|
|
|
- rdp->qs_pending && !rdp->passed_quiesce) {
|
|
|
+ rdp->qs_pending && !rdp->passed_quiesce &&
|
|
|
+ rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_qs_ctr)) {
|
|
|
rdp->n_rp_qs_pending++;
|
|
|
- } else if (rdp->qs_pending && rdp->passed_quiesce) {
|
|
|
+ } else if (rdp->qs_pending &&
|
|
|
+ (rdp->passed_quiesce ||
|
|
|
+ rdp->rcu_qs_ctr_snap != __this_cpu_read(rcu_qs_ctr))) {
|
|
|
rdp->n_rp_report_qs++;
|
|
|
return 1;
|
|
|
}
|
|
@@ -3167,7 +3278,8 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
|
|
|
}
|
|
|
|
|
|
/* Has a new RCU grace period started? */
|
|
|
- if (ACCESS_ONCE(rnp->gpnum) != rdp->gpnum) { /* outside lock */
|
|
|
+ if (ACCESS_ONCE(rnp->gpnum) != rdp->gpnum ||
|
|
|
+ unlikely(ACCESS_ONCE(rdp->gpwrap))) { /* outside lock */
|
|
|
rdp->n_rp_gp_started++;
|
|
|
return 1;
|
|
|
}
|
|
@@ -3350,6 +3462,7 @@ static void _rcu_barrier(struct rcu_state *rsp)
|
|
|
} else {
|
|
|
_rcu_barrier_trace(rsp, "OnlineNoCB", cpu,
|
|
|
rsp->n_barrier_done);
|
|
|
+ smp_mb__before_atomic();
|
|
|
atomic_inc(&rsp->barrier_cpu_count);
|
|
|
__call_rcu(&rdp->barrier_head,
|
|
|
rcu_barrier_callback, rsp, cpu, 0);
|
|
@@ -3417,9 +3530,6 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
|
|
|
/* Set up local state, ensuring consistent view of global state. */
|
|
|
raw_spin_lock_irqsave(&rnp->lock, flags);
|
|
|
rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo);
|
|
|
- init_callback_list(rdp);
|
|
|
- rdp->qlen_lazy = 0;
|
|
|
- ACCESS_ONCE(rdp->qlen) = 0;
|
|
|
rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
|
|
|
WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE);
|
|
|
WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1);
|
|
@@ -3476,6 +3586,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
|
|
|
rdp->gpnum = rnp->completed;
|
|
|
rdp->completed = rnp->completed;
|
|
|
rdp->passed_quiesce = 0;
|
|
|
+ rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr);
|
|
|
rdp->qs_pending = 0;
|
|
|
trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuonl"));
|
|
|
}
|
|
@@ -3567,17 +3678,35 @@ static int rcu_pm_notify(struct notifier_block *self,
|
|
|
static int __init rcu_spawn_gp_kthread(void)
|
|
|
{
|
|
|
unsigned long flags;
|
|
|
+ int kthread_prio_in = kthread_prio;
|
|
|
struct rcu_node *rnp;
|
|
|
struct rcu_state *rsp;
|
|
|
+ struct sched_param sp;
|
|
|
struct task_struct *t;
|
|
|
|
|
|
+ /* Force priority into range. */
|
|
|
+ if (IS_ENABLED(CONFIG_RCU_BOOST) && kthread_prio < 1)
|
|
|
+ kthread_prio = 1;
|
|
|
+ else if (kthread_prio < 0)
|
|
|
+ kthread_prio = 0;
|
|
|
+ else if (kthread_prio > 99)
|
|
|
+ kthread_prio = 99;
|
|
|
+ if (kthread_prio != kthread_prio_in)
|
|
|
+ pr_alert("rcu_spawn_gp_kthread(): Limited prio to %d from %d\n",
|
|
|
+ kthread_prio, kthread_prio_in);
|
|
|
+
|
|
|
rcu_scheduler_fully_active = 1;
|
|
|
for_each_rcu_flavor(rsp) {
|
|
|
- t = kthread_run(rcu_gp_kthread, rsp, "%s", rsp->name);
|
|
|
+ t = kthread_create(rcu_gp_kthread, rsp, "%s", rsp->name);
|
|
|
BUG_ON(IS_ERR(t));
|
|
|
rnp = rcu_get_root(rsp);
|
|
|
raw_spin_lock_irqsave(&rnp->lock, flags);
|
|
|
rsp->gp_kthread = t;
|
|
|
+ if (kthread_prio) {
|
|
|
+ sp.sched_priority = kthread_prio;
|
|
|
+ sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
|
|
|
+ }
|
|
|
+ wake_up_process(t);
|
|
|
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
|
|
}
|
|
|
rcu_spawn_nocb_kthreads();
|