tree_exp.h 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760
  1. /*
  2. * RCU expedited grace periods
  3. *
  4. * This program is free software; you can redistribute it and/or modify
  5. * it under the terms of the GNU General Public License as published by
  6. * the Free Software Foundation; either version 2 of the License, or
  7. * (at your option) any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program; if not, you can access it online at
  16. * http://www.gnu.org/licenses/gpl-2.0.html.
  17. *
  18. * Copyright IBM Corporation, 2016
  19. *
  20. * Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
  21. */
  22. /*
  23. * Record the start of an expedited grace period.
  24. */
  25. static void rcu_exp_gp_seq_start(struct rcu_state *rsp)
  26. {
  27. rcu_seq_start(&rsp->expedited_sequence);
  28. }
  29. /*
  30. * Return then value that expedited-grace-period counter will have
  31. * at the end of the current grace period.
  32. */
  33. static __maybe_unused unsigned long rcu_exp_gp_seq_endval(struct rcu_state *rsp)
  34. {
  35. return rcu_seq_endval(&rsp->expedited_sequence);
  36. }
  37. /*
  38. * Record the end of an expedited grace period.
  39. */
  40. static void rcu_exp_gp_seq_end(struct rcu_state *rsp)
  41. {
  42. rcu_seq_end(&rsp->expedited_sequence);
  43. smp_mb(); /* Ensure that consecutive grace periods serialize. */
  44. }
  45. /*
  46. * Take a snapshot of the expedited-grace-period counter.
  47. */
  48. static unsigned long rcu_exp_gp_seq_snap(struct rcu_state *rsp)
  49. {
  50. unsigned long s;
  51. smp_mb(); /* Caller's modifications seen first by other CPUs. */
  52. s = rcu_seq_snap(&rsp->expedited_sequence);
  53. trace_rcu_exp_grace_period(rsp->name, s, TPS("snap"));
  54. return s;
  55. }
  56. /*
  57. * Given a counter snapshot from rcu_exp_gp_seq_snap(), return true
  58. * if a full expedited grace period has elapsed since that snapshot
  59. * was taken.
  60. */
  61. static bool rcu_exp_gp_seq_done(struct rcu_state *rsp, unsigned long s)
  62. {
  63. return rcu_seq_done(&rsp->expedited_sequence, s);
  64. }
  65. /*
  66. * Reset the ->expmaskinit values in the rcu_node tree to reflect any
  67. * recent CPU-online activity. Note that these masks are not cleared
  68. * when CPUs go offline, so they reflect the union of all CPUs that have
  69. * ever been online. This means that this function normally takes its
  70. * no-work-to-do fastpath.
  71. */
  72. static void sync_exp_reset_tree_hotplug(struct rcu_state *rsp)
  73. {
  74. bool done;
  75. unsigned long flags;
  76. unsigned long mask;
  77. unsigned long oldmask;
  78. int ncpus = smp_load_acquire(&rsp->ncpus); /* Order against locking. */
  79. struct rcu_node *rnp;
  80. struct rcu_node *rnp_up;
  81. /* If no new CPUs onlined since last time, nothing to do. */
  82. if (likely(ncpus == rsp->ncpus_snap))
  83. return;
  84. rsp->ncpus_snap = ncpus;
  85. /*
  86. * Each pass through the following loop propagates newly onlined
  87. * CPUs for the current rcu_node structure up the rcu_node tree.
  88. */
  89. rcu_for_each_leaf_node(rsp, rnp) {
  90. raw_spin_lock_irqsave_rcu_node(rnp, flags);
  91. if (rnp->expmaskinit == rnp->expmaskinitnext) {
  92. raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
  93. continue; /* No new CPUs, nothing to do. */
  94. }
  95. /* Update this node's mask, track old value for propagation. */
  96. oldmask = rnp->expmaskinit;
  97. rnp->expmaskinit = rnp->expmaskinitnext;
  98. raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
  99. /* If was already nonzero, nothing to propagate. */
  100. if (oldmask)
  101. continue;
  102. /* Propagate the new CPU up the tree. */
  103. mask = rnp->grpmask;
  104. rnp_up = rnp->parent;
  105. done = false;
  106. while (rnp_up) {
  107. raw_spin_lock_irqsave_rcu_node(rnp_up, flags);
  108. if (rnp_up->expmaskinit)
  109. done = true;
  110. rnp_up->expmaskinit |= mask;
  111. raw_spin_unlock_irqrestore_rcu_node(rnp_up, flags);
  112. if (done)
  113. break;
  114. mask = rnp_up->grpmask;
  115. rnp_up = rnp_up->parent;
  116. }
  117. }
  118. }
  119. /*
  120. * Reset the ->expmask values in the rcu_node tree in preparation for
  121. * a new expedited grace period.
  122. */
  123. static void __maybe_unused sync_exp_reset_tree(struct rcu_state *rsp)
  124. {
  125. unsigned long flags;
  126. struct rcu_node *rnp;
  127. sync_exp_reset_tree_hotplug(rsp);
  128. rcu_for_each_node_breadth_first(rsp, rnp) {
  129. raw_spin_lock_irqsave_rcu_node(rnp, flags);
  130. WARN_ON_ONCE(rnp->expmask);
  131. rnp->expmask = rnp->expmaskinit;
  132. raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
  133. }
  134. }
  135. /*
  136. * Return non-zero if there is no RCU expedited grace period in progress
  137. * for the specified rcu_node structure, in other words, if all CPUs and
  138. * tasks covered by the specified rcu_node structure have done their bit
  139. * for the current expedited grace period. Works only for preemptible
  140. * RCU -- other RCU implementation use other means.
  141. *
  142. * Caller must hold the rcu_state's exp_mutex.
  143. */
  144. static bool sync_rcu_preempt_exp_done(struct rcu_node *rnp)
  145. {
  146. return rnp->exp_tasks == NULL &&
  147. READ_ONCE(rnp->expmask) == 0;
  148. }
  149. /*
  150. * Report the exit from RCU read-side critical section for the last task
  151. * that queued itself during or before the current expedited preemptible-RCU
  152. * grace period. This event is reported either to the rcu_node structure on
  153. * which the task was queued or to one of that rcu_node structure's ancestors,
  154. * recursively up the tree. (Calm down, calm down, we do the recursion
  155. * iteratively!)
  156. *
  157. * Caller must hold the rcu_state's exp_mutex and the specified rcu_node
  158. * structure's ->lock.
  159. */
  160. static void __rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
  161. bool wake, unsigned long flags)
  162. __releases(rnp->lock)
  163. {
  164. unsigned long mask;
  165. for (;;) {
  166. if (!sync_rcu_preempt_exp_done(rnp)) {
  167. if (!rnp->expmask)
  168. rcu_initiate_boost(rnp, flags);
  169. else
  170. raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
  171. break;
  172. }
  173. if (rnp->parent == NULL) {
  174. raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
  175. if (wake) {
  176. smp_mb(); /* EGP done before wake_up(). */
  177. swake_up(&rsp->expedited_wq);
  178. }
  179. break;
  180. }
  181. mask = rnp->grpmask;
  182. raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled */
  183. rnp = rnp->parent;
  184. raw_spin_lock_rcu_node(rnp); /* irqs already disabled */
  185. WARN_ON_ONCE(!(rnp->expmask & mask));
  186. rnp->expmask &= ~mask;
  187. }
  188. }
  189. /*
  190. * Report expedited quiescent state for specified node. This is a
  191. * lock-acquisition wrapper function for __rcu_report_exp_rnp().
  192. *
  193. * Caller must hold the rcu_state's exp_mutex.
  194. */
  195. static void __maybe_unused rcu_report_exp_rnp(struct rcu_state *rsp,
  196. struct rcu_node *rnp, bool wake)
  197. {
  198. unsigned long flags;
  199. raw_spin_lock_irqsave_rcu_node(rnp, flags);
  200. __rcu_report_exp_rnp(rsp, rnp, wake, flags);
  201. }
  202. /*
  203. * Report expedited quiescent state for multiple CPUs, all covered by the
  204. * specified leaf rcu_node structure. Caller must hold the rcu_state's
  205. * exp_mutex.
  206. */
  207. static void rcu_report_exp_cpu_mult(struct rcu_state *rsp, struct rcu_node *rnp,
  208. unsigned long mask, bool wake)
  209. {
  210. unsigned long flags;
  211. raw_spin_lock_irqsave_rcu_node(rnp, flags);
  212. if (!(rnp->expmask & mask)) {
  213. raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
  214. return;
  215. }
  216. rnp->expmask &= ~mask;
  217. __rcu_report_exp_rnp(rsp, rnp, wake, flags); /* Releases rnp->lock. */
  218. }
  219. /*
  220. * Report expedited quiescent state for specified rcu_data (CPU).
  221. */
  222. static void rcu_report_exp_rdp(struct rcu_state *rsp, struct rcu_data *rdp,
  223. bool wake)
  224. {
  225. rcu_report_exp_cpu_mult(rsp, rdp->mynode, rdp->grpmask, wake);
  226. }
  227. /* Common code for synchronize_{rcu,sched}_expedited() work-done checking. */
  228. static bool sync_exp_work_done(struct rcu_state *rsp, atomic_long_t *stat,
  229. unsigned long s)
  230. {
  231. if (rcu_exp_gp_seq_done(rsp, s)) {
  232. trace_rcu_exp_grace_period(rsp->name, s, TPS("done"));
  233. /* Ensure test happens before caller kfree(). */
  234. smp_mb__before_atomic(); /* ^^^ */
  235. atomic_long_inc(stat);
  236. return true;
  237. }
  238. return false;
  239. }
  240. /*
  241. * Funnel-lock acquisition for expedited grace periods. Returns true
  242. * if some other task completed an expedited grace period that this task
  243. * can piggy-back on, and with no mutex held. Otherwise, returns false
  244. * with the mutex held, indicating that the caller must actually do the
  245. * expedited grace period.
  246. */
  247. static bool exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
  248. {
  249. struct rcu_data *rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id());
  250. struct rcu_node *rnp = rdp->mynode;
  251. struct rcu_node *rnp_root = rcu_get_root(rsp);
  252. /* Low-contention fastpath. */
  253. if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s) &&
  254. (rnp == rnp_root ||
  255. ULONG_CMP_LT(READ_ONCE(rnp_root->exp_seq_rq), s)) &&
  256. mutex_trylock(&rsp->exp_mutex))
  257. goto fastpath;
  258. /*
  259. * Each pass through the following loop works its way up
  260. * the rcu_node tree, returning if others have done the work or
  261. * otherwise falls through to acquire rsp->exp_mutex. The mapping
  262. * from CPU to rcu_node structure can be inexact, as it is just
  263. * promoting locality and is not strictly needed for correctness.
  264. */
  265. for (; rnp != NULL; rnp = rnp->parent) {
  266. if (sync_exp_work_done(rsp, &rdp->exp_workdone1, s))
  267. return true;
  268. /* Work not done, either wait here or go up. */
  269. spin_lock(&rnp->exp_lock);
  270. if (ULONG_CMP_GE(rnp->exp_seq_rq, s)) {
  271. /* Someone else doing GP, so wait for them. */
  272. spin_unlock(&rnp->exp_lock);
  273. trace_rcu_exp_funnel_lock(rsp->name, rnp->level,
  274. rnp->grplo, rnp->grphi,
  275. TPS("wait"));
  276. wait_event(rnp->exp_wq[rcu_seq_ctr(s) & 0x3],
  277. sync_exp_work_done(rsp,
  278. &rdp->exp_workdone2, s));
  279. return true;
  280. }
  281. rnp->exp_seq_rq = s; /* Followers can wait on us. */
  282. spin_unlock(&rnp->exp_lock);
  283. trace_rcu_exp_funnel_lock(rsp->name, rnp->level, rnp->grplo,
  284. rnp->grphi, TPS("nxtlvl"));
  285. }
  286. mutex_lock(&rsp->exp_mutex);
  287. fastpath:
  288. if (sync_exp_work_done(rsp, &rdp->exp_workdone3, s)) {
  289. mutex_unlock(&rsp->exp_mutex);
  290. return true;
  291. }
  292. rcu_exp_gp_seq_start(rsp);
  293. trace_rcu_exp_grace_period(rsp->name, s, TPS("start"));
  294. return false;
  295. }
  296. /* Invoked on each online non-idle CPU for expedited quiescent state. */
  297. static void sync_sched_exp_handler(void *data)
  298. {
  299. struct rcu_data *rdp;
  300. struct rcu_node *rnp;
  301. struct rcu_state *rsp = data;
  302. rdp = this_cpu_ptr(rsp->rda);
  303. rnp = rdp->mynode;
  304. if (!(READ_ONCE(rnp->expmask) & rdp->grpmask) ||
  305. __this_cpu_read(rcu_sched_data.cpu_no_qs.b.exp))
  306. return;
  307. if (rcu_is_cpu_rrupt_from_idle()) {
  308. rcu_report_exp_rdp(&rcu_sched_state,
  309. this_cpu_ptr(&rcu_sched_data), true);
  310. return;
  311. }
  312. __this_cpu_write(rcu_sched_data.cpu_no_qs.b.exp, true);
  313. /* Store .exp before .rcu_urgent_qs. */
  314. smp_store_release(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs), true);
  315. resched_cpu(smp_processor_id());
  316. }
  317. /* Send IPI for expedited cleanup if needed at end of CPU-hotplug operation. */
  318. static void sync_sched_exp_online_cleanup(int cpu)
  319. {
  320. struct rcu_data *rdp;
  321. int ret;
  322. struct rcu_node *rnp;
  323. struct rcu_state *rsp = &rcu_sched_state;
  324. rdp = per_cpu_ptr(rsp->rda, cpu);
  325. rnp = rdp->mynode;
  326. if (!(READ_ONCE(rnp->expmask) & rdp->grpmask))
  327. return;
  328. ret = smp_call_function_single(cpu, sync_sched_exp_handler, rsp, 0);
  329. WARN_ON_ONCE(ret);
  330. }
  331. /*
  332. * Select the nodes that the upcoming expedited grace period needs
  333. * to wait for.
  334. */
  335. static void sync_rcu_exp_select_cpus(struct rcu_state *rsp,
  336. smp_call_func_t func)
  337. {
  338. int cpu;
  339. unsigned long flags;
  340. unsigned long mask_ofl_test;
  341. unsigned long mask_ofl_ipi;
  342. int ret;
  343. struct rcu_node *rnp;
  344. trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("reset"));
  345. sync_exp_reset_tree(rsp);
  346. trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("select"));
  347. rcu_for_each_leaf_node(rsp, rnp) {
  348. raw_spin_lock_irqsave_rcu_node(rnp, flags);
  349. /* Each pass checks a CPU for identity, offline, and idle. */
  350. mask_ofl_test = 0;
  351. for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) {
  352. unsigned long mask = leaf_node_cpu_bit(rnp, cpu);
  353. struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
  354. struct rcu_dynticks *rdtp = per_cpu_ptr(&rcu_dynticks, cpu);
  355. int snap;
  356. if (raw_smp_processor_id() == cpu ||
  357. !(rnp->qsmaskinitnext & mask)) {
  358. mask_ofl_test |= mask;
  359. } else {
  360. snap = rcu_dynticks_snap(rdtp);
  361. if (rcu_dynticks_in_eqs(snap))
  362. mask_ofl_test |= mask;
  363. else
  364. rdp->exp_dynticks_snap = snap;
  365. }
  366. }
  367. mask_ofl_ipi = rnp->expmask & ~mask_ofl_test;
  368. /*
  369. * Need to wait for any blocked tasks as well. Note that
  370. * additional blocking tasks will also block the expedited
  371. * GP until such time as the ->expmask bits are cleared.
  372. */
  373. if (rcu_preempt_has_tasks(rnp))
  374. rnp->exp_tasks = rnp->blkd_tasks.next;
  375. raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
  376. /* IPI the remaining CPUs for expedited quiescent state. */
  377. for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) {
  378. unsigned long mask = leaf_node_cpu_bit(rnp, cpu);
  379. struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
  380. if (!(mask_ofl_ipi & mask))
  381. continue;
  382. retry_ipi:
  383. if (rcu_dynticks_in_eqs_since(rdp->dynticks,
  384. rdp->exp_dynticks_snap)) {
  385. mask_ofl_test |= mask;
  386. continue;
  387. }
  388. ret = smp_call_function_single(cpu, func, rsp, 0);
  389. if (!ret) {
  390. mask_ofl_ipi &= ~mask;
  391. continue;
  392. }
  393. /* Failed, raced with CPU hotplug operation. */
  394. raw_spin_lock_irqsave_rcu_node(rnp, flags);
  395. if ((rnp->qsmaskinitnext & mask) &&
  396. (rnp->expmask & mask)) {
  397. /* Online, so delay for a bit and try again. */
  398. raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
  399. trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("selectofl"));
  400. schedule_timeout_uninterruptible(1);
  401. goto retry_ipi;
  402. }
  403. /* CPU really is offline, so we can ignore it. */
  404. if (!(rnp->expmask & mask))
  405. mask_ofl_ipi &= ~mask;
  406. raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
  407. }
  408. /* Report quiescent states for those that went offline. */
  409. mask_ofl_test |= mask_ofl_ipi;
  410. if (mask_ofl_test)
  411. rcu_report_exp_cpu_mult(rsp, rnp, mask_ofl_test, false);
  412. }
  413. }
  414. static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
  415. {
  416. int cpu;
  417. unsigned long jiffies_stall;
  418. unsigned long jiffies_start;
  419. unsigned long mask;
  420. int ndetected;
  421. struct rcu_node *rnp;
  422. struct rcu_node *rnp_root = rcu_get_root(rsp);
  423. int ret;
  424. trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("startwait"));
  425. jiffies_stall = rcu_jiffies_till_stall_check();
  426. jiffies_start = jiffies;
  427. for (;;) {
  428. ret = swait_event_timeout(
  429. rsp->expedited_wq,
  430. sync_rcu_preempt_exp_done(rnp_root),
  431. jiffies_stall);
  432. if (ret > 0 || sync_rcu_preempt_exp_done(rnp_root))
  433. return;
  434. WARN_ON(ret < 0); /* workqueues should not be signaled. */
  435. if (rcu_cpu_stall_suppress)
  436. continue;
  437. panic_on_rcu_stall();
  438. pr_err("INFO: %s detected expedited stalls on CPUs/tasks: {",
  439. rsp->name);
  440. ndetected = 0;
  441. rcu_for_each_leaf_node(rsp, rnp) {
  442. ndetected += rcu_print_task_exp_stall(rnp);
  443. for_each_leaf_node_possible_cpu(rnp, cpu) {
  444. struct rcu_data *rdp;
  445. mask = leaf_node_cpu_bit(rnp, cpu);
  446. if (!(rnp->expmask & mask))
  447. continue;
  448. ndetected++;
  449. rdp = per_cpu_ptr(rsp->rda, cpu);
  450. pr_cont(" %d-%c%c%c", cpu,
  451. "O."[!!cpu_online(cpu)],
  452. "o."[!!(rdp->grpmask & rnp->expmaskinit)],
  453. "N."[!!(rdp->grpmask & rnp->expmaskinitnext)]);
  454. }
  455. }
  456. pr_cont(" } %lu jiffies s: %lu root: %#lx/%c\n",
  457. jiffies - jiffies_start, rsp->expedited_sequence,
  458. rnp_root->expmask, ".T"[!!rnp_root->exp_tasks]);
  459. if (ndetected) {
  460. pr_err("blocking rcu_node structures:");
  461. rcu_for_each_node_breadth_first(rsp, rnp) {
  462. if (rnp == rnp_root)
  463. continue; /* printed unconditionally */
  464. if (sync_rcu_preempt_exp_done(rnp))
  465. continue;
  466. pr_cont(" l=%u:%d-%d:%#lx/%c",
  467. rnp->level, rnp->grplo, rnp->grphi,
  468. rnp->expmask,
  469. ".T"[!!rnp->exp_tasks]);
  470. }
  471. pr_cont("\n");
  472. }
  473. rcu_for_each_leaf_node(rsp, rnp) {
  474. for_each_leaf_node_possible_cpu(rnp, cpu) {
  475. mask = leaf_node_cpu_bit(rnp, cpu);
  476. if (!(rnp->expmask & mask))
  477. continue;
  478. dump_cpu_task(cpu);
  479. }
  480. }
  481. jiffies_stall = 3 * rcu_jiffies_till_stall_check() + 3;
  482. }
  483. }
  484. /*
  485. * Wait for the current expedited grace period to complete, and then
  486. * wake up everyone who piggybacked on the just-completed expedited
  487. * grace period. Also update all the ->exp_seq_rq counters as needed
  488. * in order to avoid counter-wrap problems.
  489. */
  490. static void rcu_exp_wait_wake(struct rcu_state *rsp, unsigned long s)
  491. {
  492. struct rcu_node *rnp;
  493. synchronize_sched_expedited_wait(rsp);
  494. rcu_exp_gp_seq_end(rsp);
  495. trace_rcu_exp_grace_period(rsp->name, s, TPS("end"));
  496. /*
  497. * Switch over to wakeup mode, allowing the next GP, but -only- the
  498. * next GP, to proceed.
  499. */
  500. mutex_lock(&rsp->exp_wake_mutex);
  501. rcu_for_each_node_breadth_first(rsp, rnp) {
  502. if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s)) {
  503. spin_lock(&rnp->exp_lock);
  504. /* Recheck, avoid hang in case someone just arrived. */
  505. if (ULONG_CMP_LT(rnp->exp_seq_rq, s))
  506. rnp->exp_seq_rq = s;
  507. spin_unlock(&rnp->exp_lock);
  508. }
  509. smp_mb(); /* All above changes before wakeup. */
  510. wake_up_all(&rnp->exp_wq[rcu_seq_ctr(rsp->expedited_sequence) & 0x3]);
  511. }
  512. trace_rcu_exp_grace_period(rsp->name, s, TPS("endwake"));
  513. mutex_unlock(&rsp->exp_wake_mutex);
  514. }
  515. /* Let the workqueue handler know what it is supposed to do. */
  516. struct rcu_exp_work {
  517. smp_call_func_t rew_func;
  518. struct rcu_state *rew_rsp;
  519. unsigned long rew_s;
  520. struct work_struct rew_work;
  521. };
  522. /*
  523. * Common code to drive an expedited grace period forward, used by
  524. * workqueues and mid-boot-time tasks.
  525. */
  526. static void rcu_exp_sel_wait_wake(struct rcu_state *rsp,
  527. smp_call_func_t func, unsigned long s)
  528. {
  529. /* Initialize the rcu_node tree in preparation for the wait. */
  530. sync_rcu_exp_select_cpus(rsp, func);
  531. /* Wait and clean up, including waking everyone. */
  532. rcu_exp_wait_wake(rsp, s);
  533. }
  534. /*
  535. * Work-queue handler to drive an expedited grace period forward.
  536. */
  537. static void wait_rcu_exp_gp(struct work_struct *wp)
  538. {
  539. struct rcu_exp_work *rewp;
  540. rewp = container_of(wp, struct rcu_exp_work, rew_work);
  541. rcu_exp_sel_wait_wake(rewp->rew_rsp, rewp->rew_func, rewp->rew_s);
  542. }
  543. /*
  544. * Given an rcu_state pointer and a smp_call_function() handler, kick
  545. * off the specified flavor of expedited grace period.
  546. */
  547. static void _synchronize_rcu_expedited(struct rcu_state *rsp,
  548. smp_call_func_t func)
  549. {
  550. struct rcu_data *rdp;
  551. struct rcu_exp_work rew;
  552. struct rcu_node *rnp;
  553. unsigned long s;
  554. /* If expedited grace periods are prohibited, fall back to normal. */
  555. if (rcu_gp_is_normal()) {
  556. wait_rcu_gp(rsp->call);
  557. return;
  558. }
  559. /* Take a snapshot of the sequence number. */
  560. s = rcu_exp_gp_seq_snap(rsp);
  561. if (exp_funnel_lock(rsp, s))
  562. return; /* Someone else did our work for us. */
  563. /* Ensure that load happens before action based on it. */
  564. if (unlikely(rcu_scheduler_active == RCU_SCHEDULER_INIT)) {
  565. /* Direct call during scheduler init and early_initcalls(). */
  566. rcu_exp_sel_wait_wake(rsp, func, s);
  567. } else {
  568. /* Marshall arguments & schedule the expedited grace period. */
  569. rew.rew_func = func;
  570. rew.rew_rsp = rsp;
  571. rew.rew_s = s;
  572. INIT_WORK_ONSTACK(&rew.rew_work, wait_rcu_exp_gp);
  573. queue_work(rcu_gp_wq, &rew.rew_work);
  574. }
  575. /* Wait for expedited grace period to complete. */
  576. rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id());
  577. rnp = rcu_get_root(rsp);
  578. wait_event(rnp->exp_wq[rcu_seq_ctr(s) & 0x3],
  579. sync_exp_work_done(rsp, &rdp->exp_workdone0, s));
  580. smp_mb(); /* Workqueue actions happen before return. */
  581. /* Let the next expedited grace period start. */
  582. mutex_unlock(&rsp->exp_mutex);
  583. }
  584. /**
  585. * synchronize_sched_expedited - Brute-force RCU-sched grace period
  586. *
  587. * Wait for an RCU-sched grace period to elapse, but use a "big hammer"
  588. * approach to force the grace period to end quickly. This consumes
  589. * significant time on all CPUs and is unfriendly to real-time workloads,
  590. * so is thus not recommended for any sort of common-case code. In fact,
  591. * if you are using synchronize_sched_expedited() in a loop, please
  592. * restructure your code to batch your updates, and then use a single
  593. * synchronize_sched() instead.
  594. *
  595. * This implementation can be thought of as an application of sequence
  596. * locking to expedited grace periods, but using the sequence counter to
  597. * determine when someone else has already done the work instead of for
  598. * retrying readers.
  599. */
  600. void synchronize_sched_expedited(void)
  601. {
  602. struct rcu_state *rsp = &rcu_sched_state;
  603. RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) ||
  604. lock_is_held(&rcu_lock_map) ||
  605. lock_is_held(&rcu_sched_lock_map),
  606. "Illegal synchronize_sched_expedited() in RCU read-side critical section");
  607. /* If only one CPU, this is automatically a grace period. */
  608. if (rcu_blocking_is_gp())
  609. return;
  610. _synchronize_rcu_expedited(rsp, sync_sched_exp_handler);
  611. }
  612. EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
  613. #ifdef CONFIG_PREEMPT_RCU
  614. /*
  615. * Remote handler for smp_call_function_single(). If there is an
  616. * RCU read-side critical section in effect, request that the
  617. * next rcu_read_unlock() record the quiescent state up the
  618. * ->expmask fields in the rcu_node tree. Otherwise, immediately
  619. * report the quiescent state.
  620. */
  621. static void sync_rcu_exp_handler(void *info)
  622. {
  623. struct rcu_data *rdp;
  624. struct rcu_state *rsp = info;
  625. struct task_struct *t = current;
  626. /*
  627. * Within an RCU read-side critical section, request that the next
  628. * rcu_read_unlock() report. Unless this RCU read-side critical
  629. * section has already blocked, in which case it is already set
  630. * up for the expedited grace period to wait on it.
  631. */
  632. if (t->rcu_read_lock_nesting > 0 &&
  633. !t->rcu_read_unlock_special.b.blocked) {
  634. t->rcu_read_unlock_special.b.exp_need_qs = true;
  635. return;
  636. }
  637. /*
  638. * We are either exiting an RCU read-side critical section (negative
  639. * values of t->rcu_read_lock_nesting) or are not in one at all
  640. * (zero value of t->rcu_read_lock_nesting). Or we are in an RCU
  641. * read-side critical section that blocked before this expedited
  642. * grace period started. Either way, we can immediately report
  643. * the quiescent state.
  644. */
  645. rdp = this_cpu_ptr(rsp->rda);
  646. rcu_report_exp_rdp(rsp, rdp, true);
  647. }
  648. /**
  649. * synchronize_rcu_expedited - Brute-force RCU grace period
  650. *
  651. * Wait for an RCU-preempt grace period, but expedite it. The basic
  652. * idea is to IPI all non-idle non-nohz online CPUs. The IPI handler
  653. * checks whether the CPU is in an RCU-preempt critical section, and
  654. * if so, it sets a flag that causes the outermost rcu_read_unlock()
  655. * to report the quiescent state. On the other hand, if the CPU is
  656. * not in an RCU read-side critical section, the IPI handler reports
  657. * the quiescent state immediately.
  658. *
  659. * Although this is a greate improvement over previous expedited
  660. * implementations, it is still unfriendly to real-time workloads, so is
  661. * thus not recommended for any sort of common-case code. In fact, if
  662. * you are using synchronize_rcu_expedited() in a loop, please restructure
  663. * your code to batch your updates, and then Use a single synchronize_rcu()
  664. * instead.
  665. */
  666. void synchronize_rcu_expedited(void)
  667. {
  668. struct rcu_state *rsp = rcu_state_p;
  669. RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) ||
  670. lock_is_held(&rcu_lock_map) ||
  671. lock_is_held(&rcu_sched_lock_map),
  672. "Illegal synchronize_rcu_expedited() in RCU read-side critical section");
  673. if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE)
  674. return;
  675. _synchronize_rcu_expedited(rsp, sync_rcu_exp_handler);
  676. }
  677. EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
  678. #else /* #ifdef CONFIG_PREEMPT_RCU */
  679. /*
  680. * Wait for an rcu-preempt grace period, but make it happen quickly.
  681. * But because preemptible RCU does not exist, map to rcu-sched.
  682. */
  683. void synchronize_rcu_expedited(void)
  684. {
  685. synchronize_sched_expedited();
  686. }
  687. EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
  688. #endif /* #else #ifdef CONFIG_PREEMPT_RCU */