tree_exp.h 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725
  1. /*
  2. * RCU expedited grace periods
  3. *
  4. * This program is free software; you can redistribute it and/or modify
  5. * it under the terms of the GNU General Public License as published by
  6. * the Free Software Foundation; either version 2 of the License, or
  7. * (at your option) any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program; if not, you can access it online at
  16. * http://www.gnu.org/licenses/gpl-2.0.html.
  17. *
  18. * Copyright IBM Corporation, 2016
  19. *
  20. * Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
  21. */
  22. /* Wrapper functions for expedited grace periods. */
  23. static void rcu_exp_gp_seq_start(struct rcu_state *rsp)
  24. {
  25. rcu_seq_start(&rsp->expedited_sequence);
  26. }
  27. static void rcu_exp_gp_seq_end(struct rcu_state *rsp)
  28. {
  29. rcu_seq_end(&rsp->expedited_sequence);
  30. smp_mb(); /* Ensure that consecutive grace periods serialize. */
  31. }
  32. static unsigned long rcu_exp_gp_seq_snap(struct rcu_state *rsp)
  33. {
  34. unsigned long s;
  35. smp_mb(); /* Caller's modifications seen first by other CPUs. */
  36. s = rcu_seq_snap(&rsp->expedited_sequence);
  37. trace_rcu_exp_grace_period(rsp->name, s, TPS("snap"));
  38. return s;
  39. }
  40. static bool rcu_exp_gp_seq_done(struct rcu_state *rsp, unsigned long s)
  41. {
  42. return rcu_seq_done(&rsp->expedited_sequence, s);
  43. }
  44. /*
  45. * Reset the ->expmaskinit values in the rcu_node tree to reflect any
  46. * recent CPU-online activity. Note that these masks are not cleared
  47. * when CPUs go offline, so they reflect the union of all CPUs that have
  48. * ever been online. This means that this function normally takes its
  49. * no-work-to-do fastpath.
  50. */
  51. static void sync_exp_reset_tree_hotplug(struct rcu_state *rsp)
  52. {
  53. bool done;
  54. unsigned long flags;
  55. unsigned long mask;
  56. unsigned long oldmask;
  57. int ncpus = READ_ONCE(rsp->ncpus);
  58. struct rcu_node *rnp;
  59. struct rcu_node *rnp_up;
  60. /* If no new CPUs onlined since last time, nothing to do. */
  61. if (likely(ncpus == rsp->ncpus_snap))
  62. return;
  63. rsp->ncpus_snap = ncpus;
  64. /*
  65. * Each pass through the following loop propagates newly onlined
  66. * CPUs for the current rcu_node structure up the rcu_node tree.
  67. */
  68. rcu_for_each_leaf_node(rsp, rnp) {
  69. raw_spin_lock_irqsave_rcu_node(rnp, flags);
  70. if (rnp->expmaskinit == rnp->expmaskinitnext) {
  71. raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
  72. continue; /* No new CPUs, nothing to do. */
  73. }
  74. /* Update this node's mask, track old value for propagation. */
  75. oldmask = rnp->expmaskinit;
  76. rnp->expmaskinit = rnp->expmaskinitnext;
  77. raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
  78. /* If was already nonzero, nothing to propagate. */
  79. if (oldmask)
  80. continue;
  81. /* Propagate the new CPU up the tree. */
  82. mask = rnp->grpmask;
  83. rnp_up = rnp->parent;
  84. done = false;
  85. while (rnp_up) {
  86. raw_spin_lock_irqsave_rcu_node(rnp_up, flags);
  87. if (rnp_up->expmaskinit)
  88. done = true;
  89. rnp_up->expmaskinit |= mask;
  90. raw_spin_unlock_irqrestore_rcu_node(rnp_up, flags);
  91. if (done)
  92. break;
  93. mask = rnp_up->grpmask;
  94. rnp_up = rnp_up->parent;
  95. }
  96. }
  97. }
  98. /*
  99. * Reset the ->expmask values in the rcu_node tree in preparation for
  100. * a new expedited grace period.
  101. */
  102. static void __maybe_unused sync_exp_reset_tree(struct rcu_state *rsp)
  103. {
  104. unsigned long flags;
  105. struct rcu_node *rnp;
  106. sync_exp_reset_tree_hotplug(rsp);
  107. rcu_for_each_node_breadth_first(rsp, rnp) {
  108. raw_spin_lock_irqsave_rcu_node(rnp, flags);
  109. WARN_ON_ONCE(rnp->expmask);
  110. rnp->expmask = rnp->expmaskinit;
  111. raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
  112. }
  113. }
  114. /*
  115. * Return non-zero if there is no RCU expedited grace period in progress
  116. * for the specified rcu_node structure, in other words, if all CPUs and
  117. * tasks covered by the specified rcu_node structure have done their bit
  118. * for the current expedited grace period. Works only for preemptible
  119. * RCU -- other RCU implementation use other means.
  120. *
  121. * Caller must hold the rcu_state's exp_mutex.
  122. */
  123. static int sync_rcu_preempt_exp_done(struct rcu_node *rnp)
  124. {
  125. return rnp->exp_tasks == NULL &&
  126. READ_ONCE(rnp->expmask) == 0;
  127. }
  128. /*
  129. * Report the exit from RCU read-side critical section for the last task
  130. * that queued itself during or before the current expedited preemptible-RCU
  131. * grace period. This event is reported either to the rcu_node structure on
  132. * which the task was queued or to one of that rcu_node structure's ancestors,
  133. * recursively up the tree. (Calm down, calm down, we do the recursion
  134. * iteratively!)
  135. *
  136. * Caller must hold the rcu_state's exp_mutex and the specified rcu_node
  137. * structure's ->lock.
  138. */
  139. static void __rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
  140. bool wake, unsigned long flags)
  141. __releases(rnp->lock)
  142. {
  143. unsigned long mask;
  144. for (;;) {
  145. if (!sync_rcu_preempt_exp_done(rnp)) {
  146. if (!rnp->expmask)
  147. rcu_initiate_boost(rnp, flags);
  148. else
  149. raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
  150. break;
  151. }
  152. if (rnp->parent == NULL) {
  153. raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
  154. if (wake) {
  155. smp_mb(); /* EGP done before wake_up(). */
  156. swake_up(&rsp->expedited_wq);
  157. }
  158. break;
  159. }
  160. mask = rnp->grpmask;
  161. raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled */
  162. rnp = rnp->parent;
  163. raw_spin_lock_rcu_node(rnp); /* irqs already disabled */
  164. WARN_ON_ONCE(!(rnp->expmask & mask));
  165. rnp->expmask &= ~mask;
  166. }
  167. }
  168. /*
  169. * Report expedited quiescent state for specified node. This is a
  170. * lock-acquisition wrapper function for __rcu_report_exp_rnp().
  171. *
  172. * Caller must hold the rcu_state's exp_mutex.
  173. */
  174. static void __maybe_unused rcu_report_exp_rnp(struct rcu_state *rsp,
  175. struct rcu_node *rnp, bool wake)
  176. {
  177. unsigned long flags;
  178. raw_spin_lock_irqsave_rcu_node(rnp, flags);
  179. __rcu_report_exp_rnp(rsp, rnp, wake, flags);
  180. }
  181. /*
  182. * Report expedited quiescent state for multiple CPUs, all covered by the
  183. * specified leaf rcu_node structure. Caller must hold the rcu_state's
  184. * exp_mutex.
  185. */
  186. static void rcu_report_exp_cpu_mult(struct rcu_state *rsp, struct rcu_node *rnp,
  187. unsigned long mask, bool wake)
  188. {
  189. unsigned long flags;
  190. raw_spin_lock_irqsave_rcu_node(rnp, flags);
  191. if (!(rnp->expmask & mask)) {
  192. raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
  193. return;
  194. }
  195. rnp->expmask &= ~mask;
  196. __rcu_report_exp_rnp(rsp, rnp, wake, flags); /* Releases rnp->lock. */
  197. }
  198. /*
  199. * Report expedited quiescent state for specified rcu_data (CPU).
  200. */
  201. static void rcu_report_exp_rdp(struct rcu_state *rsp, struct rcu_data *rdp,
  202. bool wake)
  203. {
  204. rcu_report_exp_cpu_mult(rsp, rdp->mynode, rdp->grpmask, wake);
  205. }
  206. /* Common code for synchronize_{rcu,sched}_expedited() work-done checking. */
  207. static bool sync_exp_work_done(struct rcu_state *rsp, atomic_long_t *stat,
  208. unsigned long s)
  209. {
  210. if (rcu_exp_gp_seq_done(rsp, s)) {
  211. trace_rcu_exp_grace_period(rsp->name, s, TPS("done"));
  212. /* Ensure test happens before caller kfree(). */
  213. smp_mb__before_atomic(); /* ^^^ */
  214. atomic_long_inc(stat);
  215. return true;
  216. }
  217. return false;
  218. }
  219. /*
  220. * Funnel-lock acquisition for expedited grace periods. Returns true
  221. * if some other task completed an expedited grace period that this task
  222. * can piggy-back on, and with no mutex held. Otherwise, returns false
  223. * with the mutex held, indicating that the caller must actually do the
  224. * expedited grace period.
  225. */
  226. static bool exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
  227. {
  228. struct rcu_data *rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id());
  229. struct rcu_node *rnp = rdp->mynode;
  230. struct rcu_node *rnp_root = rcu_get_root(rsp);
  231. /* Low-contention fastpath. */
  232. if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s) &&
  233. (rnp == rnp_root ||
  234. ULONG_CMP_LT(READ_ONCE(rnp_root->exp_seq_rq), s)) &&
  235. mutex_trylock(&rsp->exp_mutex))
  236. goto fastpath;
  237. /*
  238. * Each pass through the following loop works its way up
  239. * the rcu_node tree, returning if others have done the work or
  240. * otherwise falls through to acquire rsp->exp_mutex. The mapping
  241. * from CPU to rcu_node structure can be inexact, as it is just
  242. * promoting locality and is not strictly needed for correctness.
  243. */
  244. for (; rnp != NULL; rnp = rnp->parent) {
  245. if (sync_exp_work_done(rsp, &rdp->exp_workdone1, s))
  246. return true;
  247. /* Work not done, either wait here or go up. */
  248. spin_lock(&rnp->exp_lock);
  249. if (ULONG_CMP_GE(rnp->exp_seq_rq, s)) {
  250. /* Someone else doing GP, so wait for them. */
  251. spin_unlock(&rnp->exp_lock);
  252. trace_rcu_exp_funnel_lock(rsp->name, rnp->level,
  253. rnp->grplo, rnp->grphi,
  254. TPS("wait"));
  255. wait_event(rnp->exp_wq[(s >> 1) & 0x3],
  256. sync_exp_work_done(rsp,
  257. &rdp->exp_workdone2, s));
  258. return true;
  259. }
  260. rnp->exp_seq_rq = s; /* Followers can wait on us. */
  261. spin_unlock(&rnp->exp_lock);
  262. trace_rcu_exp_funnel_lock(rsp->name, rnp->level, rnp->grplo,
  263. rnp->grphi, TPS("nxtlvl"));
  264. }
  265. mutex_lock(&rsp->exp_mutex);
  266. fastpath:
  267. if (sync_exp_work_done(rsp, &rdp->exp_workdone3, s)) {
  268. mutex_unlock(&rsp->exp_mutex);
  269. return true;
  270. }
  271. rcu_exp_gp_seq_start(rsp);
  272. trace_rcu_exp_grace_period(rsp->name, s, TPS("start"));
  273. return false;
  274. }
  275. /* Invoked on each online non-idle CPU for expedited quiescent state. */
  276. static void sync_sched_exp_handler(void *data)
  277. {
  278. struct rcu_data *rdp;
  279. struct rcu_node *rnp;
  280. struct rcu_state *rsp = data;
  281. rdp = this_cpu_ptr(rsp->rda);
  282. rnp = rdp->mynode;
  283. if (!(READ_ONCE(rnp->expmask) & rdp->grpmask) ||
  284. __this_cpu_read(rcu_sched_data.cpu_no_qs.b.exp))
  285. return;
  286. if (rcu_is_cpu_rrupt_from_idle()) {
  287. rcu_report_exp_rdp(&rcu_sched_state,
  288. this_cpu_ptr(&rcu_sched_data), true);
  289. return;
  290. }
  291. __this_cpu_write(rcu_sched_data.cpu_no_qs.b.exp, true);
  292. resched_cpu(smp_processor_id());
  293. }
  294. /* Send IPI for expedited cleanup if needed at end of CPU-hotplug operation. */
  295. static void sync_sched_exp_online_cleanup(int cpu)
  296. {
  297. struct rcu_data *rdp;
  298. int ret;
  299. struct rcu_node *rnp;
  300. struct rcu_state *rsp = &rcu_sched_state;
  301. rdp = per_cpu_ptr(rsp->rda, cpu);
  302. rnp = rdp->mynode;
  303. if (!(READ_ONCE(rnp->expmask) & rdp->grpmask))
  304. return;
  305. ret = smp_call_function_single(cpu, sync_sched_exp_handler, rsp, 0);
  306. WARN_ON_ONCE(ret);
  307. }
  308. /*
  309. * Select the nodes that the upcoming expedited grace period needs
  310. * to wait for.
  311. */
  312. static void sync_rcu_exp_select_cpus(struct rcu_state *rsp,
  313. smp_call_func_t func)
  314. {
  315. int cpu;
  316. unsigned long flags;
  317. unsigned long mask_ofl_test;
  318. unsigned long mask_ofl_ipi;
  319. int ret;
  320. struct rcu_node *rnp;
  321. sync_exp_reset_tree(rsp);
  322. rcu_for_each_leaf_node(rsp, rnp) {
  323. raw_spin_lock_irqsave_rcu_node(rnp, flags);
  324. /* Each pass checks a CPU for identity, offline, and idle. */
  325. mask_ofl_test = 0;
  326. for_each_leaf_node_possible_cpu(rnp, cpu) {
  327. struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
  328. struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
  329. rdp->exp_dynticks_snap =
  330. atomic_add_return(0, &rdtp->dynticks);
  331. if (raw_smp_processor_id() == cpu ||
  332. !(rdp->exp_dynticks_snap & 0x1) ||
  333. !(rnp->qsmaskinitnext & rdp->grpmask))
  334. mask_ofl_test |= rdp->grpmask;
  335. }
  336. mask_ofl_ipi = rnp->expmask & ~mask_ofl_test;
  337. /*
  338. * Need to wait for any blocked tasks as well. Note that
  339. * additional blocking tasks will also block the expedited
  340. * GP until such time as the ->expmask bits are cleared.
  341. */
  342. if (rcu_preempt_has_tasks(rnp))
  343. rnp->exp_tasks = rnp->blkd_tasks.next;
  344. raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
  345. /* IPI the remaining CPUs for expedited quiescent state. */
  346. for_each_leaf_node_possible_cpu(rnp, cpu) {
  347. unsigned long mask = leaf_node_cpu_bit(rnp, cpu);
  348. struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
  349. struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
  350. if (!(mask_ofl_ipi & mask))
  351. continue;
  352. retry_ipi:
  353. if (atomic_add_return(0, &rdtp->dynticks) !=
  354. rdp->exp_dynticks_snap) {
  355. mask_ofl_test |= mask;
  356. continue;
  357. }
  358. ret = smp_call_function_single(cpu, func, rsp, 0);
  359. if (!ret) {
  360. mask_ofl_ipi &= ~mask;
  361. continue;
  362. }
  363. /* Failed, raced with CPU hotplug operation. */
  364. raw_spin_lock_irqsave_rcu_node(rnp, flags);
  365. if ((rnp->qsmaskinitnext & mask) &&
  366. (rnp->expmask & mask)) {
  367. /* Online, so delay for a bit and try again. */
  368. raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
  369. schedule_timeout_uninterruptible(1);
  370. goto retry_ipi;
  371. }
  372. /* CPU really is offline, so we can ignore it. */
  373. if (!(rnp->expmask & mask))
  374. mask_ofl_ipi &= ~mask;
  375. raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
  376. }
  377. /* Report quiescent states for those that went offline. */
  378. mask_ofl_test |= mask_ofl_ipi;
  379. if (mask_ofl_test)
  380. rcu_report_exp_cpu_mult(rsp, rnp, mask_ofl_test, false);
  381. }
  382. }
  383. static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
  384. {
  385. int cpu;
  386. unsigned long jiffies_stall;
  387. unsigned long jiffies_start;
  388. unsigned long mask;
  389. int ndetected;
  390. struct rcu_node *rnp;
  391. struct rcu_node *rnp_root = rcu_get_root(rsp);
  392. int ret;
  393. jiffies_stall = rcu_jiffies_till_stall_check();
  394. jiffies_start = jiffies;
  395. for (;;) {
  396. ret = swait_event_timeout(
  397. rsp->expedited_wq,
  398. sync_rcu_preempt_exp_done(rnp_root),
  399. jiffies_stall);
  400. if (ret > 0 || sync_rcu_preempt_exp_done(rnp_root))
  401. return;
  402. WARN_ON(ret < 0); /* workqueues should not be signaled. */
  403. if (rcu_cpu_stall_suppress)
  404. continue;
  405. panic_on_rcu_stall();
  406. pr_err("INFO: %s detected expedited stalls on CPUs/tasks: {",
  407. rsp->name);
  408. ndetected = 0;
  409. rcu_for_each_leaf_node(rsp, rnp) {
  410. ndetected += rcu_print_task_exp_stall(rnp);
  411. for_each_leaf_node_possible_cpu(rnp, cpu) {
  412. struct rcu_data *rdp;
  413. mask = leaf_node_cpu_bit(rnp, cpu);
  414. if (!(rnp->expmask & mask))
  415. continue;
  416. ndetected++;
  417. rdp = per_cpu_ptr(rsp->rda, cpu);
  418. pr_cont(" %d-%c%c%c", cpu,
  419. "O."[!!cpu_online(cpu)],
  420. "o."[!!(rdp->grpmask & rnp->expmaskinit)],
  421. "N."[!!(rdp->grpmask & rnp->expmaskinitnext)]);
  422. }
  423. }
  424. pr_cont(" } %lu jiffies s: %lu root: %#lx/%c\n",
  425. jiffies - jiffies_start, rsp->expedited_sequence,
  426. rnp_root->expmask, ".T"[!!rnp_root->exp_tasks]);
  427. if (ndetected) {
  428. pr_err("blocking rcu_node structures:");
  429. rcu_for_each_node_breadth_first(rsp, rnp) {
  430. if (rnp == rnp_root)
  431. continue; /* printed unconditionally */
  432. if (sync_rcu_preempt_exp_done(rnp))
  433. continue;
  434. pr_cont(" l=%u:%d-%d:%#lx/%c",
  435. rnp->level, rnp->grplo, rnp->grphi,
  436. rnp->expmask,
  437. ".T"[!!rnp->exp_tasks]);
  438. }
  439. pr_cont("\n");
  440. }
  441. rcu_for_each_leaf_node(rsp, rnp) {
  442. for_each_leaf_node_possible_cpu(rnp, cpu) {
  443. mask = leaf_node_cpu_bit(rnp, cpu);
  444. if (!(rnp->expmask & mask))
  445. continue;
  446. dump_cpu_task(cpu);
  447. }
  448. }
  449. jiffies_stall = 3 * rcu_jiffies_till_stall_check() + 3;
  450. }
  451. }
  452. /*
  453. * Wait for the current expedited grace period to complete, and then
  454. * wake up everyone who piggybacked on the just-completed expedited
  455. * grace period. Also update all the ->exp_seq_rq counters as needed
  456. * in order to avoid counter-wrap problems.
  457. */
  458. static void rcu_exp_wait_wake(struct rcu_state *rsp, unsigned long s)
  459. {
  460. struct rcu_node *rnp;
  461. synchronize_sched_expedited_wait(rsp);
  462. rcu_exp_gp_seq_end(rsp);
  463. trace_rcu_exp_grace_period(rsp->name, s, TPS("end"));
  464. /*
  465. * Switch over to wakeup mode, allowing the next GP, but -only- the
  466. * next GP, to proceed.
  467. */
  468. mutex_lock(&rsp->exp_wake_mutex);
  469. rcu_for_each_node_breadth_first(rsp, rnp) {
  470. if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s)) {
  471. spin_lock(&rnp->exp_lock);
  472. /* Recheck, avoid hang in case someone just arrived. */
  473. if (ULONG_CMP_LT(rnp->exp_seq_rq, s))
  474. rnp->exp_seq_rq = s;
  475. spin_unlock(&rnp->exp_lock);
  476. }
  477. wake_up_all(&rnp->exp_wq[(rsp->expedited_sequence >> 1) & 0x3]);
  478. }
  479. trace_rcu_exp_grace_period(rsp->name, s, TPS("endwake"));
  480. mutex_unlock(&rsp->exp_wake_mutex);
  481. }
  482. /* Let the workqueue handler know what it is supposed to do. */
  483. struct rcu_exp_work {
  484. smp_call_func_t rew_func;
  485. struct rcu_state *rew_rsp;
  486. unsigned long rew_s;
  487. struct work_struct rew_work;
  488. };
  489. /*
  490. * Common code to drive an expedited grace period forward, used by
  491. * workqueues and mid-boot-time tasks.
  492. */
  493. static void rcu_exp_sel_wait_wake(struct rcu_state *rsp,
  494. smp_call_func_t func, unsigned long s)
  495. {
  496. /* Initialize the rcu_node tree in preparation for the wait. */
  497. sync_rcu_exp_select_cpus(rsp, func);
  498. /* Wait and clean up, including waking everyone. */
  499. rcu_exp_wait_wake(rsp, s);
  500. }
  501. /*
  502. * Work-queue handler to drive an expedited grace period forward.
  503. */
  504. static void wait_rcu_exp_gp(struct work_struct *wp)
  505. {
  506. struct rcu_exp_work *rewp;
  507. rewp = container_of(wp, struct rcu_exp_work, rew_work);
  508. rcu_exp_sel_wait_wake(rewp->rew_rsp, rewp->rew_func, rewp->rew_s);
  509. }
  510. /*
  511. * Given an rcu_state pointer and a smp_call_function() handler, kick
  512. * off the specified flavor of expedited grace period.
  513. */
  514. static void _synchronize_rcu_expedited(struct rcu_state *rsp,
  515. smp_call_func_t func)
  516. {
  517. struct rcu_data *rdp;
  518. struct rcu_exp_work rew;
  519. struct rcu_node *rnp;
  520. unsigned long s;
  521. /* If expedited grace periods are prohibited, fall back to normal. */
  522. if (rcu_gp_is_normal()) {
  523. wait_rcu_gp(rsp->call);
  524. return;
  525. }
  526. /* Take a snapshot of the sequence number. */
  527. s = rcu_exp_gp_seq_snap(rsp);
  528. if (exp_funnel_lock(rsp, s))
  529. return; /* Someone else did our work for us. */
  530. /* Ensure that load happens before action based on it. */
  531. if (unlikely(rcu_scheduler_active == RCU_SCHEDULER_INIT)) {
  532. /* Direct call during scheduler init and early_initcalls(). */
  533. rcu_exp_sel_wait_wake(rsp, func, s);
  534. } else {
  535. /* Marshall arguments & schedule the expedited grace period. */
  536. rew.rew_func = func;
  537. rew.rew_rsp = rsp;
  538. rew.rew_s = s;
  539. INIT_WORK_ONSTACK(&rew.rew_work, wait_rcu_exp_gp);
  540. schedule_work(&rew.rew_work);
  541. }
  542. /* Wait for expedited grace period to complete. */
  543. rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id());
  544. rnp = rcu_get_root(rsp);
  545. wait_event(rnp->exp_wq[(s >> 1) & 0x3],
  546. sync_exp_work_done(rsp,
  547. &rdp->exp_workdone0, s));
  548. /* Let the next expedited grace period start. */
  549. mutex_unlock(&rsp->exp_mutex);
  550. }
  551. /**
  552. * synchronize_sched_expedited - Brute-force RCU-sched grace period
  553. *
  554. * Wait for an RCU-sched grace period to elapse, but use a "big hammer"
  555. * approach to force the grace period to end quickly. This consumes
  556. * significant time on all CPUs and is unfriendly to real-time workloads,
  557. * so is thus not recommended for any sort of common-case code. In fact,
  558. * if you are using synchronize_sched_expedited() in a loop, please
  559. * restructure your code to batch your updates, and then use a single
  560. * synchronize_sched() instead.
  561. *
  562. * This implementation can be thought of as an application of sequence
  563. * locking to expedited grace periods, but using the sequence counter to
  564. * determine when someone else has already done the work instead of for
  565. * retrying readers.
  566. */
  567. void synchronize_sched_expedited(void)
  568. {
  569. struct rcu_state *rsp = &rcu_sched_state;
  570. /* If only one CPU, this is automatically a grace period. */
  571. if (rcu_blocking_is_gp())
  572. return;
  573. _synchronize_rcu_expedited(rsp, sync_sched_exp_handler);
  574. }
  575. EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
  576. #ifdef CONFIG_PREEMPT_RCU
  577. /*
  578. * Remote handler for smp_call_function_single(). If there is an
  579. * RCU read-side critical section in effect, request that the
  580. * next rcu_read_unlock() record the quiescent state up the
  581. * ->expmask fields in the rcu_node tree. Otherwise, immediately
  582. * report the quiescent state.
  583. */
  584. static void sync_rcu_exp_handler(void *info)
  585. {
  586. struct rcu_data *rdp;
  587. struct rcu_state *rsp = info;
  588. struct task_struct *t = current;
  589. /*
  590. * Within an RCU read-side critical section, request that the next
  591. * rcu_read_unlock() report. Unless this RCU read-side critical
  592. * section has already blocked, in which case it is already set
  593. * up for the expedited grace period to wait on it.
  594. */
  595. if (t->rcu_read_lock_nesting > 0 &&
  596. !t->rcu_read_unlock_special.b.blocked) {
  597. t->rcu_read_unlock_special.b.exp_need_qs = true;
  598. return;
  599. }
  600. /*
  601. * We are either exiting an RCU read-side critical section (negative
  602. * values of t->rcu_read_lock_nesting) or are not in one at all
  603. * (zero value of t->rcu_read_lock_nesting). Or we are in an RCU
  604. * read-side critical section that blocked before this expedited
  605. * grace period started. Either way, we can immediately report
  606. * the quiescent state.
  607. */
  608. rdp = this_cpu_ptr(rsp->rda);
  609. rcu_report_exp_rdp(rsp, rdp, true);
  610. }
  611. /**
  612. * synchronize_rcu_expedited - Brute-force RCU grace period
  613. *
  614. * Wait for an RCU-preempt grace period, but expedite it. The basic
  615. * idea is to IPI all non-idle non-nohz online CPUs. The IPI handler
  616. * checks whether the CPU is in an RCU-preempt critical section, and
  617. * if so, it sets a flag that causes the outermost rcu_read_unlock()
  618. * to report the quiescent state. On the other hand, if the CPU is
  619. * not in an RCU read-side critical section, the IPI handler reports
  620. * the quiescent state immediately.
  621. *
  622. * Although this is a greate improvement over previous expedited
  623. * implementations, it is still unfriendly to real-time workloads, so is
  624. * thus not recommended for any sort of common-case code. In fact, if
  625. * you are using synchronize_rcu_expedited() in a loop, please restructure
  626. * your code to batch your updates, and then Use a single synchronize_rcu()
  627. * instead.
  628. */
  629. void synchronize_rcu_expedited(void)
  630. {
  631. struct rcu_state *rsp = rcu_state_p;
  632. if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE)
  633. return;
  634. _synchronize_rcu_expedited(rsp, sync_rcu_exp_handler);
  635. }
  636. EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
  637. #else /* #ifdef CONFIG_PREEMPT_RCU */
  638. /*
  639. * Wait for an rcu-preempt grace period, but make it happen quickly.
  640. * But because preemptible RCU does not exist, map to rcu-sched.
  641. */
  642. void synchronize_rcu_expedited(void)
  643. {
  644. synchronize_sched_expedited();
  645. }
  646. EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
  647. #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
  648. /*
  649. * Switch to run-time mode once Tree RCU has fully initialized.
  650. */
  651. static int __init rcu_exp_runtime_mode(void)
  652. {
  653. rcu_test_sync_prims();
  654. rcu_scheduler_active = RCU_SCHEDULER_RUNNING;
  655. rcu_test_sync_prims();
  656. return 0;
  657. }
  658. core_initcall(rcu_exp_runtime_mode);