tick-sched.c 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389
  1. /*
  2. * linux/kernel/time/tick-sched.c
  3. *
  4. * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
  5. * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
  6. * Copyright(C) 2006-2007 Timesys Corp., Thomas Gleixner
  7. *
  8. * No idle tick implementation for low and high resolution timers
  9. *
  10. * Started by: Thomas Gleixner and Ingo Molnar
  11. *
  12. * Distribute under GPLv2.
  13. */
  14. #include <linux/cpu.h>
  15. #include <linux/err.h>
  16. #include <linux/hrtimer.h>
  17. #include <linux/interrupt.h>
  18. #include <linux/kernel_stat.h>
  19. #include <linux/percpu.h>
  20. #include <linux/nmi.h>
  21. #include <linux/profile.h>
  22. #include <linux/sched/signal.h>
  23. #include <linux/sched/clock.h>
  24. #include <linux/sched/stat.h>
  25. #include <linux/sched/nohz.h>
  26. #include <linux/module.h>
  27. #include <linux/irq_work.h>
  28. #include <linux/posix-timers.h>
  29. #include <linux/context_tracking.h>
  30. #include <linux/mm.h>
  31. #include <asm/irq_regs.h>
  32. #include "tick-internal.h"
  33. #include <trace/events/timer.h>
  34. /*
  35. * Per-CPU nohz control structure
  36. */
  37. static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched);
  38. struct tick_sched *tick_get_tick_sched(int cpu)
  39. {
  40. return &per_cpu(tick_cpu_sched, cpu);
  41. }
  42. #if defined(CONFIG_NO_HZ_COMMON) || defined(CONFIG_HIGH_RES_TIMERS)
  43. /*
  44. * The time, when the last jiffy update happened. Protected by jiffies_lock.
  45. */
  46. static ktime_t last_jiffies_update;
  47. /*
  48. * Must be called with interrupts disabled !
  49. */
  50. static void tick_do_update_jiffies64(ktime_t now)
  51. {
  52. unsigned long ticks = 0;
  53. ktime_t delta;
  54. /*
  55. * Do a quick check without holding jiffies_lock:
  56. */
  57. delta = ktime_sub(now, last_jiffies_update);
  58. if (delta < tick_period)
  59. return;
  60. /* Reevaluate with jiffies_lock held */
  61. write_seqlock(&jiffies_lock);
  62. delta = ktime_sub(now, last_jiffies_update);
  63. if (delta >= tick_period) {
  64. delta = ktime_sub(delta, tick_period);
  65. last_jiffies_update = ktime_add(last_jiffies_update,
  66. tick_period);
  67. /* Slow path for long timeouts */
  68. if (unlikely(delta >= tick_period)) {
  69. s64 incr = ktime_to_ns(tick_period);
  70. ticks = ktime_divns(delta, incr);
  71. last_jiffies_update = ktime_add_ns(last_jiffies_update,
  72. incr * ticks);
  73. }
  74. do_timer(++ticks);
  75. /* Keep the tick_next_period variable up to date */
  76. tick_next_period = ktime_add(last_jiffies_update, tick_period);
  77. } else {
  78. write_sequnlock(&jiffies_lock);
  79. return;
  80. }
  81. write_sequnlock(&jiffies_lock);
  82. update_wall_time();
  83. }
  84. /*
  85. * Initialize and return retrieve the jiffies update.
  86. */
  87. static ktime_t tick_init_jiffy_update(void)
  88. {
  89. ktime_t period;
  90. write_seqlock(&jiffies_lock);
  91. /* Did we start the jiffies update yet ? */
  92. if (last_jiffies_update == 0)
  93. last_jiffies_update = tick_next_period;
  94. period = last_jiffies_update;
  95. write_sequnlock(&jiffies_lock);
  96. return period;
  97. }
  98. static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
  99. {
  100. int cpu = smp_processor_id();
  101. #ifdef CONFIG_NO_HZ_COMMON
  102. /*
  103. * Check if the do_timer duty was dropped. We don't care about
  104. * concurrency: This happens only when the CPU in charge went
  105. * into a long sleep. If two CPUs happen to assign themselves to
  106. * this duty, then the jiffies update is still serialized by
  107. * jiffies_lock.
  108. */
  109. if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)
  110. && !tick_nohz_full_cpu(cpu))
  111. tick_do_timer_cpu = cpu;
  112. #endif
  113. /* Check, if the jiffies need an update */
  114. if (tick_do_timer_cpu == cpu)
  115. tick_do_update_jiffies64(now);
  116. if (ts->inidle)
  117. ts->got_idle_tick = 1;
  118. }
  119. static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
  120. {
  121. #ifdef CONFIG_NO_HZ_COMMON
  122. /*
  123. * When we are idle and the tick is stopped, we have to touch
  124. * the watchdog as we might not schedule for a really long
  125. * time. This happens on complete idle SMP systems while
  126. * waiting on the login prompt. We also increment the "start of
  127. * idle" jiffy stamp so the idle accounting adjustment we do
  128. * when we go busy again does not account too much ticks.
  129. */
  130. if (ts->tick_stopped) {
  131. touch_softlockup_watchdog_sched();
  132. if (is_idle_task(current))
  133. ts->idle_jiffies++;
  134. /*
  135. * In case the current tick fired too early past its expected
  136. * expiration, make sure we don't bypass the next clock reprogramming
  137. * to the same deadline.
  138. */
  139. ts->next_tick = 0;
  140. }
  141. #endif
  142. update_process_times(user_mode(regs));
  143. profile_tick(CPU_PROFILING);
  144. }
  145. #endif
  146. #ifdef CONFIG_NO_HZ_FULL
  147. cpumask_var_t tick_nohz_full_mask;
  148. bool tick_nohz_full_running;
  149. static atomic_t tick_dep_mask;
  150. static bool check_tick_dependency(atomic_t *dep)
  151. {
  152. int val = atomic_read(dep);
  153. if (val & TICK_DEP_MASK_POSIX_TIMER) {
  154. trace_tick_stop(0, TICK_DEP_MASK_POSIX_TIMER);
  155. return true;
  156. }
  157. if (val & TICK_DEP_MASK_PERF_EVENTS) {
  158. trace_tick_stop(0, TICK_DEP_MASK_PERF_EVENTS);
  159. return true;
  160. }
  161. if (val & TICK_DEP_MASK_SCHED) {
  162. trace_tick_stop(0, TICK_DEP_MASK_SCHED);
  163. return true;
  164. }
  165. if (val & TICK_DEP_MASK_CLOCK_UNSTABLE) {
  166. trace_tick_stop(0, TICK_DEP_MASK_CLOCK_UNSTABLE);
  167. return true;
  168. }
  169. return false;
  170. }
  171. static bool can_stop_full_tick(int cpu, struct tick_sched *ts)
  172. {
  173. lockdep_assert_irqs_disabled();
  174. if (unlikely(!cpu_online(cpu)))
  175. return false;
  176. if (check_tick_dependency(&tick_dep_mask))
  177. return false;
  178. if (check_tick_dependency(&ts->tick_dep_mask))
  179. return false;
  180. if (check_tick_dependency(&current->tick_dep_mask))
  181. return false;
  182. if (check_tick_dependency(&current->signal->tick_dep_mask))
  183. return false;
  184. return true;
  185. }
  186. static void nohz_full_kick_func(struct irq_work *work)
  187. {
  188. /* Empty, the tick restart happens on tick_nohz_irq_exit() */
  189. }
  190. static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
  191. .func = nohz_full_kick_func,
  192. };
  193. /*
  194. * Kick this CPU if it's full dynticks in order to force it to
  195. * re-evaluate its dependency on the tick and restart it if necessary.
  196. * This kick, unlike tick_nohz_full_kick_cpu() and tick_nohz_full_kick_all(),
  197. * is NMI safe.
  198. */
  199. static void tick_nohz_full_kick(void)
  200. {
  201. if (!tick_nohz_full_cpu(smp_processor_id()))
  202. return;
  203. irq_work_queue(this_cpu_ptr(&nohz_full_kick_work));
  204. }
  205. /*
  206. * Kick the CPU if it's full dynticks in order to force it to
  207. * re-evaluate its dependency on the tick and restart it if necessary.
  208. */
  209. void tick_nohz_full_kick_cpu(int cpu)
  210. {
  211. if (!tick_nohz_full_cpu(cpu))
  212. return;
  213. irq_work_queue_on(&per_cpu(nohz_full_kick_work, cpu), cpu);
  214. }
  215. /*
  216. * Kick all full dynticks CPUs in order to force these to re-evaluate
  217. * their dependency on the tick and restart it if necessary.
  218. */
  219. static void tick_nohz_full_kick_all(void)
  220. {
  221. int cpu;
  222. if (!tick_nohz_full_running)
  223. return;
  224. preempt_disable();
  225. for_each_cpu_and(cpu, tick_nohz_full_mask, cpu_online_mask)
  226. tick_nohz_full_kick_cpu(cpu);
  227. preempt_enable();
  228. }
  229. static void tick_nohz_dep_set_all(atomic_t *dep,
  230. enum tick_dep_bits bit)
  231. {
  232. int prev;
  233. prev = atomic_fetch_or(BIT(bit), dep);
  234. if (!prev)
  235. tick_nohz_full_kick_all();
  236. }
  237. /*
  238. * Set a global tick dependency. Used by perf events that rely on freq and
  239. * by unstable clock.
  240. */
  241. void tick_nohz_dep_set(enum tick_dep_bits bit)
  242. {
  243. tick_nohz_dep_set_all(&tick_dep_mask, bit);
  244. }
  245. void tick_nohz_dep_clear(enum tick_dep_bits bit)
  246. {
  247. atomic_andnot(BIT(bit), &tick_dep_mask);
  248. }
  249. /*
  250. * Set per-CPU tick dependency. Used by scheduler and perf events in order to
  251. * manage events throttling.
  252. */
  253. void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit)
  254. {
  255. int prev;
  256. struct tick_sched *ts;
  257. ts = per_cpu_ptr(&tick_cpu_sched, cpu);
  258. prev = atomic_fetch_or(BIT(bit), &ts->tick_dep_mask);
  259. if (!prev) {
  260. preempt_disable();
  261. /* Perf needs local kick that is NMI safe */
  262. if (cpu == smp_processor_id()) {
  263. tick_nohz_full_kick();
  264. } else {
  265. /* Remote irq work not NMI-safe */
  266. if (!WARN_ON_ONCE(in_nmi()))
  267. tick_nohz_full_kick_cpu(cpu);
  268. }
  269. preempt_enable();
  270. }
  271. }
  272. void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit)
  273. {
  274. struct tick_sched *ts = per_cpu_ptr(&tick_cpu_sched, cpu);
  275. atomic_andnot(BIT(bit), &ts->tick_dep_mask);
  276. }
  277. /*
  278. * Set a per-task tick dependency. Posix CPU timers need this in order to elapse
  279. * per task timers.
  280. */
  281. void tick_nohz_dep_set_task(struct task_struct *tsk, enum tick_dep_bits bit)
  282. {
  283. /*
  284. * We could optimize this with just kicking the target running the task
  285. * if that noise matters for nohz full users.
  286. */
  287. tick_nohz_dep_set_all(&tsk->tick_dep_mask, bit);
  288. }
  289. void tick_nohz_dep_clear_task(struct task_struct *tsk, enum tick_dep_bits bit)
  290. {
  291. atomic_andnot(BIT(bit), &tsk->tick_dep_mask);
  292. }
  293. /*
  294. * Set a per-taskgroup tick dependency. Posix CPU timers need this in order to elapse
  295. * per process timers.
  296. */
  297. void tick_nohz_dep_set_signal(struct signal_struct *sig, enum tick_dep_bits bit)
  298. {
  299. tick_nohz_dep_set_all(&sig->tick_dep_mask, bit);
  300. }
  301. void tick_nohz_dep_clear_signal(struct signal_struct *sig, enum tick_dep_bits bit)
  302. {
  303. atomic_andnot(BIT(bit), &sig->tick_dep_mask);
  304. }
  305. /*
  306. * Re-evaluate the need for the tick as we switch the current task.
  307. * It might need the tick due to per task/process properties:
  308. * perf events, posix CPU timers, ...
  309. */
  310. void __tick_nohz_task_switch(void)
  311. {
  312. unsigned long flags;
  313. struct tick_sched *ts;
  314. local_irq_save(flags);
  315. if (!tick_nohz_full_cpu(smp_processor_id()))
  316. goto out;
  317. ts = this_cpu_ptr(&tick_cpu_sched);
  318. if (ts->tick_stopped) {
  319. if (atomic_read(&current->tick_dep_mask) ||
  320. atomic_read(&current->signal->tick_dep_mask))
  321. tick_nohz_full_kick();
  322. }
  323. out:
  324. local_irq_restore(flags);
  325. }
  326. /* Get the boot-time nohz CPU list from the kernel parameters. */
  327. void __init tick_nohz_full_setup(cpumask_var_t cpumask)
  328. {
  329. alloc_bootmem_cpumask_var(&tick_nohz_full_mask);
  330. cpumask_copy(tick_nohz_full_mask, cpumask);
  331. tick_nohz_full_running = true;
  332. }
  333. static int tick_nohz_cpu_down(unsigned int cpu)
  334. {
  335. /*
  336. * The boot CPU handles housekeeping duty (unbound timers,
  337. * workqueues, timekeeping, ...) on behalf of full dynticks
  338. * CPUs. It must remain online when nohz full is enabled.
  339. */
  340. if (tick_nohz_full_running && tick_do_timer_cpu == cpu)
  341. return -EBUSY;
  342. return 0;
  343. }
  344. void __init tick_nohz_init(void)
  345. {
  346. int cpu, ret;
  347. if (!tick_nohz_full_running)
  348. return;
  349. /*
  350. * Full dynticks uses irq work to drive the tick rescheduling on safe
  351. * locking contexts. But then we need irq work to raise its own
  352. * interrupts to avoid circular dependency on the tick
  353. */
  354. if (!arch_irq_work_has_interrupt()) {
  355. pr_warn("NO_HZ: Can't run full dynticks because arch doesn't support irq work self-IPIs\n");
  356. cpumask_clear(tick_nohz_full_mask);
  357. tick_nohz_full_running = false;
  358. return;
  359. }
  360. cpu = smp_processor_id();
  361. if (cpumask_test_cpu(cpu, tick_nohz_full_mask)) {
  362. pr_warn("NO_HZ: Clearing %d from nohz_full range for timekeeping\n",
  363. cpu);
  364. cpumask_clear_cpu(cpu, tick_nohz_full_mask);
  365. }
  366. for_each_cpu(cpu, tick_nohz_full_mask)
  367. context_tracking_cpu_set(cpu);
  368. ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
  369. "kernel/nohz:predown", NULL,
  370. tick_nohz_cpu_down);
  371. WARN_ON(ret < 0);
  372. pr_info("NO_HZ: Full dynticks CPUs: %*pbl.\n",
  373. cpumask_pr_args(tick_nohz_full_mask));
  374. }
  375. #endif
  376. /*
  377. * NOHZ - aka dynamic tick functionality
  378. */
  379. #ifdef CONFIG_NO_HZ_COMMON
  380. /*
  381. * NO HZ enabled ?
  382. */
  383. bool tick_nohz_enabled __read_mostly = true;
  384. unsigned long tick_nohz_active __read_mostly;
  385. /*
  386. * Enable / Disable tickless mode
  387. */
  388. static int __init setup_tick_nohz(char *str)
  389. {
  390. return (kstrtobool(str, &tick_nohz_enabled) == 0);
  391. }
  392. __setup("nohz=", setup_tick_nohz);
  393. bool tick_nohz_tick_stopped(void)
  394. {
  395. struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
  396. return ts->tick_stopped;
  397. }
  398. bool tick_nohz_tick_stopped_cpu(int cpu)
  399. {
  400. struct tick_sched *ts = per_cpu_ptr(&tick_cpu_sched, cpu);
  401. return ts->tick_stopped;
  402. }
  403. /**
  404. * tick_nohz_update_jiffies - update jiffies when idle was interrupted
  405. *
  406. * Called from interrupt entry when the CPU was idle
  407. *
  408. * In case the sched_tick was stopped on this CPU, we have to check if jiffies
  409. * must be updated. Otherwise an interrupt handler could use a stale jiffy
  410. * value. We do this unconditionally on any CPU, as we don't know whether the
  411. * CPU, which has the update task assigned is in a long sleep.
  412. */
  413. static void tick_nohz_update_jiffies(ktime_t now)
  414. {
  415. unsigned long flags;
  416. __this_cpu_write(tick_cpu_sched.idle_waketime, now);
  417. local_irq_save(flags);
  418. tick_do_update_jiffies64(now);
  419. local_irq_restore(flags);
  420. touch_softlockup_watchdog_sched();
  421. }
  422. /*
  423. * Updates the per-CPU time idle statistics counters
  424. */
  425. static void
  426. update_ts_time_stats(int cpu, struct tick_sched *ts, ktime_t now, u64 *last_update_time)
  427. {
  428. ktime_t delta;
  429. if (ts->idle_active) {
  430. delta = ktime_sub(now, ts->idle_entrytime);
  431. if (nr_iowait_cpu(cpu) > 0)
  432. ts->iowait_sleeptime = ktime_add(ts->iowait_sleeptime, delta);
  433. else
  434. ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
  435. ts->idle_entrytime = now;
  436. }
  437. if (last_update_time)
  438. *last_update_time = ktime_to_us(now);
  439. }
  440. static void tick_nohz_stop_idle(struct tick_sched *ts, ktime_t now)
  441. {
  442. update_ts_time_stats(smp_processor_id(), ts, now, NULL);
  443. ts->idle_active = 0;
  444. sched_clock_idle_wakeup_event();
  445. }
  446. static void tick_nohz_start_idle(struct tick_sched *ts)
  447. {
  448. ts->idle_entrytime = ktime_get();
  449. ts->idle_active = 1;
  450. sched_clock_idle_sleep_event();
  451. }
  452. /**
  453. * get_cpu_idle_time_us - get the total idle time of a CPU
  454. * @cpu: CPU number to query
  455. * @last_update_time: variable to store update time in. Do not update
  456. * counters if NULL.
  457. *
  458. * Return the cumulative idle time (since boot) for a given
  459. * CPU, in microseconds.
  460. *
  461. * This time is measured via accounting rather than sampling,
  462. * and is as accurate as ktime_get() is.
  463. *
  464. * This function returns -1 if NOHZ is not enabled.
  465. */
  466. u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time)
  467. {
  468. struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
  469. ktime_t now, idle;
  470. if (!tick_nohz_active)
  471. return -1;
  472. now = ktime_get();
  473. if (last_update_time) {
  474. update_ts_time_stats(cpu, ts, now, last_update_time);
  475. idle = ts->idle_sleeptime;
  476. } else {
  477. if (ts->idle_active && !nr_iowait_cpu(cpu)) {
  478. ktime_t delta = ktime_sub(now, ts->idle_entrytime);
  479. idle = ktime_add(ts->idle_sleeptime, delta);
  480. } else {
  481. idle = ts->idle_sleeptime;
  482. }
  483. }
  484. return ktime_to_us(idle);
  485. }
  486. EXPORT_SYMBOL_GPL(get_cpu_idle_time_us);
  487. /**
  488. * get_cpu_iowait_time_us - get the total iowait time of a CPU
  489. * @cpu: CPU number to query
  490. * @last_update_time: variable to store update time in. Do not update
  491. * counters if NULL.
  492. *
  493. * Return the cumulative iowait time (since boot) for a given
  494. * CPU, in microseconds.
  495. *
  496. * This time is measured via accounting rather than sampling,
  497. * and is as accurate as ktime_get() is.
  498. *
  499. * This function returns -1 if NOHZ is not enabled.
  500. */
  501. u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time)
  502. {
  503. struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
  504. ktime_t now, iowait;
  505. if (!tick_nohz_active)
  506. return -1;
  507. now = ktime_get();
  508. if (last_update_time) {
  509. update_ts_time_stats(cpu, ts, now, last_update_time);
  510. iowait = ts->iowait_sleeptime;
  511. } else {
  512. if (ts->idle_active && nr_iowait_cpu(cpu) > 0) {
  513. ktime_t delta = ktime_sub(now, ts->idle_entrytime);
  514. iowait = ktime_add(ts->iowait_sleeptime, delta);
  515. } else {
  516. iowait = ts->iowait_sleeptime;
  517. }
  518. }
  519. return ktime_to_us(iowait);
  520. }
  521. EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us);
  522. static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
  523. {
  524. hrtimer_cancel(&ts->sched_timer);
  525. hrtimer_set_expires(&ts->sched_timer, ts->last_tick);
  526. /* Forward the time to expire in the future */
  527. hrtimer_forward(&ts->sched_timer, now, tick_period);
  528. if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
  529. hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED);
  530. else
  531. tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
  532. /*
  533. * Reset to make sure next tick stop doesn't get fooled by past
  534. * cached clock deadline.
  535. */
  536. ts->next_tick = 0;
  537. }
  538. static inline bool local_timer_softirq_pending(void)
  539. {
  540. return local_softirq_pending() & TIMER_SOFTIRQ;
  541. }
  542. static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
  543. {
  544. u64 basemono, next_tick, next_tmr, next_rcu, delta, expires;
  545. unsigned long seq, basejiff;
  546. /* Read jiffies and the time when jiffies were updated last */
  547. do {
  548. seq = read_seqbegin(&jiffies_lock);
  549. basemono = last_jiffies_update;
  550. basejiff = jiffies;
  551. } while (read_seqretry(&jiffies_lock, seq));
  552. ts->last_jiffies = basejiff;
  553. ts->timer_expires_base = basemono;
  554. /*
  555. * Keep the periodic tick, when RCU, architecture or irq_work
  556. * requests it.
  557. * Aside of that check whether the local timer softirq is
  558. * pending. If so its a bad idea to call get_next_timer_interrupt()
  559. * because there is an already expired timer, so it will request
  560. * immeditate expiry, which rearms the hardware timer with a
  561. * minimal delta which brings us back to this place
  562. * immediately. Lather, rinse and repeat...
  563. */
  564. if (rcu_needs_cpu(basemono, &next_rcu) || arch_needs_cpu() ||
  565. irq_work_needs_cpu() || local_timer_softirq_pending()) {
  566. next_tick = basemono + TICK_NSEC;
  567. } else {
  568. /*
  569. * Get the next pending timer. If high resolution
  570. * timers are enabled this only takes the timer wheel
  571. * timers into account. If high resolution timers are
  572. * disabled this also looks at the next expiring
  573. * hrtimer.
  574. */
  575. next_tmr = get_next_timer_interrupt(basejiff, basemono);
  576. ts->next_timer = next_tmr;
  577. /* Take the next rcu event into account */
  578. next_tick = next_rcu < next_tmr ? next_rcu : next_tmr;
  579. }
  580. /*
  581. * If the tick is due in the next period, keep it ticking or
  582. * force prod the timer.
  583. */
  584. delta = next_tick - basemono;
  585. if (delta <= (u64)TICK_NSEC) {
  586. /*
  587. * Tell the timer code that the base is not idle, i.e. undo
  588. * the effect of get_next_timer_interrupt():
  589. */
  590. timer_clear_idle();
  591. /*
  592. * We've not stopped the tick yet, and there's a timer in the
  593. * next period, so no point in stopping it either, bail.
  594. */
  595. if (!ts->tick_stopped) {
  596. ts->timer_expires = 0;
  597. goto out;
  598. }
  599. }
  600. /*
  601. * If this CPU is the one which had the do_timer() duty last, we limit
  602. * the sleep time to the timekeeping max_deferment value.
  603. * Otherwise we can sleep as long as we want.
  604. */
  605. delta = timekeeping_max_deferment();
  606. if (cpu != tick_do_timer_cpu &&
  607. (tick_do_timer_cpu != TICK_DO_TIMER_NONE || !ts->do_timer_last))
  608. delta = KTIME_MAX;
  609. /* Calculate the next expiry time */
  610. if (delta < (KTIME_MAX - basemono))
  611. expires = basemono + delta;
  612. else
  613. expires = KTIME_MAX;
  614. ts->timer_expires = min_t(u64, expires, next_tick);
  615. out:
  616. return ts->timer_expires;
  617. }
  618. static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
  619. {
  620. struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
  621. u64 basemono = ts->timer_expires_base;
  622. u64 expires = ts->timer_expires;
  623. ktime_t tick = expires;
  624. /* Make sure we won't be trying to stop it twice in a row. */
  625. ts->timer_expires_base = 0;
  626. /*
  627. * If this CPU is the one which updates jiffies, then give up
  628. * the assignment and let it be taken by the CPU which runs
  629. * the tick timer next, which might be this CPU as well. If we
  630. * don't drop this here the jiffies might be stale and
  631. * do_timer() never invoked. Keep track of the fact that it
  632. * was the one which had the do_timer() duty last.
  633. */
  634. if (cpu == tick_do_timer_cpu) {
  635. tick_do_timer_cpu = TICK_DO_TIMER_NONE;
  636. ts->do_timer_last = 1;
  637. } else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
  638. ts->do_timer_last = 0;
  639. }
  640. /* Skip reprogram of event if its not changed */
  641. if (ts->tick_stopped && (expires == ts->next_tick)) {
  642. /* Sanity check: make sure clockevent is actually programmed */
  643. if (tick == KTIME_MAX || ts->next_tick == hrtimer_get_expires(&ts->sched_timer))
  644. return;
  645. WARN_ON_ONCE(1);
  646. printk_once("basemono: %llu ts->next_tick: %llu dev->next_event: %llu timer->active: %d timer->expires: %llu\n",
  647. basemono, ts->next_tick, dev->next_event,
  648. hrtimer_active(&ts->sched_timer), hrtimer_get_expires(&ts->sched_timer));
  649. }
  650. /*
  651. * nohz_stop_sched_tick can be called several times before
  652. * the nohz_restart_sched_tick is called. This happens when
  653. * interrupts arrive which do not cause a reschedule. In the
  654. * first call we save the current tick time, so we can restart
  655. * the scheduler tick in nohz_restart_sched_tick.
  656. */
  657. if (!ts->tick_stopped) {
  658. calc_load_nohz_start();
  659. cpu_load_update_nohz_start();
  660. quiet_vmstat();
  661. ts->last_tick = hrtimer_get_expires(&ts->sched_timer);
  662. ts->tick_stopped = 1;
  663. trace_tick_stop(1, TICK_DEP_MASK_NONE);
  664. }
  665. ts->next_tick = tick;
  666. /*
  667. * If the expiration time == KTIME_MAX, then we simply stop
  668. * the tick timer.
  669. */
  670. if (unlikely(expires == KTIME_MAX)) {
  671. if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
  672. hrtimer_cancel(&ts->sched_timer);
  673. return;
  674. }
  675. if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
  676. hrtimer_start(&ts->sched_timer, tick, HRTIMER_MODE_ABS_PINNED);
  677. } else {
  678. hrtimer_set_expires(&ts->sched_timer, tick);
  679. tick_program_event(tick, 1);
  680. }
  681. }
  682. static void tick_nohz_retain_tick(struct tick_sched *ts)
  683. {
  684. ts->timer_expires_base = 0;
  685. }
  686. #ifdef CONFIG_NO_HZ_FULL
  687. static void tick_nohz_stop_sched_tick(struct tick_sched *ts, int cpu)
  688. {
  689. if (tick_nohz_next_event(ts, cpu))
  690. tick_nohz_stop_tick(ts, cpu);
  691. else
  692. tick_nohz_retain_tick(ts);
  693. }
  694. #endif /* CONFIG_NO_HZ_FULL */
  695. static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
  696. {
  697. /* Update jiffies first */
  698. tick_do_update_jiffies64(now);
  699. cpu_load_update_nohz_stop();
  700. /*
  701. * Clear the timer idle flag, so we avoid IPIs on remote queueing and
  702. * the clock forward checks in the enqueue path:
  703. */
  704. timer_clear_idle();
  705. calc_load_nohz_stop();
  706. touch_softlockup_watchdog_sched();
  707. /*
  708. * Cancel the scheduled timer and restore the tick
  709. */
  710. ts->tick_stopped = 0;
  711. ts->idle_exittime = now;
  712. tick_nohz_restart(ts, now);
  713. }
  714. static void tick_nohz_full_update_tick(struct tick_sched *ts)
  715. {
  716. #ifdef CONFIG_NO_HZ_FULL
  717. int cpu = smp_processor_id();
  718. if (!tick_nohz_full_cpu(cpu))
  719. return;
  720. if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE)
  721. return;
  722. if (can_stop_full_tick(cpu, ts))
  723. tick_nohz_stop_sched_tick(ts, cpu);
  724. else if (ts->tick_stopped)
  725. tick_nohz_restart_sched_tick(ts, ktime_get());
  726. #endif
  727. }
  728. static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
  729. {
  730. /*
  731. * If this CPU is offline and it is the one which updates
  732. * jiffies, then give up the assignment and let it be taken by
  733. * the CPU which runs the tick timer next. If we don't drop
  734. * this here the jiffies might be stale and do_timer() never
  735. * invoked.
  736. */
  737. if (unlikely(!cpu_online(cpu))) {
  738. if (cpu == tick_do_timer_cpu)
  739. tick_do_timer_cpu = TICK_DO_TIMER_NONE;
  740. /*
  741. * Make sure the CPU doesn't get fooled by obsolete tick
  742. * deadline if it comes back online later.
  743. */
  744. ts->next_tick = 0;
  745. return false;
  746. }
  747. if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE))
  748. return false;
  749. if (need_resched())
  750. return false;
  751. if (unlikely(local_softirq_pending() && cpu_online(cpu))) {
  752. static int ratelimit;
  753. if (ratelimit < 10 &&
  754. (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) {
  755. pr_warn("NOHZ: local_softirq_pending %02x\n",
  756. (unsigned int) local_softirq_pending());
  757. ratelimit++;
  758. }
  759. return false;
  760. }
  761. if (tick_nohz_full_enabled()) {
  762. /*
  763. * Keep the tick alive to guarantee timekeeping progression
  764. * if there are full dynticks CPUs around
  765. */
  766. if (tick_do_timer_cpu == cpu)
  767. return false;
  768. /*
  769. * Boot safety: make sure the timekeeping duty has been
  770. * assigned before entering dyntick-idle mode,
  771. */
  772. if (tick_do_timer_cpu == TICK_DO_TIMER_NONE)
  773. return false;
  774. }
  775. return true;
  776. }
  777. static void __tick_nohz_idle_stop_tick(struct tick_sched *ts)
  778. {
  779. ktime_t expires;
  780. int cpu = smp_processor_id();
  781. /*
  782. * If tick_nohz_get_sleep_length() ran tick_nohz_next_event(), the
  783. * tick timer expiration time is known already.
  784. */
  785. if (ts->timer_expires_base)
  786. expires = ts->timer_expires;
  787. else if (can_stop_idle_tick(cpu, ts))
  788. expires = tick_nohz_next_event(ts, cpu);
  789. else
  790. return;
  791. ts->idle_calls++;
  792. if (expires > 0LL) {
  793. int was_stopped = ts->tick_stopped;
  794. tick_nohz_stop_tick(ts, cpu);
  795. ts->idle_sleeps++;
  796. ts->idle_expires = expires;
  797. if (!was_stopped && ts->tick_stopped) {
  798. ts->idle_jiffies = ts->last_jiffies;
  799. nohz_balance_enter_idle(cpu);
  800. }
  801. } else {
  802. tick_nohz_retain_tick(ts);
  803. }
  804. }
  805. /**
  806. * tick_nohz_idle_stop_tick - stop the idle tick from the idle task
  807. *
  808. * When the next event is more than a tick into the future, stop the idle tick
  809. */
  810. void tick_nohz_idle_stop_tick(void)
  811. {
  812. __tick_nohz_idle_stop_tick(this_cpu_ptr(&tick_cpu_sched));
  813. }
  814. void tick_nohz_idle_retain_tick(void)
  815. {
  816. tick_nohz_retain_tick(this_cpu_ptr(&tick_cpu_sched));
  817. /*
  818. * Undo the effect of get_next_timer_interrupt() called from
  819. * tick_nohz_next_event().
  820. */
  821. timer_clear_idle();
  822. }
  823. /**
  824. * tick_nohz_idle_enter - prepare for entering idle on the current CPU
  825. *
  826. * Called when we start the idle loop.
  827. */
  828. void tick_nohz_idle_enter(void)
  829. {
  830. struct tick_sched *ts;
  831. lockdep_assert_irqs_enabled();
  832. local_irq_disable();
  833. ts = this_cpu_ptr(&tick_cpu_sched);
  834. WARN_ON_ONCE(ts->timer_expires_base);
  835. ts->inidle = 1;
  836. tick_nohz_start_idle(ts);
  837. local_irq_enable();
  838. }
  839. /**
  840. * tick_nohz_irq_exit - update next tick event from interrupt exit
  841. *
  842. * When an interrupt fires while we are idle and it doesn't cause
  843. * a reschedule, it may still add, modify or delete a timer, enqueue
  844. * an RCU callback, etc...
  845. * So we need to re-calculate and reprogram the next tick event.
  846. */
  847. void tick_nohz_irq_exit(void)
  848. {
  849. struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
  850. if (ts->inidle)
  851. tick_nohz_start_idle(ts);
  852. else
  853. tick_nohz_full_update_tick(ts);
  854. }
  855. /**
  856. * tick_nohz_idle_got_tick - Check whether or not the tick handler has run
  857. */
  858. bool tick_nohz_idle_got_tick(void)
  859. {
  860. struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
  861. if (ts->got_idle_tick) {
  862. ts->got_idle_tick = 0;
  863. return true;
  864. }
  865. return false;
  866. }
  867. /**
  868. * tick_nohz_get_sleep_length - return the expected length of the current sleep
  869. * @delta_next: duration until the next event if the tick cannot be stopped
  870. *
  871. * Called from power state control code with interrupts disabled
  872. */
  873. ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next)
  874. {
  875. struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
  876. struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
  877. int cpu = smp_processor_id();
  878. /*
  879. * The idle entry time is expected to be a sufficient approximation of
  880. * the current time at this point.
  881. */
  882. ktime_t now = ts->idle_entrytime;
  883. ktime_t next_event;
  884. WARN_ON_ONCE(!ts->inidle);
  885. *delta_next = ktime_sub(dev->next_event, now);
  886. if (!can_stop_idle_tick(cpu, ts))
  887. return *delta_next;
  888. next_event = tick_nohz_next_event(ts, cpu);
  889. if (!next_event)
  890. return *delta_next;
  891. /*
  892. * If the next highres timer to expire is earlier than next_event, the
  893. * idle governor needs to know that.
  894. */
  895. next_event = min_t(u64, next_event,
  896. hrtimer_next_event_without(&ts->sched_timer));
  897. return ktime_sub(next_event, now);
  898. }
  899. /**
  900. * tick_nohz_get_idle_calls_cpu - return the current idle calls counter value
  901. * for a particular CPU.
  902. *
  903. * Called from the schedutil frequency scaling governor in scheduler context.
  904. */
  905. unsigned long tick_nohz_get_idle_calls_cpu(int cpu)
  906. {
  907. struct tick_sched *ts = tick_get_tick_sched(cpu);
  908. return ts->idle_calls;
  909. }
  910. /**
  911. * tick_nohz_get_idle_calls - return the current idle calls counter value
  912. *
  913. * Called from the schedutil frequency scaling governor in scheduler context.
  914. */
  915. unsigned long tick_nohz_get_idle_calls(void)
  916. {
  917. struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
  918. return ts->idle_calls;
  919. }
  920. static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
  921. {
  922. #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
  923. unsigned long ticks;
  924. if (vtime_accounting_cpu_enabled())
  925. return;
  926. /*
  927. * We stopped the tick in idle. Update process times would miss the
  928. * time we slept as update_process_times does only a 1 tick
  929. * accounting. Enforce that this is accounted to idle !
  930. */
  931. ticks = jiffies - ts->idle_jiffies;
  932. /*
  933. * We might be one off. Do not randomly account a huge number of ticks!
  934. */
  935. if (ticks && ticks < LONG_MAX)
  936. account_idle_ticks(ticks);
  937. #endif
  938. }
  939. static void __tick_nohz_idle_restart_tick(struct tick_sched *ts, ktime_t now)
  940. {
  941. tick_nohz_restart_sched_tick(ts, now);
  942. tick_nohz_account_idle_ticks(ts);
  943. }
  944. void tick_nohz_idle_restart_tick(void)
  945. {
  946. struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
  947. if (ts->tick_stopped)
  948. __tick_nohz_idle_restart_tick(ts, ktime_get());
  949. }
  950. /**
  951. * tick_nohz_idle_exit - restart the idle tick from the idle task
  952. *
  953. * Restart the idle tick when the CPU is woken up from idle
  954. * This also exit the RCU extended quiescent state. The CPU
  955. * can use RCU again after this function is called.
  956. */
  957. void tick_nohz_idle_exit(void)
  958. {
  959. struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
  960. bool idle_active, tick_stopped;
  961. ktime_t now;
  962. local_irq_disable();
  963. WARN_ON_ONCE(!ts->inidle);
  964. WARN_ON_ONCE(ts->timer_expires_base);
  965. ts->inidle = 0;
  966. idle_active = ts->idle_active;
  967. tick_stopped = ts->tick_stopped;
  968. if (idle_active || tick_stopped)
  969. now = ktime_get();
  970. if (idle_active)
  971. tick_nohz_stop_idle(ts, now);
  972. if (tick_stopped)
  973. __tick_nohz_idle_restart_tick(ts, now);
  974. local_irq_enable();
  975. }
  976. /*
  977. * The nohz low res interrupt handler
  978. */
  979. static void tick_nohz_handler(struct clock_event_device *dev)
  980. {
  981. struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
  982. struct pt_regs *regs = get_irq_regs();
  983. ktime_t now = ktime_get();
  984. dev->next_event = KTIME_MAX;
  985. tick_sched_do_timer(ts, now);
  986. tick_sched_handle(ts, regs);
  987. /* No need to reprogram if we are running tickless */
  988. if (unlikely(ts->tick_stopped))
  989. return;
  990. hrtimer_forward(&ts->sched_timer, now, tick_period);
  991. tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
  992. }
  993. static inline void tick_nohz_activate(struct tick_sched *ts, int mode)
  994. {
  995. if (!tick_nohz_enabled)
  996. return;
  997. ts->nohz_mode = mode;
  998. /* One update is enough */
  999. if (!test_and_set_bit(0, &tick_nohz_active))
  1000. timers_update_nohz();
  1001. }
  1002. /**
  1003. * tick_nohz_switch_to_nohz - switch to nohz mode
  1004. */
  1005. static void tick_nohz_switch_to_nohz(void)
  1006. {
  1007. struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
  1008. ktime_t next;
  1009. if (!tick_nohz_enabled)
  1010. return;
  1011. if (tick_switch_to_oneshot(tick_nohz_handler))
  1012. return;
  1013. /*
  1014. * Recycle the hrtimer in ts, so we can share the
  1015. * hrtimer_forward with the highres code.
  1016. */
  1017. hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
  1018. /* Get the next period */
  1019. next = tick_init_jiffy_update();
  1020. hrtimer_set_expires(&ts->sched_timer, next);
  1021. hrtimer_forward_now(&ts->sched_timer, tick_period);
  1022. tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
  1023. tick_nohz_activate(ts, NOHZ_MODE_LOWRES);
  1024. }
  1025. static inline void tick_nohz_irq_enter(void)
  1026. {
  1027. struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
  1028. ktime_t now;
  1029. if (!ts->idle_active && !ts->tick_stopped)
  1030. return;
  1031. now = ktime_get();
  1032. if (ts->idle_active)
  1033. tick_nohz_stop_idle(ts, now);
  1034. if (ts->tick_stopped)
  1035. tick_nohz_update_jiffies(now);
  1036. }
  1037. #else
  1038. static inline void tick_nohz_switch_to_nohz(void) { }
  1039. static inline void tick_nohz_irq_enter(void) { }
  1040. static inline void tick_nohz_activate(struct tick_sched *ts, int mode) { }
  1041. #endif /* CONFIG_NO_HZ_COMMON */
  1042. /*
  1043. * Called from irq_enter to notify about the possible interruption of idle()
  1044. */
  1045. void tick_irq_enter(void)
  1046. {
  1047. tick_check_oneshot_broadcast_this_cpu();
  1048. tick_nohz_irq_enter();
  1049. }
  1050. /*
  1051. * High resolution timer specific code
  1052. */
  1053. #ifdef CONFIG_HIGH_RES_TIMERS
  1054. /*
  1055. * We rearm the timer until we get disabled by the idle code.
  1056. * Called with interrupts disabled.
  1057. */
  1058. static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
  1059. {
  1060. struct tick_sched *ts =
  1061. container_of(timer, struct tick_sched, sched_timer);
  1062. struct pt_regs *regs = get_irq_regs();
  1063. ktime_t now = ktime_get();
  1064. tick_sched_do_timer(ts, now);
  1065. /*
  1066. * Do not call, when we are not in irq context and have
  1067. * no valid regs pointer
  1068. */
  1069. if (regs)
  1070. tick_sched_handle(ts, regs);
  1071. else
  1072. ts->next_tick = 0;
  1073. /* No need to reprogram if we are in idle or full dynticks mode */
  1074. if (unlikely(ts->tick_stopped))
  1075. return HRTIMER_NORESTART;
  1076. hrtimer_forward(timer, now, tick_period);
  1077. return HRTIMER_RESTART;
  1078. }
  1079. static int sched_skew_tick;
  1080. static int __init skew_tick(char *str)
  1081. {
  1082. get_option(&str, &sched_skew_tick);
  1083. return 0;
  1084. }
  1085. early_param("skew_tick", skew_tick);
  1086. /**
  1087. * tick_setup_sched_timer - setup the tick emulation timer
  1088. */
  1089. void tick_setup_sched_timer(void)
  1090. {
  1091. struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
  1092. ktime_t now = ktime_get();
  1093. /*
  1094. * Emulate tick processing via per-CPU hrtimers:
  1095. */
  1096. hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
  1097. ts->sched_timer.function = tick_sched_timer;
  1098. /* Get the next period (per-CPU) */
  1099. hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update());
  1100. /* Offset the tick to avert jiffies_lock contention. */
  1101. if (sched_skew_tick) {
  1102. u64 offset = ktime_to_ns(tick_period) >> 1;
  1103. do_div(offset, num_possible_cpus());
  1104. offset *= smp_processor_id();
  1105. hrtimer_add_expires_ns(&ts->sched_timer, offset);
  1106. }
  1107. hrtimer_forward(&ts->sched_timer, now, tick_period);
  1108. hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED);
  1109. tick_nohz_activate(ts, NOHZ_MODE_HIGHRES);
  1110. }
  1111. #endif /* HIGH_RES_TIMERS */
  1112. #if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS
  1113. void tick_cancel_sched_timer(int cpu)
  1114. {
  1115. struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
  1116. # ifdef CONFIG_HIGH_RES_TIMERS
  1117. if (ts->sched_timer.base)
  1118. hrtimer_cancel(&ts->sched_timer);
  1119. # endif
  1120. memset(ts, 0, sizeof(*ts));
  1121. }
  1122. #endif
  1123. /**
  1124. * Async notification about clocksource changes
  1125. */
  1126. void tick_clock_notify(void)
  1127. {
  1128. int cpu;
  1129. for_each_possible_cpu(cpu)
  1130. set_bit(0, &per_cpu(tick_cpu_sched, cpu).check_clocks);
  1131. }
  1132. /*
  1133. * Async notification about clock event changes
  1134. */
  1135. void tick_oneshot_notify(void)
  1136. {
  1137. struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
  1138. set_bit(0, &ts->check_clocks);
  1139. }
  1140. /**
  1141. * Check, if a change happened, which makes oneshot possible.
  1142. *
  1143. * Called cyclic from the hrtimer softirq (driven by the timer
  1144. * softirq) allow_nohz signals, that we can switch into low-res nohz
  1145. * mode, because high resolution timers are disabled (either compile
  1146. * or runtime). Called with interrupts disabled.
  1147. */
  1148. int tick_check_oneshot_change(int allow_nohz)
  1149. {
  1150. struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
  1151. if (!test_and_clear_bit(0, &ts->check_clocks))
  1152. return 0;
  1153. if (ts->nohz_mode != NOHZ_MODE_INACTIVE)
  1154. return 0;
  1155. if (!timekeeping_valid_for_hres() || !tick_is_oneshot_available())
  1156. return 0;
  1157. if (!allow_nohz)
  1158. return 1;
  1159. tick_nohz_switch_to_nohz();
  1160. return 0;
  1161. }