watchdog.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789
  1. /*
  2. * Detect hard and soft lockups on a system
  3. *
  4. * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc.
  5. *
  6. * Note: Most of this code is borrowed heavily from the original softlockup
  7. * detector, so thanks to Ingo for the initial implementation.
  8. * Some chunks also taken from the old x86-specific nmi watchdog code, thanks
  9. * to those contributors as well.
  10. */
  11. #define pr_fmt(fmt) "watchdog: " fmt
  12. #include <linux/mm.h>
  13. #include <linux/cpu.h>
  14. #include <linux/nmi.h>
  15. #include <linux/init.h>
  16. #include <linux/module.h>
  17. #include <linux/sysctl.h>
  18. #include <linux/smpboot.h>
  19. #include <linux/sched/rt.h>
  20. #include <uapi/linux/sched/types.h>
  21. #include <linux/tick.h>
  22. #include <linux/workqueue.h>
  23. #include <linux/sched/clock.h>
  24. #include <linux/sched/debug.h>
  25. #include <asm/irq_regs.h>
  26. #include <linux/kvm_para.h>
  27. #include <linux/kthread.h>
  28. static DEFINE_MUTEX(watchdog_mutex);
  29. #if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HAVE_NMI_WATCHDOG)
  30. # define WATCHDOG_DEFAULT (SOFT_WATCHDOG_ENABLED | NMI_WATCHDOG_ENABLED)
  31. # define NMI_WATCHDOG_DEFAULT 1
  32. #else
  33. # define WATCHDOG_DEFAULT (SOFT_WATCHDOG_ENABLED)
  34. # define NMI_WATCHDOG_DEFAULT 0
  35. #endif
  36. unsigned long __read_mostly watchdog_enabled;
  37. int __read_mostly watchdog_user_enabled = 1;
  38. int __read_mostly nmi_watchdog_user_enabled = NMI_WATCHDOG_DEFAULT;
  39. int __read_mostly soft_watchdog_user_enabled = 1;
  40. int __read_mostly watchdog_thresh = 10;
  41. int __read_mostly nmi_watchdog_available;
  42. struct cpumask watchdog_allowed_mask __read_mostly;
  43. struct cpumask watchdog_cpumask __read_mostly;
  44. unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
  45. #ifdef CONFIG_HARDLOCKUP_DETECTOR
  46. /*
  47. * Should we panic when a soft-lockup or hard-lockup occurs:
  48. */
  49. unsigned int __read_mostly hardlockup_panic =
  50. CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;
  51. /*
  52. * We may not want to enable hard lockup detection by default in all cases,
  53. * for example when running the kernel as a guest on a hypervisor. In these
  54. * cases this function can be called to disable hard lockup detection. This
  55. * function should only be executed once by the boot processor before the
  56. * kernel command line parameters are parsed, because otherwise it is not
  57. * possible to override this in hardlockup_panic_setup().
  58. */
  59. void __init hardlockup_detector_disable(void)
  60. {
  61. nmi_watchdog_user_enabled = 0;
  62. }
  63. static int __init hardlockup_panic_setup(char *str)
  64. {
  65. if (!strncmp(str, "panic", 5))
  66. hardlockup_panic = 1;
  67. else if (!strncmp(str, "nopanic", 7))
  68. hardlockup_panic = 0;
  69. else if (!strncmp(str, "0", 1))
  70. nmi_watchdog_user_enabled = 0;
  71. else if (!strncmp(str, "1", 1))
  72. nmi_watchdog_user_enabled = 1;
  73. return 1;
  74. }
  75. __setup("nmi_watchdog=", hardlockup_panic_setup);
  76. # ifdef CONFIG_SMP
  77. int __read_mostly sysctl_hardlockup_all_cpu_backtrace;
  78. static int __init hardlockup_all_cpu_backtrace_setup(char *str)
  79. {
  80. sysctl_hardlockup_all_cpu_backtrace = !!simple_strtol(str, NULL, 0);
  81. return 1;
  82. }
  83. __setup("hardlockup_all_cpu_backtrace=", hardlockup_all_cpu_backtrace_setup);
  84. # endif /* CONFIG_SMP */
  85. #endif /* CONFIG_HARDLOCKUP_DETECTOR */
  86. /*
  87. * These functions can be overridden if an architecture implements its
  88. * own hardlockup detector.
  89. *
  90. * watchdog_nmi_enable/disable can be implemented to start and stop when
  91. * softlockup watchdog threads start and stop. The arch must select the
  92. * SOFTLOCKUP_DETECTOR Kconfig.
  93. */
  94. int __weak watchdog_nmi_enable(unsigned int cpu)
  95. {
  96. hardlockup_detector_perf_enable();
  97. return 0;
  98. }
  99. void __weak watchdog_nmi_disable(unsigned int cpu)
  100. {
  101. hardlockup_detector_perf_disable();
  102. }
  103. /* Return 0, if a NMI watchdog is available. Error code otherwise */
  104. int __weak __init watchdog_nmi_probe(void)
  105. {
  106. return hardlockup_detector_perf_init();
  107. }
  108. /**
  109. * watchdog_nmi_stop - Stop the watchdog for reconfiguration
  110. *
  111. * The reconfiguration steps are:
  112. * watchdog_nmi_stop();
  113. * update_variables();
  114. * watchdog_nmi_start();
  115. */
  116. void __weak watchdog_nmi_stop(void) { }
  117. /**
  118. * watchdog_nmi_start - Start the watchdog after reconfiguration
  119. *
  120. * Counterpart to watchdog_nmi_stop().
  121. *
  122. * The following variables have been updated in update_variables() and
  123. * contain the currently valid configuration:
  124. * - watchdog_enabled
  125. * - watchdog_thresh
  126. * - watchdog_cpumask
  127. */
  128. void __weak watchdog_nmi_start(void) { }
  129. /**
  130. * lockup_detector_update_enable - Update the sysctl enable bit
  131. *
  132. * Caller needs to make sure that the NMI/perf watchdogs are off, so this
  133. * can't race with watchdog_nmi_disable().
  134. */
  135. static void lockup_detector_update_enable(void)
  136. {
  137. watchdog_enabled = 0;
  138. if (!watchdog_user_enabled)
  139. return;
  140. if (nmi_watchdog_available && nmi_watchdog_user_enabled)
  141. watchdog_enabled |= NMI_WATCHDOG_ENABLED;
  142. if (soft_watchdog_user_enabled)
  143. watchdog_enabled |= SOFT_WATCHDOG_ENABLED;
  144. }
  145. #ifdef CONFIG_SOFTLOCKUP_DETECTOR
  146. /* Global variables, exported for sysctl */
  147. unsigned int __read_mostly softlockup_panic =
  148. CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
  149. static bool softlockup_threads_initialized __read_mostly;
  150. static u64 __read_mostly sample_period;
  151. static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
  152. static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
  153. static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer);
  154. static DEFINE_PER_CPU(bool, softlockup_touch_sync);
  155. static DEFINE_PER_CPU(bool, soft_watchdog_warn);
  156. static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
  157. static DEFINE_PER_CPU(unsigned long, soft_lockup_hrtimer_cnt);
  158. static DEFINE_PER_CPU(struct task_struct *, softlockup_task_ptr_saved);
  159. static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
  160. static unsigned long soft_lockup_nmi_warn;
  161. static int __init softlockup_panic_setup(char *str)
  162. {
  163. softlockup_panic = simple_strtoul(str, NULL, 0);
  164. return 1;
  165. }
  166. __setup("softlockup_panic=", softlockup_panic_setup);
  167. static int __init nowatchdog_setup(char *str)
  168. {
  169. watchdog_user_enabled = 0;
  170. return 1;
  171. }
  172. __setup("nowatchdog", nowatchdog_setup);
  173. static int __init nosoftlockup_setup(char *str)
  174. {
  175. soft_watchdog_user_enabled = 0;
  176. return 1;
  177. }
  178. __setup("nosoftlockup", nosoftlockup_setup);
  179. #ifdef CONFIG_SMP
  180. int __read_mostly sysctl_softlockup_all_cpu_backtrace;
  181. static int __init softlockup_all_cpu_backtrace_setup(char *str)
  182. {
  183. sysctl_softlockup_all_cpu_backtrace = !!simple_strtol(str, NULL, 0);
  184. return 1;
  185. }
  186. __setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup);
  187. #endif
  188. static void __lockup_detector_cleanup(void);
  189. /*
  190. * Hard-lockup warnings should be triggered after just a few seconds. Soft-
  191. * lockups can have false positives under extreme conditions. So we generally
  192. * want a higher threshold for soft lockups than for hard lockups. So we couple
  193. * the thresholds with a factor: we make the soft threshold twice the amount of
  194. * time the hard threshold is.
  195. */
  196. static int get_softlockup_thresh(void)
  197. {
  198. return watchdog_thresh * 2;
  199. }
  200. /*
  201. * Returns seconds, approximately. We don't need nanosecond
  202. * resolution, and we don't need to waste time with a big divide when
  203. * 2^30ns == 1.074s.
  204. */
  205. static unsigned long get_timestamp(void)
  206. {
  207. return running_clock() >> 30LL; /* 2^30 ~= 10^9 */
  208. }
  209. static void set_sample_period(void)
  210. {
  211. /*
  212. * convert watchdog_thresh from seconds to ns
  213. * the divide by 5 is to give hrtimer several chances (two
  214. * or three with the current relation between the soft
  215. * and hard thresholds) to increment before the
  216. * hardlockup detector generates a warning
  217. */
  218. sample_period = get_softlockup_thresh() * ((u64)NSEC_PER_SEC / 5);
  219. watchdog_update_hrtimer_threshold(sample_period);
  220. }
  221. /* Commands for resetting the watchdog */
  222. static void __touch_watchdog(void)
  223. {
  224. __this_cpu_write(watchdog_touch_ts, get_timestamp());
  225. }
  226. /**
  227. * touch_softlockup_watchdog_sched - touch watchdog on scheduler stalls
  228. *
  229. * Call when the scheduler may have stalled for legitimate reasons
  230. * preventing the watchdog task from executing - e.g. the scheduler
  231. * entering idle state. This should only be used for scheduler events.
  232. * Use touch_softlockup_watchdog() for everything else.
  233. */
  234. void touch_softlockup_watchdog_sched(void)
  235. {
  236. /*
  237. * Preemption can be enabled. It doesn't matter which CPU's timestamp
  238. * gets zeroed here, so use the raw_ operation.
  239. */
  240. raw_cpu_write(watchdog_touch_ts, 0);
  241. }
  242. void touch_softlockup_watchdog(void)
  243. {
  244. touch_softlockup_watchdog_sched();
  245. wq_watchdog_touch(raw_smp_processor_id());
  246. }
  247. EXPORT_SYMBOL(touch_softlockup_watchdog);
  248. void touch_all_softlockup_watchdogs(void)
  249. {
  250. int cpu;
  251. /*
  252. * watchdog_mutex cannpt be taken here, as this might be called
  253. * from (soft)interrupt context, so the access to
  254. * watchdog_allowed_cpumask might race with a concurrent update.
  255. *
  256. * The watchdog time stamp can race against a concurrent real
  257. * update as well, the only side effect might be a cycle delay for
  258. * the softlockup check.
  259. */
  260. for_each_cpu(cpu, &watchdog_allowed_mask)
  261. per_cpu(watchdog_touch_ts, cpu) = 0;
  262. wq_watchdog_touch(-1);
  263. }
  264. void touch_softlockup_watchdog_sync(void)
  265. {
  266. __this_cpu_write(softlockup_touch_sync, true);
  267. __this_cpu_write(watchdog_touch_ts, 0);
  268. }
  269. static int is_softlockup(unsigned long touch_ts)
  270. {
  271. unsigned long now = get_timestamp();
  272. if ((watchdog_enabled & SOFT_WATCHDOG_ENABLED) && watchdog_thresh){
  273. /* Warn about unreasonable delays. */
  274. if (time_after(now, touch_ts + get_softlockup_thresh()))
  275. return now - touch_ts;
  276. }
  277. return 0;
  278. }
  279. /* watchdog detector functions */
  280. bool is_hardlockup(void)
  281. {
  282. unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
  283. if (__this_cpu_read(hrtimer_interrupts_saved) == hrint)
  284. return true;
  285. __this_cpu_write(hrtimer_interrupts_saved, hrint);
  286. return false;
  287. }
  288. static void watchdog_interrupt_count(void)
  289. {
  290. __this_cpu_inc(hrtimer_interrupts);
  291. }
  292. /* watchdog kicker functions */
  293. static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
  294. {
  295. unsigned long touch_ts = __this_cpu_read(watchdog_touch_ts);
  296. struct pt_regs *regs = get_irq_regs();
  297. int duration;
  298. int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace;
  299. if (!watchdog_enabled)
  300. return HRTIMER_NORESTART;
  301. /* kick the hardlockup detector */
  302. watchdog_interrupt_count();
  303. /* kick the softlockup detector */
  304. wake_up_process(__this_cpu_read(softlockup_watchdog));
  305. /* .. and repeat */
  306. hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period));
  307. if (touch_ts == 0) {
  308. if (unlikely(__this_cpu_read(softlockup_touch_sync))) {
  309. /*
  310. * If the time stamp was touched atomically
  311. * make sure the scheduler tick is up to date.
  312. */
  313. __this_cpu_write(softlockup_touch_sync, false);
  314. sched_clock_tick();
  315. }
  316. /* Clear the guest paused flag on watchdog reset */
  317. kvm_check_and_clear_guest_paused();
  318. __touch_watchdog();
  319. return HRTIMER_RESTART;
  320. }
  321. /* check for a softlockup
  322. * This is done by making sure a high priority task is
  323. * being scheduled. The task touches the watchdog to
  324. * indicate it is getting cpu time. If it hasn't then
  325. * this is a good indication some task is hogging the cpu
  326. */
  327. duration = is_softlockup(touch_ts);
  328. if (unlikely(duration)) {
  329. /*
  330. * If a virtual machine is stopped by the host it can look to
  331. * the watchdog like a soft lockup, check to see if the host
  332. * stopped the vm before we issue the warning
  333. */
  334. if (kvm_check_and_clear_guest_paused())
  335. return HRTIMER_RESTART;
  336. /* only warn once */
  337. if (__this_cpu_read(soft_watchdog_warn) == true) {
  338. /*
  339. * When multiple processes are causing softlockups the
  340. * softlockup detector only warns on the first one
  341. * because the code relies on a full quiet cycle to
  342. * re-arm. The second process prevents the quiet cycle
  343. * and never gets reported. Use task pointers to detect
  344. * this.
  345. */
  346. if (__this_cpu_read(softlockup_task_ptr_saved) !=
  347. current) {
  348. __this_cpu_write(soft_watchdog_warn, false);
  349. __touch_watchdog();
  350. }
  351. return HRTIMER_RESTART;
  352. }
  353. if (softlockup_all_cpu_backtrace) {
  354. /* Prevent multiple soft-lockup reports if one cpu is already
  355. * engaged in dumping cpu back traces
  356. */
  357. if (test_and_set_bit(0, &soft_lockup_nmi_warn)) {
  358. /* Someone else will report us. Let's give up */
  359. __this_cpu_write(soft_watchdog_warn, true);
  360. return HRTIMER_RESTART;
  361. }
  362. }
  363. pr_emerg("BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
  364. smp_processor_id(), duration,
  365. current->comm, task_pid_nr(current));
  366. __this_cpu_write(softlockup_task_ptr_saved, current);
  367. print_modules();
  368. print_irqtrace_events(current);
  369. if (regs)
  370. show_regs(regs);
  371. else
  372. dump_stack();
  373. if (softlockup_all_cpu_backtrace) {
  374. /* Avoid generating two back traces for current
  375. * given that one is already made above
  376. */
  377. trigger_allbutself_cpu_backtrace();
  378. clear_bit(0, &soft_lockup_nmi_warn);
  379. /* Barrier to sync with other cpus */
  380. smp_mb__after_atomic();
  381. }
  382. add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
  383. if (softlockup_panic)
  384. panic("softlockup: hung tasks");
  385. __this_cpu_write(soft_watchdog_warn, true);
  386. } else
  387. __this_cpu_write(soft_watchdog_warn, false);
  388. return HRTIMER_RESTART;
  389. }
  390. static void watchdog_set_prio(unsigned int policy, unsigned int prio)
  391. {
  392. struct sched_param param = { .sched_priority = prio };
  393. sched_setscheduler(current, policy, &param);
  394. }
  395. static void watchdog_enable(unsigned int cpu)
  396. {
  397. struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer);
  398. /*
  399. * Start the timer first to prevent the NMI watchdog triggering
  400. * before the timer has a chance to fire.
  401. */
  402. hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  403. hrtimer->function = watchdog_timer_fn;
  404. hrtimer_start(hrtimer, ns_to_ktime(sample_period),
  405. HRTIMER_MODE_REL_PINNED);
  406. /* Initialize timestamp */
  407. __touch_watchdog();
  408. /* Enable the perf event */
  409. if (watchdog_enabled & NMI_WATCHDOG_ENABLED)
  410. watchdog_nmi_enable(cpu);
  411. watchdog_set_prio(SCHED_FIFO, MAX_RT_PRIO - 1);
  412. }
  413. static void watchdog_disable(unsigned int cpu)
  414. {
  415. struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer);
  416. watchdog_set_prio(SCHED_NORMAL, 0);
  417. /*
  418. * Disable the perf event first. That prevents that a large delay
  419. * between disabling the timer and disabling the perf event causes
  420. * the perf NMI to detect a false positive.
  421. */
  422. watchdog_nmi_disable(cpu);
  423. hrtimer_cancel(hrtimer);
  424. }
  425. static void watchdog_cleanup(unsigned int cpu, bool online)
  426. {
  427. watchdog_disable(cpu);
  428. }
  429. static int watchdog_should_run(unsigned int cpu)
  430. {
  431. return __this_cpu_read(hrtimer_interrupts) !=
  432. __this_cpu_read(soft_lockup_hrtimer_cnt);
  433. }
  434. /*
  435. * The watchdog thread function - touches the timestamp.
  436. *
  437. * It only runs once every sample_period seconds (4 seconds by
  438. * default) to reset the softlockup timestamp. If this gets delayed
  439. * for more than 2*watchdog_thresh seconds then the debug-printout
  440. * triggers in watchdog_timer_fn().
  441. */
  442. static void watchdog(unsigned int cpu)
  443. {
  444. __this_cpu_write(soft_lockup_hrtimer_cnt,
  445. __this_cpu_read(hrtimer_interrupts));
  446. __touch_watchdog();
  447. }
  448. static struct smp_hotplug_thread watchdog_threads = {
  449. .store = &softlockup_watchdog,
  450. .thread_should_run = watchdog_should_run,
  451. .thread_fn = watchdog,
  452. .thread_comm = "watchdog/%u",
  453. .setup = watchdog_enable,
  454. .cleanup = watchdog_cleanup,
  455. .park = watchdog_disable,
  456. .unpark = watchdog_enable,
  457. };
  458. static void softlockup_update_smpboot_threads(void)
  459. {
  460. lockdep_assert_held(&watchdog_mutex);
  461. if (!softlockup_threads_initialized)
  462. return;
  463. smpboot_update_cpumask_percpu_thread(&watchdog_threads,
  464. &watchdog_allowed_mask);
  465. }
  466. /* Temporarily park all watchdog threads */
  467. static void softlockup_park_all_threads(void)
  468. {
  469. cpumask_clear(&watchdog_allowed_mask);
  470. softlockup_update_smpboot_threads();
  471. }
  472. /* Unpark enabled threads */
  473. static void softlockup_unpark_threads(void)
  474. {
  475. cpumask_copy(&watchdog_allowed_mask, &watchdog_cpumask);
  476. softlockup_update_smpboot_threads();
  477. }
  478. static void lockup_detector_reconfigure(void)
  479. {
  480. cpus_read_lock();
  481. watchdog_nmi_stop();
  482. softlockup_park_all_threads();
  483. set_sample_period();
  484. lockup_detector_update_enable();
  485. if (watchdog_enabled && watchdog_thresh)
  486. softlockup_unpark_threads();
  487. watchdog_nmi_start();
  488. cpus_read_unlock();
  489. /*
  490. * Must be called outside the cpus locked section to prevent
  491. * recursive locking in the perf code.
  492. */
  493. __lockup_detector_cleanup();
  494. }
  495. /*
  496. * Create the watchdog thread infrastructure and configure the detector(s).
  497. *
  498. * The threads are not unparked as watchdog_allowed_mask is empty. When
  499. * the threads are sucessfully initialized, take the proper locks and
  500. * unpark the threads in the watchdog_cpumask if the watchdog is enabled.
  501. */
  502. static __init void lockup_detector_setup(void)
  503. {
  504. int ret;
  505. /*
  506. * If sysctl is off and watchdog got disabled on the command line,
  507. * nothing to do here.
  508. */
  509. lockup_detector_update_enable();
  510. if (!IS_ENABLED(CONFIG_SYSCTL) &&
  511. !(watchdog_enabled && watchdog_thresh))
  512. return;
  513. ret = smpboot_register_percpu_thread_cpumask(&watchdog_threads,
  514. &watchdog_allowed_mask);
  515. if (ret) {
  516. pr_err("Failed to initialize soft lockup detector threads\n");
  517. return;
  518. }
  519. mutex_lock(&watchdog_mutex);
  520. softlockup_threads_initialized = true;
  521. lockup_detector_reconfigure();
  522. mutex_unlock(&watchdog_mutex);
  523. }
  524. #else /* CONFIG_SOFTLOCKUP_DETECTOR */
  525. static inline int watchdog_park_threads(void) { return 0; }
  526. static inline void watchdog_unpark_threads(void) { }
  527. static inline int watchdog_enable_all_cpus(void) { return 0; }
  528. static inline void watchdog_disable_all_cpus(void) { }
  529. static void lockup_detector_reconfigure(void)
  530. {
  531. cpus_read_lock();
  532. watchdog_nmi_stop();
  533. lockup_detector_update_enable();
  534. watchdog_nmi_start();
  535. cpus_read_unlock();
  536. }
  537. static inline void lockup_detector_setup(void)
  538. {
  539. lockup_detector_reconfigure();
  540. }
  541. #endif /* !CONFIG_SOFTLOCKUP_DETECTOR */
  542. static void __lockup_detector_cleanup(void)
  543. {
  544. lockdep_assert_held(&watchdog_mutex);
  545. hardlockup_detector_perf_cleanup();
  546. }
  547. /**
  548. * lockup_detector_cleanup - Cleanup after cpu hotplug or sysctl changes
  549. *
  550. * Caller must not hold the cpu hotplug rwsem.
  551. */
  552. void lockup_detector_cleanup(void)
  553. {
  554. mutex_lock(&watchdog_mutex);
  555. __lockup_detector_cleanup();
  556. mutex_unlock(&watchdog_mutex);
  557. }
  558. /**
  559. * lockup_detector_soft_poweroff - Interface to stop lockup detector(s)
  560. *
  561. * Special interface for parisc. It prevents lockup detector warnings from
  562. * the default pm_poweroff() function which busy loops forever.
  563. */
  564. void lockup_detector_soft_poweroff(void)
  565. {
  566. watchdog_enabled = 0;
  567. }
  568. #ifdef CONFIG_SYSCTL
  569. /* Propagate any changes to the watchdog threads */
  570. static void proc_watchdog_update(void)
  571. {
  572. /* Remove impossible cpus to keep sysctl output clean. */
  573. cpumask_and(&watchdog_cpumask, &watchdog_cpumask, cpu_possible_mask);
  574. lockup_detector_reconfigure();
  575. }
  576. /*
  577. * common function for watchdog, nmi_watchdog and soft_watchdog parameter
  578. *
  579. * caller | table->data points to | 'which'
  580. * -------------------|----------------------------|--------------------------
  581. * proc_watchdog | watchdog_user_enabled | NMI_WATCHDOG_ENABLED |
  582. * | | SOFT_WATCHDOG_ENABLED
  583. * -------------------|----------------------------|--------------------------
  584. * proc_nmi_watchdog | nmi_watchdog_user_enabled | NMI_WATCHDOG_ENABLED
  585. * -------------------|----------------------------|--------------------------
  586. * proc_soft_watchdog | soft_watchdog_user_enabled | SOFT_WATCHDOG_ENABLED
  587. */
  588. static int proc_watchdog_common(int which, struct ctl_table *table, int write,
  589. void __user *buffer, size_t *lenp, loff_t *ppos)
  590. {
  591. int err, old, *param = table->data;
  592. mutex_lock(&watchdog_mutex);
  593. if (!write) {
  594. /*
  595. * On read synchronize the userspace interface. This is a
  596. * racy snapshot.
  597. */
  598. *param = (watchdog_enabled & which) != 0;
  599. err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
  600. } else {
  601. old = READ_ONCE(*param);
  602. err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
  603. if (!err && old != READ_ONCE(*param))
  604. proc_watchdog_update();
  605. }
  606. mutex_unlock(&watchdog_mutex);
  607. return err;
  608. }
  609. /*
  610. * /proc/sys/kernel/watchdog
  611. */
  612. int proc_watchdog(struct ctl_table *table, int write,
  613. void __user *buffer, size_t *lenp, loff_t *ppos)
  614. {
  615. return proc_watchdog_common(NMI_WATCHDOG_ENABLED|SOFT_WATCHDOG_ENABLED,
  616. table, write, buffer, lenp, ppos);
  617. }
  618. /*
  619. * /proc/sys/kernel/nmi_watchdog
  620. */
  621. int proc_nmi_watchdog(struct ctl_table *table, int write,
  622. void __user *buffer, size_t *lenp, loff_t *ppos)
  623. {
  624. if (!nmi_watchdog_available && write)
  625. return -ENOTSUPP;
  626. return proc_watchdog_common(NMI_WATCHDOG_ENABLED,
  627. table, write, buffer, lenp, ppos);
  628. }
  629. /*
  630. * /proc/sys/kernel/soft_watchdog
  631. */
  632. int proc_soft_watchdog(struct ctl_table *table, int write,
  633. void __user *buffer, size_t *lenp, loff_t *ppos)
  634. {
  635. return proc_watchdog_common(SOFT_WATCHDOG_ENABLED,
  636. table, write, buffer, lenp, ppos);
  637. }
  638. /*
  639. * /proc/sys/kernel/watchdog_thresh
  640. */
  641. int proc_watchdog_thresh(struct ctl_table *table, int write,
  642. void __user *buffer, size_t *lenp, loff_t *ppos)
  643. {
  644. int err, old;
  645. mutex_lock(&watchdog_mutex);
  646. old = READ_ONCE(watchdog_thresh);
  647. err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
  648. if (!err && write && old != READ_ONCE(watchdog_thresh))
  649. proc_watchdog_update();
  650. mutex_unlock(&watchdog_mutex);
  651. return err;
  652. }
  653. /*
  654. * The cpumask is the mask of possible cpus that the watchdog can run
  655. * on, not the mask of cpus it is actually running on. This allows the
  656. * user to specify a mask that will include cpus that have not yet
  657. * been brought online, if desired.
  658. */
  659. int proc_watchdog_cpumask(struct ctl_table *table, int write,
  660. void __user *buffer, size_t *lenp, loff_t *ppos)
  661. {
  662. int err;
  663. mutex_lock(&watchdog_mutex);
  664. err = proc_do_large_bitmap(table, write, buffer, lenp, ppos);
  665. if (!err && write)
  666. proc_watchdog_update();
  667. mutex_unlock(&watchdog_mutex);
  668. return err;
  669. }
  670. #endif /* CONFIG_SYSCTL */
  671. void __init lockup_detector_init(void)
  672. {
  673. #ifdef CONFIG_NO_HZ_FULL
  674. if (tick_nohz_full_enabled()) {
  675. pr_info("Disabling watchdog on nohz_full cores by default\n");
  676. cpumask_copy(&watchdog_cpumask, housekeeping_mask);
  677. } else
  678. cpumask_copy(&watchdog_cpumask, cpu_possible_mask);
  679. #else
  680. cpumask_copy(&watchdog_cpumask, cpu_possible_mask);
  681. #endif
  682. if (!watchdog_nmi_probe())
  683. nmi_watchdog_available = true;
  684. lockup_detector_setup();
  685. }