trace_stack.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514
  1. /*
  2. * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
  3. *
  4. */
  5. #include <linux/sched/task_stack.h>
  6. #include <linux/stacktrace.h>
  7. #include <linux/kallsyms.h>
  8. #include <linux/seq_file.h>
  9. #include <linux/spinlock.h>
  10. #include <linux/uaccess.h>
  11. #include <linux/ftrace.h>
  12. #include <linux/module.h>
  13. #include <linux/sysctl.h>
  14. #include <linux/init.h>
  15. #include <asm/setup.h>
  16. #include "trace.h"
  17. static unsigned long stack_dump_trace[STACK_TRACE_ENTRIES+1] =
  18. { [0 ... (STACK_TRACE_ENTRIES)] = ULONG_MAX };
  19. unsigned stack_trace_index[STACK_TRACE_ENTRIES];
  20. /*
  21. * Reserve one entry for the passed in ip. This will allow
  22. * us to remove most or all of the stack size overhead
  23. * added by the stack tracer itself.
  24. */
  25. struct stack_trace stack_trace_max = {
  26. .max_entries = STACK_TRACE_ENTRIES - 1,
  27. .entries = &stack_dump_trace[0],
  28. };
  29. unsigned long stack_trace_max_size;
  30. arch_spinlock_t stack_trace_max_lock =
  31. (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
  32. static DEFINE_PER_CPU(int, trace_active);
  33. static DEFINE_MUTEX(stack_sysctl_mutex);
  34. int stack_tracer_enabled;
  35. static int last_stack_tracer_enabled;
  36. /**
  37. * stack_tracer_disable - temporarily disable the stack tracer
  38. *
  39. * There's a few locations (namely in RCU) where stack tracing
  40. * cannot be executed. This function is used to disable stack
  41. * tracing during those critical sections.
  42. *
  43. * This function must be called with preemption or interrupts
  44. * disabled and stack_tracer_enable() must be called shortly after
  45. * while preemption or interrupts are still disabled.
  46. */
  47. void stack_tracer_disable(void)
  48. {
  49. /* Preemption or interupts must be disabled */
  50. if (IS_ENABLED(CONFIG_PREEMPT_DEBUG))
  51. WARN_ON_ONCE(!preempt_count() || !irqs_disabled());
  52. this_cpu_inc(trace_active);
  53. }
  54. /**
  55. * stack_tracer_enable - re-enable the stack tracer
  56. *
  57. * After stack_tracer_disable() is called, stack_tracer_enable()
  58. * must be called shortly afterward.
  59. */
  60. void stack_tracer_enable(void)
  61. {
  62. if (IS_ENABLED(CONFIG_PREEMPT_DEBUG))
  63. WARN_ON_ONCE(!preempt_count() || !irqs_disabled());
  64. this_cpu_dec(trace_active);
  65. }
  66. void stack_trace_print(void)
  67. {
  68. long i;
  69. int size;
  70. pr_emerg(" Depth Size Location (%d entries)\n"
  71. " ----- ---- --------\n",
  72. stack_trace_max.nr_entries);
  73. for (i = 0; i < stack_trace_max.nr_entries; i++) {
  74. if (stack_dump_trace[i] == ULONG_MAX)
  75. break;
  76. if (i+1 == stack_trace_max.nr_entries ||
  77. stack_dump_trace[i+1] == ULONG_MAX)
  78. size = stack_trace_index[i];
  79. else
  80. size = stack_trace_index[i] - stack_trace_index[i+1];
  81. pr_emerg("%3ld) %8d %5d %pS\n", i, stack_trace_index[i],
  82. size, (void *)stack_dump_trace[i]);
  83. }
  84. }
  85. /*
  86. * When arch-specific code overrides this function, the following
  87. * data should be filled up, assuming stack_trace_max_lock is held to
  88. * prevent concurrent updates.
  89. * stack_trace_index[]
  90. * stack_trace_max
  91. * stack_trace_max_size
  92. */
  93. void __weak
  94. check_stack(unsigned long ip, unsigned long *stack)
  95. {
  96. unsigned long this_size, flags; unsigned long *p, *top, *start;
  97. static int tracer_frame;
  98. int frame_size = ACCESS_ONCE(tracer_frame);
  99. int i, x;
  100. this_size = ((unsigned long)stack) & (THREAD_SIZE-1);
  101. this_size = THREAD_SIZE - this_size;
  102. /* Remove the frame of the tracer */
  103. this_size -= frame_size;
  104. if (this_size <= stack_trace_max_size)
  105. return;
  106. /* we do not handle interrupt stacks yet */
  107. if (!object_is_on_stack(stack))
  108. return;
  109. /* Can't do this from NMI context (can cause deadlocks) */
  110. if (in_nmi())
  111. return;
  112. local_irq_save(flags);
  113. arch_spin_lock(&stack_trace_max_lock);
  114. /*
  115. * RCU may not be watching, make it see us.
  116. * The stack trace code uses rcu_sched.
  117. */
  118. rcu_irq_enter();
  119. /* In case another CPU set the tracer_frame on us */
  120. if (unlikely(!frame_size))
  121. this_size -= tracer_frame;
  122. /* a race could have already updated it */
  123. if (this_size <= stack_trace_max_size)
  124. goto out;
  125. stack_trace_max_size = this_size;
  126. stack_trace_max.nr_entries = 0;
  127. stack_trace_max.skip = 3;
  128. save_stack_trace(&stack_trace_max);
  129. /* Skip over the overhead of the stack tracer itself */
  130. for (i = 0; i < stack_trace_max.nr_entries; i++) {
  131. if (stack_dump_trace[i] == ip)
  132. break;
  133. }
  134. /*
  135. * Some archs may not have the passed in ip in the dump.
  136. * If that happens, we need to show everything.
  137. */
  138. if (i == stack_trace_max.nr_entries)
  139. i = 0;
  140. /*
  141. * Now find where in the stack these are.
  142. */
  143. x = 0;
  144. start = stack;
  145. top = (unsigned long *)
  146. (((unsigned long)start & ~(THREAD_SIZE-1)) + THREAD_SIZE);
  147. /*
  148. * Loop through all the entries. One of the entries may
  149. * for some reason be missed on the stack, so we may
  150. * have to account for them. If they are all there, this
  151. * loop will only happen once. This code only takes place
  152. * on a new max, so it is far from a fast path.
  153. */
  154. while (i < stack_trace_max.nr_entries) {
  155. int found = 0;
  156. stack_trace_index[x] = this_size;
  157. p = start;
  158. for (; p < top && i < stack_trace_max.nr_entries; p++) {
  159. if (stack_dump_trace[i] == ULONG_MAX)
  160. break;
  161. /*
  162. * The READ_ONCE_NOCHECK is used to let KASAN know that
  163. * this is not a stack-out-of-bounds error.
  164. */
  165. if ((READ_ONCE_NOCHECK(*p)) == stack_dump_trace[i]) {
  166. stack_dump_trace[x] = stack_dump_trace[i++];
  167. this_size = stack_trace_index[x++] =
  168. (top - p) * sizeof(unsigned long);
  169. found = 1;
  170. /* Start the search from here */
  171. start = p + 1;
  172. /*
  173. * We do not want to show the overhead
  174. * of the stack tracer stack in the
  175. * max stack. If we haven't figured
  176. * out what that is, then figure it out
  177. * now.
  178. */
  179. if (unlikely(!tracer_frame)) {
  180. tracer_frame = (p - stack) *
  181. sizeof(unsigned long);
  182. stack_trace_max_size -= tracer_frame;
  183. }
  184. }
  185. }
  186. if (!found)
  187. i++;
  188. }
  189. stack_trace_max.nr_entries = x;
  190. for (; x < i; x++)
  191. stack_dump_trace[x] = ULONG_MAX;
  192. if (task_stack_end_corrupted(current)) {
  193. stack_trace_print();
  194. BUG();
  195. }
  196. out:
  197. rcu_irq_exit();
  198. arch_spin_unlock(&stack_trace_max_lock);
  199. local_irq_restore(flags);
  200. }
  201. static void
  202. stack_trace_call(unsigned long ip, unsigned long parent_ip,
  203. struct ftrace_ops *op, struct pt_regs *pt_regs)
  204. {
  205. unsigned long stack;
  206. preempt_disable_notrace();
  207. /* no atomic needed, we only modify this variable by this cpu */
  208. __this_cpu_inc(trace_active);
  209. if (__this_cpu_read(trace_active) != 1)
  210. goto out;
  211. ip += MCOUNT_INSN_SIZE;
  212. check_stack(ip, &stack);
  213. out:
  214. __this_cpu_dec(trace_active);
  215. /* prevent recursion in schedule */
  216. preempt_enable_notrace();
  217. }
  218. static struct ftrace_ops trace_ops __read_mostly =
  219. {
  220. .func = stack_trace_call,
  221. .flags = FTRACE_OPS_FL_RECURSION_SAFE,
  222. };
  223. static ssize_t
  224. stack_max_size_read(struct file *filp, char __user *ubuf,
  225. size_t count, loff_t *ppos)
  226. {
  227. unsigned long *ptr = filp->private_data;
  228. char buf[64];
  229. int r;
  230. r = snprintf(buf, sizeof(buf), "%ld\n", *ptr);
  231. if (r > sizeof(buf))
  232. r = sizeof(buf);
  233. return simple_read_from_buffer(ubuf, count, ppos, buf, r);
  234. }
  235. static ssize_t
  236. stack_max_size_write(struct file *filp, const char __user *ubuf,
  237. size_t count, loff_t *ppos)
  238. {
  239. long *ptr = filp->private_data;
  240. unsigned long val, flags;
  241. int ret;
  242. ret = kstrtoul_from_user(ubuf, count, 10, &val);
  243. if (ret)
  244. return ret;
  245. local_irq_save(flags);
  246. /*
  247. * In case we trace inside arch_spin_lock() or after (NMI),
  248. * we will cause circular lock, so we also need to increase
  249. * the percpu trace_active here.
  250. */
  251. __this_cpu_inc(trace_active);
  252. arch_spin_lock(&stack_trace_max_lock);
  253. *ptr = val;
  254. arch_spin_unlock(&stack_trace_max_lock);
  255. __this_cpu_dec(trace_active);
  256. local_irq_restore(flags);
  257. return count;
  258. }
  259. static const struct file_operations stack_max_size_fops = {
  260. .open = tracing_open_generic,
  261. .read = stack_max_size_read,
  262. .write = stack_max_size_write,
  263. .llseek = default_llseek,
  264. };
  265. static void *
  266. __next(struct seq_file *m, loff_t *pos)
  267. {
  268. long n = *pos - 1;
  269. if (n > stack_trace_max.nr_entries || stack_dump_trace[n] == ULONG_MAX)
  270. return NULL;
  271. m->private = (void *)n;
  272. return &m->private;
  273. }
  274. static void *
  275. t_next(struct seq_file *m, void *v, loff_t *pos)
  276. {
  277. (*pos)++;
  278. return __next(m, pos);
  279. }
  280. static void *t_start(struct seq_file *m, loff_t *pos)
  281. {
  282. local_irq_disable();
  283. __this_cpu_inc(trace_active);
  284. arch_spin_lock(&stack_trace_max_lock);
  285. if (*pos == 0)
  286. return SEQ_START_TOKEN;
  287. return __next(m, pos);
  288. }
  289. static void t_stop(struct seq_file *m, void *p)
  290. {
  291. arch_spin_unlock(&stack_trace_max_lock);
  292. __this_cpu_dec(trace_active);
  293. local_irq_enable();
  294. }
  295. static void trace_lookup_stack(struct seq_file *m, long i)
  296. {
  297. unsigned long addr = stack_dump_trace[i];
  298. seq_printf(m, "%pS\n", (void *)addr);
  299. }
  300. static void print_disabled(struct seq_file *m)
  301. {
  302. seq_puts(m, "#\n"
  303. "# Stack tracer disabled\n"
  304. "#\n"
  305. "# To enable the stack tracer, either add 'stacktrace' to the\n"
  306. "# kernel command line\n"
  307. "# or 'echo 1 > /proc/sys/kernel/stack_tracer_enabled'\n"
  308. "#\n");
  309. }
  310. static int t_show(struct seq_file *m, void *v)
  311. {
  312. long i;
  313. int size;
  314. if (v == SEQ_START_TOKEN) {
  315. seq_printf(m, " Depth Size Location"
  316. " (%d entries)\n"
  317. " ----- ---- --------\n",
  318. stack_trace_max.nr_entries);
  319. if (!stack_tracer_enabled && !stack_trace_max_size)
  320. print_disabled(m);
  321. return 0;
  322. }
  323. i = *(long *)v;
  324. if (i >= stack_trace_max.nr_entries ||
  325. stack_dump_trace[i] == ULONG_MAX)
  326. return 0;
  327. if (i+1 == stack_trace_max.nr_entries ||
  328. stack_dump_trace[i+1] == ULONG_MAX)
  329. size = stack_trace_index[i];
  330. else
  331. size = stack_trace_index[i] - stack_trace_index[i+1];
  332. seq_printf(m, "%3ld) %8d %5d ", i, stack_trace_index[i], size);
  333. trace_lookup_stack(m, i);
  334. return 0;
  335. }
  336. static const struct seq_operations stack_trace_seq_ops = {
  337. .start = t_start,
  338. .next = t_next,
  339. .stop = t_stop,
  340. .show = t_show,
  341. };
  342. static int stack_trace_open(struct inode *inode, struct file *file)
  343. {
  344. return seq_open(file, &stack_trace_seq_ops);
  345. }
  346. static const struct file_operations stack_trace_fops = {
  347. .open = stack_trace_open,
  348. .read = seq_read,
  349. .llseek = seq_lseek,
  350. .release = seq_release,
  351. };
  352. static int
  353. stack_trace_filter_open(struct inode *inode, struct file *file)
  354. {
  355. return ftrace_regex_open(&trace_ops, FTRACE_ITER_FILTER,
  356. inode, file);
  357. }
  358. static const struct file_operations stack_trace_filter_fops = {
  359. .open = stack_trace_filter_open,
  360. .read = seq_read,
  361. .write = ftrace_filter_write,
  362. .llseek = tracing_lseek,
  363. .release = ftrace_regex_release,
  364. };
  365. int
  366. stack_trace_sysctl(struct ctl_table *table, int write,
  367. void __user *buffer, size_t *lenp,
  368. loff_t *ppos)
  369. {
  370. int ret;
  371. mutex_lock(&stack_sysctl_mutex);
  372. ret = proc_dointvec(table, write, buffer, lenp, ppos);
  373. if (ret || !write ||
  374. (last_stack_tracer_enabled == !!stack_tracer_enabled))
  375. goto out;
  376. last_stack_tracer_enabled = !!stack_tracer_enabled;
  377. if (stack_tracer_enabled)
  378. register_ftrace_function(&trace_ops);
  379. else
  380. unregister_ftrace_function(&trace_ops);
  381. out:
  382. mutex_unlock(&stack_sysctl_mutex);
  383. return ret;
  384. }
  385. static char stack_trace_filter_buf[COMMAND_LINE_SIZE+1] __initdata;
  386. static __init int enable_stacktrace(char *str)
  387. {
  388. if (strncmp(str, "_filter=", 8) == 0)
  389. strncpy(stack_trace_filter_buf, str+8, COMMAND_LINE_SIZE);
  390. stack_tracer_enabled = 1;
  391. last_stack_tracer_enabled = 1;
  392. return 1;
  393. }
  394. __setup("stacktrace", enable_stacktrace);
  395. static __init int stack_trace_init(void)
  396. {
  397. struct dentry *d_tracer;
  398. d_tracer = tracing_init_dentry();
  399. if (IS_ERR(d_tracer))
  400. return 0;
  401. trace_create_file("stack_max_size", 0644, d_tracer,
  402. &stack_trace_max_size, &stack_max_size_fops);
  403. trace_create_file("stack_trace", 0444, d_tracer,
  404. NULL, &stack_trace_fops);
  405. trace_create_file("stack_trace_filter", 0444, d_tracer,
  406. NULL, &stack_trace_filter_fops);
  407. if (stack_trace_filter_buf[0])
  408. ftrace_set_early_filter(&trace_ops, stack_trace_filter_buf, 1);
  409. if (stack_tracer_enabled)
  410. register_ftrace_function(&trace_ops);
  411. return 0;
  412. }
  413. device_initcall(stack_trace_init);