thread-stack.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679
  1. /*
  2. * thread-stack.c: Synthesize a thread's stack using call / return events
  3. * Copyright (c) 2014, Intel Corporation.
  4. *
  5. * This program is free software; you can redistribute it and/or modify it
  6. * under the terms and conditions of the GNU General Public License,
  7. * version 2, as published by the Free Software Foundation.
  8. *
  9. * This program is distributed in the hope it will be useful, but WITHOUT
  10. * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11. * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  12. * more details.
  13. *
  14. */
  15. #include <linux/rbtree.h>
  16. #include <linux/list.h>
  17. #include <errno.h>
  18. #include "thread.h"
  19. #include "event.h"
  20. #include "machine.h"
  21. #include "util.h"
  22. #include "debug.h"
  23. #include "symbol.h"
  24. #include "comm.h"
  25. #include "call-path.h"
  26. #include "thread-stack.h"
  27. #define STACK_GROWTH 2048
  28. /**
  29. * struct thread_stack_entry - thread stack entry.
  30. * @ret_addr: return address
  31. * @timestamp: timestamp (if known)
  32. * @ref: external reference (e.g. db_id of sample)
  33. * @branch_count: the branch count when the entry was created
  34. * @cp: call path
  35. * @no_call: a 'call' was not seen
  36. * @trace_end: a 'call' but trace ended
  37. */
  38. struct thread_stack_entry {
  39. u64 ret_addr;
  40. u64 timestamp;
  41. u64 ref;
  42. u64 branch_count;
  43. struct call_path *cp;
  44. bool no_call;
  45. bool trace_end;
  46. };
  47. /**
  48. * struct thread_stack - thread stack constructed from 'call' and 'return'
  49. * branch samples.
  50. * @stack: array that holds the stack
  51. * @cnt: number of entries in the stack
  52. * @sz: current maximum stack size
  53. * @trace_nr: current trace number
  54. * @branch_count: running branch count
  55. * @kernel_start: kernel start address
  56. * @last_time: last timestamp
  57. * @crp: call/return processor
  58. * @comm: current comm
  59. */
  60. struct thread_stack {
  61. struct thread_stack_entry *stack;
  62. size_t cnt;
  63. size_t sz;
  64. u64 trace_nr;
  65. u64 branch_count;
  66. u64 kernel_start;
  67. u64 last_time;
  68. struct call_return_processor *crp;
  69. struct comm *comm;
  70. };
  71. static int thread_stack__grow(struct thread_stack *ts)
  72. {
  73. struct thread_stack_entry *new_stack;
  74. size_t sz, new_sz;
  75. new_sz = ts->sz + STACK_GROWTH;
  76. sz = new_sz * sizeof(struct thread_stack_entry);
  77. new_stack = realloc(ts->stack, sz);
  78. if (!new_stack)
  79. return -ENOMEM;
  80. ts->stack = new_stack;
  81. ts->sz = new_sz;
  82. return 0;
  83. }
  84. static struct thread_stack *thread_stack__new(struct thread *thread,
  85. struct call_return_processor *crp)
  86. {
  87. struct thread_stack *ts;
  88. ts = zalloc(sizeof(struct thread_stack));
  89. if (!ts)
  90. return NULL;
  91. if (thread_stack__grow(ts)) {
  92. free(ts);
  93. return NULL;
  94. }
  95. if (thread->mg && thread->mg->machine)
  96. ts->kernel_start = machine__kernel_start(thread->mg->machine);
  97. else
  98. ts->kernel_start = 1ULL << 63;
  99. ts->crp = crp;
  100. return ts;
  101. }
  102. static int thread_stack__push(struct thread_stack *ts, u64 ret_addr,
  103. bool trace_end)
  104. {
  105. int err = 0;
  106. if (ts->cnt == ts->sz) {
  107. err = thread_stack__grow(ts);
  108. if (err) {
  109. pr_warning("Out of memory: discarding thread stack\n");
  110. ts->cnt = 0;
  111. }
  112. }
  113. ts->stack[ts->cnt].trace_end = trace_end;
  114. ts->stack[ts->cnt++].ret_addr = ret_addr;
  115. return err;
  116. }
  117. static void thread_stack__pop(struct thread_stack *ts, u64 ret_addr)
  118. {
  119. size_t i;
  120. /*
  121. * In some cases there may be functions which are not seen to return.
  122. * For example when setjmp / longjmp has been used. Or the perf context
  123. * switch in the kernel which doesn't stop and start tracing in exactly
  124. * the same code path. When that happens the return address will be
  125. * further down the stack. If the return address is not found at all,
  126. * we assume the opposite (i.e. this is a return for a call that wasn't
  127. * seen for some reason) and leave the stack alone.
  128. */
  129. for (i = ts->cnt; i; ) {
  130. if (ts->stack[--i].ret_addr == ret_addr) {
  131. ts->cnt = i;
  132. return;
  133. }
  134. }
  135. }
  136. static void thread_stack__pop_trace_end(struct thread_stack *ts)
  137. {
  138. size_t i;
  139. for (i = ts->cnt; i; ) {
  140. if (ts->stack[--i].trace_end)
  141. ts->cnt = i;
  142. else
  143. return;
  144. }
  145. }
  146. static bool thread_stack__in_kernel(struct thread_stack *ts)
  147. {
  148. if (!ts->cnt)
  149. return false;
  150. return ts->stack[ts->cnt - 1].cp->in_kernel;
  151. }
  152. static int thread_stack__call_return(struct thread *thread,
  153. struct thread_stack *ts, size_t idx,
  154. u64 timestamp, u64 ref, bool no_return)
  155. {
  156. struct call_return_processor *crp = ts->crp;
  157. struct thread_stack_entry *tse;
  158. struct call_return cr = {
  159. .thread = thread,
  160. .comm = ts->comm,
  161. .db_id = 0,
  162. };
  163. tse = &ts->stack[idx];
  164. cr.cp = tse->cp;
  165. cr.call_time = tse->timestamp;
  166. cr.return_time = timestamp;
  167. cr.branch_count = ts->branch_count - tse->branch_count;
  168. cr.call_ref = tse->ref;
  169. cr.return_ref = ref;
  170. if (tse->no_call)
  171. cr.flags |= CALL_RETURN_NO_CALL;
  172. if (no_return)
  173. cr.flags |= CALL_RETURN_NO_RETURN;
  174. return crp->process(&cr, crp->data);
  175. }
  176. static int __thread_stack__flush(struct thread *thread, struct thread_stack *ts)
  177. {
  178. struct call_return_processor *crp = ts->crp;
  179. int err;
  180. if (!crp) {
  181. ts->cnt = 0;
  182. return 0;
  183. }
  184. while (ts->cnt) {
  185. err = thread_stack__call_return(thread, ts, --ts->cnt,
  186. ts->last_time, 0, true);
  187. if (err) {
  188. pr_err("Error flushing thread stack!\n");
  189. ts->cnt = 0;
  190. return err;
  191. }
  192. }
  193. return 0;
  194. }
  195. int thread_stack__flush(struct thread *thread)
  196. {
  197. if (thread->ts)
  198. return __thread_stack__flush(thread, thread->ts);
  199. return 0;
  200. }
  201. int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,
  202. u64 to_ip, u16 insn_len, u64 trace_nr)
  203. {
  204. if (!thread)
  205. return -EINVAL;
  206. if (!thread->ts) {
  207. thread->ts = thread_stack__new(thread, NULL);
  208. if (!thread->ts) {
  209. pr_warning("Out of memory: no thread stack\n");
  210. return -ENOMEM;
  211. }
  212. thread->ts->trace_nr = trace_nr;
  213. }
  214. /*
  215. * When the trace is discontinuous, the trace_nr changes. In that case
  216. * the stack might be completely invalid. Better to report nothing than
  217. * to report something misleading, so flush the stack.
  218. */
  219. if (trace_nr != thread->ts->trace_nr) {
  220. if (thread->ts->trace_nr)
  221. __thread_stack__flush(thread, thread->ts);
  222. thread->ts->trace_nr = trace_nr;
  223. }
  224. /* Stop here if thread_stack__process() is in use */
  225. if (thread->ts->crp)
  226. return 0;
  227. if (flags & PERF_IP_FLAG_CALL) {
  228. u64 ret_addr;
  229. if (!to_ip)
  230. return 0;
  231. ret_addr = from_ip + insn_len;
  232. if (ret_addr == to_ip)
  233. return 0; /* Zero-length calls are excluded */
  234. return thread_stack__push(thread->ts, ret_addr,
  235. flags & PERF_IP_FLAG_TRACE_END);
  236. } else if (flags & PERF_IP_FLAG_TRACE_BEGIN) {
  237. /*
  238. * If the caller did not change the trace number (which would
  239. * have flushed the stack) then try to make sense of the stack.
  240. * Possibly, tracing began after returning to the current
  241. * address, so try to pop that. Also, do not expect a call made
  242. * when the trace ended, to return, so pop that.
  243. */
  244. thread_stack__pop(thread->ts, to_ip);
  245. thread_stack__pop_trace_end(thread->ts);
  246. } else if ((flags & PERF_IP_FLAG_RETURN) && from_ip) {
  247. thread_stack__pop(thread->ts, to_ip);
  248. }
  249. return 0;
  250. }
  251. void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr)
  252. {
  253. if (!thread || !thread->ts)
  254. return;
  255. if (trace_nr != thread->ts->trace_nr) {
  256. if (thread->ts->trace_nr)
  257. __thread_stack__flush(thread, thread->ts);
  258. thread->ts->trace_nr = trace_nr;
  259. }
  260. }
  261. void thread_stack__free(struct thread *thread)
  262. {
  263. if (thread->ts) {
  264. __thread_stack__flush(thread, thread->ts);
  265. zfree(&thread->ts->stack);
  266. zfree(&thread->ts);
  267. }
  268. }
  269. static inline u64 callchain_context(u64 ip, u64 kernel_start)
  270. {
  271. return ip < kernel_start ? PERF_CONTEXT_USER : PERF_CONTEXT_KERNEL;
  272. }
  273. void thread_stack__sample(struct thread *thread, struct ip_callchain *chain,
  274. size_t sz, u64 ip, u64 kernel_start)
  275. {
  276. u64 context = callchain_context(ip, kernel_start);
  277. u64 last_context;
  278. size_t i, j;
  279. if (sz < 2) {
  280. chain->nr = 0;
  281. return;
  282. }
  283. chain->ips[0] = context;
  284. chain->ips[1] = ip;
  285. if (!thread || !thread->ts) {
  286. chain->nr = 2;
  287. return;
  288. }
  289. last_context = context;
  290. for (i = 2, j = 1; i < sz && j <= thread->ts->cnt; i++, j++) {
  291. ip = thread->ts->stack[thread->ts->cnt - j].ret_addr;
  292. context = callchain_context(ip, kernel_start);
  293. if (context != last_context) {
  294. if (i >= sz - 1)
  295. break;
  296. chain->ips[i++] = context;
  297. last_context = context;
  298. }
  299. chain->ips[i] = ip;
  300. }
  301. chain->nr = i;
  302. }
  303. struct call_return_processor *
  304. call_return_processor__new(int (*process)(struct call_return *cr, void *data),
  305. void *data)
  306. {
  307. struct call_return_processor *crp;
  308. crp = zalloc(sizeof(struct call_return_processor));
  309. if (!crp)
  310. return NULL;
  311. crp->cpr = call_path_root__new();
  312. if (!crp->cpr)
  313. goto out_free;
  314. crp->process = process;
  315. crp->data = data;
  316. return crp;
  317. out_free:
  318. free(crp);
  319. return NULL;
  320. }
  321. void call_return_processor__free(struct call_return_processor *crp)
  322. {
  323. if (crp) {
  324. call_path_root__free(crp->cpr);
  325. free(crp);
  326. }
  327. }
  328. static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr,
  329. u64 timestamp, u64 ref, struct call_path *cp,
  330. bool no_call, bool trace_end)
  331. {
  332. struct thread_stack_entry *tse;
  333. int err;
  334. if (ts->cnt == ts->sz) {
  335. err = thread_stack__grow(ts);
  336. if (err)
  337. return err;
  338. }
  339. tse = &ts->stack[ts->cnt++];
  340. tse->ret_addr = ret_addr;
  341. tse->timestamp = timestamp;
  342. tse->ref = ref;
  343. tse->branch_count = ts->branch_count;
  344. tse->cp = cp;
  345. tse->no_call = no_call;
  346. tse->trace_end = trace_end;
  347. return 0;
  348. }
  349. static int thread_stack__pop_cp(struct thread *thread, struct thread_stack *ts,
  350. u64 ret_addr, u64 timestamp, u64 ref,
  351. struct symbol *sym)
  352. {
  353. int err;
  354. if (!ts->cnt)
  355. return 1;
  356. if (ts->cnt == 1) {
  357. struct thread_stack_entry *tse = &ts->stack[0];
  358. if (tse->cp->sym == sym)
  359. return thread_stack__call_return(thread, ts, --ts->cnt,
  360. timestamp, ref, false);
  361. }
  362. if (ts->stack[ts->cnt - 1].ret_addr == ret_addr) {
  363. return thread_stack__call_return(thread, ts, --ts->cnt,
  364. timestamp, ref, false);
  365. } else {
  366. size_t i = ts->cnt - 1;
  367. while (i--) {
  368. if (ts->stack[i].ret_addr != ret_addr)
  369. continue;
  370. i += 1;
  371. while (ts->cnt > i) {
  372. err = thread_stack__call_return(thread, ts,
  373. --ts->cnt,
  374. timestamp, ref,
  375. true);
  376. if (err)
  377. return err;
  378. }
  379. return thread_stack__call_return(thread, ts, --ts->cnt,
  380. timestamp, ref, false);
  381. }
  382. }
  383. return 1;
  384. }
  385. static int thread_stack__bottom(struct thread *thread, struct thread_stack *ts,
  386. struct perf_sample *sample,
  387. struct addr_location *from_al,
  388. struct addr_location *to_al, u64 ref)
  389. {
  390. struct call_path_root *cpr = ts->crp->cpr;
  391. struct call_path *cp;
  392. struct symbol *sym;
  393. u64 ip;
  394. if (sample->ip) {
  395. ip = sample->ip;
  396. sym = from_al->sym;
  397. } else if (sample->addr) {
  398. ip = sample->addr;
  399. sym = to_al->sym;
  400. } else {
  401. return 0;
  402. }
  403. cp = call_path__findnew(cpr, &cpr->call_path, sym, ip,
  404. ts->kernel_start);
  405. if (!cp)
  406. return -ENOMEM;
  407. return thread_stack__push_cp(thread->ts, ip, sample->time, ref, cp,
  408. true, false);
  409. }
  410. static int thread_stack__no_call_return(struct thread *thread,
  411. struct thread_stack *ts,
  412. struct perf_sample *sample,
  413. struct addr_location *from_al,
  414. struct addr_location *to_al, u64 ref)
  415. {
  416. struct call_path_root *cpr = ts->crp->cpr;
  417. struct call_path *cp, *parent;
  418. u64 ks = ts->kernel_start;
  419. int err;
  420. if (sample->ip >= ks && sample->addr < ks) {
  421. /* Return to userspace, so pop all kernel addresses */
  422. while (thread_stack__in_kernel(ts)) {
  423. err = thread_stack__call_return(thread, ts, --ts->cnt,
  424. sample->time, ref,
  425. true);
  426. if (err)
  427. return err;
  428. }
  429. /* If the stack is empty, push the userspace address */
  430. if (!ts->cnt) {
  431. cp = call_path__findnew(cpr, &cpr->call_path,
  432. to_al->sym, sample->addr,
  433. ts->kernel_start);
  434. if (!cp)
  435. return -ENOMEM;
  436. return thread_stack__push_cp(ts, 0, sample->time, ref,
  437. cp, true, false);
  438. }
  439. } else if (thread_stack__in_kernel(ts) && sample->ip < ks) {
  440. /* Return to userspace, so pop all kernel addresses */
  441. while (thread_stack__in_kernel(ts)) {
  442. err = thread_stack__call_return(thread, ts, --ts->cnt,
  443. sample->time, ref,
  444. true);
  445. if (err)
  446. return err;
  447. }
  448. }
  449. if (ts->cnt)
  450. parent = ts->stack[ts->cnt - 1].cp;
  451. else
  452. parent = &cpr->call_path;
  453. /* This 'return' had no 'call', so push and pop top of stack */
  454. cp = call_path__findnew(cpr, parent, from_al->sym, sample->ip,
  455. ts->kernel_start);
  456. if (!cp)
  457. return -ENOMEM;
  458. err = thread_stack__push_cp(ts, sample->addr, sample->time, ref, cp,
  459. true, false);
  460. if (err)
  461. return err;
  462. return thread_stack__pop_cp(thread, ts, sample->addr, sample->time, ref,
  463. to_al->sym);
  464. }
  465. static int thread_stack__trace_begin(struct thread *thread,
  466. struct thread_stack *ts, u64 timestamp,
  467. u64 ref)
  468. {
  469. struct thread_stack_entry *tse;
  470. int err;
  471. if (!ts->cnt)
  472. return 0;
  473. /* Pop trace end */
  474. tse = &ts->stack[ts->cnt - 1];
  475. if (tse->trace_end) {
  476. err = thread_stack__call_return(thread, ts, --ts->cnt,
  477. timestamp, ref, false);
  478. if (err)
  479. return err;
  480. }
  481. return 0;
  482. }
  483. static int thread_stack__trace_end(struct thread_stack *ts,
  484. struct perf_sample *sample, u64 ref)
  485. {
  486. struct call_path_root *cpr = ts->crp->cpr;
  487. struct call_path *cp;
  488. u64 ret_addr;
  489. /* No point having 'trace end' on the bottom of the stack */
  490. if (!ts->cnt || (ts->cnt == 1 && ts->stack[0].ref == ref))
  491. return 0;
  492. cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, NULL, 0,
  493. ts->kernel_start);
  494. if (!cp)
  495. return -ENOMEM;
  496. ret_addr = sample->ip + sample->insn_len;
  497. return thread_stack__push_cp(ts, ret_addr, sample->time, ref, cp,
  498. false, true);
  499. }
  500. int thread_stack__process(struct thread *thread, struct comm *comm,
  501. struct perf_sample *sample,
  502. struct addr_location *from_al,
  503. struct addr_location *to_al, u64 ref,
  504. struct call_return_processor *crp)
  505. {
  506. struct thread_stack *ts = thread->ts;
  507. int err = 0;
  508. if (ts) {
  509. if (!ts->crp) {
  510. /* Supersede thread_stack__event() */
  511. thread_stack__free(thread);
  512. thread->ts = thread_stack__new(thread, crp);
  513. if (!thread->ts)
  514. return -ENOMEM;
  515. ts = thread->ts;
  516. ts->comm = comm;
  517. }
  518. } else {
  519. thread->ts = thread_stack__new(thread, crp);
  520. if (!thread->ts)
  521. return -ENOMEM;
  522. ts = thread->ts;
  523. ts->comm = comm;
  524. }
  525. /* Flush stack on exec */
  526. if (ts->comm != comm && thread->pid_ == thread->tid) {
  527. err = __thread_stack__flush(thread, ts);
  528. if (err)
  529. return err;
  530. ts->comm = comm;
  531. }
  532. /* If the stack is empty, put the current symbol on the stack */
  533. if (!ts->cnt) {
  534. err = thread_stack__bottom(thread, ts, sample, from_al, to_al,
  535. ref);
  536. if (err)
  537. return err;
  538. }
  539. ts->branch_count += 1;
  540. ts->last_time = sample->time;
  541. if (sample->flags & PERF_IP_FLAG_CALL) {
  542. bool trace_end = sample->flags & PERF_IP_FLAG_TRACE_END;
  543. struct call_path_root *cpr = ts->crp->cpr;
  544. struct call_path *cp;
  545. u64 ret_addr;
  546. if (!sample->ip || !sample->addr)
  547. return 0;
  548. ret_addr = sample->ip + sample->insn_len;
  549. if (ret_addr == sample->addr)
  550. return 0; /* Zero-length calls are excluded */
  551. cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp,
  552. to_al->sym, sample->addr,
  553. ts->kernel_start);
  554. if (!cp)
  555. return -ENOMEM;
  556. err = thread_stack__push_cp(ts, ret_addr, sample->time, ref,
  557. cp, false, trace_end);
  558. } else if (sample->flags & PERF_IP_FLAG_RETURN) {
  559. if (!sample->ip || !sample->addr)
  560. return 0;
  561. err = thread_stack__pop_cp(thread, ts, sample->addr,
  562. sample->time, ref, from_al->sym);
  563. if (err) {
  564. if (err < 0)
  565. return err;
  566. err = thread_stack__no_call_return(thread, ts, sample,
  567. from_al, to_al, ref);
  568. }
  569. } else if (sample->flags & PERF_IP_FLAG_TRACE_BEGIN) {
  570. err = thread_stack__trace_begin(thread, ts, sample->time, ref);
  571. } else if (sample->flags & PERF_IP_FLAG_TRACE_END) {
  572. err = thread_stack__trace_end(ts, sample, ref);
  573. }
  574. return err;
  575. }
  576. size_t thread_stack__depth(struct thread *thread)
  577. {
  578. if (!thread->ts)
  579. return 0;
  580. return thread->ts->cnt;
  581. }