thread-stack.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626
  1. /*
  2. * thread-stack.c: Synthesize a thread's stack using call / return events
  3. * Copyright (c) 2014, Intel Corporation.
  4. *
  5. * This program is free software; you can redistribute it and/or modify it
  6. * under the terms and conditions of the GNU General Public License,
  7. * version 2, as published by the Free Software Foundation.
  8. *
  9. * This program is distributed in the hope it will be useful, but WITHOUT
  10. * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11. * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  12. * more details.
  13. *
  14. */
  15. #include <linux/rbtree.h>
  16. #include <linux/list.h>
  17. #include <errno.h>
  18. #include "thread.h"
  19. #include "event.h"
  20. #include "machine.h"
  21. #include "util.h"
  22. #include "debug.h"
  23. #include "symbol.h"
  24. #include "comm.h"
  25. #include "call-path.h"
  26. #include "thread-stack.h"
  27. #define STACK_GROWTH 2048
  28. /**
  29. * struct thread_stack_entry - thread stack entry.
  30. * @ret_addr: return address
  31. * @timestamp: timestamp (if known)
  32. * @ref: external reference (e.g. db_id of sample)
  33. * @branch_count: the branch count when the entry was created
  34. * @cp: call path
  35. * @no_call: a 'call' was not seen
  36. */
  37. struct thread_stack_entry {
  38. u64 ret_addr;
  39. u64 timestamp;
  40. u64 ref;
  41. u64 branch_count;
  42. struct call_path *cp;
  43. bool no_call;
  44. };
  45. /**
  46. * struct thread_stack - thread stack constructed from 'call' and 'return'
  47. * branch samples.
  48. * @stack: array that holds the stack
  49. * @cnt: number of entries in the stack
  50. * @sz: current maximum stack size
  51. * @trace_nr: current trace number
  52. * @branch_count: running branch count
  53. * @kernel_start: kernel start address
  54. * @last_time: last timestamp
  55. * @crp: call/return processor
  56. * @comm: current comm
  57. */
  58. struct thread_stack {
  59. struct thread_stack_entry *stack;
  60. size_t cnt;
  61. size_t sz;
  62. u64 trace_nr;
  63. u64 branch_count;
  64. u64 kernel_start;
  65. u64 last_time;
  66. struct call_return_processor *crp;
  67. struct comm *comm;
  68. };
  69. static int thread_stack__grow(struct thread_stack *ts)
  70. {
  71. struct thread_stack_entry *new_stack;
  72. size_t sz, new_sz;
  73. new_sz = ts->sz + STACK_GROWTH;
  74. sz = new_sz * sizeof(struct thread_stack_entry);
  75. new_stack = realloc(ts->stack, sz);
  76. if (!new_stack)
  77. return -ENOMEM;
  78. ts->stack = new_stack;
  79. ts->sz = new_sz;
  80. return 0;
  81. }
  82. static struct thread_stack *thread_stack__new(struct thread *thread,
  83. struct call_return_processor *crp)
  84. {
  85. struct thread_stack *ts;
  86. ts = zalloc(sizeof(struct thread_stack));
  87. if (!ts)
  88. return NULL;
  89. if (thread_stack__grow(ts)) {
  90. free(ts);
  91. return NULL;
  92. }
  93. if (thread->mg && thread->mg->machine)
  94. ts->kernel_start = machine__kernel_start(thread->mg->machine);
  95. else
  96. ts->kernel_start = 1ULL << 63;
  97. ts->crp = crp;
  98. return ts;
  99. }
  100. static int thread_stack__push(struct thread_stack *ts, u64 ret_addr)
  101. {
  102. int err = 0;
  103. if (ts->cnt == ts->sz) {
  104. err = thread_stack__grow(ts);
  105. if (err) {
  106. pr_warning("Out of memory: discarding thread stack\n");
  107. ts->cnt = 0;
  108. }
  109. }
  110. ts->stack[ts->cnt++].ret_addr = ret_addr;
  111. return err;
  112. }
  113. static void thread_stack__pop(struct thread_stack *ts, u64 ret_addr)
  114. {
  115. size_t i;
  116. /*
  117. * In some cases there may be functions which are not seen to return.
  118. * For example when setjmp / longjmp has been used. Or the perf context
  119. * switch in the kernel which doesn't stop and start tracing in exactly
  120. * the same code path. When that happens the return address will be
  121. * further down the stack. If the return address is not found at all,
  122. * we assume the opposite (i.e. this is a return for a call that wasn't
  123. * seen for some reason) and leave the stack alone.
  124. */
  125. for (i = ts->cnt; i; ) {
  126. if (ts->stack[--i].ret_addr == ret_addr) {
  127. ts->cnt = i;
  128. return;
  129. }
  130. }
  131. }
  132. static bool thread_stack__in_kernel(struct thread_stack *ts)
  133. {
  134. if (!ts->cnt)
  135. return false;
  136. return ts->stack[ts->cnt - 1].cp->in_kernel;
  137. }
  138. static int thread_stack__call_return(struct thread *thread,
  139. struct thread_stack *ts, size_t idx,
  140. u64 timestamp, u64 ref, bool no_return)
  141. {
  142. struct call_return_processor *crp = ts->crp;
  143. struct thread_stack_entry *tse;
  144. struct call_return cr = {
  145. .thread = thread,
  146. .comm = ts->comm,
  147. .db_id = 0,
  148. };
  149. tse = &ts->stack[idx];
  150. cr.cp = tse->cp;
  151. cr.call_time = tse->timestamp;
  152. cr.return_time = timestamp;
  153. cr.branch_count = ts->branch_count - tse->branch_count;
  154. cr.call_ref = tse->ref;
  155. cr.return_ref = ref;
  156. if (tse->no_call)
  157. cr.flags |= CALL_RETURN_NO_CALL;
  158. if (no_return)
  159. cr.flags |= CALL_RETURN_NO_RETURN;
  160. return crp->process(&cr, crp->data);
  161. }
  162. static int __thread_stack__flush(struct thread *thread, struct thread_stack *ts)
  163. {
  164. struct call_return_processor *crp = ts->crp;
  165. int err;
  166. if (!crp) {
  167. ts->cnt = 0;
  168. return 0;
  169. }
  170. while (ts->cnt) {
  171. err = thread_stack__call_return(thread, ts, --ts->cnt,
  172. ts->last_time, 0, true);
  173. if (err) {
  174. pr_err("Error flushing thread stack!\n");
  175. ts->cnt = 0;
  176. return err;
  177. }
  178. }
  179. return 0;
  180. }
  181. int thread_stack__flush(struct thread *thread)
  182. {
  183. if (thread->ts)
  184. return __thread_stack__flush(thread, thread->ts);
  185. return 0;
  186. }
  187. int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,
  188. u64 to_ip, u16 insn_len, u64 trace_nr)
  189. {
  190. if (!thread)
  191. return -EINVAL;
  192. if (!thread->ts) {
  193. thread->ts = thread_stack__new(thread, NULL);
  194. if (!thread->ts) {
  195. pr_warning("Out of memory: no thread stack\n");
  196. return -ENOMEM;
  197. }
  198. thread->ts->trace_nr = trace_nr;
  199. }
  200. /*
  201. * When the trace is discontinuous, the trace_nr changes. In that case
  202. * the stack might be completely invalid. Better to report nothing than
  203. * to report something misleading, so flush the stack.
  204. */
  205. if (trace_nr != thread->ts->trace_nr) {
  206. if (thread->ts->trace_nr)
  207. __thread_stack__flush(thread, thread->ts);
  208. thread->ts->trace_nr = trace_nr;
  209. }
  210. /* Stop here if thread_stack__process() is in use */
  211. if (thread->ts->crp)
  212. return 0;
  213. if (flags & PERF_IP_FLAG_CALL) {
  214. u64 ret_addr;
  215. if (!to_ip)
  216. return 0;
  217. ret_addr = from_ip + insn_len;
  218. if (ret_addr == to_ip)
  219. return 0; /* Zero-length calls are excluded */
  220. return thread_stack__push(thread->ts, ret_addr);
  221. } else if (flags & PERF_IP_FLAG_RETURN) {
  222. if (!from_ip)
  223. return 0;
  224. thread_stack__pop(thread->ts, to_ip);
  225. }
  226. return 0;
  227. }
  228. void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr)
  229. {
  230. if (!thread || !thread->ts)
  231. return;
  232. if (trace_nr != thread->ts->trace_nr) {
  233. if (thread->ts->trace_nr)
  234. __thread_stack__flush(thread, thread->ts);
  235. thread->ts->trace_nr = trace_nr;
  236. }
  237. }
  238. void thread_stack__free(struct thread *thread)
  239. {
  240. if (thread->ts) {
  241. __thread_stack__flush(thread, thread->ts);
  242. zfree(&thread->ts->stack);
  243. zfree(&thread->ts);
  244. }
  245. }
  246. void thread_stack__sample(struct thread *thread, struct ip_callchain *chain,
  247. size_t sz, u64 ip)
  248. {
  249. size_t i;
  250. if (!thread || !thread->ts)
  251. chain->nr = 1;
  252. else
  253. chain->nr = min(sz, thread->ts->cnt + 1);
  254. chain->ips[0] = ip;
  255. for (i = 1; i < chain->nr; i++)
  256. chain->ips[i] = thread->ts->stack[thread->ts->cnt - i].ret_addr;
  257. }
  258. struct call_return_processor *
  259. call_return_processor__new(int (*process)(struct call_return *cr, void *data),
  260. void *data)
  261. {
  262. struct call_return_processor *crp;
  263. crp = zalloc(sizeof(struct call_return_processor));
  264. if (!crp)
  265. return NULL;
  266. crp->cpr = call_path_root__new();
  267. if (!crp->cpr)
  268. goto out_free;
  269. crp->process = process;
  270. crp->data = data;
  271. return crp;
  272. out_free:
  273. free(crp);
  274. return NULL;
  275. }
  276. void call_return_processor__free(struct call_return_processor *crp)
  277. {
  278. if (crp) {
  279. call_path_root__free(crp->cpr);
  280. free(crp);
  281. }
  282. }
  283. static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr,
  284. u64 timestamp, u64 ref, struct call_path *cp,
  285. bool no_call)
  286. {
  287. struct thread_stack_entry *tse;
  288. int err;
  289. if (ts->cnt == ts->sz) {
  290. err = thread_stack__grow(ts);
  291. if (err)
  292. return err;
  293. }
  294. tse = &ts->stack[ts->cnt++];
  295. tse->ret_addr = ret_addr;
  296. tse->timestamp = timestamp;
  297. tse->ref = ref;
  298. tse->branch_count = ts->branch_count;
  299. tse->cp = cp;
  300. tse->no_call = no_call;
  301. return 0;
  302. }
  303. static int thread_stack__pop_cp(struct thread *thread, struct thread_stack *ts,
  304. u64 ret_addr, u64 timestamp, u64 ref,
  305. struct symbol *sym)
  306. {
  307. int err;
  308. if (!ts->cnt)
  309. return 1;
  310. if (ts->cnt == 1) {
  311. struct thread_stack_entry *tse = &ts->stack[0];
  312. if (tse->cp->sym == sym)
  313. return thread_stack__call_return(thread, ts, --ts->cnt,
  314. timestamp, ref, false);
  315. }
  316. if (ts->stack[ts->cnt - 1].ret_addr == ret_addr) {
  317. return thread_stack__call_return(thread, ts, --ts->cnt,
  318. timestamp, ref, false);
  319. } else {
  320. size_t i = ts->cnt - 1;
  321. while (i--) {
  322. if (ts->stack[i].ret_addr != ret_addr)
  323. continue;
  324. i += 1;
  325. while (ts->cnt > i) {
  326. err = thread_stack__call_return(thread, ts,
  327. --ts->cnt,
  328. timestamp, ref,
  329. true);
  330. if (err)
  331. return err;
  332. }
  333. return thread_stack__call_return(thread, ts, --ts->cnt,
  334. timestamp, ref, false);
  335. }
  336. }
  337. return 1;
  338. }
  339. static int thread_stack__bottom(struct thread *thread, struct thread_stack *ts,
  340. struct perf_sample *sample,
  341. struct addr_location *from_al,
  342. struct addr_location *to_al, u64 ref)
  343. {
  344. struct call_path_root *cpr = ts->crp->cpr;
  345. struct call_path *cp;
  346. struct symbol *sym;
  347. u64 ip;
  348. if (sample->ip) {
  349. ip = sample->ip;
  350. sym = from_al->sym;
  351. } else if (sample->addr) {
  352. ip = sample->addr;
  353. sym = to_al->sym;
  354. } else {
  355. return 0;
  356. }
  357. cp = call_path__findnew(cpr, &cpr->call_path, sym, ip,
  358. ts->kernel_start);
  359. if (!cp)
  360. return -ENOMEM;
  361. return thread_stack__push_cp(thread->ts, ip, sample->time, ref, cp,
  362. true);
  363. }
  364. static int thread_stack__no_call_return(struct thread *thread,
  365. struct thread_stack *ts,
  366. struct perf_sample *sample,
  367. struct addr_location *from_al,
  368. struct addr_location *to_al, u64 ref)
  369. {
  370. struct call_path_root *cpr = ts->crp->cpr;
  371. struct call_path *cp, *parent;
  372. u64 ks = ts->kernel_start;
  373. int err;
  374. if (sample->ip >= ks && sample->addr < ks) {
  375. /* Return to userspace, so pop all kernel addresses */
  376. while (thread_stack__in_kernel(ts)) {
  377. err = thread_stack__call_return(thread, ts, --ts->cnt,
  378. sample->time, ref,
  379. true);
  380. if (err)
  381. return err;
  382. }
  383. /* If the stack is empty, push the userspace address */
  384. if (!ts->cnt) {
  385. cp = call_path__findnew(cpr, &cpr->call_path,
  386. to_al->sym, sample->addr,
  387. ts->kernel_start);
  388. if (!cp)
  389. return -ENOMEM;
  390. return thread_stack__push_cp(ts, 0, sample->time, ref,
  391. cp, true);
  392. }
  393. } else if (thread_stack__in_kernel(ts) && sample->ip < ks) {
  394. /* Return to userspace, so pop all kernel addresses */
  395. while (thread_stack__in_kernel(ts)) {
  396. err = thread_stack__call_return(thread, ts, --ts->cnt,
  397. sample->time, ref,
  398. true);
  399. if (err)
  400. return err;
  401. }
  402. }
  403. if (ts->cnt)
  404. parent = ts->stack[ts->cnt - 1].cp;
  405. else
  406. parent = &cpr->call_path;
  407. /* This 'return' had no 'call', so push and pop top of stack */
  408. cp = call_path__findnew(cpr, parent, from_al->sym, sample->ip,
  409. ts->kernel_start);
  410. if (!cp)
  411. return -ENOMEM;
  412. err = thread_stack__push_cp(ts, sample->addr, sample->time, ref, cp,
  413. true);
  414. if (err)
  415. return err;
  416. return thread_stack__pop_cp(thread, ts, sample->addr, sample->time, ref,
  417. to_al->sym);
  418. }
  419. static int thread_stack__trace_begin(struct thread *thread,
  420. struct thread_stack *ts, u64 timestamp,
  421. u64 ref)
  422. {
  423. struct thread_stack_entry *tse;
  424. int err;
  425. if (!ts->cnt)
  426. return 0;
  427. /* Pop trace end */
  428. tse = &ts->stack[ts->cnt - 1];
  429. if (tse->cp->sym == NULL && tse->cp->ip == 0) {
  430. err = thread_stack__call_return(thread, ts, --ts->cnt,
  431. timestamp, ref, false);
  432. if (err)
  433. return err;
  434. }
  435. return 0;
  436. }
  437. static int thread_stack__trace_end(struct thread_stack *ts,
  438. struct perf_sample *sample, u64 ref)
  439. {
  440. struct call_path_root *cpr = ts->crp->cpr;
  441. struct call_path *cp;
  442. u64 ret_addr;
  443. /* No point having 'trace end' on the bottom of the stack */
  444. if (!ts->cnt || (ts->cnt == 1 && ts->stack[0].ref == ref))
  445. return 0;
  446. cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, NULL, 0,
  447. ts->kernel_start);
  448. if (!cp)
  449. return -ENOMEM;
  450. ret_addr = sample->ip + sample->insn_len;
  451. return thread_stack__push_cp(ts, ret_addr, sample->time, ref, cp,
  452. false);
  453. }
  454. int thread_stack__process(struct thread *thread, struct comm *comm,
  455. struct perf_sample *sample,
  456. struct addr_location *from_al,
  457. struct addr_location *to_al, u64 ref,
  458. struct call_return_processor *crp)
  459. {
  460. struct thread_stack *ts = thread->ts;
  461. int err = 0;
  462. if (ts) {
  463. if (!ts->crp) {
  464. /* Supersede thread_stack__event() */
  465. thread_stack__free(thread);
  466. thread->ts = thread_stack__new(thread, crp);
  467. if (!thread->ts)
  468. return -ENOMEM;
  469. ts = thread->ts;
  470. ts->comm = comm;
  471. }
  472. } else {
  473. thread->ts = thread_stack__new(thread, crp);
  474. if (!thread->ts)
  475. return -ENOMEM;
  476. ts = thread->ts;
  477. ts->comm = comm;
  478. }
  479. /* Flush stack on exec */
  480. if (ts->comm != comm && thread->pid_ == thread->tid) {
  481. err = __thread_stack__flush(thread, ts);
  482. if (err)
  483. return err;
  484. ts->comm = comm;
  485. }
  486. /* If the stack is empty, put the current symbol on the stack */
  487. if (!ts->cnt) {
  488. err = thread_stack__bottom(thread, ts, sample, from_al, to_al,
  489. ref);
  490. if (err)
  491. return err;
  492. }
  493. ts->branch_count += 1;
  494. ts->last_time = sample->time;
  495. if (sample->flags & PERF_IP_FLAG_CALL) {
  496. struct call_path_root *cpr = ts->crp->cpr;
  497. struct call_path *cp;
  498. u64 ret_addr;
  499. if (!sample->ip || !sample->addr)
  500. return 0;
  501. ret_addr = sample->ip + sample->insn_len;
  502. if (ret_addr == sample->addr)
  503. return 0; /* Zero-length calls are excluded */
  504. cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp,
  505. to_al->sym, sample->addr,
  506. ts->kernel_start);
  507. if (!cp)
  508. return -ENOMEM;
  509. err = thread_stack__push_cp(ts, ret_addr, sample->time, ref,
  510. cp, false);
  511. } else if (sample->flags & PERF_IP_FLAG_RETURN) {
  512. if (!sample->ip || !sample->addr)
  513. return 0;
  514. err = thread_stack__pop_cp(thread, ts, sample->addr,
  515. sample->time, ref, from_al->sym);
  516. if (err) {
  517. if (err < 0)
  518. return err;
  519. err = thread_stack__no_call_return(thread, ts, sample,
  520. from_al, to_al, ref);
  521. }
  522. } else if (sample->flags & PERF_IP_FLAG_TRACE_BEGIN) {
  523. err = thread_stack__trace_begin(thread, ts, sample->time, ref);
  524. } else if (sample->flags & PERF_IP_FLAG_TRACE_END) {
  525. err = thread_stack__trace_end(ts, sample, ref);
  526. }
  527. return err;
  528. }
  529. size_t thread_stack__depth(struct thread *thread)
  530. {
  531. if (!thread->ts)
  532. return 0;
  533. return thread->ts->cnt;
  534. }