intel-pt.c 54 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198
  1. /*
  2. * intel_pt.c: Intel Processor Trace support
  3. * Copyright (c) 2013-2015, Intel Corporation.
  4. *
  5. * This program is free software; you can redistribute it and/or modify it
  6. * under the terms and conditions of the GNU General Public License,
  7. * version 2, as published by the Free Software Foundation.
  8. *
  9. * This program is distributed in the hope it will be useful, but WITHOUT
  10. * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11. * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  12. * more details.
  13. *
  14. */
  15. #include <stdio.h>
  16. #include <stdbool.h>
  17. #include <errno.h>
  18. #include <linux/kernel.h>
  19. #include <linux/types.h>
  20. #include "../perf.h"
  21. #include "session.h"
  22. #include "machine.h"
  23. #include "sort.h"
  24. #include "tool.h"
  25. #include "event.h"
  26. #include "evlist.h"
  27. #include "evsel.h"
  28. #include "map.h"
  29. #include "color.h"
  30. #include "util.h"
  31. #include "thread.h"
  32. #include "thread-stack.h"
  33. #include "symbol.h"
  34. #include "callchain.h"
  35. #include "dso.h"
  36. #include "debug.h"
  37. #include "auxtrace.h"
  38. #include "tsc.h"
  39. #include "intel-pt.h"
  40. #include "config.h"
  41. #include "intel-pt-decoder/intel-pt-log.h"
  42. #include "intel-pt-decoder/intel-pt-decoder.h"
  43. #include "intel-pt-decoder/intel-pt-insn-decoder.h"
  44. #include "intel-pt-decoder/intel-pt-pkt-decoder.h"
  45. #define MAX_TIMESTAMP (~0ULL)
  46. struct intel_pt {
  47. struct auxtrace auxtrace;
  48. struct auxtrace_queues queues;
  49. struct auxtrace_heap heap;
  50. u32 auxtrace_type;
  51. struct perf_session *session;
  52. struct machine *machine;
  53. struct perf_evsel *switch_evsel;
  54. struct thread *unknown_thread;
  55. bool timeless_decoding;
  56. bool sampling_mode;
  57. bool snapshot_mode;
  58. bool per_cpu_mmaps;
  59. bool have_tsc;
  60. bool data_queued;
  61. bool est_tsc;
  62. bool sync_switch;
  63. bool mispred_all;
  64. int have_sched_switch;
  65. u32 pmu_type;
  66. u64 kernel_start;
  67. u64 switch_ip;
  68. u64 ptss_ip;
  69. struct perf_tsc_conversion tc;
  70. bool cap_user_time_zero;
  71. struct itrace_synth_opts synth_opts;
  72. bool sample_instructions;
  73. u64 instructions_sample_type;
  74. u64 instructions_sample_period;
  75. u64 instructions_id;
  76. bool sample_branches;
  77. u32 branches_filter;
  78. u64 branches_sample_type;
  79. u64 branches_id;
  80. bool sample_transactions;
  81. u64 transactions_sample_type;
  82. u64 transactions_id;
  83. bool synth_needs_swap;
  84. u64 tsc_bit;
  85. u64 mtc_bit;
  86. u64 mtc_freq_bits;
  87. u32 tsc_ctc_ratio_n;
  88. u32 tsc_ctc_ratio_d;
  89. u64 cyc_bit;
  90. u64 noretcomp_bit;
  91. unsigned max_non_turbo_ratio;
  92. unsigned long num_events;
  93. };
  94. enum switch_state {
  95. INTEL_PT_SS_NOT_TRACING,
  96. INTEL_PT_SS_UNKNOWN,
  97. INTEL_PT_SS_TRACING,
  98. INTEL_PT_SS_EXPECTING_SWITCH_EVENT,
  99. INTEL_PT_SS_EXPECTING_SWITCH_IP,
  100. };
  101. struct intel_pt_queue {
  102. struct intel_pt *pt;
  103. unsigned int queue_nr;
  104. struct auxtrace_buffer *buffer;
  105. void *decoder;
  106. const struct intel_pt_state *state;
  107. struct ip_callchain *chain;
  108. struct branch_stack *last_branch;
  109. struct branch_stack *last_branch_rb;
  110. size_t last_branch_pos;
  111. union perf_event *event_buf;
  112. bool on_heap;
  113. bool stop;
  114. bool step_through_buffers;
  115. bool use_buffer_pid_tid;
  116. pid_t pid, tid;
  117. int cpu;
  118. int switch_state;
  119. pid_t next_tid;
  120. struct thread *thread;
  121. bool exclude_kernel;
  122. bool have_sample;
  123. u64 time;
  124. u64 timestamp;
  125. u32 flags;
  126. u16 insn_len;
  127. u64 last_insn_cnt;
  128. };
  129. static void intel_pt_dump(struct intel_pt *pt __maybe_unused,
  130. unsigned char *buf, size_t len)
  131. {
  132. struct intel_pt_pkt packet;
  133. size_t pos = 0;
  134. int ret, pkt_len, i;
  135. char desc[INTEL_PT_PKT_DESC_MAX];
  136. const char *color = PERF_COLOR_BLUE;
  137. color_fprintf(stdout, color,
  138. ". ... Intel Processor Trace data: size %zu bytes\n",
  139. len);
  140. while (len) {
  141. ret = intel_pt_get_packet(buf, len, &packet);
  142. if (ret > 0)
  143. pkt_len = ret;
  144. else
  145. pkt_len = 1;
  146. printf(".");
  147. color_fprintf(stdout, color, " %08x: ", pos);
  148. for (i = 0; i < pkt_len; i++)
  149. color_fprintf(stdout, color, " %02x", buf[i]);
  150. for (; i < 16; i++)
  151. color_fprintf(stdout, color, " ");
  152. if (ret > 0) {
  153. ret = intel_pt_pkt_desc(&packet, desc,
  154. INTEL_PT_PKT_DESC_MAX);
  155. if (ret > 0)
  156. color_fprintf(stdout, color, " %s\n", desc);
  157. } else {
  158. color_fprintf(stdout, color, " Bad packet!\n");
  159. }
  160. pos += pkt_len;
  161. buf += pkt_len;
  162. len -= pkt_len;
  163. }
  164. }
  165. static void intel_pt_dump_event(struct intel_pt *pt, unsigned char *buf,
  166. size_t len)
  167. {
  168. printf(".\n");
  169. intel_pt_dump(pt, buf, len);
  170. }
  171. static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a,
  172. struct auxtrace_buffer *b)
  173. {
  174. void *start;
  175. start = intel_pt_find_overlap(a->data, a->size, b->data, b->size,
  176. pt->have_tsc);
  177. if (!start)
  178. return -EINVAL;
  179. b->use_size = b->data + b->size - start;
  180. b->use_data = start;
  181. return 0;
  182. }
  183. static void intel_pt_use_buffer_pid_tid(struct intel_pt_queue *ptq,
  184. struct auxtrace_queue *queue,
  185. struct auxtrace_buffer *buffer)
  186. {
  187. if (queue->cpu == -1 && buffer->cpu != -1)
  188. ptq->cpu = buffer->cpu;
  189. ptq->pid = buffer->pid;
  190. ptq->tid = buffer->tid;
  191. intel_pt_log("queue %u cpu %d pid %d tid %d\n",
  192. ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid);
  193. thread__zput(ptq->thread);
  194. if (ptq->tid != -1) {
  195. if (ptq->pid != -1)
  196. ptq->thread = machine__findnew_thread(ptq->pt->machine,
  197. ptq->pid,
  198. ptq->tid);
  199. else
  200. ptq->thread = machine__find_thread(ptq->pt->machine, -1,
  201. ptq->tid);
  202. }
  203. }
  204. /* This function assumes data is processed sequentially only */
  205. static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data)
  206. {
  207. struct intel_pt_queue *ptq = data;
  208. struct auxtrace_buffer *buffer = ptq->buffer, *old_buffer = buffer;
  209. struct auxtrace_queue *queue;
  210. if (ptq->stop) {
  211. b->len = 0;
  212. return 0;
  213. }
  214. queue = &ptq->pt->queues.queue_array[ptq->queue_nr];
  215. buffer = auxtrace_buffer__next(queue, buffer);
  216. if (!buffer) {
  217. if (old_buffer)
  218. auxtrace_buffer__drop_data(old_buffer);
  219. b->len = 0;
  220. return 0;
  221. }
  222. ptq->buffer = buffer;
  223. if (!buffer->data) {
  224. int fd = perf_data_file__fd(ptq->pt->session->file);
  225. buffer->data = auxtrace_buffer__get_data(buffer, fd);
  226. if (!buffer->data)
  227. return -ENOMEM;
  228. }
  229. if (ptq->pt->snapshot_mode && !buffer->consecutive && old_buffer &&
  230. intel_pt_do_fix_overlap(ptq->pt, old_buffer, buffer))
  231. return -ENOMEM;
  232. if (old_buffer)
  233. auxtrace_buffer__drop_data(old_buffer);
  234. if (buffer->use_data) {
  235. b->len = buffer->use_size;
  236. b->buf = buffer->use_data;
  237. } else {
  238. b->len = buffer->size;
  239. b->buf = buffer->data;
  240. }
  241. b->ref_timestamp = buffer->reference;
  242. if (!old_buffer || ptq->pt->sampling_mode || (ptq->pt->snapshot_mode &&
  243. !buffer->consecutive)) {
  244. b->consecutive = false;
  245. b->trace_nr = buffer->buffer_nr + 1;
  246. } else {
  247. b->consecutive = true;
  248. }
  249. if (ptq->use_buffer_pid_tid && (ptq->pid != buffer->pid ||
  250. ptq->tid != buffer->tid))
  251. intel_pt_use_buffer_pid_tid(ptq, queue, buffer);
  252. if (ptq->step_through_buffers)
  253. ptq->stop = true;
  254. if (!b->len)
  255. return intel_pt_get_trace(b, data);
  256. return 0;
  257. }
  258. struct intel_pt_cache_entry {
  259. struct auxtrace_cache_entry entry;
  260. u64 insn_cnt;
  261. u64 byte_cnt;
  262. enum intel_pt_insn_op op;
  263. enum intel_pt_insn_branch branch;
  264. int length;
  265. int32_t rel;
  266. };
  267. static int intel_pt_config_div(const char *var, const char *value, void *data)
  268. {
  269. int *d = data;
  270. long val;
  271. if (!strcmp(var, "intel-pt.cache-divisor")) {
  272. val = strtol(value, NULL, 0);
  273. if (val > 0 && val <= INT_MAX)
  274. *d = val;
  275. }
  276. return 0;
  277. }
  278. static int intel_pt_cache_divisor(void)
  279. {
  280. static int d;
  281. if (d)
  282. return d;
  283. perf_config(intel_pt_config_div, &d);
  284. if (!d)
  285. d = 64;
  286. return d;
  287. }
  288. static unsigned int intel_pt_cache_size(struct dso *dso,
  289. struct machine *machine)
  290. {
  291. off_t size;
  292. size = dso__data_size(dso, machine);
  293. size /= intel_pt_cache_divisor();
  294. if (size < 1000)
  295. return 10;
  296. if (size > (1 << 21))
  297. return 21;
  298. return 32 - __builtin_clz(size);
  299. }
  300. static struct auxtrace_cache *intel_pt_cache(struct dso *dso,
  301. struct machine *machine)
  302. {
  303. struct auxtrace_cache *c;
  304. unsigned int bits;
  305. if (dso->auxtrace_cache)
  306. return dso->auxtrace_cache;
  307. bits = intel_pt_cache_size(dso, machine);
  308. /* Ignoring cache creation failure */
  309. c = auxtrace_cache__new(bits, sizeof(struct intel_pt_cache_entry), 200);
  310. dso->auxtrace_cache = c;
  311. return c;
  312. }
  313. static int intel_pt_cache_add(struct dso *dso, struct machine *machine,
  314. u64 offset, u64 insn_cnt, u64 byte_cnt,
  315. struct intel_pt_insn *intel_pt_insn)
  316. {
  317. struct auxtrace_cache *c = intel_pt_cache(dso, machine);
  318. struct intel_pt_cache_entry *e;
  319. int err;
  320. if (!c)
  321. return -ENOMEM;
  322. e = auxtrace_cache__alloc_entry(c);
  323. if (!e)
  324. return -ENOMEM;
  325. e->insn_cnt = insn_cnt;
  326. e->byte_cnt = byte_cnt;
  327. e->op = intel_pt_insn->op;
  328. e->branch = intel_pt_insn->branch;
  329. e->length = intel_pt_insn->length;
  330. e->rel = intel_pt_insn->rel;
  331. err = auxtrace_cache__add(c, offset, &e->entry);
  332. if (err)
  333. auxtrace_cache__free_entry(c, e);
  334. return err;
  335. }
  336. static struct intel_pt_cache_entry *
  337. intel_pt_cache_lookup(struct dso *dso, struct machine *machine, u64 offset)
  338. {
  339. struct auxtrace_cache *c = intel_pt_cache(dso, machine);
  340. if (!c)
  341. return NULL;
  342. return auxtrace_cache__lookup(dso->auxtrace_cache, offset);
  343. }
  344. static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
  345. uint64_t *insn_cnt_ptr, uint64_t *ip,
  346. uint64_t to_ip, uint64_t max_insn_cnt,
  347. void *data)
  348. {
  349. struct intel_pt_queue *ptq = data;
  350. struct machine *machine = ptq->pt->machine;
  351. struct thread *thread;
  352. struct addr_location al;
  353. unsigned char buf[1024];
  354. size_t bufsz;
  355. ssize_t len;
  356. int x86_64;
  357. u8 cpumode;
  358. u64 offset, start_offset, start_ip;
  359. u64 insn_cnt = 0;
  360. bool one_map = true;
  361. if (to_ip && *ip == to_ip)
  362. goto out_no_cache;
  363. bufsz = intel_pt_insn_max_size();
  364. if (*ip >= ptq->pt->kernel_start)
  365. cpumode = PERF_RECORD_MISC_KERNEL;
  366. else
  367. cpumode = PERF_RECORD_MISC_USER;
  368. thread = ptq->thread;
  369. if (!thread) {
  370. if (cpumode != PERF_RECORD_MISC_KERNEL)
  371. return -EINVAL;
  372. thread = ptq->pt->unknown_thread;
  373. }
  374. while (1) {
  375. thread__find_addr_map(thread, cpumode, MAP__FUNCTION, *ip, &al);
  376. if (!al.map || !al.map->dso)
  377. return -EINVAL;
  378. if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR &&
  379. dso__data_status_seen(al.map->dso,
  380. DSO_DATA_STATUS_SEEN_ITRACE))
  381. return -ENOENT;
  382. offset = al.map->map_ip(al.map, *ip);
  383. if (!to_ip && one_map) {
  384. struct intel_pt_cache_entry *e;
  385. e = intel_pt_cache_lookup(al.map->dso, machine, offset);
  386. if (e &&
  387. (!max_insn_cnt || e->insn_cnt <= max_insn_cnt)) {
  388. *insn_cnt_ptr = e->insn_cnt;
  389. *ip += e->byte_cnt;
  390. intel_pt_insn->op = e->op;
  391. intel_pt_insn->branch = e->branch;
  392. intel_pt_insn->length = e->length;
  393. intel_pt_insn->rel = e->rel;
  394. intel_pt_log_insn_no_data(intel_pt_insn, *ip);
  395. return 0;
  396. }
  397. }
  398. start_offset = offset;
  399. start_ip = *ip;
  400. /* Load maps to ensure dso->is_64_bit has been updated */
  401. map__load(al.map, machine->symbol_filter);
  402. x86_64 = al.map->dso->is_64_bit;
  403. while (1) {
  404. len = dso__data_read_offset(al.map->dso, machine,
  405. offset, buf, bufsz);
  406. if (len <= 0)
  407. return -EINVAL;
  408. if (intel_pt_get_insn(buf, len, x86_64, intel_pt_insn))
  409. return -EINVAL;
  410. intel_pt_log_insn(intel_pt_insn, *ip);
  411. insn_cnt += 1;
  412. if (intel_pt_insn->branch != INTEL_PT_BR_NO_BRANCH)
  413. goto out;
  414. if (max_insn_cnt && insn_cnt >= max_insn_cnt)
  415. goto out_no_cache;
  416. *ip += intel_pt_insn->length;
  417. if (to_ip && *ip == to_ip)
  418. goto out_no_cache;
  419. if (*ip >= al.map->end)
  420. break;
  421. offset += intel_pt_insn->length;
  422. }
  423. one_map = false;
  424. }
  425. out:
  426. *insn_cnt_ptr = insn_cnt;
  427. if (!one_map)
  428. goto out_no_cache;
  429. /*
  430. * Didn't lookup in the 'to_ip' case, so do it now to prevent duplicate
  431. * entries.
  432. */
  433. if (to_ip) {
  434. struct intel_pt_cache_entry *e;
  435. e = intel_pt_cache_lookup(al.map->dso, machine, start_offset);
  436. if (e)
  437. return 0;
  438. }
  439. /* Ignore cache errors */
  440. intel_pt_cache_add(al.map->dso, machine, start_offset, insn_cnt,
  441. *ip - start_ip, intel_pt_insn);
  442. return 0;
  443. out_no_cache:
  444. *insn_cnt_ptr = insn_cnt;
  445. return 0;
  446. }
  447. static bool intel_pt_get_config(struct intel_pt *pt,
  448. struct perf_event_attr *attr, u64 *config)
  449. {
  450. if (attr->type == pt->pmu_type) {
  451. if (config)
  452. *config = attr->config;
  453. return true;
  454. }
  455. return false;
  456. }
  457. static bool intel_pt_exclude_kernel(struct intel_pt *pt)
  458. {
  459. struct perf_evsel *evsel;
  460. evlist__for_each_entry(pt->session->evlist, evsel) {
  461. if (intel_pt_get_config(pt, &evsel->attr, NULL) &&
  462. !evsel->attr.exclude_kernel)
  463. return false;
  464. }
  465. return true;
  466. }
  467. static bool intel_pt_return_compression(struct intel_pt *pt)
  468. {
  469. struct perf_evsel *evsel;
  470. u64 config;
  471. if (!pt->noretcomp_bit)
  472. return true;
  473. evlist__for_each_entry(pt->session->evlist, evsel) {
  474. if (intel_pt_get_config(pt, &evsel->attr, &config) &&
  475. (config & pt->noretcomp_bit))
  476. return false;
  477. }
  478. return true;
  479. }
  480. static unsigned int intel_pt_mtc_period(struct intel_pt *pt)
  481. {
  482. struct perf_evsel *evsel;
  483. unsigned int shift;
  484. u64 config;
  485. if (!pt->mtc_freq_bits)
  486. return 0;
  487. for (shift = 0, config = pt->mtc_freq_bits; !(config & 1); shift++)
  488. config >>= 1;
  489. evlist__for_each_entry(pt->session->evlist, evsel) {
  490. if (intel_pt_get_config(pt, &evsel->attr, &config))
  491. return (config & pt->mtc_freq_bits) >> shift;
  492. }
  493. return 0;
  494. }
  495. static bool intel_pt_timeless_decoding(struct intel_pt *pt)
  496. {
  497. struct perf_evsel *evsel;
  498. bool timeless_decoding = true;
  499. u64 config;
  500. if (!pt->tsc_bit || !pt->cap_user_time_zero)
  501. return true;
  502. evlist__for_each_entry(pt->session->evlist, evsel) {
  503. if (!(evsel->attr.sample_type & PERF_SAMPLE_TIME))
  504. return true;
  505. if (intel_pt_get_config(pt, &evsel->attr, &config)) {
  506. if (config & pt->tsc_bit)
  507. timeless_decoding = false;
  508. else
  509. return true;
  510. }
  511. }
  512. return timeless_decoding;
  513. }
  514. static bool intel_pt_tracing_kernel(struct intel_pt *pt)
  515. {
  516. struct perf_evsel *evsel;
  517. evlist__for_each_entry(pt->session->evlist, evsel) {
  518. if (intel_pt_get_config(pt, &evsel->attr, NULL) &&
  519. !evsel->attr.exclude_kernel)
  520. return true;
  521. }
  522. return false;
  523. }
  524. static bool intel_pt_have_tsc(struct intel_pt *pt)
  525. {
  526. struct perf_evsel *evsel;
  527. bool have_tsc = false;
  528. u64 config;
  529. if (!pt->tsc_bit)
  530. return false;
  531. evlist__for_each_entry(pt->session->evlist, evsel) {
  532. if (intel_pt_get_config(pt, &evsel->attr, &config)) {
  533. if (config & pt->tsc_bit)
  534. have_tsc = true;
  535. else
  536. return false;
  537. }
  538. }
  539. return have_tsc;
  540. }
  541. static u64 intel_pt_ns_to_ticks(const struct intel_pt *pt, u64 ns)
  542. {
  543. u64 quot, rem;
  544. quot = ns / pt->tc.time_mult;
  545. rem = ns % pt->tc.time_mult;
  546. return (quot << pt->tc.time_shift) + (rem << pt->tc.time_shift) /
  547. pt->tc.time_mult;
  548. }
  549. static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
  550. unsigned int queue_nr)
  551. {
  552. struct intel_pt_params params = { .get_trace = 0, };
  553. struct intel_pt_queue *ptq;
  554. ptq = zalloc(sizeof(struct intel_pt_queue));
  555. if (!ptq)
  556. return NULL;
  557. if (pt->synth_opts.callchain) {
  558. size_t sz = sizeof(struct ip_callchain);
  559. sz += pt->synth_opts.callchain_sz * sizeof(u64);
  560. ptq->chain = zalloc(sz);
  561. if (!ptq->chain)
  562. goto out_free;
  563. }
  564. if (pt->synth_opts.last_branch) {
  565. size_t sz = sizeof(struct branch_stack);
  566. sz += pt->synth_opts.last_branch_sz *
  567. sizeof(struct branch_entry);
  568. ptq->last_branch = zalloc(sz);
  569. if (!ptq->last_branch)
  570. goto out_free;
  571. ptq->last_branch_rb = zalloc(sz);
  572. if (!ptq->last_branch_rb)
  573. goto out_free;
  574. }
  575. ptq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
  576. if (!ptq->event_buf)
  577. goto out_free;
  578. ptq->pt = pt;
  579. ptq->queue_nr = queue_nr;
  580. ptq->exclude_kernel = intel_pt_exclude_kernel(pt);
  581. ptq->pid = -1;
  582. ptq->tid = -1;
  583. ptq->cpu = -1;
  584. ptq->next_tid = -1;
  585. params.get_trace = intel_pt_get_trace;
  586. params.walk_insn = intel_pt_walk_next_insn;
  587. params.data = ptq;
  588. params.return_compression = intel_pt_return_compression(pt);
  589. params.max_non_turbo_ratio = pt->max_non_turbo_ratio;
  590. params.mtc_period = intel_pt_mtc_period(pt);
  591. params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n;
  592. params.tsc_ctc_ratio_d = pt->tsc_ctc_ratio_d;
  593. if (pt->synth_opts.instructions) {
  594. if (pt->synth_opts.period) {
  595. switch (pt->synth_opts.period_type) {
  596. case PERF_ITRACE_PERIOD_INSTRUCTIONS:
  597. params.period_type =
  598. INTEL_PT_PERIOD_INSTRUCTIONS;
  599. params.period = pt->synth_opts.period;
  600. break;
  601. case PERF_ITRACE_PERIOD_TICKS:
  602. params.period_type = INTEL_PT_PERIOD_TICKS;
  603. params.period = pt->synth_opts.period;
  604. break;
  605. case PERF_ITRACE_PERIOD_NANOSECS:
  606. params.period_type = INTEL_PT_PERIOD_TICKS;
  607. params.period = intel_pt_ns_to_ticks(pt,
  608. pt->synth_opts.period);
  609. break;
  610. default:
  611. break;
  612. }
  613. }
  614. if (!params.period) {
  615. params.period_type = INTEL_PT_PERIOD_INSTRUCTIONS;
  616. params.period = 1;
  617. }
  618. }
  619. ptq->decoder = intel_pt_decoder_new(&params);
  620. if (!ptq->decoder)
  621. goto out_free;
  622. return ptq;
  623. out_free:
  624. zfree(&ptq->event_buf);
  625. zfree(&ptq->last_branch);
  626. zfree(&ptq->last_branch_rb);
  627. zfree(&ptq->chain);
  628. free(ptq);
  629. return NULL;
  630. }
  631. static void intel_pt_free_queue(void *priv)
  632. {
  633. struct intel_pt_queue *ptq = priv;
  634. if (!ptq)
  635. return;
  636. thread__zput(ptq->thread);
  637. intel_pt_decoder_free(ptq->decoder);
  638. zfree(&ptq->event_buf);
  639. zfree(&ptq->last_branch);
  640. zfree(&ptq->last_branch_rb);
  641. zfree(&ptq->chain);
  642. free(ptq);
  643. }
  644. static void intel_pt_set_pid_tid_cpu(struct intel_pt *pt,
  645. struct auxtrace_queue *queue)
  646. {
  647. struct intel_pt_queue *ptq = queue->priv;
  648. if (queue->tid == -1 || pt->have_sched_switch) {
  649. ptq->tid = machine__get_current_tid(pt->machine, ptq->cpu);
  650. thread__zput(ptq->thread);
  651. }
  652. if (!ptq->thread && ptq->tid != -1)
  653. ptq->thread = machine__find_thread(pt->machine, -1, ptq->tid);
  654. if (ptq->thread) {
  655. ptq->pid = ptq->thread->pid_;
  656. if (queue->cpu == -1)
  657. ptq->cpu = ptq->thread->cpu;
  658. }
  659. }
  660. static void intel_pt_sample_flags(struct intel_pt_queue *ptq)
  661. {
  662. if (ptq->state->flags & INTEL_PT_ABORT_TX) {
  663. ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT;
  664. } else if (ptq->state->flags & INTEL_PT_ASYNC) {
  665. if (ptq->state->to_ip)
  666. ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL |
  667. PERF_IP_FLAG_ASYNC |
  668. PERF_IP_FLAG_INTERRUPT;
  669. else
  670. ptq->flags = PERF_IP_FLAG_BRANCH |
  671. PERF_IP_FLAG_TRACE_END;
  672. ptq->insn_len = 0;
  673. } else {
  674. if (ptq->state->from_ip)
  675. ptq->flags = intel_pt_insn_type(ptq->state->insn_op);
  676. else
  677. ptq->flags = PERF_IP_FLAG_BRANCH |
  678. PERF_IP_FLAG_TRACE_BEGIN;
  679. if (ptq->state->flags & INTEL_PT_IN_TX)
  680. ptq->flags |= PERF_IP_FLAG_IN_TX;
  681. ptq->insn_len = ptq->state->insn_len;
  682. }
  683. }
  684. static int intel_pt_setup_queue(struct intel_pt *pt,
  685. struct auxtrace_queue *queue,
  686. unsigned int queue_nr)
  687. {
  688. struct intel_pt_queue *ptq = queue->priv;
  689. if (list_empty(&queue->head))
  690. return 0;
  691. if (!ptq) {
  692. ptq = intel_pt_alloc_queue(pt, queue_nr);
  693. if (!ptq)
  694. return -ENOMEM;
  695. queue->priv = ptq;
  696. if (queue->cpu != -1)
  697. ptq->cpu = queue->cpu;
  698. ptq->tid = queue->tid;
  699. if (pt->sampling_mode) {
  700. if (pt->timeless_decoding)
  701. ptq->step_through_buffers = true;
  702. if (pt->timeless_decoding || !pt->have_sched_switch)
  703. ptq->use_buffer_pid_tid = true;
  704. }
  705. }
  706. if (!ptq->on_heap &&
  707. (!pt->sync_switch ||
  708. ptq->switch_state != INTEL_PT_SS_EXPECTING_SWITCH_EVENT)) {
  709. const struct intel_pt_state *state;
  710. int ret;
  711. if (pt->timeless_decoding)
  712. return 0;
  713. intel_pt_log("queue %u getting timestamp\n", queue_nr);
  714. intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n",
  715. queue_nr, ptq->cpu, ptq->pid, ptq->tid);
  716. while (1) {
  717. state = intel_pt_decode(ptq->decoder);
  718. if (state->err) {
  719. if (state->err == INTEL_PT_ERR_NODATA) {
  720. intel_pt_log("queue %u has no timestamp\n",
  721. queue_nr);
  722. return 0;
  723. }
  724. continue;
  725. }
  726. if (state->timestamp)
  727. break;
  728. }
  729. ptq->timestamp = state->timestamp;
  730. intel_pt_log("queue %u timestamp 0x%" PRIx64 "\n",
  731. queue_nr, ptq->timestamp);
  732. ptq->state = state;
  733. ptq->have_sample = true;
  734. intel_pt_sample_flags(ptq);
  735. ret = auxtrace_heap__add(&pt->heap, queue_nr, ptq->timestamp);
  736. if (ret)
  737. return ret;
  738. ptq->on_heap = true;
  739. }
  740. return 0;
  741. }
  742. static int intel_pt_setup_queues(struct intel_pt *pt)
  743. {
  744. unsigned int i;
  745. int ret;
  746. for (i = 0; i < pt->queues.nr_queues; i++) {
  747. ret = intel_pt_setup_queue(pt, &pt->queues.queue_array[i], i);
  748. if (ret)
  749. return ret;
  750. }
  751. return 0;
  752. }
  753. static inline void intel_pt_copy_last_branch_rb(struct intel_pt_queue *ptq)
  754. {
  755. struct branch_stack *bs_src = ptq->last_branch_rb;
  756. struct branch_stack *bs_dst = ptq->last_branch;
  757. size_t nr = 0;
  758. bs_dst->nr = bs_src->nr;
  759. if (!bs_src->nr)
  760. return;
  761. nr = ptq->pt->synth_opts.last_branch_sz - ptq->last_branch_pos;
  762. memcpy(&bs_dst->entries[0],
  763. &bs_src->entries[ptq->last_branch_pos],
  764. sizeof(struct branch_entry) * nr);
  765. if (bs_src->nr >= ptq->pt->synth_opts.last_branch_sz) {
  766. memcpy(&bs_dst->entries[nr],
  767. &bs_src->entries[0],
  768. sizeof(struct branch_entry) * ptq->last_branch_pos);
  769. }
  770. }
  771. static inline void intel_pt_reset_last_branch_rb(struct intel_pt_queue *ptq)
  772. {
  773. ptq->last_branch_pos = 0;
  774. ptq->last_branch_rb->nr = 0;
  775. }
  776. static void intel_pt_update_last_branch_rb(struct intel_pt_queue *ptq)
  777. {
  778. const struct intel_pt_state *state = ptq->state;
  779. struct branch_stack *bs = ptq->last_branch_rb;
  780. struct branch_entry *be;
  781. if (!ptq->last_branch_pos)
  782. ptq->last_branch_pos = ptq->pt->synth_opts.last_branch_sz;
  783. ptq->last_branch_pos -= 1;
  784. be = &bs->entries[ptq->last_branch_pos];
  785. be->from = state->from_ip;
  786. be->to = state->to_ip;
  787. be->flags.abort = !!(state->flags & INTEL_PT_ABORT_TX);
  788. be->flags.in_tx = !!(state->flags & INTEL_PT_IN_TX);
  789. /* No support for mispredict */
  790. be->flags.mispred = ptq->pt->mispred_all;
  791. if (bs->nr < ptq->pt->synth_opts.last_branch_sz)
  792. bs->nr += 1;
  793. }
  794. static int intel_pt_inject_event(union perf_event *event,
  795. struct perf_sample *sample, u64 type,
  796. bool swapped)
  797. {
  798. event->header.size = perf_event__sample_event_size(sample, type, 0);
  799. return perf_event__synthesize_sample(event, type, 0, sample, swapped);
  800. }
  801. static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
  802. {
  803. int ret;
  804. struct intel_pt *pt = ptq->pt;
  805. union perf_event *event = ptq->event_buf;
  806. struct perf_sample sample = { .ip = 0, };
  807. struct dummy_branch_stack {
  808. u64 nr;
  809. struct branch_entry entries;
  810. } dummy_bs;
  811. if (pt->branches_filter && !(pt->branches_filter & ptq->flags))
  812. return 0;
  813. if (pt->synth_opts.initial_skip &&
  814. pt->num_events++ < pt->synth_opts.initial_skip)
  815. return 0;
  816. event->sample.header.type = PERF_RECORD_SAMPLE;
  817. event->sample.header.misc = PERF_RECORD_MISC_USER;
  818. event->sample.header.size = sizeof(struct perf_event_header);
  819. if (!pt->timeless_decoding)
  820. sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
  821. sample.cpumode = PERF_RECORD_MISC_USER;
  822. sample.ip = ptq->state->from_ip;
  823. sample.pid = ptq->pid;
  824. sample.tid = ptq->tid;
  825. sample.addr = ptq->state->to_ip;
  826. sample.id = ptq->pt->branches_id;
  827. sample.stream_id = ptq->pt->branches_id;
  828. sample.period = 1;
  829. sample.cpu = ptq->cpu;
  830. sample.flags = ptq->flags;
  831. sample.insn_len = ptq->insn_len;
  832. /*
  833. * perf report cannot handle events without a branch stack when using
  834. * SORT_MODE__BRANCH so make a dummy one.
  835. */
  836. if (pt->synth_opts.last_branch && sort__mode == SORT_MODE__BRANCH) {
  837. dummy_bs = (struct dummy_branch_stack){
  838. .nr = 1,
  839. .entries = {
  840. .from = sample.ip,
  841. .to = sample.addr,
  842. },
  843. };
  844. sample.branch_stack = (struct branch_stack *)&dummy_bs;
  845. }
  846. if (pt->synth_opts.inject) {
  847. ret = intel_pt_inject_event(event, &sample,
  848. pt->branches_sample_type,
  849. pt->synth_needs_swap);
  850. if (ret)
  851. return ret;
  852. }
  853. ret = perf_session__deliver_synth_event(pt->session, event, &sample);
  854. if (ret)
  855. pr_err("Intel Processor Trace: failed to deliver branch event, error %d\n",
  856. ret);
  857. return ret;
  858. }
  859. static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
  860. {
  861. int ret;
  862. struct intel_pt *pt = ptq->pt;
  863. union perf_event *event = ptq->event_buf;
  864. struct perf_sample sample = { .ip = 0, };
  865. if (pt->synth_opts.initial_skip &&
  866. pt->num_events++ < pt->synth_opts.initial_skip)
  867. return 0;
  868. event->sample.header.type = PERF_RECORD_SAMPLE;
  869. event->sample.header.misc = PERF_RECORD_MISC_USER;
  870. event->sample.header.size = sizeof(struct perf_event_header);
  871. if (!pt->timeless_decoding)
  872. sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
  873. sample.cpumode = PERF_RECORD_MISC_USER;
  874. sample.ip = ptq->state->from_ip;
  875. sample.pid = ptq->pid;
  876. sample.tid = ptq->tid;
  877. sample.addr = ptq->state->to_ip;
  878. sample.id = ptq->pt->instructions_id;
  879. sample.stream_id = ptq->pt->instructions_id;
  880. sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt;
  881. sample.cpu = ptq->cpu;
  882. sample.flags = ptq->flags;
  883. sample.insn_len = ptq->insn_len;
  884. ptq->last_insn_cnt = ptq->state->tot_insn_cnt;
  885. if (pt->synth_opts.callchain) {
  886. thread_stack__sample(ptq->thread, ptq->chain,
  887. pt->synth_opts.callchain_sz, sample.ip);
  888. sample.callchain = ptq->chain;
  889. }
  890. if (pt->synth_opts.last_branch) {
  891. intel_pt_copy_last_branch_rb(ptq);
  892. sample.branch_stack = ptq->last_branch;
  893. }
  894. if (pt->synth_opts.inject) {
  895. ret = intel_pt_inject_event(event, &sample,
  896. pt->instructions_sample_type,
  897. pt->synth_needs_swap);
  898. if (ret)
  899. return ret;
  900. }
  901. ret = perf_session__deliver_synth_event(pt->session, event, &sample);
  902. if (ret)
  903. pr_err("Intel Processor Trace: failed to deliver instruction event, error %d\n",
  904. ret);
  905. if (pt->synth_opts.last_branch)
  906. intel_pt_reset_last_branch_rb(ptq);
  907. return ret;
  908. }
  909. static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
  910. {
  911. int ret;
  912. struct intel_pt *pt = ptq->pt;
  913. union perf_event *event = ptq->event_buf;
  914. struct perf_sample sample = { .ip = 0, };
  915. if (pt->synth_opts.initial_skip &&
  916. pt->num_events++ < pt->synth_opts.initial_skip)
  917. return 0;
  918. event->sample.header.type = PERF_RECORD_SAMPLE;
  919. event->sample.header.misc = PERF_RECORD_MISC_USER;
  920. event->sample.header.size = sizeof(struct perf_event_header);
  921. if (!pt->timeless_decoding)
  922. sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
  923. sample.cpumode = PERF_RECORD_MISC_USER;
  924. sample.ip = ptq->state->from_ip;
  925. sample.pid = ptq->pid;
  926. sample.tid = ptq->tid;
  927. sample.addr = ptq->state->to_ip;
  928. sample.id = ptq->pt->transactions_id;
  929. sample.stream_id = ptq->pt->transactions_id;
  930. sample.period = 1;
  931. sample.cpu = ptq->cpu;
  932. sample.flags = ptq->flags;
  933. sample.insn_len = ptq->insn_len;
  934. if (pt->synth_opts.callchain) {
  935. thread_stack__sample(ptq->thread, ptq->chain,
  936. pt->synth_opts.callchain_sz, sample.ip);
  937. sample.callchain = ptq->chain;
  938. }
  939. if (pt->synth_opts.last_branch) {
  940. intel_pt_copy_last_branch_rb(ptq);
  941. sample.branch_stack = ptq->last_branch;
  942. }
  943. if (pt->synth_opts.inject) {
  944. ret = intel_pt_inject_event(event, &sample,
  945. pt->transactions_sample_type,
  946. pt->synth_needs_swap);
  947. if (ret)
  948. return ret;
  949. }
  950. ret = perf_session__deliver_synth_event(pt->session, event, &sample);
  951. if (ret)
  952. pr_err("Intel Processor Trace: failed to deliver transaction event, error %d\n",
  953. ret);
  954. if (pt->synth_opts.last_branch)
  955. intel_pt_reset_last_branch_rb(ptq);
  956. return ret;
  957. }
  958. static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu,
  959. pid_t pid, pid_t tid, u64 ip)
  960. {
  961. union perf_event event;
  962. char msg[MAX_AUXTRACE_ERROR_MSG];
  963. int err;
  964. intel_pt__strerror(code, msg, MAX_AUXTRACE_ERROR_MSG);
  965. auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
  966. code, cpu, pid, tid, ip, msg);
  967. err = perf_session__deliver_synth_event(pt->session, &event, NULL);
  968. if (err)
  969. pr_err("Intel Processor Trace: failed to deliver error event, error %d\n",
  970. err);
  971. return err;
  972. }
  973. static int intel_pt_next_tid(struct intel_pt *pt, struct intel_pt_queue *ptq)
  974. {
  975. struct auxtrace_queue *queue;
  976. pid_t tid = ptq->next_tid;
  977. int err;
  978. if (tid == -1)
  979. return 0;
  980. intel_pt_log("switch: cpu %d tid %d\n", ptq->cpu, tid);
  981. err = machine__set_current_tid(pt->machine, ptq->cpu, -1, tid);
  982. queue = &pt->queues.queue_array[ptq->queue_nr];
  983. intel_pt_set_pid_tid_cpu(pt, queue);
  984. ptq->next_tid = -1;
  985. return err;
  986. }
  987. static inline bool intel_pt_is_switch_ip(struct intel_pt_queue *ptq, u64 ip)
  988. {
  989. struct intel_pt *pt = ptq->pt;
  990. return ip == pt->switch_ip &&
  991. (ptq->flags & PERF_IP_FLAG_BRANCH) &&
  992. !(ptq->flags & (PERF_IP_FLAG_CONDITIONAL | PERF_IP_FLAG_ASYNC |
  993. PERF_IP_FLAG_INTERRUPT | PERF_IP_FLAG_TX_ABORT));
  994. }
  995. static int intel_pt_sample(struct intel_pt_queue *ptq)
  996. {
  997. const struct intel_pt_state *state = ptq->state;
  998. struct intel_pt *pt = ptq->pt;
  999. int err;
  1000. if (!ptq->have_sample)
  1001. return 0;
  1002. ptq->have_sample = false;
  1003. if (pt->sample_instructions &&
  1004. (state->type & INTEL_PT_INSTRUCTION) &&
  1005. (!pt->synth_opts.initial_skip ||
  1006. pt->num_events++ >= pt->synth_opts.initial_skip)) {
  1007. err = intel_pt_synth_instruction_sample(ptq);
  1008. if (err)
  1009. return err;
  1010. }
  1011. if (pt->sample_transactions &&
  1012. (state->type & INTEL_PT_TRANSACTION) &&
  1013. (!pt->synth_opts.initial_skip ||
  1014. pt->num_events++ >= pt->synth_opts.initial_skip)) {
  1015. err = intel_pt_synth_transaction_sample(ptq);
  1016. if (err)
  1017. return err;
  1018. }
  1019. if (!(state->type & INTEL_PT_BRANCH))
  1020. return 0;
  1021. if (pt->synth_opts.callchain || pt->synth_opts.thread_stack)
  1022. thread_stack__event(ptq->thread, ptq->flags, state->from_ip,
  1023. state->to_ip, ptq->insn_len,
  1024. state->trace_nr);
  1025. else
  1026. thread_stack__set_trace_nr(ptq->thread, state->trace_nr);
  1027. if (pt->sample_branches) {
  1028. err = intel_pt_synth_branch_sample(ptq);
  1029. if (err)
  1030. return err;
  1031. }
  1032. if (pt->synth_opts.last_branch)
  1033. intel_pt_update_last_branch_rb(ptq);
  1034. if (!pt->sync_switch)
  1035. return 0;
  1036. if (intel_pt_is_switch_ip(ptq, state->to_ip)) {
  1037. switch (ptq->switch_state) {
  1038. case INTEL_PT_SS_UNKNOWN:
  1039. case INTEL_PT_SS_EXPECTING_SWITCH_IP:
  1040. err = intel_pt_next_tid(pt, ptq);
  1041. if (err)
  1042. return err;
  1043. ptq->switch_state = INTEL_PT_SS_TRACING;
  1044. break;
  1045. default:
  1046. ptq->switch_state = INTEL_PT_SS_EXPECTING_SWITCH_EVENT;
  1047. return 1;
  1048. }
  1049. } else if (!state->to_ip) {
  1050. ptq->switch_state = INTEL_PT_SS_NOT_TRACING;
  1051. } else if (ptq->switch_state == INTEL_PT_SS_NOT_TRACING) {
  1052. ptq->switch_state = INTEL_PT_SS_UNKNOWN;
  1053. } else if (ptq->switch_state == INTEL_PT_SS_UNKNOWN &&
  1054. state->to_ip == pt->ptss_ip &&
  1055. (ptq->flags & PERF_IP_FLAG_CALL)) {
  1056. ptq->switch_state = INTEL_PT_SS_TRACING;
  1057. }
  1058. return 0;
  1059. }
  1060. static u64 intel_pt_switch_ip(struct intel_pt *pt, u64 *ptss_ip)
  1061. {
  1062. struct machine *machine = pt->machine;
  1063. struct map *map;
  1064. struct symbol *sym, *start;
  1065. u64 ip, switch_ip = 0;
  1066. const char *ptss;
  1067. if (ptss_ip)
  1068. *ptss_ip = 0;
  1069. map = machine__kernel_map(machine);
  1070. if (!map)
  1071. return 0;
  1072. if (map__load(map, machine->symbol_filter))
  1073. return 0;
  1074. start = dso__first_symbol(map->dso, MAP__FUNCTION);
  1075. for (sym = start; sym; sym = dso__next_symbol(sym)) {
  1076. if (sym->binding == STB_GLOBAL &&
  1077. !strcmp(sym->name, "__switch_to")) {
  1078. ip = map->unmap_ip(map, sym->start);
  1079. if (ip >= map->start && ip < map->end) {
  1080. switch_ip = ip;
  1081. break;
  1082. }
  1083. }
  1084. }
  1085. if (!switch_ip || !ptss_ip)
  1086. return 0;
  1087. if (pt->have_sched_switch == 1)
  1088. ptss = "perf_trace_sched_switch";
  1089. else
  1090. ptss = "__perf_event_task_sched_out";
  1091. for (sym = start; sym; sym = dso__next_symbol(sym)) {
  1092. if (!strcmp(sym->name, ptss)) {
  1093. ip = map->unmap_ip(map, sym->start);
  1094. if (ip >= map->start && ip < map->end) {
  1095. *ptss_ip = ip;
  1096. break;
  1097. }
  1098. }
  1099. }
  1100. return switch_ip;
  1101. }
  1102. static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp)
  1103. {
  1104. const struct intel_pt_state *state = ptq->state;
  1105. struct intel_pt *pt = ptq->pt;
  1106. int err;
  1107. if (!pt->kernel_start) {
  1108. pt->kernel_start = machine__kernel_start(pt->machine);
  1109. if (pt->per_cpu_mmaps &&
  1110. (pt->have_sched_switch == 1 || pt->have_sched_switch == 3) &&
  1111. !pt->timeless_decoding && intel_pt_tracing_kernel(pt) &&
  1112. !pt->sampling_mode) {
  1113. pt->switch_ip = intel_pt_switch_ip(pt, &pt->ptss_ip);
  1114. if (pt->switch_ip) {
  1115. intel_pt_log("switch_ip: %"PRIx64" ptss_ip: %"PRIx64"\n",
  1116. pt->switch_ip, pt->ptss_ip);
  1117. pt->sync_switch = true;
  1118. }
  1119. }
  1120. }
  1121. intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n",
  1122. ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid);
  1123. while (1) {
  1124. err = intel_pt_sample(ptq);
  1125. if (err)
  1126. return err;
  1127. state = intel_pt_decode(ptq->decoder);
  1128. if (state->err) {
  1129. if (state->err == INTEL_PT_ERR_NODATA)
  1130. return 1;
  1131. if (pt->sync_switch &&
  1132. state->from_ip >= pt->kernel_start) {
  1133. pt->sync_switch = false;
  1134. intel_pt_next_tid(pt, ptq);
  1135. }
  1136. if (pt->synth_opts.errors) {
  1137. err = intel_pt_synth_error(pt, state->err,
  1138. ptq->cpu, ptq->pid,
  1139. ptq->tid,
  1140. state->from_ip);
  1141. if (err)
  1142. return err;
  1143. }
  1144. continue;
  1145. }
  1146. ptq->state = state;
  1147. ptq->have_sample = true;
  1148. intel_pt_sample_flags(ptq);
  1149. /* Use estimated TSC upon return to user space */
  1150. if (pt->est_tsc &&
  1151. (state->from_ip >= pt->kernel_start || !state->from_ip) &&
  1152. state->to_ip && state->to_ip < pt->kernel_start) {
  1153. intel_pt_log("TSC %"PRIx64" est. TSC %"PRIx64"\n",
  1154. state->timestamp, state->est_timestamp);
  1155. ptq->timestamp = state->est_timestamp;
  1156. /* Use estimated TSC in unknown switch state */
  1157. } else if (pt->sync_switch &&
  1158. ptq->switch_state == INTEL_PT_SS_UNKNOWN &&
  1159. intel_pt_is_switch_ip(ptq, state->to_ip) &&
  1160. ptq->next_tid == -1) {
  1161. intel_pt_log("TSC %"PRIx64" est. TSC %"PRIx64"\n",
  1162. state->timestamp, state->est_timestamp);
  1163. ptq->timestamp = state->est_timestamp;
  1164. } else if (state->timestamp > ptq->timestamp) {
  1165. ptq->timestamp = state->timestamp;
  1166. }
  1167. if (!pt->timeless_decoding && ptq->timestamp >= *timestamp) {
  1168. *timestamp = ptq->timestamp;
  1169. return 0;
  1170. }
  1171. }
  1172. return 0;
  1173. }
  1174. static inline int intel_pt_update_queues(struct intel_pt *pt)
  1175. {
  1176. if (pt->queues.new_data) {
  1177. pt->queues.new_data = false;
  1178. return intel_pt_setup_queues(pt);
  1179. }
  1180. return 0;
  1181. }
  1182. static int intel_pt_process_queues(struct intel_pt *pt, u64 timestamp)
  1183. {
  1184. unsigned int queue_nr;
  1185. u64 ts;
  1186. int ret;
  1187. while (1) {
  1188. struct auxtrace_queue *queue;
  1189. struct intel_pt_queue *ptq;
  1190. if (!pt->heap.heap_cnt)
  1191. return 0;
  1192. if (pt->heap.heap_array[0].ordinal >= timestamp)
  1193. return 0;
  1194. queue_nr = pt->heap.heap_array[0].queue_nr;
  1195. queue = &pt->queues.queue_array[queue_nr];
  1196. ptq = queue->priv;
  1197. intel_pt_log("queue %u processing 0x%" PRIx64 " to 0x%" PRIx64 "\n",
  1198. queue_nr, pt->heap.heap_array[0].ordinal,
  1199. timestamp);
  1200. auxtrace_heap__pop(&pt->heap);
  1201. if (pt->heap.heap_cnt) {
  1202. ts = pt->heap.heap_array[0].ordinal + 1;
  1203. if (ts > timestamp)
  1204. ts = timestamp;
  1205. } else {
  1206. ts = timestamp;
  1207. }
  1208. intel_pt_set_pid_tid_cpu(pt, queue);
  1209. ret = intel_pt_run_decoder(ptq, &ts);
  1210. if (ret < 0) {
  1211. auxtrace_heap__add(&pt->heap, queue_nr, ts);
  1212. return ret;
  1213. }
  1214. if (!ret) {
  1215. ret = auxtrace_heap__add(&pt->heap, queue_nr, ts);
  1216. if (ret < 0)
  1217. return ret;
  1218. } else {
  1219. ptq->on_heap = false;
  1220. }
  1221. }
  1222. return 0;
  1223. }
  1224. static int intel_pt_process_timeless_queues(struct intel_pt *pt, pid_t tid,
  1225. u64 time_)
  1226. {
  1227. struct auxtrace_queues *queues = &pt->queues;
  1228. unsigned int i;
  1229. u64 ts = 0;
  1230. for (i = 0; i < queues->nr_queues; i++) {
  1231. struct auxtrace_queue *queue = &pt->queues.queue_array[i];
  1232. struct intel_pt_queue *ptq = queue->priv;
  1233. if (ptq && (tid == -1 || ptq->tid == tid)) {
  1234. ptq->time = time_;
  1235. intel_pt_set_pid_tid_cpu(pt, queue);
  1236. intel_pt_run_decoder(ptq, &ts);
  1237. }
  1238. }
  1239. return 0;
  1240. }
  1241. static int intel_pt_lost(struct intel_pt *pt, struct perf_sample *sample)
  1242. {
  1243. return intel_pt_synth_error(pt, INTEL_PT_ERR_LOST, sample->cpu,
  1244. sample->pid, sample->tid, 0);
  1245. }
  1246. static struct intel_pt_queue *intel_pt_cpu_to_ptq(struct intel_pt *pt, int cpu)
  1247. {
  1248. unsigned i, j;
  1249. if (cpu < 0 || !pt->queues.nr_queues)
  1250. return NULL;
  1251. if ((unsigned)cpu >= pt->queues.nr_queues)
  1252. i = pt->queues.nr_queues - 1;
  1253. else
  1254. i = cpu;
  1255. if (pt->queues.queue_array[i].cpu == cpu)
  1256. return pt->queues.queue_array[i].priv;
  1257. for (j = 0; i > 0; j++) {
  1258. if (pt->queues.queue_array[--i].cpu == cpu)
  1259. return pt->queues.queue_array[i].priv;
  1260. }
  1261. for (; j < pt->queues.nr_queues; j++) {
  1262. if (pt->queues.queue_array[j].cpu == cpu)
  1263. return pt->queues.queue_array[j].priv;
  1264. }
  1265. return NULL;
  1266. }
  1267. static int intel_pt_sync_switch(struct intel_pt *pt, int cpu, pid_t tid,
  1268. u64 timestamp)
  1269. {
  1270. struct intel_pt_queue *ptq;
  1271. int err;
  1272. if (!pt->sync_switch)
  1273. return 1;
  1274. ptq = intel_pt_cpu_to_ptq(pt, cpu);
  1275. if (!ptq)
  1276. return 1;
  1277. switch (ptq->switch_state) {
  1278. case INTEL_PT_SS_NOT_TRACING:
  1279. ptq->next_tid = -1;
  1280. break;
  1281. case INTEL_PT_SS_UNKNOWN:
  1282. case INTEL_PT_SS_TRACING:
  1283. ptq->next_tid = tid;
  1284. ptq->switch_state = INTEL_PT_SS_EXPECTING_SWITCH_IP;
  1285. return 0;
  1286. case INTEL_PT_SS_EXPECTING_SWITCH_EVENT:
  1287. if (!ptq->on_heap) {
  1288. ptq->timestamp = perf_time_to_tsc(timestamp,
  1289. &pt->tc);
  1290. err = auxtrace_heap__add(&pt->heap, ptq->queue_nr,
  1291. ptq->timestamp);
  1292. if (err)
  1293. return err;
  1294. ptq->on_heap = true;
  1295. }
  1296. ptq->switch_state = INTEL_PT_SS_TRACING;
  1297. break;
  1298. case INTEL_PT_SS_EXPECTING_SWITCH_IP:
  1299. ptq->next_tid = tid;
  1300. intel_pt_log("ERROR: cpu %d expecting switch ip\n", cpu);
  1301. break;
  1302. default:
  1303. break;
  1304. }
  1305. return 1;
  1306. }
  1307. static int intel_pt_process_switch(struct intel_pt *pt,
  1308. struct perf_sample *sample)
  1309. {
  1310. struct perf_evsel *evsel;
  1311. pid_t tid;
  1312. int cpu, ret;
  1313. evsel = perf_evlist__id2evsel(pt->session->evlist, sample->id);
  1314. if (evsel != pt->switch_evsel)
  1315. return 0;
  1316. tid = perf_evsel__intval(evsel, sample, "next_pid");
  1317. cpu = sample->cpu;
  1318. intel_pt_log("sched_switch: cpu %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
  1319. cpu, tid, sample->time, perf_time_to_tsc(sample->time,
  1320. &pt->tc));
  1321. ret = intel_pt_sync_switch(pt, cpu, tid, sample->time);
  1322. if (ret <= 0)
  1323. return ret;
  1324. return machine__set_current_tid(pt->machine, cpu, -1, tid);
  1325. }
  1326. static int intel_pt_context_switch(struct intel_pt *pt, union perf_event *event,
  1327. struct perf_sample *sample)
  1328. {
  1329. bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
  1330. pid_t pid, tid;
  1331. int cpu, ret;
  1332. cpu = sample->cpu;
  1333. if (pt->have_sched_switch == 3) {
  1334. if (!out)
  1335. return 0;
  1336. if (event->header.type != PERF_RECORD_SWITCH_CPU_WIDE) {
  1337. pr_err("Expecting CPU-wide context switch event\n");
  1338. return -EINVAL;
  1339. }
  1340. pid = event->context_switch.next_prev_pid;
  1341. tid = event->context_switch.next_prev_tid;
  1342. } else {
  1343. if (out)
  1344. return 0;
  1345. pid = sample->pid;
  1346. tid = sample->tid;
  1347. }
  1348. if (tid == -1) {
  1349. pr_err("context_switch event has no tid\n");
  1350. return -EINVAL;
  1351. }
  1352. intel_pt_log("context_switch: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
  1353. cpu, pid, tid, sample->time, perf_time_to_tsc(sample->time,
  1354. &pt->tc));
  1355. ret = intel_pt_sync_switch(pt, cpu, tid, sample->time);
  1356. if (ret <= 0)
  1357. return ret;
  1358. return machine__set_current_tid(pt->machine, cpu, pid, tid);
  1359. }
  1360. static int intel_pt_process_itrace_start(struct intel_pt *pt,
  1361. union perf_event *event,
  1362. struct perf_sample *sample)
  1363. {
  1364. if (!pt->per_cpu_mmaps)
  1365. return 0;
  1366. intel_pt_log("itrace_start: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
  1367. sample->cpu, event->itrace_start.pid,
  1368. event->itrace_start.tid, sample->time,
  1369. perf_time_to_tsc(sample->time, &pt->tc));
  1370. return machine__set_current_tid(pt->machine, sample->cpu,
  1371. event->itrace_start.pid,
  1372. event->itrace_start.tid);
  1373. }
  1374. static int intel_pt_process_event(struct perf_session *session,
  1375. union perf_event *event,
  1376. struct perf_sample *sample,
  1377. struct perf_tool *tool)
  1378. {
  1379. struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
  1380. auxtrace);
  1381. u64 timestamp;
  1382. int err = 0;
  1383. if (dump_trace)
  1384. return 0;
  1385. if (!tool->ordered_events) {
  1386. pr_err("Intel Processor Trace requires ordered events\n");
  1387. return -EINVAL;
  1388. }
  1389. if (sample->time && sample->time != (u64)-1)
  1390. timestamp = perf_time_to_tsc(sample->time, &pt->tc);
  1391. else
  1392. timestamp = 0;
  1393. if (timestamp || pt->timeless_decoding) {
  1394. err = intel_pt_update_queues(pt);
  1395. if (err)
  1396. return err;
  1397. }
  1398. if (pt->timeless_decoding) {
  1399. if (event->header.type == PERF_RECORD_EXIT) {
  1400. err = intel_pt_process_timeless_queues(pt,
  1401. event->fork.tid,
  1402. sample->time);
  1403. }
  1404. } else if (timestamp) {
  1405. err = intel_pt_process_queues(pt, timestamp);
  1406. }
  1407. if (err)
  1408. return err;
  1409. if (event->header.type == PERF_RECORD_AUX &&
  1410. (event->aux.flags & PERF_AUX_FLAG_TRUNCATED) &&
  1411. pt->synth_opts.errors) {
  1412. err = intel_pt_lost(pt, sample);
  1413. if (err)
  1414. return err;
  1415. }
  1416. if (pt->switch_evsel && event->header.type == PERF_RECORD_SAMPLE)
  1417. err = intel_pt_process_switch(pt, sample);
  1418. else if (event->header.type == PERF_RECORD_ITRACE_START)
  1419. err = intel_pt_process_itrace_start(pt, event, sample);
  1420. else if (event->header.type == PERF_RECORD_SWITCH ||
  1421. event->header.type == PERF_RECORD_SWITCH_CPU_WIDE)
  1422. err = intel_pt_context_switch(pt, event, sample);
  1423. intel_pt_log("event %s (%u): cpu %d time %"PRIu64" tsc %#"PRIx64"\n",
  1424. perf_event__name(event->header.type), event->header.type,
  1425. sample->cpu, sample->time, timestamp);
  1426. return err;
  1427. }
  1428. static int intel_pt_flush(struct perf_session *session, struct perf_tool *tool)
  1429. {
  1430. struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
  1431. auxtrace);
  1432. int ret;
  1433. if (dump_trace)
  1434. return 0;
  1435. if (!tool->ordered_events)
  1436. return -EINVAL;
  1437. ret = intel_pt_update_queues(pt);
  1438. if (ret < 0)
  1439. return ret;
  1440. if (pt->timeless_decoding)
  1441. return intel_pt_process_timeless_queues(pt, -1,
  1442. MAX_TIMESTAMP - 1);
  1443. return intel_pt_process_queues(pt, MAX_TIMESTAMP);
  1444. }
  1445. static void intel_pt_free_events(struct perf_session *session)
  1446. {
  1447. struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
  1448. auxtrace);
  1449. struct auxtrace_queues *queues = &pt->queues;
  1450. unsigned int i;
  1451. for (i = 0; i < queues->nr_queues; i++) {
  1452. intel_pt_free_queue(queues->queue_array[i].priv);
  1453. queues->queue_array[i].priv = NULL;
  1454. }
  1455. intel_pt_log_disable();
  1456. auxtrace_queues__free(queues);
  1457. }
  1458. static void intel_pt_free(struct perf_session *session)
  1459. {
  1460. struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
  1461. auxtrace);
  1462. auxtrace_heap__free(&pt->heap);
  1463. intel_pt_free_events(session);
  1464. session->auxtrace = NULL;
  1465. thread__put(pt->unknown_thread);
  1466. free(pt);
  1467. }
  1468. static int intel_pt_process_auxtrace_event(struct perf_session *session,
  1469. union perf_event *event,
  1470. struct perf_tool *tool __maybe_unused)
  1471. {
  1472. struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
  1473. auxtrace);
  1474. if (pt->sampling_mode)
  1475. return 0;
  1476. if (!pt->data_queued) {
  1477. struct auxtrace_buffer *buffer;
  1478. off_t data_offset;
  1479. int fd = perf_data_file__fd(session->file);
  1480. int err;
  1481. if (perf_data_file__is_pipe(session->file)) {
  1482. data_offset = 0;
  1483. } else {
  1484. data_offset = lseek(fd, 0, SEEK_CUR);
  1485. if (data_offset == -1)
  1486. return -errno;
  1487. }
  1488. err = auxtrace_queues__add_event(&pt->queues, session, event,
  1489. data_offset, &buffer);
  1490. if (err)
  1491. return err;
  1492. /* Dump here now we have copied a piped trace out of the pipe */
  1493. if (dump_trace) {
  1494. if (auxtrace_buffer__get_data(buffer, fd)) {
  1495. intel_pt_dump_event(pt, buffer->data,
  1496. buffer->size);
  1497. auxtrace_buffer__put_data(buffer);
  1498. }
  1499. }
  1500. }
  1501. return 0;
  1502. }
  1503. struct intel_pt_synth {
  1504. struct perf_tool dummy_tool;
  1505. struct perf_session *session;
  1506. };
  1507. static int intel_pt_event_synth(struct perf_tool *tool,
  1508. union perf_event *event,
  1509. struct perf_sample *sample __maybe_unused,
  1510. struct machine *machine __maybe_unused)
  1511. {
  1512. struct intel_pt_synth *intel_pt_synth =
  1513. container_of(tool, struct intel_pt_synth, dummy_tool);
  1514. return perf_session__deliver_synth_event(intel_pt_synth->session, event,
  1515. NULL);
  1516. }
  1517. static int intel_pt_synth_event(struct perf_session *session,
  1518. struct perf_event_attr *attr, u64 id)
  1519. {
  1520. struct intel_pt_synth intel_pt_synth;
  1521. memset(&intel_pt_synth, 0, sizeof(struct intel_pt_synth));
  1522. intel_pt_synth.session = session;
  1523. return perf_event__synthesize_attr(&intel_pt_synth.dummy_tool, attr, 1,
  1524. &id, intel_pt_event_synth);
  1525. }
  1526. static int intel_pt_synth_events(struct intel_pt *pt,
  1527. struct perf_session *session)
  1528. {
  1529. struct perf_evlist *evlist = session->evlist;
  1530. struct perf_evsel *evsel;
  1531. struct perf_event_attr attr;
  1532. bool found = false;
  1533. u64 id;
  1534. int err;
  1535. evlist__for_each_entry(evlist, evsel) {
  1536. if (evsel->attr.type == pt->pmu_type && evsel->ids) {
  1537. found = true;
  1538. break;
  1539. }
  1540. }
  1541. if (!found) {
  1542. pr_debug("There are no selected events with Intel Processor Trace data\n");
  1543. return 0;
  1544. }
  1545. memset(&attr, 0, sizeof(struct perf_event_attr));
  1546. attr.size = sizeof(struct perf_event_attr);
  1547. attr.type = PERF_TYPE_HARDWARE;
  1548. attr.sample_type = evsel->attr.sample_type & PERF_SAMPLE_MASK;
  1549. attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
  1550. PERF_SAMPLE_PERIOD;
  1551. if (pt->timeless_decoding)
  1552. attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
  1553. else
  1554. attr.sample_type |= PERF_SAMPLE_TIME;
  1555. if (!pt->per_cpu_mmaps)
  1556. attr.sample_type &= ~(u64)PERF_SAMPLE_CPU;
  1557. attr.exclude_user = evsel->attr.exclude_user;
  1558. attr.exclude_kernel = evsel->attr.exclude_kernel;
  1559. attr.exclude_hv = evsel->attr.exclude_hv;
  1560. attr.exclude_host = evsel->attr.exclude_host;
  1561. attr.exclude_guest = evsel->attr.exclude_guest;
  1562. attr.sample_id_all = evsel->attr.sample_id_all;
  1563. attr.read_format = evsel->attr.read_format;
  1564. id = evsel->id[0] + 1000000000;
  1565. if (!id)
  1566. id = 1;
  1567. if (pt->synth_opts.instructions) {
  1568. attr.config = PERF_COUNT_HW_INSTRUCTIONS;
  1569. if (pt->synth_opts.period_type == PERF_ITRACE_PERIOD_NANOSECS)
  1570. attr.sample_period =
  1571. intel_pt_ns_to_ticks(pt, pt->synth_opts.period);
  1572. else
  1573. attr.sample_period = pt->synth_opts.period;
  1574. pt->instructions_sample_period = attr.sample_period;
  1575. if (pt->synth_opts.callchain)
  1576. attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
  1577. if (pt->synth_opts.last_branch)
  1578. attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
  1579. pr_debug("Synthesizing 'instructions' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
  1580. id, (u64)attr.sample_type);
  1581. err = intel_pt_synth_event(session, &attr, id);
  1582. if (err) {
  1583. pr_err("%s: failed to synthesize 'instructions' event type\n",
  1584. __func__);
  1585. return err;
  1586. }
  1587. pt->sample_instructions = true;
  1588. pt->instructions_sample_type = attr.sample_type;
  1589. pt->instructions_id = id;
  1590. id += 1;
  1591. }
  1592. if (pt->synth_opts.transactions) {
  1593. attr.config = PERF_COUNT_HW_INSTRUCTIONS;
  1594. attr.sample_period = 1;
  1595. if (pt->synth_opts.callchain)
  1596. attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
  1597. if (pt->synth_opts.last_branch)
  1598. attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
  1599. pr_debug("Synthesizing 'transactions' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
  1600. id, (u64)attr.sample_type);
  1601. err = intel_pt_synth_event(session, &attr, id);
  1602. if (err) {
  1603. pr_err("%s: failed to synthesize 'transactions' event type\n",
  1604. __func__);
  1605. return err;
  1606. }
  1607. pt->sample_transactions = true;
  1608. pt->transactions_id = id;
  1609. id += 1;
  1610. evlist__for_each_entry(evlist, evsel) {
  1611. if (evsel->id && evsel->id[0] == pt->transactions_id) {
  1612. if (evsel->name)
  1613. zfree(&evsel->name);
  1614. evsel->name = strdup("transactions");
  1615. break;
  1616. }
  1617. }
  1618. }
  1619. if (pt->synth_opts.branches) {
  1620. attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
  1621. attr.sample_period = 1;
  1622. attr.sample_type |= PERF_SAMPLE_ADDR;
  1623. attr.sample_type &= ~(u64)PERF_SAMPLE_CALLCHAIN;
  1624. attr.sample_type &= ~(u64)PERF_SAMPLE_BRANCH_STACK;
  1625. pr_debug("Synthesizing 'branches' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
  1626. id, (u64)attr.sample_type);
  1627. err = intel_pt_synth_event(session, &attr, id);
  1628. if (err) {
  1629. pr_err("%s: failed to synthesize 'branches' event type\n",
  1630. __func__);
  1631. return err;
  1632. }
  1633. pt->sample_branches = true;
  1634. pt->branches_sample_type = attr.sample_type;
  1635. pt->branches_id = id;
  1636. }
  1637. pt->synth_needs_swap = evsel->needs_swap;
  1638. return 0;
  1639. }
  1640. static struct perf_evsel *intel_pt_find_sched_switch(struct perf_evlist *evlist)
  1641. {
  1642. struct perf_evsel *evsel;
  1643. evlist__for_each_entry_reverse(evlist, evsel) {
  1644. const char *name = perf_evsel__name(evsel);
  1645. if (!strcmp(name, "sched:sched_switch"))
  1646. return evsel;
  1647. }
  1648. return NULL;
  1649. }
  1650. static bool intel_pt_find_switch(struct perf_evlist *evlist)
  1651. {
  1652. struct perf_evsel *evsel;
  1653. evlist__for_each_entry(evlist, evsel) {
  1654. if (evsel->attr.context_switch)
  1655. return true;
  1656. }
  1657. return false;
  1658. }
  1659. static int intel_pt_perf_config(const char *var, const char *value, void *data)
  1660. {
  1661. struct intel_pt *pt = data;
  1662. if (!strcmp(var, "intel-pt.mispred-all"))
  1663. pt->mispred_all = perf_config_bool(var, value);
  1664. return 0;
  1665. }
  1666. static const char * const intel_pt_info_fmts[] = {
  1667. [INTEL_PT_PMU_TYPE] = " PMU Type %"PRId64"\n",
  1668. [INTEL_PT_TIME_SHIFT] = " Time Shift %"PRIu64"\n",
  1669. [INTEL_PT_TIME_MULT] = " Time Muliplier %"PRIu64"\n",
  1670. [INTEL_PT_TIME_ZERO] = " Time Zero %"PRIu64"\n",
  1671. [INTEL_PT_CAP_USER_TIME_ZERO] = " Cap Time Zero %"PRId64"\n",
  1672. [INTEL_PT_TSC_BIT] = " TSC bit %#"PRIx64"\n",
  1673. [INTEL_PT_NORETCOMP_BIT] = " NoRETComp bit %#"PRIx64"\n",
  1674. [INTEL_PT_HAVE_SCHED_SWITCH] = " Have sched_switch %"PRId64"\n",
  1675. [INTEL_PT_SNAPSHOT_MODE] = " Snapshot mode %"PRId64"\n",
  1676. [INTEL_PT_PER_CPU_MMAPS] = " Per-cpu maps %"PRId64"\n",
  1677. [INTEL_PT_MTC_BIT] = " MTC bit %#"PRIx64"\n",
  1678. [INTEL_PT_TSC_CTC_N] = " TSC:CTC numerator %"PRIu64"\n",
  1679. [INTEL_PT_TSC_CTC_D] = " TSC:CTC denominator %"PRIu64"\n",
  1680. [INTEL_PT_CYC_BIT] = " CYC bit %#"PRIx64"\n",
  1681. };
  1682. static void intel_pt_print_info(u64 *arr, int start, int finish)
  1683. {
  1684. int i;
  1685. if (!dump_trace)
  1686. return;
  1687. for (i = start; i <= finish; i++)
  1688. fprintf(stdout, intel_pt_info_fmts[i], arr[i]);
  1689. }
  1690. int intel_pt_process_auxtrace_info(union perf_event *event,
  1691. struct perf_session *session)
  1692. {
  1693. struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info;
  1694. size_t min_sz = sizeof(u64) * INTEL_PT_PER_CPU_MMAPS;
  1695. struct intel_pt *pt;
  1696. int err;
  1697. if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event) +
  1698. min_sz)
  1699. return -EINVAL;
  1700. pt = zalloc(sizeof(struct intel_pt));
  1701. if (!pt)
  1702. return -ENOMEM;
  1703. perf_config(intel_pt_perf_config, pt);
  1704. err = auxtrace_queues__init(&pt->queues);
  1705. if (err)
  1706. goto err_free;
  1707. intel_pt_log_set_name(INTEL_PT_PMU_NAME);
  1708. pt->session = session;
  1709. pt->machine = &session->machines.host; /* No kvm support */
  1710. pt->auxtrace_type = auxtrace_info->type;
  1711. pt->pmu_type = auxtrace_info->priv[INTEL_PT_PMU_TYPE];
  1712. pt->tc.time_shift = auxtrace_info->priv[INTEL_PT_TIME_SHIFT];
  1713. pt->tc.time_mult = auxtrace_info->priv[INTEL_PT_TIME_MULT];
  1714. pt->tc.time_zero = auxtrace_info->priv[INTEL_PT_TIME_ZERO];
  1715. pt->cap_user_time_zero = auxtrace_info->priv[INTEL_PT_CAP_USER_TIME_ZERO];
  1716. pt->tsc_bit = auxtrace_info->priv[INTEL_PT_TSC_BIT];
  1717. pt->noretcomp_bit = auxtrace_info->priv[INTEL_PT_NORETCOMP_BIT];
  1718. pt->have_sched_switch = auxtrace_info->priv[INTEL_PT_HAVE_SCHED_SWITCH];
  1719. pt->snapshot_mode = auxtrace_info->priv[INTEL_PT_SNAPSHOT_MODE];
  1720. pt->per_cpu_mmaps = auxtrace_info->priv[INTEL_PT_PER_CPU_MMAPS];
  1721. intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_PMU_TYPE,
  1722. INTEL_PT_PER_CPU_MMAPS);
  1723. if (auxtrace_info->header.size >= sizeof(struct auxtrace_info_event) +
  1724. (sizeof(u64) * INTEL_PT_CYC_BIT)) {
  1725. pt->mtc_bit = auxtrace_info->priv[INTEL_PT_MTC_BIT];
  1726. pt->mtc_freq_bits = auxtrace_info->priv[INTEL_PT_MTC_FREQ_BITS];
  1727. pt->tsc_ctc_ratio_n = auxtrace_info->priv[INTEL_PT_TSC_CTC_N];
  1728. pt->tsc_ctc_ratio_d = auxtrace_info->priv[INTEL_PT_TSC_CTC_D];
  1729. pt->cyc_bit = auxtrace_info->priv[INTEL_PT_CYC_BIT];
  1730. intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_MTC_BIT,
  1731. INTEL_PT_CYC_BIT);
  1732. }
  1733. pt->timeless_decoding = intel_pt_timeless_decoding(pt);
  1734. pt->have_tsc = intel_pt_have_tsc(pt);
  1735. pt->sampling_mode = false;
  1736. pt->est_tsc = !pt->timeless_decoding;
  1737. pt->unknown_thread = thread__new(999999999, 999999999);
  1738. if (!pt->unknown_thread) {
  1739. err = -ENOMEM;
  1740. goto err_free_queues;
  1741. }
  1742. /*
  1743. * Since this thread will not be kept in any rbtree not in a
  1744. * list, initialize its list node so that at thread__put() the
  1745. * current thread lifetime assuption is kept and we don't segfault
  1746. * at list_del_init().
  1747. */
  1748. INIT_LIST_HEAD(&pt->unknown_thread->node);
  1749. err = thread__set_comm(pt->unknown_thread, "unknown", 0);
  1750. if (err)
  1751. goto err_delete_thread;
  1752. if (thread__init_map_groups(pt->unknown_thread, pt->machine)) {
  1753. err = -ENOMEM;
  1754. goto err_delete_thread;
  1755. }
  1756. pt->auxtrace.process_event = intel_pt_process_event;
  1757. pt->auxtrace.process_auxtrace_event = intel_pt_process_auxtrace_event;
  1758. pt->auxtrace.flush_events = intel_pt_flush;
  1759. pt->auxtrace.free_events = intel_pt_free_events;
  1760. pt->auxtrace.free = intel_pt_free;
  1761. session->auxtrace = &pt->auxtrace;
  1762. if (dump_trace)
  1763. return 0;
  1764. if (pt->have_sched_switch == 1) {
  1765. pt->switch_evsel = intel_pt_find_sched_switch(session->evlist);
  1766. if (!pt->switch_evsel) {
  1767. pr_err("%s: missing sched_switch event\n", __func__);
  1768. goto err_delete_thread;
  1769. }
  1770. } else if (pt->have_sched_switch == 2 &&
  1771. !intel_pt_find_switch(session->evlist)) {
  1772. pr_err("%s: missing context_switch attribute flag\n", __func__);
  1773. goto err_delete_thread;
  1774. }
  1775. if (session->itrace_synth_opts && session->itrace_synth_opts->set) {
  1776. pt->synth_opts = *session->itrace_synth_opts;
  1777. } else {
  1778. itrace_synth_opts__set_default(&pt->synth_opts);
  1779. if (use_browser != -1) {
  1780. pt->synth_opts.branches = false;
  1781. pt->synth_opts.callchain = true;
  1782. }
  1783. if (session->itrace_synth_opts)
  1784. pt->synth_opts.thread_stack =
  1785. session->itrace_synth_opts->thread_stack;
  1786. }
  1787. if (pt->synth_opts.log)
  1788. intel_pt_log_enable();
  1789. /* Maximum non-turbo ratio is TSC freq / 100 MHz */
  1790. if (pt->tc.time_mult) {
  1791. u64 tsc_freq = intel_pt_ns_to_ticks(pt, 1000000000);
  1792. pt->max_non_turbo_ratio = (tsc_freq + 50000000) / 100000000;
  1793. intel_pt_log("TSC frequency %"PRIu64"\n", tsc_freq);
  1794. intel_pt_log("Maximum non-turbo ratio %u\n",
  1795. pt->max_non_turbo_ratio);
  1796. }
  1797. if (pt->synth_opts.calls)
  1798. pt->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC |
  1799. PERF_IP_FLAG_TRACE_END;
  1800. if (pt->synth_opts.returns)
  1801. pt->branches_filter |= PERF_IP_FLAG_RETURN |
  1802. PERF_IP_FLAG_TRACE_BEGIN;
  1803. if (pt->synth_opts.callchain && !symbol_conf.use_callchain) {
  1804. symbol_conf.use_callchain = true;
  1805. if (callchain_register_param(&callchain_param) < 0) {
  1806. symbol_conf.use_callchain = false;
  1807. pt->synth_opts.callchain = false;
  1808. }
  1809. }
  1810. err = intel_pt_synth_events(pt, session);
  1811. if (err)
  1812. goto err_delete_thread;
  1813. err = auxtrace_queues__process_index(&pt->queues, session);
  1814. if (err)
  1815. goto err_delete_thread;
  1816. if (pt->queues.populated)
  1817. pt->data_queued = true;
  1818. if (pt->timeless_decoding)
  1819. pr_debug2("Intel PT decoding without timestamps\n");
  1820. return 0;
  1821. err_delete_thread:
  1822. thread__zput(pt->unknown_thread);
  1823. err_free_queues:
  1824. intel_pt_log_disable();
  1825. auxtrace_queues__free(&pt->queues);
  1826. session->auxtrace = NULL;
  1827. err_free:
  1828. free(pt);
  1829. return err;
  1830. }