perf_event_intel_lbr.c 25 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024
  1. #include <linux/perf_event.h>
  2. #include <linux/types.h>
  3. #include <asm/perf_event.h>
  4. #include <asm/msr.h>
  5. #include <asm/insn.h>
  6. #include "perf_event.h"
  7. enum {
  8. LBR_FORMAT_32 = 0x00,
  9. LBR_FORMAT_LIP = 0x01,
  10. LBR_FORMAT_EIP = 0x02,
  11. LBR_FORMAT_EIP_FLAGS = 0x03,
  12. LBR_FORMAT_EIP_FLAGS2 = 0x04,
  13. LBR_FORMAT_INFO = 0x05,
  14. LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_INFO,
  15. };
  16. static enum {
  17. LBR_EIP_FLAGS = 1,
  18. LBR_TSX = 2,
  19. } lbr_desc[LBR_FORMAT_MAX_KNOWN + 1] = {
  20. [LBR_FORMAT_EIP_FLAGS] = LBR_EIP_FLAGS,
  21. [LBR_FORMAT_EIP_FLAGS2] = LBR_EIP_FLAGS | LBR_TSX,
  22. };
  23. /*
  24. * Intel LBR_SELECT bits
  25. * Intel Vol3a, April 2011, Section 16.7 Table 16-10
  26. *
  27. * Hardware branch filter (not available on all CPUs)
  28. */
  29. #define LBR_KERNEL_BIT 0 /* do not capture at ring0 */
  30. #define LBR_USER_BIT 1 /* do not capture at ring > 0 */
  31. #define LBR_JCC_BIT 2 /* do not capture conditional branches */
  32. #define LBR_REL_CALL_BIT 3 /* do not capture relative calls */
  33. #define LBR_IND_CALL_BIT 4 /* do not capture indirect calls */
  34. #define LBR_RETURN_BIT 5 /* do not capture near returns */
  35. #define LBR_IND_JMP_BIT 6 /* do not capture indirect jumps */
  36. #define LBR_REL_JMP_BIT 7 /* do not capture relative jumps */
  37. #define LBR_FAR_BIT 8 /* do not capture far branches */
  38. #define LBR_CALL_STACK_BIT 9 /* enable call stack */
  39. #define LBR_KERNEL (1 << LBR_KERNEL_BIT)
  40. #define LBR_USER (1 << LBR_USER_BIT)
  41. #define LBR_JCC (1 << LBR_JCC_BIT)
  42. #define LBR_REL_CALL (1 << LBR_REL_CALL_BIT)
  43. #define LBR_IND_CALL (1 << LBR_IND_CALL_BIT)
  44. #define LBR_RETURN (1 << LBR_RETURN_BIT)
  45. #define LBR_REL_JMP (1 << LBR_REL_JMP_BIT)
  46. #define LBR_IND_JMP (1 << LBR_IND_JMP_BIT)
  47. #define LBR_FAR (1 << LBR_FAR_BIT)
  48. #define LBR_CALL_STACK (1 << LBR_CALL_STACK_BIT)
  49. #define LBR_PLM (LBR_KERNEL | LBR_USER)
  50. #define LBR_SEL_MASK 0x1ff /* valid bits in LBR_SELECT */
  51. #define LBR_NOT_SUPP -1 /* LBR filter not supported */
  52. #define LBR_IGN 0 /* ignored */
  53. #define LBR_ANY \
  54. (LBR_JCC |\
  55. LBR_REL_CALL |\
  56. LBR_IND_CALL |\
  57. LBR_RETURN |\
  58. LBR_REL_JMP |\
  59. LBR_IND_JMP |\
  60. LBR_FAR)
  61. #define LBR_FROM_FLAG_MISPRED (1ULL << 63)
  62. #define LBR_FROM_FLAG_IN_TX (1ULL << 62)
  63. #define LBR_FROM_FLAG_ABORT (1ULL << 61)
  64. /*
  65. * x86control flow change classification
  66. * x86control flow changes include branches, interrupts, traps, faults
  67. */
  68. enum {
  69. X86_BR_NONE = 0, /* unknown */
  70. X86_BR_USER = 1 << 0, /* branch target is user */
  71. X86_BR_KERNEL = 1 << 1, /* branch target is kernel */
  72. X86_BR_CALL = 1 << 2, /* call */
  73. X86_BR_RET = 1 << 3, /* return */
  74. X86_BR_SYSCALL = 1 << 4, /* syscall */
  75. X86_BR_SYSRET = 1 << 5, /* syscall return */
  76. X86_BR_INT = 1 << 6, /* sw interrupt */
  77. X86_BR_IRET = 1 << 7, /* return from interrupt */
  78. X86_BR_JCC = 1 << 8, /* conditional */
  79. X86_BR_JMP = 1 << 9, /* jump */
  80. X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */
  81. X86_BR_IND_CALL = 1 << 11,/* indirect calls */
  82. X86_BR_ABORT = 1 << 12,/* transaction abort */
  83. X86_BR_IN_TX = 1 << 13,/* in transaction */
  84. X86_BR_NO_TX = 1 << 14,/* not in transaction */
  85. X86_BR_ZERO_CALL = 1 << 15,/* zero length call */
  86. X86_BR_CALL_STACK = 1 << 16,/* call stack */
  87. X86_BR_IND_JMP = 1 << 17,/* indirect jump */
  88. };
  89. #define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
  90. #define X86_BR_ANYTX (X86_BR_NO_TX | X86_BR_IN_TX)
  91. #define X86_BR_ANY \
  92. (X86_BR_CALL |\
  93. X86_BR_RET |\
  94. X86_BR_SYSCALL |\
  95. X86_BR_SYSRET |\
  96. X86_BR_INT |\
  97. X86_BR_IRET |\
  98. X86_BR_JCC |\
  99. X86_BR_JMP |\
  100. X86_BR_IRQ |\
  101. X86_BR_ABORT |\
  102. X86_BR_IND_CALL |\
  103. X86_BR_IND_JMP |\
  104. X86_BR_ZERO_CALL)
  105. #define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY)
  106. #define X86_BR_ANY_CALL \
  107. (X86_BR_CALL |\
  108. X86_BR_IND_CALL |\
  109. X86_BR_ZERO_CALL |\
  110. X86_BR_SYSCALL |\
  111. X86_BR_IRQ |\
  112. X86_BR_INT)
  113. static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc);
  114. /*
  115. * We only support LBR implementations that have FREEZE_LBRS_ON_PMI
  116. * otherwise it becomes near impossible to get a reliable stack.
  117. */
  118. static void __intel_pmu_lbr_enable(bool pmi)
  119. {
  120. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  121. u64 debugctl, lbr_select = 0, orig_debugctl;
  122. /*
  123. * No need to unfreeze manually, as v4 can do that as part
  124. * of the GLOBAL_STATUS ack.
  125. */
  126. if (pmi && x86_pmu.version >= 4)
  127. return;
  128. /*
  129. * No need to reprogram LBR_SELECT in a PMI, as it
  130. * did not change.
  131. */
  132. if (cpuc->lbr_sel && !pmi) {
  133. lbr_select = cpuc->lbr_sel->config;
  134. wrmsrl(MSR_LBR_SELECT, lbr_select);
  135. }
  136. rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
  137. orig_debugctl = debugctl;
  138. debugctl |= DEBUGCTLMSR_LBR;
  139. /*
  140. * LBR callstack does not work well with FREEZE_LBRS_ON_PMI.
  141. * If FREEZE_LBRS_ON_PMI is set, PMI near call/return instructions
  142. * may cause superfluous increase/decrease of LBR_TOS.
  143. */
  144. if (!(lbr_select & LBR_CALL_STACK))
  145. debugctl |= DEBUGCTLMSR_FREEZE_LBRS_ON_PMI;
  146. if (orig_debugctl != debugctl)
  147. wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
  148. }
  149. static void __intel_pmu_lbr_disable(void)
  150. {
  151. u64 debugctl;
  152. rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
  153. debugctl &= ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
  154. wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
  155. }
  156. static void intel_pmu_lbr_reset_32(void)
  157. {
  158. int i;
  159. for (i = 0; i < x86_pmu.lbr_nr; i++)
  160. wrmsrl(x86_pmu.lbr_from + i, 0);
  161. }
  162. static void intel_pmu_lbr_reset_64(void)
  163. {
  164. int i;
  165. for (i = 0; i < x86_pmu.lbr_nr; i++) {
  166. wrmsrl(x86_pmu.lbr_from + i, 0);
  167. wrmsrl(x86_pmu.lbr_to + i, 0);
  168. if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
  169. wrmsrl(MSR_LBR_INFO_0 + i, 0);
  170. }
  171. }
  172. void intel_pmu_lbr_reset(void)
  173. {
  174. if (!x86_pmu.lbr_nr)
  175. return;
  176. if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32)
  177. intel_pmu_lbr_reset_32();
  178. else
  179. intel_pmu_lbr_reset_64();
  180. }
  181. /*
  182. * TOS = most recently recorded branch
  183. */
  184. static inline u64 intel_pmu_lbr_tos(void)
  185. {
  186. u64 tos;
  187. rdmsrl(x86_pmu.lbr_tos, tos);
  188. return tos;
  189. }
  190. enum {
  191. LBR_NONE,
  192. LBR_VALID,
  193. };
  194. static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx)
  195. {
  196. int i;
  197. unsigned lbr_idx, mask;
  198. u64 tos;
  199. if (task_ctx->lbr_callstack_users == 0 ||
  200. task_ctx->lbr_stack_state == LBR_NONE) {
  201. intel_pmu_lbr_reset();
  202. return;
  203. }
  204. mask = x86_pmu.lbr_nr - 1;
  205. tos = intel_pmu_lbr_tos();
  206. for (i = 0; i < tos; i++) {
  207. lbr_idx = (tos - i) & mask;
  208. wrmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]);
  209. wrmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]);
  210. if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
  211. wrmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]);
  212. }
  213. task_ctx->lbr_stack_state = LBR_NONE;
  214. }
  215. static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
  216. {
  217. int i;
  218. unsigned lbr_idx, mask;
  219. u64 tos;
  220. if (task_ctx->lbr_callstack_users == 0) {
  221. task_ctx->lbr_stack_state = LBR_NONE;
  222. return;
  223. }
  224. mask = x86_pmu.lbr_nr - 1;
  225. tos = intel_pmu_lbr_tos();
  226. for (i = 0; i < tos; i++) {
  227. lbr_idx = (tos - i) & mask;
  228. rdmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]);
  229. rdmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]);
  230. if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
  231. rdmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]);
  232. }
  233. task_ctx->lbr_stack_state = LBR_VALID;
  234. }
  235. void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
  236. {
  237. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  238. struct x86_perf_task_context *task_ctx;
  239. /*
  240. * If LBR callstack feature is enabled and the stack was saved when
  241. * the task was scheduled out, restore the stack. Otherwise flush
  242. * the LBR stack.
  243. */
  244. task_ctx = ctx ? ctx->task_ctx_data : NULL;
  245. if (task_ctx) {
  246. if (sched_in) {
  247. __intel_pmu_lbr_restore(task_ctx);
  248. cpuc->lbr_context = ctx;
  249. } else {
  250. __intel_pmu_lbr_save(task_ctx);
  251. }
  252. return;
  253. }
  254. /*
  255. * When sampling the branck stack in system-wide, it may be
  256. * necessary to flush the stack on context switch. This happens
  257. * when the branch stack does not tag its entries with the pid
  258. * of the current task. Otherwise it becomes impossible to
  259. * associate a branch entry with a task. This ambiguity is more
  260. * likely to appear when the branch stack supports priv level
  261. * filtering and the user sets it to monitor only at the user
  262. * level (which could be a useful measurement in system-wide
  263. * mode). In that case, the risk is high of having a branch
  264. * stack with branch from multiple tasks.
  265. */
  266. if (sched_in) {
  267. intel_pmu_lbr_reset();
  268. cpuc->lbr_context = ctx;
  269. }
  270. }
  271. static inline bool branch_user_callstack(unsigned br_sel)
  272. {
  273. return (br_sel & X86_BR_USER) && (br_sel & X86_BR_CALL_STACK);
  274. }
  275. void intel_pmu_lbr_enable(struct perf_event *event)
  276. {
  277. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  278. struct x86_perf_task_context *task_ctx;
  279. if (!x86_pmu.lbr_nr)
  280. return;
  281. /*
  282. * Reset the LBR stack if we changed task context to
  283. * avoid data leaks.
  284. */
  285. if (event->ctx->task && cpuc->lbr_context != event->ctx) {
  286. intel_pmu_lbr_reset();
  287. cpuc->lbr_context = event->ctx;
  288. }
  289. cpuc->br_sel = event->hw.branch_reg.reg;
  290. if (branch_user_callstack(cpuc->br_sel) && event->ctx &&
  291. event->ctx->task_ctx_data) {
  292. task_ctx = event->ctx->task_ctx_data;
  293. task_ctx->lbr_callstack_users++;
  294. }
  295. cpuc->lbr_users++;
  296. perf_sched_cb_inc(event->ctx->pmu);
  297. }
  298. void intel_pmu_lbr_disable(struct perf_event *event)
  299. {
  300. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  301. struct x86_perf_task_context *task_ctx;
  302. if (!x86_pmu.lbr_nr)
  303. return;
  304. if (branch_user_callstack(cpuc->br_sel) && event->ctx &&
  305. event->ctx->task_ctx_data) {
  306. task_ctx = event->ctx->task_ctx_data;
  307. task_ctx->lbr_callstack_users--;
  308. }
  309. cpuc->lbr_users--;
  310. WARN_ON_ONCE(cpuc->lbr_users < 0);
  311. perf_sched_cb_dec(event->ctx->pmu);
  312. if (cpuc->enabled && !cpuc->lbr_users) {
  313. __intel_pmu_lbr_disable();
  314. /* avoid stale pointer */
  315. cpuc->lbr_context = NULL;
  316. }
  317. }
  318. void intel_pmu_lbr_enable_all(bool pmi)
  319. {
  320. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  321. if (cpuc->lbr_users)
  322. __intel_pmu_lbr_enable(pmi);
  323. }
  324. void intel_pmu_lbr_disable_all(void)
  325. {
  326. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  327. if (cpuc->lbr_users)
  328. __intel_pmu_lbr_disable();
  329. }
  330. static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
  331. {
  332. unsigned long mask = x86_pmu.lbr_nr - 1;
  333. u64 tos = intel_pmu_lbr_tos();
  334. int i;
  335. for (i = 0; i < x86_pmu.lbr_nr; i++) {
  336. unsigned long lbr_idx = (tos - i) & mask;
  337. union {
  338. struct {
  339. u32 from;
  340. u32 to;
  341. };
  342. u64 lbr;
  343. } msr_lastbranch;
  344. rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr);
  345. cpuc->lbr_entries[i].from = msr_lastbranch.from;
  346. cpuc->lbr_entries[i].to = msr_lastbranch.to;
  347. cpuc->lbr_entries[i].mispred = 0;
  348. cpuc->lbr_entries[i].predicted = 0;
  349. cpuc->lbr_entries[i].reserved = 0;
  350. }
  351. cpuc->lbr_stack.nr = i;
  352. }
  353. /*
  354. * Due to lack of segmentation in Linux the effective address (offset)
  355. * is the same as the linear address, allowing us to merge the LIP and EIP
  356. * LBR formats.
  357. */
  358. static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
  359. {
  360. unsigned long mask = x86_pmu.lbr_nr - 1;
  361. int lbr_format = x86_pmu.intel_cap.lbr_format;
  362. u64 tos = intel_pmu_lbr_tos();
  363. int i;
  364. int out = 0;
  365. int num = x86_pmu.lbr_nr;
  366. if (cpuc->lbr_sel->config & LBR_CALL_STACK)
  367. num = tos;
  368. for (i = 0; i < num; i++) {
  369. unsigned long lbr_idx = (tos - i) & mask;
  370. u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0;
  371. int skip = 0;
  372. u16 cycles = 0;
  373. int lbr_flags = lbr_desc[lbr_format];
  374. rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
  375. rdmsrl(x86_pmu.lbr_to + lbr_idx, to);
  376. if (lbr_format == LBR_FORMAT_INFO) {
  377. u64 info;
  378. rdmsrl(MSR_LBR_INFO_0 + lbr_idx, info);
  379. mis = !!(info & LBR_INFO_MISPRED);
  380. pred = !mis;
  381. in_tx = !!(info & LBR_INFO_IN_TX);
  382. abort = !!(info & LBR_INFO_ABORT);
  383. cycles = (info & LBR_INFO_CYCLES);
  384. }
  385. if (lbr_flags & LBR_EIP_FLAGS) {
  386. mis = !!(from & LBR_FROM_FLAG_MISPRED);
  387. pred = !mis;
  388. skip = 1;
  389. }
  390. if (lbr_flags & LBR_TSX) {
  391. in_tx = !!(from & LBR_FROM_FLAG_IN_TX);
  392. abort = !!(from & LBR_FROM_FLAG_ABORT);
  393. skip = 3;
  394. }
  395. from = (u64)((((s64)from) << skip) >> skip);
  396. /*
  397. * Some CPUs report duplicated abort records,
  398. * with the second entry not having an abort bit set.
  399. * Skip them here. This loop runs backwards,
  400. * so we need to undo the previous record.
  401. * If the abort just happened outside the window
  402. * the extra entry cannot be removed.
  403. */
  404. if (abort && x86_pmu.lbr_double_abort && out > 0)
  405. out--;
  406. cpuc->lbr_entries[out].from = from;
  407. cpuc->lbr_entries[out].to = to;
  408. cpuc->lbr_entries[out].mispred = mis;
  409. cpuc->lbr_entries[out].predicted = pred;
  410. cpuc->lbr_entries[out].in_tx = in_tx;
  411. cpuc->lbr_entries[out].abort = abort;
  412. cpuc->lbr_entries[out].cycles = cycles;
  413. cpuc->lbr_entries[out].reserved = 0;
  414. out++;
  415. }
  416. cpuc->lbr_stack.nr = out;
  417. }
  418. void intel_pmu_lbr_read(void)
  419. {
  420. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  421. if (!cpuc->lbr_users)
  422. return;
  423. if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32)
  424. intel_pmu_lbr_read_32(cpuc);
  425. else
  426. intel_pmu_lbr_read_64(cpuc);
  427. intel_pmu_lbr_filter(cpuc);
  428. }
  429. /*
  430. * SW filter is used:
  431. * - in case there is no HW filter
  432. * - in case the HW filter has errata or limitations
  433. */
  434. static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
  435. {
  436. u64 br_type = event->attr.branch_sample_type;
  437. int mask = 0;
  438. if (br_type & PERF_SAMPLE_BRANCH_USER)
  439. mask |= X86_BR_USER;
  440. if (br_type & PERF_SAMPLE_BRANCH_KERNEL)
  441. mask |= X86_BR_KERNEL;
  442. /* we ignore BRANCH_HV here */
  443. if (br_type & PERF_SAMPLE_BRANCH_ANY)
  444. mask |= X86_BR_ANY;
  445. if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL)
  446. mask |= X86_BR_ANY_CALL;
  447. if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
  448. mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET;
  449. if (br_type & PERF_SAMPLE_BRANCH_IND_CALL)
  450. mask |= X86_BR_IND_CALL;
  451. if (br_type & PERF_SAMPLE_BRANCH_ABORT_TX)
  452. mask |= X86_BR_ABORT;
  453. if (br_type & PERF_SAMPLE_BRANCH_IN_TX)
  454. mask |= X86_BR_IN_TX;
  455. if (br_type & PERF_SAMPLE_BRANCH_NO_TX)
  456. mask |= X86_BR_NO_TX;
  457. if (br_type & PERF_SAMPLE_BRANCH_COND)
  458. mask |= X86_BR_JCC;
  459. if (br_type & PERF_SAMPLE_BRANCH_CALL_STACK) {
  460. if (!x86_pmu_has_lbr_callstack())
  461. return -EOPNOTSUPP;
  462. if (mask & ~(X86_BR_USER | X86_BR_KERNEL))
  463. return -EINVAL;
  464. mask |= X86_BR_CALL | X86_BR_IND_CALL | X86_BR_RET |
  465. X86_BR_CALL_STACK;
  466. }
  467. if (br_type & PERF_SAMPLE_BRANCH_IND_JUMP)
  468. mask |= X86_BR_IND_JMP;
  469. /*
  470. * stash actual user request into reg, it may
  471. * be used by fixup code for some CPU
  472. */
  473. event->hw.branch_reg.reg = mask;
  474. return 0;
  475. }
  476. /*
  477. * setup the HW LBR filter
  478. * Used only when available, may not be enough to disambiguate
  479. * all branches, may need the help of the SW filter
  480. */
  481. static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
  482. {
  483. struct hw_perf_event_extra *reg;
  484. u64 br_type = event->attr.branch_sample_type;
  485. u64 mask = 0, v;
  486. int i;
  487. for (i = 0; i < PERF_SAMPLE_BRANCH_MAX_SHIFT; i++) {
  488. if (!(br_type & (1ULL << i)))
  489. continue;
  490. v = x86_pmu.lbr_sel_map[i];
  491. if (v == LBR_NOT_SUPP)
  492. return -EOPNOTSUPP;
  493. if (v != LBR_IGN)
  494. mask |= v;
  495. }
  496. reg = &event->hw.branch_reg;
  497. reg->idx = EXTRA_REG_LBR;
  498. /*
  499. * The first 9 bits (LBR_SEL_MASK) in LBR_SELECT operate
  500. * in suppress mode. So LBR_SELECT should be set to
  501. * (~mask & LBR_SEL_MASK) | (mask & ~LBR_SEL_MASK)
  502. */
  503. reg->config = mask ^ x86_pmu.lbr_sel_mask;
  504. return 0;
  505. }
  506. int intel_pmu_setup_lbr_filter(struct perf_event *event)
  507. {
  508. int ret = 0;
  509. /*
  510. * no LBR on this PMU
  511. */
  512. if (!x86_pmu.lbr_nr)
  513. return -EOPNOTSUPP;
  514. /*
  515. * setup SW LBR filter
  516. */
  517. ret = intel_pmu_setup_sw_lbr_filter(event);
  518. if (ret)
  519. return ret;
  520. /*
  521. * setup HW LBR filter, if any
  522. */
  523. if (x86_pmu.lbr_sel_map)
  524. ret = intel_pmu_setup_hw_lbr_filter(event);
  525. return ret;
  526. }
  527. /*
  528. * return the type of control flow change at address "from"
  529. * intruction is not necessarily a branch (in case of interrupt).
  530. *
  531. * The branch type returned also includes the priv level of the
  532. * target of the control flow change (X86_BR_USER, X86_BR_KERNEL).
  533. *
  534. * If a branch type is unknown OR the instruction cannot be
  535. * decoded (e.g., text page not present), then X86_BR_NONE is
  536. * returned.
  537. */
  538. static int branch_type(unsigned long from, unsigned long to, int abort)
  539. {
  540. struct insn insn;
  541. void *addr;
  542. int bytes_read, bytes_left;
  543. int ret = X86_BR_NONE;
  544. int ext, to_plm, from_plm;
  545. u8 buf[MAX_INSN_SIZE];
  546. int is64 = 0;
  547. to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER;
  548. from_plm = kernel_ip(from) ? X86_BR_KERNEL : X86_BR_USER;
  549. /*
  550. * maybe zero if lbr did not fill up after a reset by the time
  551. * we get a PMU interrupt
  552. */
  553. if (from == 0 || to == 0)
  554. return X86_BR_NONE;
  555. if (abort)
  556. return X86_BR_ABORT | to_plm;
  557. if (from_plm == X86_BR_USER) {
  558. /*
  559. * can happen if measuring at the user level only
  560. * and we interrupt in a kernel thread, e.g., idle.
  561. */
  562. if (!current->mm)
  563. return X86_BR_NONE;
  564. /* may fail if text not present */
  565. bytes_left = copy_from_user_nmi(buf, (void __user *)from,
  566. MAX_INSN_SIZE);
  567. bytes_read = MAX_INSN_SIZE - bytes_left;
  568. if (!bytes_read)
  569. return X86_BR_NONE;
  570. addr = buf;
  571. } else {
  572. /*
  573. * The LBR logs any address in the IP, even if the IP just
  574. * faulted. This means userspace can control the from address.
  575. * Ensure we don't blindy read any address by validating it is
  576. * a known text address.
  577. */
  578. if (kernel_text_address(from)) {
  579. addr = (void *)from;
  580. /*
  581. * Assume we can get the maximum possible size
  582. * when grabbing kernel data. This is not
  583. * _strictly_ true since we could possibly be
  584. * executing up next to a memory hole, but
  585. * it is very unlikely to be a problem.
  586. */
  587. bytes_read = MAX_INSN_SIZE;
  588. } else {
  589. return X86_BR_NONE;
  590. }
  591. }
  592. /*
  593. * decoder needs to know the ABI especially
  594. * on 64-bit systems running 32-bit apps
  595. */
  596. #ifdef CONFIG_X86_64
  597. is64 = kernel_ip((unsigned long)addr) || !test_thread_flag(TIF_IA32);
  598. #endif
  599. insn_init(&insn, addr, bytes_read, is64);
  600. insn_get_opcode(&insn);
  601. if (!insn.opcode.got)
  602. return X86_BR_ABORT;
  603. switch (insn.opcode.bytes[0]) {
  604. case 0xf:
  605. switch (insn.opcode.bytes[1]) {
  606. case 0x05: /* syscall */
  607. case 0x34: /* sysenter */
  608. ret = X86_BR_SYSCALL;
  609. break;
  610. case 0x07: /* sysret */
  611. case 0x35: /* sysexit */
  612. ret = X86_BR_SYSRET;
  613. break;
  614. case 0x80 ... 0x8f: /* conditional */
  615. ret = X86_BR_JCC;
  616. break;
  617. default:
  618. ret = X86_BR_NONE;
  619. }
  620. break;
  621. case 0x70 ... 0x7f: /* conditional */
  622. ret = X86_BR_JCC;
  623. break;
  624. case 0xc2: /* near ret */
  625. case 0xc3: /* near ret */
  626. case 0xca: /* far ret */
  627. case 0xcb: /* far ret */
  628. ret = X86_BR_RET;
  629. break;
  630. case 0xcf: /* iret */
  631. ret = X86_BR_IRET;
  632. break;
  633. case 0xcc ... 0xce: /* int */
  634. ret = X86_BR_INT;
  635. break;
  636. case 0xe8: /* call near rel */
  637. insn_get_immediate(&insn);
  638. if (insn.immediate1.value == 0) {
  639. /* zero length call */
  640. ret = X86_BR_ZERO_CALL;
  641. break;
  642. }
  643. case 0x9a: /* call far absolute */
  644. ret = X86_BR_CALL;
  645. break;
  646. case 0xe0 ... 0xe3: /* loop jmp */
  647. ret = X86_BR_JCC;
  648. break;
  649. case 0xe9 ... 0xeb: /* jmp */
  650. ret = X86_BR_JMP;
  651. break;
  652. case 0xff: /* call near absolute, call far absolute ind */
  653. insn_get_modrm(&insn);
  654. ext = (insn.modrm.bytes[0] >> 3) & 0x7;
  655. switch (ext) {
  656. case 2: /* near ind call */
  657. case 3: /* far ind call */
  658. ret = X86_BR_IND_CALL;
  659. break;
  660. case 4:
  661. case 5:
  662. ret = X86_BR_IND_JMP;
  663. break;
  664. }
  665. break;
  666. default:
  667. ret = X86_BR_NONE;
  668. }
  669. /*
  670. * interrupts, traps, faults (and thus ring transition) may
  671. * occur on any instructions. Thus, to classify them correctly,
  672. * we need to first look at the from and to priv levels. If they
  673. * are different and to is in the kernel, then it indicates
  674. * a ring transition. If the from instruction is not a ring
  675. * transition instr (syscall, systenter, int), then it means
  676. * it was a irq, trap or fault.
  677. *
  678. * we have no way of detecting kernel to kernel faults.
  679. */
  680. if (from_plm == X86_BR_USER && to_plm == X86_BR_KERNEL
  681. && ret != X86_BR_SYSCALL && ret != X86_BR_INT)
  682. ret = X86_BR_IRQ;
  683. /*
  684. * branch priv level determined by target as
  685. * is done by HW when LBR_SELECT is implemented
  686. */
  687. if (ret != X86_BR_NONE)
  688. ret |= to_plm;
  689. return ret;
  690. }
  691. /*
  692. * implement actual branch filter based on user demand.
  693. * Hardware may not exactly satisfy that request, thus
  694. * we need to inspect opcodes. Mismatched branches are
  695. * discarded. Therefore, the number of branches returned
  696. * in PERF_SAMPLE_BRANCH_STACK sample may vary.
  697. */
  698. static void
  699. intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
  700. {
  701. u64 from, to;
  702. int br_sel = cpuc->br_sel;
  703. int i, j, type;
  704. bool compress = false;
  705. /* if sampling all branches, then nothing to filter */
  706. if ((br_sel & X86_BR_ALL) == X86_BR_ALL)
  707. return;
  708. for (i = 0; i < cpuc->lbr_stack.nr; i++) {
  709. from = cpuc->lbr_entries[i].from;
  710. to = cpuc->lbr_entries[i].to;
  711. type = branch_type(from, to, cpuc->lbr_entries[i].abort);
  712. if (type != X86_BR_NONE && (br_sel & X86_BR_ANYTX)) {
  713. if (cpuc->lbr_entries[i].in_tx)
  714. type |= X86_BR_IN_TX;
  715. else
  716. type |= X86_BR_NO_TX;
  717. }
  718. /* if type does not correspond, then discard */
  719. if (type == X86_BR_NONE || (br_sel & type) != type) {
  720. cpuc->lbr_entries[i].from = 0;
  721. compress = true;
  722. }
  723. }
  724. if (!compress)
  725. return;
  726. /* remove all entries with from=0 */
  727. for (i = 0; i < cpuc->lbr_stack.nr; ) {
  728. if (!cpuc->lbr_entries[i].from) {
  729. j = i;
  730. while (++j < cpuc->lbr_stack.nr)
  731. cpuc->lbr_entries[j-1] = cpuc->lbr_entries[j];
  732. cpuc->lbr_stack.nr--;
  733. if (!cpuc->lbr_entries[i].from)
  734. continue;
  735. }
  736. i++;
  737. }
  738. }
  739. /*
  740. * Map interface branch filters onto LBR filters
  741. */
  742. static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
  743. [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY,
  744. [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER,
  745. [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL,
  746. [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN,
  747. [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_REL_JMP
  748. | LBR_IND_JMP | LBR_FAR,
  749. /*
  750. * NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches
  751. */
  752. [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] =
  753. LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR,
  754. /*
  755. * NHM/WSM erratum: must include IND_JMP to capture IND_CALL
  756. */
  757. [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL | LBR_IND_JMP,
  758. [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC,
  759. [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP,
  760. };
  761. static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
  762. [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY,
  763. [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER,
  764. [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL,
  765. [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN,
  766. [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_FAR,
  767. [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_REL_CALL | LBR_IND_CALL
  768. | LBR_FAR,
  769. [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL,
  770. [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC,
  771. [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP,
  772. };
  773. static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
  774. [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY,
  775. [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER,
  776. [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL,
  777. [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN,
  778. [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_FAR,
  779. [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_REL_CALL | LBR_IND_CALL
  780. | LBR_FAR,
  781. [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL,
  782. [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC,
  783. [PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = LBR_REL_CALL | LBR_IND_CALL
  784. | LBR_RETURN | LBR_CALL_STACK,
  785. [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP,
  786. };
  787. /* core */
  788. void __init intel_pmu_lbr_init_core(void)
  789. {
  790. x86_pmu.lbr_nr = 4;
  791. x86_pmu.lbr_tos = MSR_LBR_TOS;
  792. x86_pmu.lbr_from = MSR_LBR_CORE_FROM;
  793. x86_pmu.lbr_to = MSR_LBR_CORE_TO;
  794. /*
  795. * SW branch filter usage:
  796. * - compensate for lack of HW filter
  797. */
  798. pr_cont("4-deep LBR, ");
  799. }
  800. /* nehalem/westmere */
  801. void __init intel_pmu_lbr_init_nhm(void)
  802. {
  803. x86_pmu.lbr_nr = 16;
  804. x86_pmu.lbr_tos = MSR_LBR_TOS;
  805. x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
  806. x86_pmu.lbr_to = MSR_LBR_NHM_TO;
  807. x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
  808. x86_pmu.lbr_sel_map = nhm_lbr_sel_map;
  809. /*
  810. * SW branch filter usage:
  811. * - workaround LBR_SEL errata (see above)
  812. * - support syscall, sysret capture.
  813. * That requires LBR_FAR but that means far
  814. * jmp need to be filtered out
  815. */
  816. pr_cont("16-deep LBR, ");
  817. }
  818. /* sandy bridge */
  819. void __init intel_pmu_lbr_init_snb(void)
  820. {
  821. x86_pmu.lbr_nr = 16;
  822. x86_pmu.lbr_tos = MSR_LBR_TOS;
  823. x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
  824. x86_pmu.lbr_to = MSR_LBR_NHM_TO;
  825. x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
  826. x86_pmu.lbr_sel_map = snb_lbr_sel_map;
  827. /*
  828. * SW branch filter usage:
  829. * - support syscall, sysret capture.
  830. * That requires LBR_FAR but that means far
  831. * jmp need to be filtered out
  832. */
  833. pr_cont("16-deep LBR, ");
  834. }
  835. /* haswell */
  836. void intel_pmu_lbr_init_hsw(void)
  837. {
  838. x86_pmu.lbr_nr = 16;
  839. x86_pmu.lbr_tos = MSR_LBR_TOS;
  840. x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
  841. x86_pmu.lbr_to = MSR_LBR_NHM_TO;
  842. x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
  843. x86_pmu.lbr_sel_map = hsw_lbr_sel_map;
  844. pr_cont("16-deep LBR, ");
  845. }
  846. /* skylake */
  847. __init void intel_pmu_lbr_init_skl(void)
  848. {
  849. x86_pmu.lbr_nr = 32;
  850. x86_pmu.lbr_tos = MSR_LBR_TOS;
  851. x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
  852. x86_pmu.lbr_to = MSR_LBR_NHM_TO;
  853. x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
  854. x86_pmu.lbr_sel_map = hsw_lbr_sel_map;
  855. /*
  856. * SW branch filter usage:
  857. * - support syscall, sysret capture.
  858. * That requires LBR_FAR but that means far
  859. * jmp need to be filtered out
  860. */
  861. pr_cont("32-deep LBR, ");
  862. }
  863. /* atom */
  864. void __init intel_pmu_lbr_init_atom(void)
  865. {
  866. /*
  867. * only models starting at stepping 10 seems
  868. * to have an operational LBR which can freeze
  869. * on PMU interrupt
  870. */
  871. if (boot_cpu_data.x86_model == 28
  872. && boot_cpu_data.x86_mask < 10) {
  873. pr_cont("LBR disabled due to erratum");
  874. return;
  875. }
  876. x86_pmu.lbr_nr = 8;
  877. x86_pmu.lbr_tos = MSR_LBR_TOS;
  878. x86_pmu.lbr_from = MSR_LBR_CORE_FROM;
  879. x86_pmu.lbr_to = MSR_LBR_CORE_TO;
  880. /*
  881. * SW branch filter usage:
  882. * - compensate for lack of HW filter
  883. */
  884. pr_cont("8-deep LBR, ");
  885. }