ibs.c 24 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028
  1. /*
  2. * Performance events - AMD IBS
  3. *
  4. * Copyright (C) 2011 Advanced Micro Devices, Inc., Robert Richter
  5. *
  6. * For licencing details see kernel-base/COPYING
  7. */
  8. #include <linux/perf_event.h>
  9. #include <linux/init.h>
  10. #include <linux/export.h>
  11. #include <linux/pci.h>
  12. #include <linux/ptrace.h>
  13. #include <linux/syscore_ops.h>
  14. #include <linux/sched/clock.h>
  15. #include <asm/apic.h>
  16. #include "../perf_event.h"
  17. static u32 ibs_caps;
  18. #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
  19. #include <linux/kprobes.h>
  20. #include <linux/hardirq.h>
  21. #include <asm/nmi.h>
  22. #define IBS_FETCH_CONFIG_MASK (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT)
  23. #define IBS_OP_CONFIG_MASK IBS_OP_MAX_CNT
  24. /*
  25. * IBS states:
  26. *
  27. * ENABLED; tracks the pmu::add(), pmu::del() state, when set the counter is taken
  28. * and any further add()s must fail.
  29. *
  30. * STARTED/STOPPING/STOPPED; deal with pmu::start(), pmu::stop() state but are
  31. * complicated by the fact that the IBS hardware can send late NMIs (ie. after
  32. * we've cleared the EN bit).
  33. *
  34. * In order to consume these late NMIs we have the STOPPED state, any NMI that
  35. * happens after we've cleared the EN state will clear this bit and report the
  36. * NMI handled (this is fundamentally racy in the face or multiple NMI sources,
  37. * someone else can consume our BIT and our NMI will go unhandled).
  38. *
  39. * And since we cannot set/clear this separate bit together with the EN bit,
  40. * there are races; if we cleared STARTED early, an NMI could land in
  41. * between clearing STARTED and clearing the EN bit (in fact multiple NMIs
  42. * could happen if the period is small enough), and consume our STOPPED bit
  43. * and trigger streams of unhandled NMIs.
  44. *
  45. * If, however, we clear STARTED late, an NMI can hit between clearing the
  46. * EN bit and clearing STARTED, still see STARTED set and process the event.
  47. * If this event will have the VALID bit clear, we bail properly, but this
  48. * is not a given. With VALID set we can end up calling pmu::stop() again
  49. * (the throttle logic) and trigger the WARNs in there.
  50. *
  51. * So what we do is set STOPPING before clearing EN to avoid the pmu::stop()
  52. * nesting, and clear STARTED late, so that we have a well defined state over
  53. * the clearing of the EN bit.
  54. *
  55. * XXX: we could probably be using !atomic bitops for all this.
  56. */
  57. enum ibs_states {
  58. IBS_ENABLED = 0,
  59. IBS_STARTED = 1,
  60. IBS_STOPPING = 2,
  61. IBS_STOPPED = 3,
  62. IBS_MAX_STATES,
  63. };
  64. struct cpu_perf_ibs {
  65. struct perf_event *event;
  66. unsigned long state[BITS_TO_LONGS(IBS_MAX_STATES)];
  67. };
  68. struct perf_ibs {
  69. struct pmu pmu;
  70. unsigned int msr;
  71. u64 config_mask;
  72. u64 cnt_mask;
  73. u64 enable_mask;
  74. u64 valid_mask;
  75. u64 max_period;
  76. unsigned long offset_mask[1];
  77. int offset_max;
  78. struct cpu_perf_ibs __percpu *pcpu;
  79. struct attribute **format_attrs;
  80. struct attribute_group format_group;
  81. const struct attribute_group *attr_groups[2];
  82. u64 (*get_count)(u64 config);
  83. };
  84. struct perf_ibs_data {
  85. u32 size;
  86. union {
  87. u32 data[0]; /* data buffer starts here */
  88. u32 caps;
  89. };
  90. u64 regs[MSR_AMD64_IBS_REG_COUNT_MAX];
  91. };
  92. static int
  93. perf_event_set_period(struct hw_perf_event *hwc, u64 min, u64 max, u64 *hw_period)
  94. {
  95. s64 left = local64_read(&hwc->period_left);
  96. s64 period = hwc->sample_period;
  97. int overflow = 0;
  98. /*
  99. * If we are way outside a reasonable range then just skip forward:
  100. */
  101. if (unlikely(left <= -period)) {
  102. left = period;
  103. local64_set(&hwc->period_left, left);
  104. hwc->last_period = period;
  105. overflow = 1;
  106. }
  107. if (unlikely(left < (s64)min)) {
  108. left += period;
  109. local64_set(&hwc->period_left, left);
  110. hwc->last_period = period;
  111. overflow = 1;
  112. }
  113. /*
  114. * If the hw period that triggers the sw overflow is too short
  115. * we might hit the irq handler. This biases the results.
  116. * Thus we shorten the next-to-last period and set the last
  117. * period to the max period.
  118. */
  119. if (left > max) {
  120. left -= max;
  121. if (left > max)
  122. left = max;
  123. else if (left < min)
  124. left = min;
  125. }
  126. *hw_period = (u64)left;
  127. return overflow;
  128. }
  129. static int
  130. perf_event_try_update(struct perf_event *event, u64 new_raw_count, int width)
  131. {
  132. struct hw_perf_event *hwc = &event->hw;
  133. int shift = 64 - width;
  134. u64 prev_raw_count;
  135. u64 delta;
  136. /*
  137. * Careful: an NMI might modify the previous event value.
  138. *
  139. * Our tactic to handle this is to first atomically read and
  140. * exchange a new raw count - then add that new-prev delta
  141. * count to the generic event atomically:
  142. */
  143. prev_raw_count = local64_read(&hwc->prev_count);
  144. if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
  145. new_raw_count) != prev_raw_count)
  146. return 0;
  147. /*
  148. * Now we have the new raw value and have updated the prev
  149. * timestamp already. We can now calculate the elapsed delta
  150. * (event-)time and add that to the generic event.
  151. *
  152. * Careful, not all hw sign-extends above the physical width
  153. * of the count.
  154. */
  155. delta = (new_raw_count << shift) - (prev_raw_count << shift);
  156. delta >>= shift;
  157. local64_add(delta, &event->count);
  158. local64_sub(delta, &hwc->period_left);
  159. return 1;
  160. }
  161. static struct perf_ibs perf_ibs_fetch;
  162. static struct perf_ibs perf_ibs_op;
  163. static struct perf_ibs *get_ibs_pmu(int type)
  164. {
  165. if (perf_ibs_fetch.pmu.type == type)
  166. return &perf_ibs_fetch;
  167. if (perf_ibs_op.pmu.type == type)
  168. return &perf_ibs_op;
  169. return NULL;
  170. }
  171. /*
  172. * Use IBS for precise event sampling:
  173. *
  174. * perf record -a -e cpu-cycles:p ... # use ibs op counting cycle count
  175. * perf record -a -e r076:p ... # same as -e cpu-cycles:p
  176. * perf record -a -e r0C1:p ... # use ibs op counting micro-ops
  177. *
  178. * IbsOpCntCtl (bit 19) of IBS Execution Control Register (IbsOpCtl,
  179. * MSRC001_1033) is used to select either cycle or micro-ops counting
  180. * mode.
  181. *
  182. * The rip of IBS samples has skid 0. Thus, IBS supports precise
  183. * levels 1 and 2 and the PERF_EFLAGS_EXACT is set. In rare cases the
  184. * rip is invalid when IBS was not able to record the rip correctly.
  185. * We clear PERF_EFLAGS_EXACT and take the rip from pt_regs then.
  186. *
  187. */
  188. static int perf_ibs_precise_event(struct perf_event *event, u64 *config)
  189. {
  190. switch (event->attr.precise_ip) {
  191. case 0:
  192. return -ENOENT;
  193. case 1:
  194. case 2:
  195. break;
  196. default:
  197. return -EOPNOTSUPP;
  198. }
  199. switch (event->attr.type) {
  200. case PERF_TYPE_HARDWARE:
  201. switch (event->attr.config) {
  202. case PERF_COUNT_HW_CPU_CYCLES:
  203. *config = 0;
  204. return 0;
  205. }
  206. break;
  207. case PERF_TYPE_RAW:
  208. switch (event->attr.config) {
  209. case 0x0076:
  210. *config = 0;
  211. return 0;
  212. case 0x00C1:
  213. *config = IBS_OP_CNT_CTL;
  214. return 0;
  215. }
  216. break;
  217. default:
  218. return -ENOENT;
  219. }
  220. return -EOPNOTSUPP;
  221. }
  222. static const struct perf_event_attr ibs_notsupp = {
  223. .exclude_user = 1,
  224. .exclude_kernel = 1,
  225. .exclude_hv = 1,
  226. .exclude_idle = 1,
  227. .exclude_host = 1,
  228. .exclude_guest = 1,
  229. };
  230. static int perf_ibs_init(struct perf_event *event)
  231. {
  232. struct hw_perf_event *hwc = &event->hw;
  233. struct perf_ibs *perf_ibs;
  234. u64 max_cnt, config;
  235. int ret;
  236. perf_ibs = get_ibs_pmu(event->attr.type);
  237. if (perf_ibs) {
  238. config = event->attr.config;
  239. } else {
  240. perf_ibs = &perf_ibs_op;
  241. ret = perf_ibs_precise_event(event, &config);
  242. if (ret)
  243. return ret;
  244. }
  245. if (event->pmu != &perf_ibs->pmu)
  246. return -ENOENT;
  247. if (perf_flags(&event->attr) & perf_flags(&ibs_notsupp))
  248. return -EINVAL;
  249. if (config & ~perf_ibs->config_mask)
  250. return -EINVAL;
  251. if (hwc->sample_period) {
  252. if (config & perf_ibs->cnt_mask)
  253. /* raw max_cnt may not be set */
  254. return -EINVAL;
  255. if (!event->attr.sample_freq && hwc->sample_period & 0x0f)
  256. /*
  257. * lower 4 bits can not be set in ibs max cnt,
  258. * but allowing it in case we adjust the
  259. * sample period to set a frequency.
  260. */
  261. return -EINVAL;
  262. hwc->sample_period &= ~0x0FULL;
  263. if (!hwc->sample_period)
  264. hwc->sample_period = 0x10;
  265. } else {
  266. max_cnt = config & perf_ibs->cnt_mask;
  267. config &= ~perf_ibs->cnt_mask;
  268. event->attr.sample_period = max_cnt << 4;
  269. hwc->sample_period = event->attr.sample_period;
  270. }
  271. if (!hwc->sample_period)
  272. return -EINVAL;
  273. /*
  274. * If we modify hwc->sample_period, we also need to update
  275. * hwc->last_period and hwc->period_left.
  276. */
  277. hwc->last_period = hwc->sample_period;
  278. local64_set(&hwc->period_left, hwc->sample_period);
  279. hwc->config_base = perf_ibs->msr;
  280. hwc->config = config;
  281. return 0;
  282. }
  283. static int perf_ibs_set_period(struct perf_ibs *perf_ibs,
  284. struct hw_perf_event *hwc, u64 *period)
  285. {
  286. int overflow;
  287. /* ignore lower 4 bits in min count: */
  288. overflow = perf_event_set_period(hwc, 1<<4, perf_ibs->max_period, period);
  289. local64_set(&hwc->prev_count, 0);
  290. return overflow;
  291. }
  292. static u64 get_ibs_fetch_count(u64 config)
  293. {
  294. return (config & IBS_FETCH_CNT) >> 12;
  295. }
  296. static u64 get_ibs_op_count(u64 config)
  297. {
  298. u64 count = 0;
  299. if (config & IBS_OP_VAL)
  300. count += (config & IBS_OP_MAX_CNT) << 4; /* cnt rolled over */
  301. if (ibs_caps & IBS_CAPS_RDWROPCNT)
  302. count += (config & IBS_OP_CUR_CNT) >> 32;
  303. return count;
  304. }
  305. static void
  306. perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event,
  307. u64 *config)
  308. {
  309. u64 count = perf_ibs->get_count(*config);
  310. /*
  311. * Set width to 64 since we do not overflow on max width but
  312. * instead on max count. In perf_ibs_set_period() we clear
  313. * prev count manually on overflow.
  314. */
  315. while (!perf_event_try_update(event, count, 64)) {
  316. rdmsrl(event->hw.config_base, *config);
  317. count = perf_ibs->get_count(*config);
  318. }
  319. }
  320. static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs,
  321. struct hw_perf_event *hwc, u64 config)
  322. {
  323. wrmsrl(hwc->config_base, hwc->config | config | perf_ibs->enable_mask);
  324. }
  325. /*
  326. * Erratum #420 Instruction-Based Sampling Engine May Generate
  327. * Interrupt that Cannot Be Cleared:
  328. *
  329. * Must clear counter mask first, then clear the enable bit. See
  330. * Revision Guide for AMD Family 10h Processors, Publication #41322.
  331. */
  332. static inline void perf_ibs_disable_event(struct perf_ibs *perf_ibs,
  333. struct hw_perf_event *hwc, u64 config)
  334. {
  335. config &= ~perf_ibs->cnt_mask;
  336. wrmsrl(hwc->config_base, config);
  337. config &= ~perf_ibs->enable_mask;
  338. wrmsrl(hwc->config_base, config);
  339. }
  340. /*
  341. * We cannot restore the ibs pmu state, so we always needs to update
  342. * the event while stopping it and then reset the state when starting
  343. * again. Thus, ignoring PERF_EF_RELOAD and PERF_EF_UPDATE flags in
  344. * perf_ibs_start()/perf_ibs_stop() and instead always do it.
  345. */
  346. static void perf_ibs_start(struct perf_event *event, int flags)
  347. {
  348. struct hw_perf_event *hwc = &event->hw;
  349. struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
  350. struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
  351. u64 period;
  352. if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
  353. return;
  354. WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
  355. hwc->state = 0;
  356. perf_ibs_set_period(perf_ibs, hwc, &period);
  357. /*
  358. * Set STARTED before enabling the hardware, such that a subsequent NMI
  359. * must observe it.
  360. */
  361. set_bit(IBS_STARTED, pcpu->state);
  362. clear_bit(IBS_STOPPING, pcpu->state);
  363. perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
  364. perf_event_update_userpage(event);
  365. }
  366. static void perf_ibs_stop(struct perf_event *event, int flags)
  367. {
  368. struct hw_perf_event *hwc = &event->hw;
  369. struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
  370. struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
  371. u64 config;
  372. int stopping;
  373. if (test_and_set_bit(IBS_STOPPING, pcpu->state))
  374. return;
  375. stopping = test_bit(IBS_STARTED, pcpu->state);
  376. if (!stopping && (hwc->state & PERF_HES_UPTODATE))
  377. return;
  378. rdmsrl(hwc->config_base, config);
  379. if (stopping) {
  380. /*
  381. * Set STOPPED before disabling the hardware, such that it
  382. * must be visible to NMIs the moment we clear the EN bit,
  383. * at which point we can generate an !VALID sample which
  384. * we need to consume.
  385. */
  386. set_bit(IBS_STOPPED, pcpu->state);
  387. perf_ibs_disable_event(perf_ibs, hwc, config);
  388. /*
  389. * Clear STARTED after disabling the hardware; if it were
  390. * cleared before an NMI hitting after the clear but before
  391. * clearing the EN bit might think it a spurious NMI and not
  392. * handle it.
  393. *
  394. * Clearing it after, however, creates the problem of the NMI
  395. * handler seeing STARTED but not having a valid sample.
  396. */
  397. clear_bit(IBS_STARTED, pcpu->state);
  398. WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
  399. hwc->state |= PERF_HES_STOPPED;
  400. }
  401. if (hwc->state & PERF_HES_UPTODATE)
  402. return;
  403. /*
  404. * Clear valid bit to not count rollovers on update, rollovers
  405. * are only updated in the irq handler.
  406. */
  407. config &= ~perf_ibs->valid_mask;
  408. perf_ibs_event_update(perf_ibs, event, &config);
  409. hwc->state |= PERF_HES_UPTODATE;
  410. }
  411. static int perf_ibs_add(struct perf_event *event, int flags)
  412. {
  413. struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
  414. struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
  415. if (test_and_set_bit(IBS_ENABLED, pcpu->state))
  416. return -ENOSPC;
  417. event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
  418. pcpu->event = event;
  419. if (flags & PERF_EF_START)
  420. perf_ibs_start(event, PERF_EF_RELOAD);
  421. return 0;
  422. }
  423. static void perf_ibs_del(struct perf_event *event, int flags)
  424. {
  425. struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
  426. struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
  427. if (!test_and_clear_bit(IBS_ENABLED, pcpu->state))
  428. return;
  429. perf_ibs_stop(event, PERF_EF_UPDATE);
  430. pcpu->event = NULL;
  431. perf_event_update_userpage(event);
  432. }
  433. static void perf_ibs_read(struct perf_event *event) { }
  434. PMU_FORMAT_ATTR(rand_en, "config:57");
  435. PMU_FORMAT_ATTR(cnt_ctl, "config:19");
  436. static struct attribute *ibs_fetch_format_attrs[] = {
  437. &format_attr_rand_en.attr,
  438. NULL,
  439. };
  440. static struct attribute *ibs_op_format_attrs[] = {
  441. NULL, /* &format_attr_cnt_ctl.attr if IBS_CAPS_OPCNT */
  442. NULL,
  443. };
  444. static struct perf_ibs perf_ibs_fetch = {
  445. .pmu = {
  446. .task_ctx_nr = perf_invalid_context,
  447. .event_init = perf_ibs_init,
  448. .add = perf_ibs_add,
  449. .del = perf_ibs_del,
  450. .start = perf_ibs_start,
  451. .stop = perf_ibs_stop,
  452. .read = perf_ibs_read,
  453. },
  454. .msr = MSR_AMD64_IBSFETCHCTL,
  455. .config_mask = IBS_FETCH_CONFIG_MASK,
  456. .cnt_mask = IBS_FETCH_MAX_CNT,
  457. .enable_mask = IBS_FETCH_ENABLE,
  458. .valid_mask = IBS_FETCH_VAL,
  459. .max_period = IBS_FETCH_MAX_CNT << 4,
  460. .offset_mask = { MSR_AMD64_IBSFETCH_REG_MASK },
  461. .offset_max = MSR_AMD64_IBSFETCH_REG_COUNT,
  462. .format_attrs = ibs_fetch_format_attrs,
  463. .get_count = get_ibs_fetch_count,
  464. };
  465. static struct perf_ibs perf_ibs_op = {
  466. .pmu = {
  467. .task_ctx_nr = perf_invalid_context,
  468. .event_init = perf_ibs_init,
  469. .add = perf_ibs_add,
  470. .del = perf_ibs_del,
  471. .start = perf_ibs_start,
  472. .stop = perf_ibs_stop,
  473. .read = perf_ibs_read,
  474. },
  475. .msr = MSR_AMD64_IBSOPCTL,
  476. .config_mask = IBS_OP_CONFIG_MASK,
  477. .cnt_mask = IBS_OP_MAX_CNT,
  478. .enable_mask = IBS_OP_ENABLE,
  479. .valid_mask = IBS_OP_VAL,
  480. .max_period = IBS_OP_MAX_CNT << 4,
  481. .offset_mask = { MSR_AMD64_IBSOP_REG_MASK },
  482. .offset_max = MSR_AMD64_IBSOP_REG_COUNT,
  483. .format_attrs = ibs_op_format_attrs,
  484. .get_count = get_ibs_op_count,
  485. };
  486. static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
  487. {
  488. struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
  489. struct perf_event *event = pcpu->event;
  490. struct hw_perf_event *hwc;
  491. struct perf_sample_data data;
  492. struct perf_raw_record raw;
  493. struct pt_regs regs;
  494. struct perf_ibs_data ibs_data;
  495. int offset, size, check_rip, offset_max, throttle = 0;
  496. unsigned int msr;
  497. u64 *buf, *config, period;
  498. if (!test_bit(IBS_STARTED, pcpu->state)) {
  499. fail:
  500. /*
  501. * Catch spurious interrupts after stopping IBS: After
  502. * disabling IBS there could be still incoming NMIs
  503. * with samples that even have the valid bit cleared.
  504. * Mark all this NMIs as handled.
  505. */
  506. if (test_and_clear_bit(IBS_STOPPED, pcpu->state))
  507. return 1;
  508. return 0;
  509. }
  510. if (WARN_ON_ONCE(!event))
  511. goto fail;
  512. hwc = &event->hw;
  513. msr = hwc->config_base;
  514. buf = ibs_data.regs;
  515. rdmsrl(msr, *buf);
  516. if (!(*buf++ & perf_ibs->valid_mask))
  517. goto fail;
  518. config = &ibs_data.regs[0];
  519. perf_ibs_event_update(perf_ibs, event, config);
  520. perf_sample_data_init(&data, 0, hwc->last_period);
  521. if (!perf_ibs_set_period(perf_ibs, hwc, &period))
  522. goto out; /* no sw counter overflow */
  523. ibs_data.caps = ibs_caps;
  524. size = 1;
  525. offset = 1;
  526. check_rip = (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_RIPINVALIDCHK));
  527. if (event->attr.sample_type & PERF_SAMPLE_RAW)
  528. offset_max = perf_ibs->offset_max;
  529. else if (check_rip)
  530. offset_max = 2;
  531. else
  532. offset_max = 1;
  533. do {
  534. rdmsrl(msr + offset, *buf++);
  535. size++;
  536. offset = find_next_bit(perf_ibs->offset_mask,
  537. perf_ibs->offset_max,
  538. offset + 1);
  539. } while (offset < offset_max);
  540. if (event->attr.sample_type & PERF_SAMPLE_RAW) {
  541. /*
  542. * Read IbsBrTarget and IbsOpData4 separately
  543. * depending on their availability.
  544. * Can't add to offset_max as they are staggered
  545. */
  546. if (ibs_caps & IBS_CAPS_BRNTRGT) {
  547. rdmsrl(MSR_AMD64_IBSBRTARGET, *buf++);
  548. size++;
  549. }
  550. if (ibs_caps & IBS_CAPS_OPDATA4) {
  551. rdmsrl(MSR_AMD64_IBSOPDATA4, *buf++);
  552. size++;
  553. }
  554. }
  555. ibs_data.size = sizeof(u64) * size;
  556. regs = *iregs;
  557. if (check_rip && (ibs_data.regs[2] & IBS_RIP_INVALID)) {
  558. regs.flags &= ~PERF_EFLAGS_EXACT;
  559. } else {
  560. set_linear_ip(&regs, ibs_data.regs[1]);
  561. regs.flags |= PERF_EFLAGS_EXACT;
  562. }
  563. if (event->attr.sample_type & PERF_SAMPLE_RAW) {
  564. raw = (struct perf_raw_record){
  565. .frag = {
  566. .size = sizeof(u32) + ibs_data.size,
  567. .data = ibs_data.data,
  568. },
  569. };
  570. data.raw = &raw;
  571. }
  572. throttle = perf_event_overflow(event, &data, &regs);
  573. out:
  574. if (throttle)
  575. perf_ibs_stop(event, 0);
  576. else
  577. perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
  578. perf_event_update_userpage(event);
  579. return 1;
  580. }
  581. static int
  582. perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs)
  583. {
  584. u64 stamp = sched_clock();
  585. int handled = 0;
  586. handled += perf_ibs_handle_irq(&perf_ibs_fetch, regs);
  587. handled += perf_ibs_handle_irq(&perf_ibs_op, regs);
  588. if (handled)
  589. inc_irq_stat(apic_perf_irqs);
  590. perf_sample_event_took(sched_clock() - stamp);
  591. return handled;
  592. }
  593. NOKPROBE_SYMBOL(perf_ibs_nmi_handler);
  594. static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name)
  595. {
  596. struct cpu_perf_ibs __percpu *pcpu;
  597. int ret;
  598. pcpu = alloc_percpu(struct cpu_perf_ibs);
  599. if (!pcpu)
  600. return -ENOMEM;
  601. perf_ibs->pcpu = pcpu;
  602. /* register attributes */
  603. if (perf_ibs->format_attrs[0]) {
  604. memset(&perf_ibs->format_group, 0, sizeof(perf_ibs->format_group));
  605. perf_ibs->format_group.name = "format";
  606. perf_ibs->format_group.attrs = perf_ibs->format_attrs;
  607. memset(&perf_ibs->attr_groups, 0, sizeof(perf_ibs->attr_groups));
  608. perf_ibs->attr_groups[0] = &perf_ibs->format_group;
  609. perf_ibs->pmu.attr_groups = perf_ibs->attr_groups;
  610. }
  611. ret = perf_pmu_register(&perf_ibs->pmu, name, -1);
  612. if (ret) {
  613. perf_ibs->pcpu = NULL;
  614. free_percpu(pcpu);
  615. }
  616. return ret;
  617. }
  618. static __init void perf_event_ibs_init(void)
  619. {
  620. struct attribute **attr = ibs_op_format_attrs;
  621. perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
  622. if (ibs_caps & IBS_CAPS_OPCNT) {
  623. perf_ibs_op.config_mask |= IBS_OP_CNT_CTL;
  624. *attr++ = &format_attr_cnt_ctl.attr;
  625. }
  626. perf_ibs_pmu_init(&perf_ibs_op, "ibs_op");
  627. register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs");
  628. pr_info("perf: AMD IBS detected (0x%08x)\n", ibs_caps);
  629. }
  630. #else /* defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) */
  631. static __init void perf_event_ibs_init(void) { }
  632. #endif
  633. /* IBS - apic initialization, for perf and oprofile */
  634. static __init u32 __get_ibs_caps(void)
  635. {
  636. u32 caps;
  637. unsigned int max_level;
  638. if (!boot_cpu_has(X86_FEATURE_IBS))
  639. return 0;
  640. /* check IBS cpuid feature flags */
  641. max_level = cpuid_eax(0x80000000);
  642. if (max_level < IBS_CPUID_FEATURES)
  643. return IBS_CAPS_DEFAULT;
  644. caps = cpuid_eax(IBS_CPUID_FEATURES);
  645. if (!(caps & IBS_CAPS_AVAIL))
  646. /* cpuid flags not valid */
  647. return IBS_CAPS_DEFAULT;
  648. return caps;
  649. }
  650. u32 get_ibs_caps(void)
  651. {
  652. return ibs_caps;
  653. }
  654. EXPORT_SYMBOL(get_ibs_caps);
  655. static inline int get_eilvt(int offset)
  656. {
  657. return !setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 1);
  658. }
  659. static inline int put_eilvt(int offset)
  660. {
  661. return !setup_APIC_eilvt(offset, 0, 0, 1);
  662. }
  663. /*
  664. * Check and reserve APIC extended interrupt LVT offset for IBS if available.
  665. */
  666. static inline int ibs_eilvt_valid(void)
  667. {
  668. int offset;
  669. u64 val;
  670. int valid = 0;
  671. preempt_disable();
  672. rdmsrl(MSR_AMD64_IBSCTL, val);
  673. offset = val & IBSCTL_LVT_OFFSET_MASK;
  674. if (!(val & IBSCTL_LVT_OFFSET_VALID)) {
  675. pr_err(FW_BUG "cpu %d, invalid IBS interrupt offset %d (MSR%08X=0x%016llx)\n",
  676. smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
  677. goto out;
  678. }
  679. if (!get_eilvt(offset)) {
  680. pr_err(FW_BUG "cpu %d, IBS interrupt offset %d not available (MSR%08X=0x%016llx)\n",
  681. smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
  682. goto out;
  683. }
  684. valid = 1;
  685. out:
  686. preempt_enable();
  687. return valid;
  688. }
  689. static int setup_ibs_ctl(int ibs_eilvt_off)
  690. {
  691. struct pci_dev *cpu_cfg;
  692. int nodes;
  693. u32 value = 0;
  694. nodes = 0;
  695. cpu_cfg = NULL;
  696. do {
  697. cpu_cfg = pci_get_device(PCI_VENDOR_ID_AMD,
  698. PCI_DEVICE_ID_AMD_10H_NB_MISC,
  699. cpu_cfg);
  700. if (!cpu_cfg)
  701. break;
  702. ++nodes;
  703. pci_write_config_dword(cpu_cfg, IBSCTL, ibs_eilvt_off
  704. | IBSCTL_LVT_OFFSET_VALID);
  705. pci_read_config_dword(cpu_cfg, IBSCTL, &value);
  706. if (value != (ibs_eilvt_off | IBSCTL_LVT_OFFSET_VALID)) {
  707. pci_dev_put(cpu_cfg);
  708. pr_debug("Failed to setup IBS LVT offset, IBSCTL = 0x%08x\n",
  709. value);
  710. return -EINVAL;
  711. }
  712. } while (1);
  713. if (!nodes) {
  714. pr_debug("No CPU node configured for IBS\n");
  715. return -ENODEV;
  716. }
  717. return 0;
  718. }
  719. /*
  720. * This runs only on the current cpu. We try to find an LVT offset and
  721. * setup the local APIC. For this we must disable preemption. On
  722. * success we initialize all nodes with this offset. This updates then
  723. * the offset in the IBS_CTL per-node msr. The per-core APIC setup of
  724. * the IBS interrupt vector is handled by perf_ibs_cpu_notifier that
  725. * is using the new offset.
  726. */
  727. static void force_ibs_eilvt_setup(void)
  728. {
  729. int offset;
  730. int ret;
  731. preempt_disable();
  732. /* find the next free available EILVT entry, skip offset 0 */
  733. for (offset = 1; offset < APIC_EILVT_NR_MAX; offset++) {
  734. if (get_eilvt(offset))
  735. break;
  736. }
  737. preempt_enable();
  738. if (offset == APIC_EILVT_NR_MAX) {
  739. pr_debug("No EILVT entry available\n");
  740. return;
  741. }
  742. ret = setup_ibs_ctl(offset);
  743. if (ret)
  744. goto out;
  745. if (!ibs_eilvt_valid())
  746. goto out;
  747. pr_info("LVT offset %d assigned\n", offset);
  748. return;
  749. out:
  750. preempt_disable();
  751. put_eilvt(offset);
  752. preempt_enable();
  753. return;
  754. }
  755. static void ibs_eilvt_setup(void)
  756. {
  757. /*
  758. * Force LVT offset assignment for family 10h: The offsets are
  759. * not assigned by the BIOS for this family, so the OS is
  760. * responsible for doing it. If the OS assignment fails, fall
  761. * back to BIOS settings and try to setup this.
  762. */
  763. if (boot_cpu_data.x86 == 0x10)
  764. force_ibs_eilvt_setup();
  765. }
  766. static inline int get_ibs_lvt_offset(void)
  767. {
  768. u64 val;
  769. rdmsrl(MSR_AMD64_IBSCTL, val);
  770. if (!(val & IBSCTL_LVT_OFFSET_VALID))
  771. return -EINVAL;
  772. return val & IBSCTL_LVT_OFFSET_MASK;
  773. }
  774. static void setup_APIC_ibs(void)
  775. {
  776. int offset;
  777. offset = get_ibs_lvt_offset();
  778. if (offset < 0)
  779. goto failed;
  780. if (!setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 0))
  781. return;
  782. failed:
  783. pr_warn("perf: IBS APIC setup failed on cpu #%d\n",
  784. smp_processor_id());
  785. }
  786. static void clear_APIC_ibs(void)
  787. {
  788. int offset;
  789. offset = get_ibs_lvt_offset();
  790. if (offset >= 0)
  791. setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_FIX, 1);
  792. }
  793. static int x86_pmu_amd_ibs_starting_cpu(unsigned int cpu)
  794. {
  795. setup_APIC_ibs();
  796. return 0;
  797. }
  798. #ifdef CONFIG_PM
  799. static int perf_ibs_suspend(void)
  800. {
  801. clear_APIC_ibs();
  802. return 0;
  803. }
  804. static void perf_ibs_resume(void)
  805. {
  806. ibs_eilvt_setup();
  807. setup_APIC_ibs();
  808. }
  809. static struct syscore_ops perf_ibs_syscore_ops = {
  810. .resume = perf_ibs_resume,
  811. .suspend = perf_ibs_suspend,
  812. };
  813. static void perf_ibs_pm_init(void)
  814. {
  815. register_syscore_ops(&perf_ibs_syscore_ops);
  816. }
  817. #else
  818. static inline void perf_ibs_pm_init(void) { }
  819. #endif
  820. static int x86_pmu_amd_ibs_dying_cpu(unsigned int cpu)
  821. {
  822. clear_APIC_ibs();
  823. return 0;
  824. }
  825. static __init int amd_ibs_init(void)
  826. {
  827. u32 caps;
  828. caps = __get_ibs_caps();
  829. if (!caps)
  830. return -ENODEV; /* ibs not supported by the cpu */
  831. ibs_eilvt_setup();
  832. if (!ibs_eilvt_valid())
  833. return -EINVAL;
  834. perf_ibs_pm_init();
  835. ibs_caps = caps;
  836. /* make ibs_caps visible to other cpus: */
  837. smp_mb();
  838. /*
  839. * x86_pmu_amd_ibs_starting_cpu will be called from core on
  840. * all online cpus.
  841. */
  842. cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_IBS_STARTING,
  843. "perf/x86/amd/ibs:starting",
  844. x86_pmu_amd_ibs_starting_cpu,
  845. x86_pmu_amd_ibs_dying_cpu);
  846. perf_event_ibs_init();
  847. return 0;
  848. }
  849. /* Since we need the pci subsystem to init ibs we can't do this earlier: */
  850. device_initcall(amd_ibs_init);