perf_event_intel_bts.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525
  1. /*
  2. * BTS PMU driver for perf
  3. * Copyright (c) 2013-2014, Intel Corporation.
  4. *
  5. * This program is free software; you can redistribute it and/or modify it
  6. * under the terms and conditions of the GNU General Public License,
  7. * version 2, as published by the Free Software Foundation.
  8. *
  9. * This program is distributed in the hope it will be useful, but WITHOUT
  10. * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11. * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  12. * more details.
  13. */
  14. #undef DEBUG
  15. #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  16. #include <linux/bitops.h>
  17. #include <linux/types.h>
  18. #include <linux/slab.h>
  19. #include <linux/debugfs.h>
  20. #include <linux/device.h>
  21. #include <linux/coredump.h>
  22. #include <asm-generic/sizes.h>
  23. #include <asm/perf_event.h>
  24. #include "perf_event.h"
  25. struct bts_ctx {
  26. struct perf_output_handle handle;
  27. struct debug_store ds_back;
  28. int started;
  29. };
  30. static DEFINE_PER_CPU(struct bts_ctx, bts_ctx);
  31. #define BTS_RECORD_SIZE 24
  32. #define BTS_SAFETY_MARGIN 4080
  33. struct bts_phys {
  34. struct page *page;
  35. unsigned long size;
  36. unsigned long offset;
  37. unsigned long displacement;
  38. };
  39. struct bts_buffer {
  40. size_t real_size; /* multiple of BTS_RECORD_SIZE */
  41. unsigned int nr_pages;
  42. unsigned int nr_bufs;
  43. unsigned int cur_buf;
  44. bool snapshot;
  45. local_t data_size;
  46. local_t lost;
  47. local_t head;
  48. unsigned long end;
  49. void **data_pages;
  50. struct bts_phys buf[0];
  51. };
  52. struct pmu bts_pmu;
  53. void intel_pmu_enable_bts(u64 config);
  54. void intel_pmu_disable_bts(void);
  55. static size_t buf_size(struct page *page)
  56. {
  57. return 1 << (PAGE_SHIFT + page_private(page));
  58. }
  59. static void *
  60. bts_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool overwrite)
  61. {
  62. struct bts_buffer *buf;
  63. struct page *page;
  64. int node = (cpu == -1) ? cpu : cpu_to_node(cpu);
  65. unsigned long offset;
  66. size_t size = nr_pages << PAGE_SHIFT;
  67. int pg, nbuf, pad;
  68. /* count all the high order buffers */
  69. for (pg = 0, nbuf = 0; pg < nr_pages;) {
  70. page = virt_to_page(pages[pg]);
  71. if (WARN_ON_ONCE(!PagePrivate(page) && nr_pages > 1))
  72. return NULL;
  73. pg += 1 << page_private(page);
  74. nbuf++;
  75. }
  76. /*
  77. * to avoid interrupts in overwrite mode, only allow one physical
  78. */
  79. if (overwrite && nbuf > 1)
  80. return NULL;
  81. buf = kzalloc_node(offsetof(struct bts_buffer, buf[nbuf]), GFP_KERNEL, node);
  82. if (!buf)
  83. return NULL;
  84. buf->nr_pages = nr_pages;
  85. buf->nr_bufs = nbuf;
  86. buf->snapshot = overwrite;
  87. buf->data_pages = pages;
  88. buf->real_size = size - size % BTS_RECORD_SIZE;
  89. for (pg = 0, nbuf = 0, offset = 0, pad = 0; nbuf < buf->nr_bufs; nbuf++) {
  90. unsigned int __nr_pages;
  91. page = virt_to_page(pages[pg]);
  92. __nr_pages = PagePrivate(page) ? 1 << page_private(page) : 1;
  93. buf->buf[nbuf].page = page;
  94. buf->buf[nbuf].offset = offset;
  95. buf->buf[nbuf].displacement = (pad ? BTS_RECORD_SIZE - pad : 0);
  96. buf->buf[nbuf].size = buf_size(page) - buf->buf[nbuf].displacement;
  97. pad = buf->buf[nbuf].size % BTS_RECORD_SIZE;
  98. buf->buf[nbuf].size -= pad;
  99. pg += __nr_pages;
  100. offset += __nr_pages << PAGE_SHIFT;
  101. }
  102. return buf;
  103. }
  104. static void bts_buffer_free_aux(void *data)
  105. {
  106. kfree(data);
  107. }
  108. static unsigned long bts_buffer_offset(struct bts_buffer *buf, unsigned int idx)
  109. {
  110. return buf->buf[idx].offset + buf->buf[idx].displacement;
  111. }
  112. static void
  113. bts_config_buffer(struct bts_buffer *buf)
  114. {
  115. int cpu = raw_smp_processor_id();
  116. struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
  117. struct bts_phys *phys = &buf->buf[buf->cur_buf];
  118. unsigned long index, thresh = 0, end = phys->size;
  119. struct page *page = phys->page;
  120. index = local_read(&buf->head);
  121. if (!buf->snapshot) {
  122. if (buf->end < phys->offset + buf_size(page))
  123. end = buf->end - phys->offset - phys->displacement;
  124. index -= phys->offset + phys->displacement;
  125. if (end - index > BTS_SAFETY_MARGIN)
  126. thresh = end - BTS_SAFETY_MARGIN;
  127. else if (end - index > BTS_RECORD_SIZE)
  128. thresh = end - BTS_RECORD_SIZE;
  129. else
  130. thresh = end;
  131. }
  132. ds->bts_buffer_base = (u64)(long)page_address(page) + phys->displacement;
  133. ds->bts_index = ds->bts_buffer_base + index;
  134. ds->bts_absolute_maximum = ds->bts_buffer_base + end;
  135. ds->bts_interrupt_threshold = !buf->snapshot
  136. ? ds->bts_buffer_base + thresh
  137. : ds->bts_absolute_maximum + BTS_RECORD_SIZE;
  138. }
  139. static void bts_buffer_pad_out(struct bts_phys *phys, unsigned long head)
  140. {
  141. unsigned long index = head - phys->offset;
  142. memset(page_address(phys->page) + index, 0, phys->size - index);
  143. }
  144. static bool bts_buffer_is_full(struct bts_buffer *buf, struct bts_ctx *bts)
  145. {
  146. if (buf->snapshot)
  147. return false;
  148. if (local_read(&buf->data_size) >= bts->handle.size ||
  149. bts->handle.size - local_read(&buf->data_size) < BTS_RECORD_SIZE)
  150. return true;
  151. return false;
  152. }
  153. static void bts_update(struct bts_ctx *bts)
  154. {
  155. int cpu = raw_smp_processor_id();
  156. struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
  157. struct bts_buffer *buf = perf_get_aux(&bts->handle);
  158. unsigned long index = ds->bts_index - ds->bts_buffer_base, old, head;
  159. if (!buf)
  160. return;
  161. head = index + bts_buffer_offset(buf, buf->cur_buf);
  162. old = local_xchg(&buf->head, head);
  163. if (!buf->snapshot) {
  164. if (old == head)
  165. return;
  166. if (ds->bts_index >= ds->bts_absolute_maximum)
  167. local_inc(&buf->lost);
  168. /*
  169. * old and head are always in the same physical buffer, so we
  170. * can subtract them to get the data size.
  171. */
  172. local_add(head - old, &buf->data_size);
  173. } else {
  174. local_set(&buf->data_size, head);
  175. }
  176. }
  177. static void __bts_event_start(struct perf_event *event)
  178. {
  179. struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
  180. struct bts_buffer *buf = perf_get_aux(&bts->handle);
  181. u64 config = 0;
  182. if (!buf || bts_buffer_is_full(buf, bts))
  183. return;
  184. event->hw.state = 0;
  185. if (!buf->snapshot)
  186. config |= ARCH_PERFMON_EVENTSEL_INT;
  187. if (!event->attr.exclude_kernel)
  188. config |= ARCH_PERFMON_EVENTSEL_OS;
  189. if (!event->attr.exclude_user)
  190. config |= ARCH_PERFMON_EVENTSEL_USR;
  191. bts_config_buffer(buf);
  192. /*
  193. * local barrier to make sure that ds configuration made it
  194. * before we enable BTS
  195. */
  196. wmb();
  197. intel_pmu_enable_bts(config);
  198. }
  199. static void bts_event_start(struct perf_event *event, int flags)
  200. {
  201. struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
  202. __bts_event_start(event);
  203. /* PMI handler: this counter is running and likely generating PMIs */
  204. ACCESS_ONCE(bts->started) = 1;
  205. }
  206. static void __bts_event_stop(struct perf_event *event)
  207. {
  208. /*
  209. * No extra synchronization is mandated by the documentation to have
  210. * BTS data stores globally visible.
  211. */
  212. intel_pmu_disable_bts();
  213. if (event->hw.state & PERF_HES_STOPPED)
  214. return;
  215. ACCESS_ONCE(event->hw.state) |= PERF_HES_STOPPED;
  216. }
  217. static void bts_event_stop(struct perf_event *event, int flags)
  218. {
  219. struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
  220. /* PMI handler: don't restart this counter */
  221. ACCESS_ONCE(bts->started) = 0;
  222. __bts_event_stop(event);
  223. if (flags & PERF_EF_UPDATE)
  224. bts_update(bts);
  225. }
  226. void intel_bts_enable_local(void)
  227. {
  228. struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
  229. if (bts->handle.event && bts->started)
  230. __bts_event_start(bts->handle.event);
  231. }
  232. void intel_bts_disable_local(void)
  233. {
  234. struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
  235. if (bts->handle.event)
  236. __bts_event_stop(bts->handle.event);
  237. }
  238. static int
  239. bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle)
  240. {
  241. unsigned long head, space, next_space, pad, gap, skip, wakeup;
  242. unsigned int next_buf;
  243. struct bts_phys *phys, *next_phys;
  244. int ret;
  245. if (buf->snapshot)
  246. return 0;
  247. head = handle->head & ((buf->nr_pages << PAGE_SHIFT) - 1);
  248. if (WARN_ON_ONCE(head != local_read(&buf->head)))
  249. return -EINVAL;
  250. phys = &buf->buf[buf->cur_buf];
  251. space = phys->offset + phys->displacement + phys->size - head;
  252. pad = space;
  253. if (space > handle->size) {
  254. space = handle->size;
  255. space -= space % BTS_RECORD_SIZE;
  256. }
  257. if (space <= BTS_SAFETY_MARGIN) {
  258. /* See if next phys buffer has more space */
  259. next_buf = buf->cur_buf + 1;
  260. if (next_buf >= buf->nr_bufs)
  261. next_buf = 0;
  262. next_phys = &buf->buf[next_buf];
  263. gap = buf_size(phys->page) - phys->displacement - phys->size +
  264. next_phys->displacement;
  265. skip = pad + gap;
  266. if (handle->size >= skip) {
  267. next_space = next_phys->size;
  268. if (next_space + skip > handle->size) {
  269. next_space = handle->size - skip;
  270. next_space -= next_space % BTS_RECORD_SIZE;
  271. }
  272. if (next_space > space || !space) {
  273. if (pad)
  274. bts_buffer_pad_out(phys, head);
  275. ret = perf_aux_output_skip(handle, skip);
  276. if (ret)
  277. return ret;
  278. /* Advance to next phys buffer */
  279. phys = next_phys;
  280. space = next_space;
  281. head = phys->offset + phys->displacement;
  282. /*
  283. * After this, cur_buf and head won't match ds
  284. * anymore, so we must not be racing with
  285. * bts_update().
  286. */
  287. buf->cur_buf = next_buf;
  288. local_set(&buf->head, head);
  289. }
  290. }
  291. }
  292. /* Don't go far beyond wakeup watermark */
  293. wakeup = BTS_SAFETY_MARGIN + BTS_RECORD_SIZE + handle->wakeup -
  294. handle->head;
  295. if (space > wakeup) {
  296. space = wakeup;
  297. space -= space % BTS_RECORD_SIZE;
  298. }
  299. buf->end = head + space;
  300. /*
  301. * If we have no space, the lost notification would have been sent when
  302. * we hit absolute_maximum - see bts_update()
  303. */
  304. if (!space)
  305. return -ENOSPC;
  306. return 0;
  307. }
  308. int intel_bts_interrupt(void)
  309. {
  310. struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
  311. struct perf_event *event = bts->handle.event;
  312. struct bts_buffer *buf;
  313. s64 old_head;
  314. int err;
  315. if (!event || !bts->started)
  316. return 0;
  317. buf = perf_get_aux(&bts->handle);
  318. /*
  319. * Skip snapshot counters: they don't use the interrupt, but
  320. * there's no other way of telling, because the pointer will
  321. * keep moving
  322. */
  323. if (!buf || buf->snapshot)
  324. return 0;
  325. old_head = local_read(&buf->head);
  326. bts_update(bts);
  327. /* no new data */
  328. if (old_head == local_read(&buf->head))
  329. return 0;
  330. perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0),
  331. !!local_xchg(&buf->lost, 0));
  332. buf = perf_aux_output_begin(&bts->handle, event);
  333. if (!buf)
  334. return 1;
  335. err = bts_buffer_reset(buf, &bts->handle);
  336. if (err)
  337. perf_aux_output_end(&bts->handle, 0, false);
  338. return 1;
  339. }
  340. static void bts_event_del(struct perf_event *event, int mode)
  341. {
  342. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  343. struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
  344. struct bts_buffer *buf = perf_get_aux(&bts->handle);
  345. bts_event_stop(event, PERF_EF_UPDATE);
  346. if (buf) {
  347. if (buf->snapshot)
  348. bts->handle.head =
  349. local_xchg(&buf->data_size,
  350. buf->nr_pages << PAGE_SHIFT);
  351. perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0),
  352. !!local_xchg(&buf->lost, 0));
  353. }
  354. cpuc->ds->bts_index = bts->ds_back.bts_buffer_base;
  355. cpuc->ds->bts_buffer_base = bts->ds_back.bts_buffer_base;
  356. cpuc->ds->bts_absolute_maximum = bts->ds_back.bts_absolute_maximum;
  357. cpuc->ds->bts_interrupt_threshold = bts->ds_back.bts_interrupt_threshold;
  358. }
  359. static int bts_event_add(struct perf_event *event, int mode)
  360. {
  361. struct bts_buffer *buf;
  362. struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
  363. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  364. struct hw_perf_event *hwc = &event->hw;
  365. int ret = -EBUSY;
  366. event->hw.state = PERF_HES_STOPPED;
  367. if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask))
  368. return -EBUSY;
  369. if (bts->handle.event)
  370. return -EBUSY;
  371. buf = perf_aux_output_begin(&bts->handle, event);
  372. if (!buf)
  373. return -EINVAL;
  374. ret = bts_buffer_reset(buf, &bts->handle);
  375. if (ret) {
  376. perf_aux_output_end(&bts->handle, 0, false);
  377. return ret;
  378. }
  379. bts->ds_back.bts_buffer_base = cpuc->ds->bts_buffer_base;
  380. bts->ds_back.bts_absolute_maximum = cpuc->ds->bts_absolute_maximum;
  381. bts->ds_back.bts_interrupt_threshold = cpuc->ds->bts_interrupt_threshold;
  382. if (mode & PERF_EF_START) {
  383. bts_event_start(event, 0);
  384. if (hwc->state & PERF_HES_STOPPED) {
  385. bts_event_del(event, 0);
  386. return -EBUSY;
  387. }
  388. }
  389. return 0;
  390. }
  391. static void bts_event_destroy(struct perf_event *event)
  392. {
  393. x86_del_exclusive(x86_lbr_exclusive_bts);
  394. }
  395. static int bts_event_init(struct perf_event *event)
  396. {
  397. if (event->attr.type != bts_pmu.type)
  398. return -ENOENT;
  399. if (x86_add_exclusive(x86_lbr_exclusive_bts))
  400. return -EBUSY;
  401. event->destroy = bts_event_destroy;
  402. return 0;
  403. }
  404. static void bts_event_read(struct perf_event *event)
  405. {
  406. }
  407. static __init int bts_init(void)
  408. {
  409. if (!boot_cpu_has(X86_FEATURE_DTES64) || !x86_pmu.bts)
  410. return -ENODEV;
  411. bts_pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_ITRACE;
  412. bts_pmu.task_ctx_nr = perf_sw_context;
  413. bts_pmu.event_init = bts_event_init;
  414. bts_pmu.add = bts_event_add;
  415. bts_pmu.del = bts_event_del;
  416. bts_pmu.start = bts_event_start;
  417. bts_pmu.stop = bts_event_stop;
  418. bts_pmu.read = bts_event_read;
  419. bts_pmu.setup_aux = bts_buffer_setup_aux;
  420. bts_pmu.free_aux = bts_buffer_free_aux;
  421. return perf_pmu_register(&bts_pmu, "intel_bts", -1);
  422. }
  423. module_init(bts_init);