perf_event_intel_bts.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531
  1. /*
  2. * BTS PMU driver for perf
  3. * Copyright (c) 2013-2014, Intel Corporation.
  4. *
  5. * This program is free software; you can redistribute it and/or modify it
  6. * under the terms and conditions of the GNU General Public License,
  7. * version 2, as published by the Free Software Foundation.
  8. *
  9. * This program is distributed in the hope it will be useful, but WITHOUT
  10. * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11. * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  12. * more details.
  13. */
  14. #undef DEBUG
  15. #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  16. #include <linux/bitops.h>
  17. #include <linux/types.h>
  18. #include <linux/slab.h>
  19. #include <linux/debugfs.h>
  20. #include <linux/device.h>
  21. #include <linux/coredump.h>
  22. #include <asm-generic/sizes.h>
  23. #include <asm/perf_event.h>
  24. #include "perf_event.h"
  25. struct bts_ctx {
  26. struct perf_output_handle handle;
  27. struct debug_store ds_back;
  28. int started;
  29. };
  30. static DEFINE_PER_CPU(struct bts_ctx, bts_ctx);
  31. #define BTS_RECORD_SIZE 24
  32. #define BTS_SAFETY_MARGIN 4080
  33. struct bts_phys {
  34. struct page *page;
  35. unsigned long size;
  36. unsigned long offset;
  37. unsigned long displacement;
  38. };
  39. struct bts_buffer {
  40. size_t real_size; /* multiple of BTS_RECORD_SIZE */
  41. unsigned int nr_pages;
  42. unsigned int nr_bufs;
  43. unsigned int cur_buf;
  44. bool snapshot;
  45. local_t data_size;
  46. local_t lost;
  47. local_t head;
  48. unsigned long end;
  49. void **data_pages;
  50. struct bts_phys buf[0];
  51. };
  52. struct pmu bts_pmu;
  53. static size_t buf_size(struct page *page)
  54. {
  55. return 1 << (PAGE_SHIFT + page_private(page));
  56. }
  57. static void *
  58. bts_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool overwrite)
  59. {
  60. struct bts_buffer *buf;
  61. struct page *page;
  62. int node = (cpu == -1) ? cpu : cpu_to_node(cpu);
  63. unsigned long offset;
  64. size_t size = nr_pages << PAGE_SHIFT;
  65. int pg, nbuf, pad;
  66. /* count all the high order buffers */
  67. for (pg = 0, nbuf = 0; pg < nr_pages;) {
  68. page = virt_to_page(pages[pg]);
  69. if (WARN_ON_ONCE(!PagePrivate(page) && nr_pages > 1))
  70. return NULL;
  71. pg += 1 << page_private(page);
  72. nbuf++;
  73. }
  74. /*
  75. * to avoid interrupts in overwrite mode, only allow one physical
  76. */
  77. if (overwrite && nbuf > 1)
  78. return NULL;
  79. buf = kzalloc_node(offsetof(struct bts_buffer, buf[nbuf]), GFP_KERNEL, node);
  80. if (!buf)
  81. return NULL;
  82. buf->nr_pages = nr_pages;
  83. buf->nr_bufs = nbuf;
  84. buf->snapshot = overwrite;
  85. buf->data_pages = pages;
  86. buf->real_size = size - size % BTS_RECORD_SIZE;
  87. for (pg = 0, nbuf = 0, offset = 0, pad = 0; nbuf < buf->nr_bufs; nbuf++) {
  88. unsigned int __nr_pages;
  89. page = virt_to_page(pages[pg]);
  90. __nr_pages = PagePrivate(page) ? 1 << page_private(page) : 1;
  91. buf->buf[nbuf].page = page;
  92. buf->buf[nbuf].offset = offset;
  93. buf->buf[nbuf].displacement = (pad ? BTS_RECORD_SIZE - pad : 0);
  94. buf->buf[nbuf].size = buf_size(page) - buf->buf[nbuf].displacement;
  95. pad = buf->buf[nbuf].size % BTS_RECORD_SIZE;
  96. buf->buf[nbuf].size -= pad;
  97. pg += __nr_pages;
  98. offset += __nr_pages << PAGE_SHIFT;
  99. }
  100. return buf;
  101. }
  102. static void bts_buffer_free_aux(void *data)
  103. {
  104. kfree(data);
  105. }
  106. static unsigned long bts_buffer_offset(struct bts_buffer *buf, unsigned int idx)
  107. {
  108. return buf->buf[idx].offset + buf->buf[idx].displacement;
  109. }
  110. static void
  111. bts_config_buffer(struct bts_buffer *buf)
  112. {
  113. int cpu = raw_smp_processor_id();
  114. struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
  115. struct bts_phys *phys = &buf->buf[buf->cur_buf];
  116. unsigned long index, thresh = 0, end = phys->size;
  117. struct page *page = phys->page;
  118. index = local_read(&buf->head);
  119. if (!buf->snapshot) {
  120. if (buf->end < phys->offset + buf_size(page))
  121. end = buf->end - phys->offset - phys->displacement;
  122. index -= phys->offset + phys->displacement;
  123. if (end - index > BTS_SAFETY_MARGIN)
  124. thresh = end - BTS_SAFETY_MARGIN;
  125. else if (end - index > BTS_RECORD_SIZE)
  126. thresh = end - BTS_RECORD_SIZE;
  127. else
  128. thresh = end;
  129. }
  130. ds->bts_buffer_base = (u64)(long)page_address(page) + phys->displacement;
  131. ds->bts_index = ds->bts_buffer_base + index;
  132. ds->bts_absolute_maximum = ds->bts_buffer_base + end;
  133. ds->bts_interrupt_threshold = !buf->snapshot
  134. ? ds->bts_buffer_base + thresh
  135. : ds->bts_absolute_maximum + BTS_RECORD_SIZE;
  136. }
  137. static void bts_buffer_pad_out(struct bts_phys *phys, unsigned long head)
  138. {
  139. unsigned long index = head - phys->offset;
  140. memset(page_address(phys->page) + index, 0, phys->size - index);
  141. }
  142. static bool bts_buffer_is_full(struct bts_buffer *buf, struct bts_ctx *bts)
  143. {
  144. if (buf->snapshot)
  145. return false;
  146. if (local_read(&buf->data_size) >= bts->handle.size ||
  147. bts->handle.size - local_read(&buf->data_size) < BTS_RECORD_SIZE)
  148. return true;
  149. return false;
  150. }
  151. static void bts_update(struct bts_ctx *bts)
  152. {
  153. int cpu = raw_smp_processor_id();
  154. struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
  155. struct bts_buffer *buf = perf_get_aux(&bts->handle);
  156. unsigned long index = ds->bts_index - ds->bts_buffer_base, old, head;
  157. if (!buf)
  158. return;
  159. head = index + bts_buffer_offset(buf, buf->cur_buf);
  160. old = local_xchg(&buf->head, head);
  161. if (!buf->snapshot) {
  162. if (old == head)
  163. return;
  164. if (ds->bts_index >= ds->bts_absolute_maximum)
  165. local_inc(&buf->lost);
  166. /*
  167. * old and head are always in the same physical buffer, so we
  168. * can subtract them to get the data size.
  169. */
  170. local_add(head - old, &buf->data_size);
  171. } else {
  172. local_set(&buf->data_size, head);
  173. }
  174. }
  175. static void __bts_event_start(struct perf_event *event)
  176. {
  177. struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
  178. struct bts_buffer *buf = perf_get_aux(&bts->handle);
  179. u64 config = 0;
  180. if (!buf || bts_buffer_is_full(buf, bts))
  181. return;
  182. event->hw.itrace_started = 1;
  183. event->hw.state = 0;
  184. if (!buf->snapshot)
  185. config |= ARCH_PERFMON_EVENTSEL_INT;
  186. if (!event->attr.exclude_kernel)
  187. config |= ARCH_PERFMON_EVENTSEL_OS;
  188. if (!event->attr.exclude_user)
  189. config |= ARCH_PERFMON_EVENTSEL_USR;
  190. bts_config_buffer(buf);
  191. /*
  192. * local barrier to make sure that ds configuration made it
  193. * before we enable BTS
  194. */
  195. wmb();
  196. intel_pmu_enable_bts(config);
  197. }
  198. static void bts_event_start(struct perf_event *event, int flags)
  199. {
  200. struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
  201. __bts_event_start(event);
  202. /* PMI handler: this counter is running and likely generating PMIs */
  203. ACCESS_ONCE(bts->started) = 1;
  204. }
  205. static void __bts_event_stop(struct perf_event *event)
  206. {
  207. /*
  208. * No extra synchronization is mandated by the documentation to have
  209. * BTS data stores globally visible.
  210. */
  211. intel_pmu_disable_bts();
  212. if (event->hw.state & PERF_HES_STOPPED)
  213. return;
  214. ACCESS_ONCE(event->hw.state) |= PERF_HES_STOPPED;
  215. }
  216. static void bts_event_stop(struct perf_event *event, int flags)
  217. {
  218. struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
  219. /* PMI handler: don't restart this counter */
  220. ACCESS_ONCE(bts->started) = 0;
  221. __bts_event_stop(event);
  222. if (flags & PERF_EF_UPDATE)
  223. bts_update(bts);
  224. }
  225. void intel_bts_enable_local(void)
  226. {
  227. struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
  228. if (bts->handle.event && bts->started)
  229. __bts_event_start(bts->handle.event);
  230. }
  231. void intel_bts_disable_local(void)
  232. {
  233. struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
  234. if (bts->handle.event)
  235. __bts_event_stop(bts->handle.event);
  236. }
  237. static int
  238. bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle)
  239. {
  240. unsigned long head, space, next_space, pad, gap, skip, wakeup;
  241. unsigned int next_buf;
  242. struct bts_phys *phys, *next_phys;
  243. int ret;
  244. if (buf->snapshot)
  245. return 0;
  246. head = handle->head & ((buf->nr_pages << PAGE_SHIFT) - 1);
  247. if (WARN_ON_ONCE(head != local_read(&buf->head)))
  248. return -EINVAL;
  249. phys = &buf->buf[buf->cur_buf];
  250. space = phys->offset + phys->displacement + phys->size - head;
  251. pad = space;
  252. if (space > handle->size) {
  253. space = handle->size;
  254. space -= space % BTS_RECORD_SIZE;
  255. }
  256. if (space <= BTS_SAFETY_MARGIN) {
  257. /* See if next phys buffer has more space */
  258. next_buf = buf->cur_buf + 1;
  259. if (next_buf >= buf->nr_bufs)
  260. next_buf = 0;
  261. next_phys = &buf->buf[next_buf];
  262. gap = buf_size(phys->page) - phys->displacement - phys->size +
  263. next_phys->displacement;
  264. skip = pad + gap;
  265. if (handle->size >= skip) {
  266. next_space = next_phys->size;
  267. if (next_space + skip > handle->size) {
  268. next_space = handle->size - skip;
  269. next_space -= next_space % BTS_RECORD_SIZE;
  270. }
  271. if (next_space > space || !space) {
  272. if (pad)
  273. bts_buffer_pad_out(phys, head);
  274. ret = perf_aux_output_skip(handle, skip);
  275. if (ret)
  276. return ret;
  277. /* Advance to next phys buffer */
  278. phys = next_phys;
  279. space = next_space;
  280. head = phys->offset + phys->displacement;
  281. /*
  282. * After this, cur_buf and head won't match ds
  283. * anymore, so we must not be racing with
  284. * bts_update().
  285. */
  286. buf->cur_buf = next_buf;
  287. local_set(&buf->head, head);
  288. }
  289. }
  290. }
  291. /* Don't go far beyond wakeup watermark */
  292. wakeup = BTS_SAFETY_MARGIN + BTS_RECORD_SIZE + handle->wakeup -
  293. handle->head;
  294. if (space > wakeup) {
  295. space = wakeup;
  296. space -= space % BTS_RECORD_SIZE;
  297. }
  298. buf->end = head + space;
  299. /*
  300. * If we have no space, the lost notification would have been sent when
  301. * we hit absolute_maximum - see bts_update()
  302. */
  303. if (!space)
  304. return -ENOSPC;
  305. return 0;
  306. }
  307. int intel_bts_interrupt(void)
  308. {
  309. struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
  310. struct perf_event *event = bts->handle.event;
  311. struct bts_buffer *buf;
  312. s64 old_head;
  313. int err;
  314. if (!event || !bts->started)
  315. return 0;
  316. buf = perf_get_aux(&bts->handle);
  317. /*
  318. * Skip snapshot counters: they don't use the interrupt, but
  319. * there's no other way of telling, because the pointer will
  320. * keep moving
  321. */
  322. if (!buf || buf->snapshot)
  323. return 0;
  324. old_head = local_read(&buf->head);
  325. bts_update(bts);
  326. /* no new data */
  327. if (old_head == local_read(&buf->head))
  328. return 0;
  329. perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0),
  330. !!local_xchg(&buf->lost, 0));
  331. buf = perf_aux_output_begin(&bts->handle, event);
  332. if (!buf)
  333. return 1;
  334. err = bts_buffer_reset(buf, &bts->handle);
  335. if (err)
  336. perf_aux_output_end(&bts->handle, 0, false);
  337. return 1;
  338. }
  339. static void bts_event_del(struct perf_event *event, int mode)
  340. {
  341. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  342. struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
  343. struct bts_buffer *buf = perf_get_aux(&bts->handle);
  344. bts_event_stop(event, PERF_EF_UPDATE);
  345. if (buf) {
  346. if (buf->snapshot)
  347. bts->handle.head =
  348. local_xchg(&buf->data_size,
  349. buf->nr_pages << PAGE_SHIFT);
  350. perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0),
  351. !!local_xchg(&buf->lost, 0));
  352. }
  353. cpuc->ds->bts_index = bts->ds_back.bts_buffer_base;
  354. cpuc->ds->bts_buffer_base = bts->ds_back.bts_buffer_base;
  355. cpuc->ds->bts_absolute_maximum = bts->ds_back.bts_absolute_maximum;
  356. cpuc->ds->bts_interrupt_threshold = bts->ds_back.bts_interrupt_threshold;
  357. }
  358. static int bts_event_add(struct perf_event *event, int mode)
  359. {
  360. struct bts_buffer *buf;
  361. struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
  362. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  363. struct hw_perf_event *hwc = &event->hw;
  364. int ret = -EBUSY;
  365. event->hw.state = PERF_HES_STOPPED;
  366. if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask))
  367. return -EBUSY;
  368. if (bts->handle.event)
  369. return -EBUSY;
  370. buf = perf_aux_output_begin(&bts->handle, event);
  371. if (!buf)
  372. return -EINVAL;
  373. ret = bts_buffer_reset(buf, &bts->handle);
  374. if (ret) {
  375. perf_aux_output_end(&bts->handle, 0, false);
  376. return ret;
  377. }
  378. bts->ds_back.bts_buffer_base = cpuc->ds->bts_buffer_base;
  379. bts->ds_back.bts_absolute_maximum = cpuc->ds->bts_absolute_maximum;
  380. bts->ds_back.bts_interrupt_threshold = cpuc->ds->bts_interrupt_threshold;
  381. if (mode & PERF_EF_START) {
  382. bts_event_start(event, 0);
  383. if (hwc->state & PERF_HES_STOPPED) {
  384. bts_event_del(event, 0);
  385. return -EBUSY;
  386. }
  387. }
  388. return 0;
  389. }
  390. static void bts_event_destroy(struct perf_event *event)
  391. {
  392. x86_release_hardware();
  393. x86_del_exclusive(x86_lbr_exclusive_bts);
  394. }
  395. static int bts_event_init(struct perf_event *event)
  396. {
  397. int ret;
  398. if (event->attr.type != bts_pmu.type)
  399. return -ENOENT;
  400. if (x86_add_exclusive(x86_lbr_exclusive_bts))
  401. return -EBUSY;
  402. ret = x86_reserve_hardware();
  403. if (ret) {
  404. x86_del_exclusive(x86_lbr_exclusive_bts);
  405. return ret;
  406. }
  407. event->destroy = bts_event_destroy;
  408. return 0;
  409. }
  410. static void bts_event_read(struct perf_event *event)
  411. {
  412. }
  413. static __init int bts_init(void)
  414. {
  415. if (!boot_cpu_has(X86_FEATURE_DTES64) || !x86_pmu.bts)
  416. return -ENODEV;
  417. bts_pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_ITRACE;
  418. bts_pmu.task_ctx_nr = perf_sw_context;
  419. bts_pmu.event_init = bts_event_init;
  420. bts_pmu.add = bts_event_add;
  421. bts_pmu.del = bts_event_del;
  422. bts_pmu.start = bts_event_start;
  423. bts_pmu.stop = bts_event_stop;
  424. bts_pmu.read = bts_event_read;
  425. bts_pmu.setup_aux = bts_buffer_setup_aux;
  426. bts_pmu.free_aux = bts_buffer_free_aux;
  427. return perf_pmu_register(&bts_pmu, "intel_bts", -1);
  428. }
  429. arch_initcall(bts_init);