mmap.c 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331
  1. /*
  2. * Copyright (C) 2011-2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
  3. *
  4. * Parts came from evlist.c builtin-{top,stat,record}.c, see those files for further
  5. * copyright notes.
  6. *
  7. * Released under the GPL v2. (and only v2, not any later version)
  8. */
  9. #include <sys/mman.h>
  10. #include <inttypes.h>
  11. #include <asm/bug.h>
  12. #include "debug.h"
  13. #include "event.h"
  14. #include "mmap.h"
  15. #include "util.h" /* page_size */
  16. size_t perf_mmap__mmap_len(struct perf_mmap *map)
  17. {
  18. return map->mask + 1 + page_size;
  19. }
  20. /* When check_messup is true, 'end' must points to a good entry */
  21. static union perf_event *perf_mmap__read(struct perf_mmap *map,
  22. u64 *startp, u64 end)
  23. {
  24. unsigned char *data = map->base + page_size;
  25. union perf_event *event = NULL;
  26. int diff = end - *startp;
  27. if (diff >= (int)sizeof(event->header)) {
  28. size_t size;
  29. event = (union perf_event *)&data[*startp & map->mask];
  30. size = event->header.size;
  31. if (size < sizeof(event->header) || diff < (int)size)
  32. return NULL;
  33. /*
  34. * Event straddles the mmap boundary -- header should always
  35. * be inside due to u64 alignment of output.
  36. */
  37. if ((*startp & map->mask) + size != ((*startp + size) & map->mask)) {
  38. unsigned int offset = *startp;
  39. unsigned int len = min(sizeof(*event), size), cpy;
  40. void *dst = map->event_copy;
  41. do {
  42. cpy = min(map->mask + 1 - (offset & map->mask), len);
  43. memcpy(dst, &data[offset & map->mask], cpy);
  44. offset += cpy;
  45. dst += cpy;
  46. len -= cpy;
  47. } while (len);
  48. event = (union perf_event *)map->event_copy;
  49. }
  50. *startp += size;
  51. }
  52. return event;
  53. }
  54. /*
  55. * Read event from ring buffer one by one.
  56. * Return one event for each call.
  57. *
  58. * Usage:
  59. * perf_mmap__read_init()
  60. * while(event = perf_mmap__read_event()) {
  61. * //process the event
  62. * perf_mmap__consume()
  63. * }
  64. * perf_mmap__read_done()
  65. */
  66. union perf_event *perf_mmap__read_event(struct perf_mmap *map,
  67. bool overwrite __maybe_unused,
  68. u64 *startp __maybe_unused,
  69. u64 end __maybe_unused)
  70. {
  71. union perf_event *event;
  72. /*
  73. * Check if event was unmapped due to a POLLHUP/POLLERR.
  74. */
  75. if (!refcount_read(&map->refcnt))
  76. return NULL;
  77. /* non-overwirte doesn't pause the ringbuffer */
  78. if (!map->overwrite)
  79. map->end = perf_mmap__read_head(map);
  80. event = perf_mmap__read(map, &map->start, map->end);
  81. if (!map->overwrite)
  82. map->prev = map->start;
  83. return event;
  84. }
  85. static bool perf_mmap__empty(struct perf_mmap *map)
  86. {
  87. return perf_mmap__read_head(map) == map->prev && !map->auxtrace_mmap.base;
  88. }
  89. void perf_mmap__get(struct perf_mmap *map)
  90. {
  91. refcount_inc(&map->refcnt);
  92. }
  93. void perf_mmap__put(struct perf_mmap *map)
  94. {
  95. BUG_ON(map->base && refcount_read(&map->refcnt) == 0);
  96. if (refcount_dec_and_test(&map->refcnt))
  97. perf_mmap__munmap(map);
  98. }
  99. void perf_mmap__consume(struct perf_mmap *map, bool overwrite)
  100. {
  101. if (!overwrite) {
  102. u64 old = map->prev;
  103. perf_mmap__write_tail(map, old);
  104. }
  105. if (refcount_read(&map->refcnt) == 1 && perf_mmap__empty(map))
  106. perf_mmap__put(map);
  107. }
  108. int __weak auxtrace_mmap__mmap(struct auxtrace_mmap *mm __maybe_unused,
  109. struct auxtrace_mmap_params *mp __maybe_unused,
  110. void *userpg __maybe_unused,
  111. int fd __maybe_unused)
  112. {
  113. return 0;
  114. }
  115. void __weak auxtrace_mmap__munmap(struct auxtrace_mmap *mm __maybe_unused)
  116. {
  117. }
  118. void __weak auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp __maybe_unused,
  119. off_t auxtrace_offset __maybe_unused,
  120. unsigned int auxtrace_pages __maybe_unused,
  121. bool auxtrace_overwrite __maybe_unused)
  122. {
  123. }
  124. void __weak auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp __maybe_unused,
  125. struct perf_evlist *evlist __maybe_unused,
  126. int idx __maybe_unused,
  127. bool per_cpu __maybe_unused)
  128. {
  129. }
  130. void perf_mmap__munmap(struct perf_mmap *map)
  131. {
  132. if (map->base != NULL) {
  133. munmap(map->base, perf_mmap__mmap_len(map));
  134. map->base = NULL;
  135. map->fd = -1;
  136. refcount_set(&map->refcnt, 0);
  137. }
  138. auxtrace_mmap__munmap(&map->auxtrace_mmap);
  139. }
  140. int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd)
  141. {
  142. /*
  143. * The last one will be done at perf_mmap__consume(), so that we
  144. * make sure we don't prevent tools from consuming every last event in
  145. * the ring buffer.
  146. *
  147. * I.e. we can get the POLLHUP meaning that the fd doesn't exist
  148. * anymore, but the last events for it are still in the ring buffer,
  149. * waiting to be consumed.
  150. *
  151. * Tools can chose to ignore this at their own discretion, but the
  152. * evlist layer can't just drop it when filtering events in
  153. * perf_evlist__filter_pollfd().
  154. */
  155. refcount_set(&map->refcnt, 2);
  156. map->prev = 0;
  157. map->mask = mp->mask;
  158. map->base = mmap(NULL, perf_mmap__mmap_len(map), mp->prot,
  159. MAP_SHARED, fd, 0);
  160. if (map->base == MAP_FAILED) {
  161. pr_debug2("failed to mmap perf event ring buffer, error %d\n",
  162. errno);
  163. map->base = NULL;
  164. return -1;
  165. }
  166. map->fd = fd;
  167. if (auxtrace_mmap__mmap(&map->auxtrace_mmap,
  168. &mp->auxtrace_mp, map->base, fd))
  169. return -1;
  170. return 0;
  171. }
  172. static int overwrite_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end)
  173. {
  174. struct perf_event_header *pheader;
  175. u64 evt_head = head;
  176. int size = mask + 1;
  177. pr_debug2("overwrite_rb_find_range: buf=%p, head=%"PRIx64"\n", buf, head);
  178. pheader = (struct perf_event_header *)(buf + (head & mask));
  179. *start = head;
  180. while (true) {
  181. if (evt_head - head >= (unsigned int)size) {
  182. pr_debug("Finished reading overwrite ring buffer: rewind\n");
  183. if (evt_head - head > (unsigned int)size)
  184. evt_head -= pheader->size;
  185. *end = evt_head;
  186. return 0;
  187. }
  188. pheader = (struct perf_event_header *)(buf + (evt_head & mask));
  189. if (pheader->size == 0) {
  190. pr_debug("Finished reading overwrite ring buffer: get start\n");
  191. *end = evt_head;
  192. return 0;
  193. }
  194. evt_head += pheader->size;
  195. pr_debug3("move evt_head: %"PRIx64"\n", evt_head);
  196. }
  197. WARN_ONCE(1, "Shouldn't get here\n");
  198. return -1;
  199. }
  200. /*
  201. * Report the start and end of the available data in ringbuffer
  202. */
  203. int perf_mmap__read_init(struct perf_mmap *md, bool overwrite,
  204. u64 *startp, u64 *endp)
  205. {
  206. u64 head = perf_mmap__read_head(md);
  207. u64 old = md->prev;
  208. unsigned char *data = md->base + page_size;
  209. unsigned long size;
  210. *startp = overwrite ? head : old;
  211. *endp = overwrite ? old : head;
  212. md->start = md->overwrite ? head : old;
  213. md->end = md->overwrite ? old : head;
  214. if (md->start == md->end)
  215. return -EAGAIN;
  216. size = md->end - md->start;
  217. if (size > (unsigned long)(md->mask) + 1) {
  218. if (!md->overwrite) {
  219. WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
  220. md->prev = head;
  221. perf_mmap__consume(md, overwrite);
  222. return -EAGAIN;
  223. }
  224. /*
  225. * Backward ring buffer is full. We still have a chance to read
  226. * most of data from it.
  227. */
  228. if (overwrite_rb_find_range(data, md->mask, head, &md->start, &md->end))
  229. return -EINVAL;
  230. *startp = md->start;
  231. *endp = md->end;
  232. }
  233. return 0;
  234. }
  235. int perf_mmap__push(struct perf_mmap *md, void *to,
  236. int push(void *to, void *buf, size_t size))
  237. {
  238. u64 head = perf_mmap__read_head(md);
  239. u64 end, start;
  240. unsigned char *data = md->base + page_size;
  241. unsigned long size;
  242. void *buf;
  243. int rc = 0;
  244. rc = perf_mmap__read_init(md, md->overwrite, &start, &end);
  245. if (rc < 0)
  246. return (rc == -EAGAIN) ? 0 : -1;
  247. size = md->end - md->start;
  248. if ((md->start & md->mask) + size != (md->end & md->mask)) {
  249. buf = &data[md->start & md->mask];
  250. size = md->mask + 1 - (md->start & md->mask);
  251. md->start += size;
  252. if (push(to, buf, size) < 0) {
  253. rc = -1;
  254. goto out;
  255. }
  256. }
  257. buf = &data[md->start & md->mask];
  258. size = md->end - md->start;
  259. md->start += size;
  260. if (push(to, buf, size) < 0) {
  261. rc = -1;
  262. goto out;
  263. }
  264. md->prev = head;
  265. perf_mmap__consume(md, md->overwrite);
  266. out:
  267. return rc;
  268. }
  269. /*
  270. * Mandatory for overwrite mode
  271. * The direction of overwrite mode is backward.
  272. * The last perf_mmap__read() will set tail to map->prev.
  273. * Need to correct the map->prev to head which is the end of next read.
  274. */
  275. void perf_mmap__read_done(struct perf_mmap *map)
  276. {
  277. map->prev = perf_mmap__read_head(map);
  278. }