s390-cpumsf.c 30 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright IBM Corp. 2018
  4. * Auxtrace support for s390 CPU-Measurement Sampling Facility
  5. *
  6. * Author(s): Thomas Richter <tmricht@linux.ibm.com>
  7. *
  8. * Auxiliary traces are collected during 'perf record' using rbd000 event.
  9. * Several PERF_RECORD_XXX are generated during recording:
  10. *
  11. * PERF_RECORD_AUX:
  12. * Records that new data landed in the AUX buffer part.
  13. * PERF_RECORD_AUXTRACE:
  14. * Defines auxtrace data. Followed by the actual data. The contents of
  15. * the auxtrace data is dependent on the event and the CPU.
  16. * This record is generated by perf record command. For details
  17. * see Documentation/perf.data-file-format.txt.
  18. * PERF_RECORD_AUXTRACE_INFO:
  19. * Defines a table of contains for PERF_RECORD_AUXTRACE records. This
  20. * record is generated during 'perf record' command. Each record contains up
  21. * to 256 entries describing offset and size of the AUXTRACE data in the
  22. * perf.data file.
  23. * PERF_RECORD_AUXTRACE_ERROR:
  24. * Indicates an error during AUXTRACE collection such as buffer overflow.
  25. * PERF_RECORD_FINISHED_ROUND:
  26. * Perf events are not necessarily in time stamp order, as they can be
  27. * collected in parallel on different CPUs. If the events should be
  28. * processed in time order they need to be sorted first.
  29. * Perf report guarantees that there is no reordering over a
  30. * PERF_RECORD_FINISHED_ROUND boundary event. All perf records with a
  31. * time stamp lower than this record are processed (and displayed) before
  32. * the succeeding perf record are processed.
  33. *
  34. * These records are evaluated during perf report command.
  35. *
  36. * 1. PERF_RECORD_AUXTRACE_INFO is used to set up the infrastructure for
  37. * auxiliary trace data processing. See s390_cpumsf_process_auxtrace_info()
  38. * below.
  39. * Auxiliary trace data is collected per CPU. To merge the data into the report
  40. * an auxtrace_queue is created for each CPU. It is assumed that the auxtrace
  41. * data is in ascending order.
  42. *
  43. * Each queue has a double linked list of auxtrace_buffers. This list contains
  44. * the offset and size of a CPU's auxtrace data. During auxtrace processing
  45. * the data portion is mmap()'ed.
  46. *
  47. * To sort the queues in chronological order, all queue access is controlled
  48. * by the auxtrace_heap. This is basicly a stack, each stack element has two
  49. * entries, the queue number and a time stamp. However the stack is sorted by
  50. * the time stamps. The highest time stamp is at the bottom the lowest
  51. * (nearest) time stamp is at the top. That sort order is maintained at all
  52. * times!
  53. *
  54. * After the auxtrace infrastructure has been setup, the auxtrace queues are
  55. * filled with data (offset/size pairs) and the auxtrace_heap is populated.
  56. *
  57. * 2. PERF_RECORD_XXX processing triggers access to the auxtrace_queues.
  58. * Each record is handled by s390_cpumsf_process_event(). The time stamp of
  59. * the perf record is compared with the time stamp located on the auxtrace_heap
  60. * top element. If that time stamp is lower than the time stamp from the
  61. * record sample, the auxtrace queues will be processed. As auxtrace queues
  62. * control many auxtrace_buffers and each buffer can be quite large, the
  63. * auxtrace buffer might be processed only partially. In this case the
  64. * position in the auxtrace_buffer of that queue is remembered and the time
  65. * stamp of the last processed entry of the auxtrace_buffer replaces the
  66. * current auxtrace_heap top.
  67. *
  68. * 3. Auxtrace_queues might run of out data and are feeded by the
  69. * PERF_RECORD_AUXTRACE handling, see s390_cpumsf_process_auxtrace_event().
  70. *
  71. * Event Generation
  72. * Each sampling-data entry in the auxilary trace data generates a perf sample.
  73. * This sample is filled
  74. * with data from the auxtrace such as PID/TID, instruction address, CPU state,
  75. * etc. This sample is processed with perf_session__deliver_synth_event() to
  76. * be included into the GUI.
  77. *
  78. * 4. PERF_RECORD_FINISHED_ROUND event is used to process all the remaining
  79. * auxiliary traces entries until the time stamp of this record is reached
  80. * auxtrace_heap top. This is triggered by ordered_event->deliver().
  81. *
  82. *
  83. * Perf event processing.
  84. * Event processing of PERF_RECORD_XXX entries relies on time stamp entries.
  85. * This is the function call sequence:
  86. *
  87. * __cmd_report()
  88. * |
  89. * perf_session__process_events()
  90. * |
  91. * __perf_session__process_events()
  92. * |
  93. * perf_session__process_event()
  94. * | This functions splits the PERF_RECORD_XXX records.
  95. * | - Those generated by perf record command (type number equal or higher
  96. * | than PERF_RECORD_USER_TYPE_START) are handled by
  97. * | perf_session__process_user_event(see below)
  98. * | - Those generated by the kernel are handled by
  99. * | perf_evlist__parse_sample_timestamp()
  100. * |
  101. * perf_evlist__parse_sample_timestamp()
  102. * | Extract time stamp from sample data.
  103. * |
  104. * perf_session__queue_event()
  105. * | If timestamp is positive the sample is entered into an ordered_event
  106. * | list, sort order is the timestamp. The event processing is deferred until
  107. * | later (see perf_session__process_user_event()).
  108. * | Other timestamps (0 or -1) are handled immediately by
  109. * | perf_session__deliver_event(). These are events generated at start up
  110. * | of command perf record. They create PERF_RECORD_COMM and PERF_RECORD_MMAP*
  111. * | records. They are needed to create a list of running processes and its
  112. * | memory mappings and layout. They are needed at the beginning to enable
  113. * | command perf report to create process trees and memory mappings.
  114. * |
  115. * perf_session__deliver_event()
  116. * | Delivers a PERF_RECORD_XXX entry for handling.
  117. * |
  118. * auxtrace__process_event()
  119. * | The timestamp of the PERF_RECORD_XXX entry is taken to correlate with
  120. * | time stamps from the auxiliary trace buffers. This enables
  121. * | synchronization between auxiliary trace data and the events on the
  122. * | perf.data file.
  123. * |
  124. * machine__deliver_event()
  125. * | Handles the PERF_RECORD_XXX event. This depends on the record type.
  126. * It might update the process tree, update a process memory map or enter
  127. * a sample with IP and call back chain data into GUI data pool.
  128. *
  129. *
  130. * Deferred processing determined by perf_session__process_user_event() is
  131. * finally processed when a PERF_RECORD_FINISHED_ROUND is encountered. These
  132. * are generated during command perf record.
  133. * The timestamp of PERF_RECORD_FINISHED_ROUND event is taken to process all
  134. * PERF_RECORD_XXX entries stored in the ordered_event list. This list was
  135. * built up while reading the perf.data file.
  136. * Each event is now processed by calling perf_session__deliver_event().
  137. * This enables time synchronization between the data in the perf.data file and
  138. * the data in the auxiliary trace buffers.
  139. */
  140. #include <endian.h>
  141. #include <errno.h>
  142. #include <byteswap.h>
  143. #include <inttypes.h>
  144. #include <linux/kernel.h>
  145. #include <linux/types.h>
  146. #include <linux/bitops.h>
  147. #include <linux/log2.h>
  148. #include <sys/stat.h>
  149. #include <sys/types.h>
  150. #include "cpumap.h"
  151. #include "color.h"
  152. #include "evsel.h"
  153. #include "evlist.h"
  154. #include "machine.h"
  155. #include "session.h"
  156. #include "util.h"
  157. #include "thread.h"
  158. #include "debug.h"
  159. #include "auxtrace.h"
  160. #include "s390-cpumsf.h"
  161. #include "s390-cpumsf-kernel.h"
  162. #include "config.h"
  163. struct s390_cpumsf {
  164. struct auxtrace auxtrace;
  165. struct auxtrace_queues queues;
  166. struct auxtrace_heap heap;
  167. struct perf_session *session;
  168. struct machine *machine;
  169. u32 auxtrace_type;
  170. u32 pmu_type;
  171. u16 machine_type;
  172. bool data_queued;
  173. bool use_logfile;
  174. char *logdir;
  175. };
  176. struct s390_cpumsf_queue {
  177. struct s390_cpumsf *sf;
  178. unsigned int queue_nr;
  179. struct auxtrace_buffer *buffer;
  180. int cpu;
  181. FILE *logfile;
  182. };
  183. /* Display s390 CPU measurement facility basic-sampling data entry */
  184. static bool s390_cpumsf_basic_show(const char *color, size_t pos,
  185. struct hws_basic_entry *basic)
  186. {
  187. if (basic->def != 1) {
  188. pr_err("Invalid AUX trace basic entry [%#08zx]\n", pos);
  189. return false;
  190. }
  191. color_fprintf(stdout, color, " [%#08zx] Basic Def:%04x Inst:%#04x"
  192. " %c%c%c%c AS:%d ASN:%#04x IA:%#018llx\n"
  193. "\t\tCL:%d HPP:%#018llx GPP:%#018llx\n",
  194. pos, basic->def, basic->U,
  195. basic->T ? 'T' : ' ',
  196. basic->W ? 'W' : ' ',
  197. basic->P ? 'P' : ' ',
  198. basic->I ? 'I' : ' ',
  199. basic->AS, basic->prim_asn, basic->ia, basic->CL,
  200. basic->hpp, basic->gpp);
  201. return true;
  202. }
  203. /* Display s390 CPU measurement facility diagnostic-sampling data entry */
  204. static bool s390_cpumsf_diag_show(const char *color, size_t pos,
  205. struct hws_diag_entry *diag)
  206. {
  207. if (diag->def < S390_CPUMSF_DIAG_DEF_FIRST) {
  208. pr_err("Invalid AUX trace diagnostic entry [%#08zx]\n", pos);
  209. return false;
  210. }
  211. color_fprintf(stdout, color, " [%#08zx] Diag Def:%04x %c\n",
  212. pos, diag->def, diag->I ? 'I' : ' ');
  213. return true;
  214. }
  215. /* Return TOD timestamp contained in an trailer entry */
  216. static unsigned long long trailer_timestamp(struct hws_trailer_entry *te)
  217. {
  218. /* te->t set: TOD in STCKE format, bytes 8-15
  219. * to->t not set: TOD in STCK format, bytes 0-7
  220. */
  221. unsigned long long ts;
  222. memcpy(&ts, &te->timestamp[te->t], sizeof(ts));
  223. return ts;
  224. }
  225. /* Display s390 CPU measurement facility trailer entry */
  226. static bool s390_cpumsf_trailer_show(const char *color, size_t pos,
  227. struct hws_trailer_entry *te)
  228. {
  229. if (te->bsdes != sizeof(struct hws_basic_entry)) {
  230. pr_err("Invalid AUX trace trailer entry [%#08zx]\n", pos);
  231. return false;
  232. }
  233. color_fprintf(stdout, color, " [%#08zx] Trailer %c%c%c bsdes:%d"
  234. " dsdes:%d Overflow:%lld Time:%#llx\n"
  235. "\t\tC:%d TOD:%#lx 1:%#llx 2:%#llx\n",
  236. pos,
  237. te->f ? 'F' : ' ',
  238. te->a ? 'A' : ' ',
  239. te->t ? 'T' : ' ',
  240. te->bsdes, te->dsdes, te->overflow,
  241. trailer_timestamp(te), te->clock_base, te->progusage2,
  242. te->progusage[0], te->progusage[1]);
  243. return true;
  244. }
  245. /* Test a sample data block. It must be 4KB or a multiple thereof in size and
  246. * 4KB page aligned. Each sample data page has a trailer entry at the
  247. * end which contains the sample entry data sizes.
  248. *
  249. * Return true if the sample data block passes the checks and set the
  250. * basic set entry size and diagnostic set entry size.
  251. *
  252. * Return false on failure.
  253. *
  254. * Note: Old hardware does not set the basic or diagnostic entry sizes
  255. * in the trailer entry. Use the type number instead.
  256. */
  257. static bool s390_cpumsf_validate(int machine_type,
  258. unsigned char *buf, size_t len,
  259. unsigned short *bsdes,
  260. unsigned short *dsdes)
  261. {
  262. struct hws_basic_entry *basic = (struct hws_basic_entry *)buf;
  263. struct hws_trailer_entry *te;
  264. *dsdes = *bsdes = 0;
  265. if (len & (S390_CPUMSF_PAGESZ - 1)) /* Illegal size */
  266. return false;
  267. if (basic->def != 1) /* No basic set entry, must be first */
  268. return false;
  269. /* Check for trailer entry at end of SDB */
  270. te = (struct hws_trailer_entry *)(buf + S390_CPUMSF_PAGESZ
  271. - sizeof(*te));
  272. *bsdes = te->bsdes;
  273. *dsdes = te->dsdes;
  274. if (!te->bsdes && !te->dsdes) {
  275. /* Very old hardware, use CPUID */
  276. switch (machine_type) {
  277. case 2097:
  278. case 2098:
  279. *dsdes = 64;
  280. *bsdes = 32;
  281. break;
  282. case 2817:
  283. case 2818:
  284. *dsdes = 74;
  285. *bsdes = 32;
  286. break;
  287. case 2827:
  288. case 2828:
  289. *dsdes = 85;
  290. *bsdes = 32;
  291. break;
  292. default:
  293. /* Illegal trailer entry */
  294. return false;
  295. }
  296. }
  297. return true;
  298. }
  299. /* Return true if there is room for another entry */
  300. static bool s390_cpumsf_reached_trailer(size_t entry_sz, size_t pos)
  301. {
  302. size_t payload = S390_CPUMSF_PAGESZ - sizeof(struct hws_trailer_entry);
  303. if (payload - (pos & (S390_CPUMSF_PAGESZ - 1)) < entry_sz)
  304. return false;
  305. return true;
  306. }
  307. /* Dump an auxiliary buffer. These buffers are multiple of
  308. * 4KB SDB pages.
  309. */
  310. static void s390_cpumsf_dump(struct s390_cpumsf *sf,
  311. unsigned char *buf, size_t len)
  312. {
  313. const char *color = PERF_COLOR_BLUE;
  314. struct hws_basic_entry *basic;
  315. struct hws_diag_entry *diag;
  316. unsigned short bsdes, dsdes;
  317. size_t pos = 0;
  318. color_fprintf(stdout, color,
  319. ". ... s390 AUX data: size %zu bytes\n",
  320. len);
  321. if (!s390_cpumsf_validate(sf->machine_type, buf, len, &bsdes,
  322. &dsdes)) {
  323. pr_err("Invalid AUX trace data block size:%zu"
  324. " (type:%d bsdes:%hd dsdes:%hd)\n",
  325. len, sf->machine_type, bsdes, dsdes);
  326. return;
  327. }
  328. /* s390 kernel always returns 4KB blocks fully occupied,
  329. * no partially filled SDBs.
  330. */
  331. while (pos < len) {
  332. /* Handle Basic entry */
  333. basic = (struct hws_basic_entry *)(buf + pos);
  334. if (s390_cpumsf_basic_show(color, pos, basic))
  335. pos += bsdes;
  336. else
  337. return;
  338. /* Handle Diagnostic entry */
  339. diag = (struct hws_diag_entry *)(buf + pos);
  340. if (s390_cpumsf_diag_show(color, pos, diag))
  341. pos += dsdes;
  342. else
  343. return;
  344. /* Check for trailer entry */
  345. if (!s390_cpumsf_reached_trailer(bsdes + dsdes, pos)) {
  346. /* Show trailer entry */
  347. struct hws_trailer_entry te;
  348. pos = (pos + S390_CPUMSF_PAGESZ)
  349. & ~(S390_CPUMSF_PAGESZ - 1);
  350. pos -= sizeof(te);
  351. memcpy(&te, buf + pos, sizeof(te));
  352. /* Set descriptor sizes in case of old hardware
  353. * where these values are not set.
  354. */
  355. te.bsdes = bsdes;
  356. te.dsdes = dsdes;
  357. if (s390_cpumsf_trailer_show(color, pos, &te))
  358. pos += sizeof(te);
  359. else
  360. return;
  361. }
  362. }
  363. }
  364. static void s390_cpumsf_dump_event(struct s390_cpumsf *sf, unsigned char *buf,
  365. size_t len)
  366. {
  367. printf(".\n");
  368. s390_cpumsf_dump(sf, buf, len);
  369. }
  370. #define S390_LPP_PID_MASK 0xffffffff
  371. static bool s390_cpumsf_make_event(size_t pos,
  372. struct hws_basic_entry *basic,
  373. struct s390_cpumsf_queue *sfq)
  374. {
  375. struct perf_sample sample = {
  376. .ip = basic->ia,
  377. .pid = basic->hpp & S390_LPP_PID_MASK,
  378. .tid = basic->hpp & S390_LPP_PID_MASK,
  379. .cpumode = PERF_RECORD_MISC_CPUMODE_UNKNOWN,
  380. .cpu = sfq->cpu,
  381. .period = 1
  382. };
  383. union perf_event event;
  384. memset(&event, 0, sizeof(event));
  385. if (basic->CL == 1) /* Native LPAR mode */
  386. sample.cpumode = basic->P ? PERF_RECORD_MISC_USER
  387. : PERF_RECORD_MISC_KERNEL;
  388. else if (basic->CL == 2) /* Guest kernel/user space */
  389. sample.cpumode = basic->P ? PERF_RECORD_MISC_GUEST_USER
  390. : PERF_RECORD_MISC_GUEST_KERNEL;
  391. else if (basic->gpp || basic->prim_asn != 0xffff)
  392. /* Use heuristics on old hardware */
  393. sample.cpumode = basic->P ? PERF_RECORD_MISC_GUEST_USER
  394. : PERF_RECORD_MISC_GUEST_KERNEL;
  395. else
  396. sample.cpumode = basic->P ? PERF_RECORD_MISC_USER
  397. : PERF_RECORD_MISC_KERNEL;
  398. event.sample.header.type = PERF_RECORD_SAMPLE;
  399. event.sample.header.misc = sample.cpumode;
  400. event.sample.header.size = sizeof(struct perf_event_header);
  401. pr_debug4("%s pos:%#zx ip:%#" PRIx64 " P:%d CL:%d pid:%d.%d cpumode:%d cpu:%d\n",
  402. __func__, pos, sample.ip, basic->P, basic->CL, sample.pid,
  403. sample.tid, sample.cpumode, sample.cpu);
  404. if (perf_session__deliver_synth_event(sfq->sf->session, &event,
  405. &sample)) {
  406. pr_err("s390 Auxiliary Trace: failed to deliver event\n");
  407. return false;
  408. }
  409. return true;
  410. }
  411. static unsigned long long get_trailer_time(const unsigned char *buf)
  412. {
  413. struct hws_trailer_entry *te;
  414. unsigned long long aux_time;
  415. te = (struct hws_trailer_entry *)(buf + S390_CPUMSF_PAGESZ
  416. - sizeof(*te));
  417. if (!te->clock_base) /* TOD_CLOCK_BASE value missing */
  418. return 0;
  419. /* Correct calculation to convert time stamp in trailer entry to
  420. * nano seconds (taken from arch/s390 function tod_to_ns()).
  421. * TOD_CLOCK_BASE is stored in trailer entry member progusage2.
  422. */
  423. aux_time = trailer_timestamp(te) - te->progusage2;
  424. aux_time = (aux_time >> 9) * 125 + (((aux_time & 0x1ff) * 125) >> 9);
  425. return aux_time;
  426. }
  427. /* Process the data samples of a single queue. The first parameter is a
  428. * pointer to the queue, the second parameter is the time stamp. This
  429. * is the time stamp:
  430. * - of the event that triggered this processing.
  431. * - or the time stamp when the last proccesing of this queue stopped.
  432. * In this case it stopped at a 4KB page boundary and record the
  433. * position on where to continue processing on the next invocation
  434. * (see buffer->use_data and buffer->use_size).
  435. *
  436. * When this function returns the second parameter is updated to
  437. * reflect the time stamp of the last processed auxiliary data entry
  438. * (taken from the trailer entry of that page). The caller uses this
  439. * returned time stamp to record the last processed entry in this
  440. * queue.
  441. *
  442. * The function returns:
  443. * 0: Processing successful. The second parameter returns the
  444. * time stamp from the trailer entry until which position
  445. * processing took place. Subsequent calls resume from this
  446. * position.
  447. * <0: An error occurred during processing. The second parameter
  448. * returns the maximum time stamp.
  449. * >0: Done on this queue. The second parameter returns the
  450. * maximum time stamp.
  451. */
  452. static int s390_cpumsf_samples(struct s390_cpumsf_queue *sfq, u64 *ts)
  453. {
  454. struct s390_cpumsf *sf = sfq->sf;
  455. unsigned char *buf = sfq->buffer->use_data;
  456. size_t len = sfq->buffer->use_size;
  457. struct hws_basic_entry *basic;
  458. unsigned short bsdes, dsdes;
  459. size_t pos = 0;
  460. int err = 1;
  461. u64 aux_ts;
  462. if (!s390_cpumsf_validate(sf->machine_type, buf, len, &bsdes,
  463. &dsdes)) {
  464. *ts = ~0ULL;
  465. return -1;
  466. }
  467. /* Get trailer entry time stamp and check if entries in
  468. * this auxiliary page are ready for processing. If the
  469. * time stamp of the first entry is too high, whole buffer
  470. * can be skipped. In this case return time stamp.
  471. */
  472. aux_ts = get_trailer_time(buf);
  473. if (!aux_ts) {
  474. pr_err("[%#08" PRIx64 "] Invalid AUX trailer entry TOD clock base\n",
  475. sfq->buffer->data_offset);
  476. aux_ts = ~0ULL;
  477. goto out;
  478. }
  479. if (aux_ts > *ts) {
  480. *ts = aux_ts;
  481. return 0;
  482. }
  483. while (pos < len) {
  484. /* Handle Basic entry */
  485. basic = (struct hws_basic_entry *)(buf + pos);
  486. if (s390_cpumsf_make_event(pos, basic, sfq))
  487. pos += bsdes;
  488. else {
  489. err = -EBADF;
  490. goto out;
  491. }
  492. pos += dsdes; /* Skip diagnositic entry */
  493. /* Check for trailer entry */
  494. if (!s390_cpumsf_reached_trailer(bsdes + dsdes, pos)) {
  495. pos = (pos + S390_CPUMSF_PAGESZ)
  496. & ~(S390_CPUMSF_PAGESZ - 1);
  497. /* Check existence of next page */
  498. if (pos >= len)
  499. break;
  500. aux_ts = get_trailer_time(buf + pos);
  501. if (!aux_ts) {
  502. aux_ts = ~0ULL;
  503. goto out;
  504. }
  505. if (aux_ts > *ts) {
  506. *ts = aux_ts;
  507. sfq->buffer->use_data += pos;
  508. sfq->buffer->use_size -= pos;
  509. return 0;
  510. }
  511. }
  512. }
  513. out:
  514. *ts = aux_ts;
  515. sfq->buffer->use_size = 0;
  516. sfq->buffer->use_data = NULL;
  517. return err; /* Buffer completely scanned or error */
  518. }
  519. /* Run the s390 auxiliary trace decoder.
  520. * Select the queue buffer to operate on, the caller already selected
  521. * the proper queue, depending on second parameter 'ts'.
  522. * This is the time stamp until which the auxiliary entries should
  523. * be processed. This value is updated by called functions and
  524. * returned to the caller.
  525. *
  526. * Resume processing in the current buffer. If there is no buffer
  527. * get a new buffer from the queue and setup start position for
  528. * processing.
  529. * When a buffer is completely processed remove it from the queue
  530. * before returning.
  531. *
  532. * This function returns
  533. * 1: When the queue is empty. Second parameter will be set to
  534. * maximum time stamp.
  535. * 0: Normal processing done.
  536. * <0: Error during queue buffer setup. This causes the caller
  537. * to stop processing completely.
  538. */
  539. static int s390_cpumsf_run_decoder(struct s390_cpumsf_queue *sfq,
  540. u64 *ts)
  541. {
  542. struct auxtrace_buffer *buffer;
  543. struct auxtrace_queue *queue;
  544. int err;
  545. queue = &sfq->sf->queues.queue_array[sfq->queue_nr];
  546. /* Get buffer and last position in buffer to resume
  547. * decoding the auxiliary entries. One buffer might be large
  548. * and decoding might stop in between. This depends on the time
  549. * stamp of the trailer entry in each page of the auxiliary
  550. * data and the time stamp of the event triggering the decoding.
  551. */
  552. if (sfq->buffer == NULL) {
  553. sfq->buffer = buffer = auxtrace_buffer__next(queue,
  554. sfq->buffer);
  555. if (!buffer) {
  556. *ts = ~0ULL;
  557. return 1; /* Processing done on this queue */
  558. }
  559. /* Start with a new buffer on this queue */
  560. if (buffer->data) {
  561. buffer->use_size = buffer->size;
  562. buffer->use_data = buffer->data;
  563. }
  564. if (sfq->logfile) { /* Write into log file */
  565. size_t rc = fwrite(buffer->data, buffer->size, 1,
  566. sfq->logfile);
  567. if (rc != 1)
  568. pr_err("Failed to write auxiliary data\n");
  569. }
  570. } else
  571. buffer = sfq->buffer;
  572. if (!buffer->data) {
  573. int fd = perf_data__fd(sfq->sf->session->data);
  574. buffer->data = auxtrace_buffer__get_data(buffer, fd);
  575. if (!buffer->data)
  576. return -ENOMEM;
  577. buffer->use_size = buffer->size;
  578. buffer->use_data = buffer->data;
  579. if (sfq->logfile) { /* Write into log file */
  580. size_t rc = fwrite(buffer->data, buffer->size, 1,
  581. sfq->logfile);
  582. if (rc != 1)
  583. pr_err("Failed to write auxiliary data\n");
  584. }
  585. }
  586. pr_debug4("%s queue_nr:%d buffer:%" PRId64 " offset:%#" PRIx64 " size:%#zx rest:%#zx\n",
  587. __func__, sfq->queue_nr, buffer->buffer_nr, buffer->offset,
  588. buffer->size, buffer->use_size);
  589. err = s390_cpumsf_samples(sfq, ts);
  590. /* If non-zero, there is either an error (err < 0) or the buffer is
  591. * completely done (err > 0). The error is unrecoverable, usually
  592. * some descriptors could not be read successfully, so continue with
  593. * the next buffer.
  594. * In both cases the parameter 'ts' has been updated.
  595. */
  596. if (err) {
  597. sfq->buffer = NULL;
  598. list_del(&buffer->list);
  599. auxtrace_buffer__free(buffer);
  600. if (err > 0) /* Buffer done, no error */
  601. err = 0;
  602. }
  603. return err;
  604. }
  605. static struct s390_cpumsf_queue *
  606. s390_cpumsf_alloc_queue(struct s390_cpumsf *sf, unsigned int queue_nr)
  607. {
  608. struct s390_cpumsf_queue *sfq;
  609. sfq = zalloc(sizeof(struct s390_cpumsf_queue));
  610. if (sfq == NULL)
  611. return NULL;
  612. sfq->sf = sf;
  613. sfq->queue_nr = queue_nr;
  614. sfq->cpu = -1;
  615. if (sf->use_logfile) {
  616. char *name;
  617. int rc;
  618. rc = (sf->logdir)
  619. ? asprintf(&name, "%s/aux.smp.%02x",
  620. sf->logdir, queue_nr)
  621. : asprintf(&name, "aux.smp.%02x", queue_nr);
  622. if (rc > 0)
  623. sfq->logfile = fopen(name, "w");
  624. if (sfq->logfile == NULL) {
  625. pr_err("Failed to open auxiliary log file %s,"
  626. "continue...\n", name);
  627. sf->use_logfile = false;
  628. }
  629. free(name);
  630. }
  631. return sfq;
  632. }
  633. static int s390_cpumsf_setup_queue(struct s390_cpumsf *sf,
  634. struct auxtrace_queue *queue,
  635. unsigned int queue_nr, u64 ts)
  636. {
  637. struct s390_cpumsf_queue *sfq = queue->priv;
  638. if (list_empty(&queue->head))
  639. return 0;
  640. if (sfq == NULL) {
  641. sfq = s390_cpumsf_alloc_queue(sf, queue_nr);
  642. if (!sfq)
  643. return -ENOMEM;
  644. queue->priv = sfq;
  645. if (queue->cpu != -1)
  646. sfq->cpu = queue->cpu;
  647. }
  648. return auxtrace_heap__add(&sf->heap, queue_nr, ts);
  649. }
  650. static int s390_cpumsf_setup_queues(struct s390_cpumsf *sf, u64 ts)
  651. {
  652. unsigned int i;
  653. int ret = 0;
  654. for (i = 0; i < sf->queues.nr_queues; i++) {
  655. ret = s390_cpumsf_setup_queue(sf, &sf->queues.queue_array[i],
  656. i, ts);
  657. if (ret)
  658. break;
  659. }
  660. return ret;
  661. }
  662. static int s390_cpumsf_update_queues(struct s390_cpumsf *sf, u64 ts)
  663. {
  664. if (!sf->queues.new_data)
  665. return 0;
  666. sf->queues.new_data = false;
  667. return s390_cpumsf_setup_queues(sf, ts);
  668. }
  669. static int s390_cpumsf_process_queues(struct s390_cpumsf *sf, u64 timestamp)
  670. {
  671. unsigned int queue_nr;
  672. u64 ts;
  673. int ret;
  674. while (1) {
  675. struct auxtrace_queue *queue;
  676. struct s390_cpumsf_queue *sfq;
  677. if (!sf->heap.heap_cnt)
  678. return 0;
  679. if (sf->heap.heap_array[0].ordinal >= timestamp)
  680. return 0;
  681. queue_nr = sf->heap.heap_array[0].queue_nr;
  682. queue = &sf->queues.queue_array[queue_nr];
  683. sfq = queue->priv;
  684. auxtrace_heap__pop(&sf->heap);
  685. if (sf->heap.heap_cnt) {
  686. ts = sf->heap.heap_array[0].ordinal + 1;
  687. if (ts > timestamp)
  688. ts = timestamp;
  689. } else {
  690. ts = timestamp;
  691. }
  692. ret = s390_cpumsf_run_decoder(sfq, &ts);
  693. if (ret < 0) {
  694. auxtrace_heap__add(&sf->heap, queue_nr, ts);
  695. return ret;
  696. }
  697. if (!ret) {
  698. ret = auxtrace_heap__add(&sf->heap, queue_nr, ts);
  699. if (ret < 0)
  700. return ret;
  701. }
  702. }
  703. return 0;
  704. }
  705. static int s390_cpumsf_synth_error(struct s390_cpumsf *sf, int code, int cpu,
  706. pid_t pid, pid_t tid, u64 ip)
  707. {
  708. char msg[MAX_AUXTRACE_ERROR_MSG];
  709. union perf_event event;
  710. int err;
  711. strncpy(msg, "Lost Auxiliary Trace Buffer", sizeof(msg) - 1);
  712. auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
  713. code, cpu, pid, tid, ip, msg);
  714. err = perf_session__deliver_synth_event(sf->session, &event, NULL);
  715. if (err)
  716. pr_err("s390 Auxiliary Trace: failed to deliver error event,"
  717. "error %d\n", err);
  718. return err;
  719. }
  720. static int s390_cpumsf_lost(struct s390_cpumsf *sf, struct perf_sample *sample)
  721. {
  722. return s390_cpumsf_synth_error(sf, 1, sample->cpu,
  723. sample->pid, sample->tid, 0);
  724. }
  725. static int
  726. s390_cpumsf_process_event(struct perf_session *session __maybe_unused,
  727. union perf_event *event,
  728. struct perf_sample *sample,
  729. struct perf_tool *tool)
  730. {
  731. struct s390_cpumsf *sf = container_of(session->auxtrace,
  732. struct s390_cpumsf,
  733. auxtrace);
  734. u64 timestamp = sample->time;
  735. int err = 0;
  736. if (dump_trace)
  737. return 0;
  738. if (!tool->ordered_events) {
  739. pr_err("s390 Auxiliary Trace requires ordered events\n");
  740. return -EINVAL;
  741. }
  742. if (event->header.type == PERF_RECORD_AUX &&
  743. event->aux.flags & PERF_AUX_FLAG_TRUNCATED)
  744. return s390_cpumsf_lost(sf, sample);
  745. if (timestamp) {
  746. err = s390_cpumsf_update_queues(sf, timestamp);
  747. if (!err)
  748. err = s390_cpumsf_process_queues(sf, timestamp);
  749. }
  750. return err;
  751. }
  752. struct s390_cpumsf_synth {
  753. struct perf_tool cpumsf_tool;
  754. struct perf_session *session;
  755. };
  756. static int
  757. s390_cpumsf_process_auxtrace_event(struct perf_session *session,
  758. union perf_event *event __maybe_unused,
  759. struct perf_tool *tool __maybe_unused)
  760. {
  761. struct s390_cpumsf *sf = container_of(session->auxtrace,
  762. struct s390_cpumsf,
  763. auxtrace);
  764. int fd = perf_data__fd(session->data);
  765. struct auxtrace_buffer *buffer;
  766. off_t data_offset;
  767. int err;
  768. if (sf->data_queued)
  769. return 0;
  770. if (perf_data__is_pipe(session->data)) {
  771. data_offset = 0;
  772. } else {
  773. data_offset = lseek(fd, 0, SEEK_CUR);
  774. if (data_offset == -1)
  775. return -errno;
  776. }
  777. err = auxtrace_queues__add_event(&sf->queues, session, event,
  778. data_offset, &buffer);
  779. if (err)
  780. return err;
  781. /* Dump here after copying piped trace out of the pipe */
  782. if (dump_trace) {
  783. if (auxtrace_buffer__get_data(buffer, fd)) {
  784. s390_cpumsf_dump_event(sf, buffer->data,
  785. buffer->size);
  786. auxtrace_buffer__put_data(buffer);
  787. }
  788. }
  789. return 0;
  790. }
  791. static void s390_cpumsf_free_events(struct perf_session *session __maybe_unused)
  792. {
  793. }
  794. static int s390_cpumsf_flush(struct perf_session *session __maybe_unused,
  795. struct perf_tool *tool __maybe_unused)
  796. {
  797. return 0;
  798. }
  799. static void s390_cpumsf_free_queues(struct perf_session *session)
  800. {
  801. struct s390_cpumsf *sf = container_of(session->auxtrace,
  802. struct s390_cpumsf,
  803. auxtrace);
  804. struct auxtrace_queues *queues = &sf->queues;
  805. unsigned int i;
  806. for (i = 0; i < queues->nr_queues; i++) {
  807. struct s390_cpumsf_queue *sfq = (struct s390_cpumsf_queue *)
  808. queues->queue_array[i].priv;
  809. if (sfq != NULL && sfq->logfile) {
  810. fclose(sfq->logfile);
  811. sfq->logfile = NULL;
  812. }
  813. zfree(&queues->queue_array[i].priv);
  814. }
  815. auxtrace_queues__free(queues);
  816. }
  817. static void s390_cpumsf_free(struct perf_session *session)
  818. {
  819. struct s390_cpumsf *sf = container_of(session->auxtrace,
  820. struct s390_cpumsf,
  821. auxtrace);
  822. auxtrace_heap__free(&sf->heap);
  823. s390_cpumsf_free_queues(session);
  824. session->auxtrace = NULL;
  825. free(sf->logdir);
  826. free(sf);
  827. }
  828. static int s390_cpumsf_get_type(const char *cpuid)
  829. {
  830. int ret, family = 0;
  831. ret = sscanf(cpuid, "%*[^,],%u", &family);
  832. return (ret == 1) ? family : 0;
  833. }
  834. /* Check itrace options set on perf report command.
  835. * Return true, if none are set or all options specified can be
  836. * handled on s390 (currently only option 'd' for logging.
  837. * Return false otherwise.
  838. */
  839. static bool check_auxtrace_itrace(struct itrace_synth_opts *itops)
  840. {
  841. bool ison = false;
  842. if (!itops || !itops->set)
  843. return true;
  844. ison = itops->inject || itops->instructions || itops->branches ||
  845. itops->transactions || itops->ptwrites ||
  846. itops->pwr_events || itops->errors ||
  847. itops->dont_decode || itops->calls || itops->returns ||
  848. itops->callchain || itops->thread_stack ||
  849. itops->last_branch;
  850. if (!ison)
  851. return true;
  852. pr_err("Unsupported --itrace options specified\n");
  853. return false;
  854. }
  855. /* Check for AUXTRACE dump directory if it is needed.
  856. * On failure print an error message but continue.
  857. * Return 0 on wrong keyword in config file and 1 otherwise.
  858. */
  859. static int s390_cpumsf__config(const char *var, const char *value, void *cb)
  860. {
  861. struct s390_cpumsf *sf = cb;
  862. struct stat stbuf;
  863. int rc;
  864. if (strcmp(var, "auxtrace.dumpdir"))
  865. return 0;
  866. sf->logdir = strdup(value);
  867. if (sf->logdir == NULL) {
  868. pr_err("Failed to find auxtrace log directory %s,"
  869. " continue with current directory...\n", value);
  870. return 1;
  871. }
  872. rc = stat(sf->logdir, &stbuf);
  873. if (rc == -1 || !S_ISDIR(stbuf.st_mode)) {
  874. pr_err("Missing auxtrace log directory %s,"
  875. " continue with current directory...\n", value);
  876. free(sf->logdir);
  877. sf->logdir = NULL;
  878. }
  879. return 1;
  880. }
  881. int s390_cpumsf_process_auxtrace_info(union perf_event *event,
  882. struct perf_session *session)
  883. {
  884. struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info;
  885. struct s390_cpumsf *sf;
  886. int err;
  887. if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event))
  888. return -EINVAL;
  889. sf = zalloc(sizeof(struct s390_cpumsf));
  890. if (sf == NULL)
  891. return -ENOMEM;
  892. if (!check_auxtrace_itrace(session->itrace_synth_opts)) {
  893. err = -EINVAL;
  894. goto err_free;
  895. }
  896. sf->use_logfile = session->itrace_synth_opts->log;
  897. if (sf->use_logfile)
  898. perf_config(s390_cpumsf__config, sf);
  899. err = auxtrace_queues__init(&sf->queues);
  900. if (err)
  901. goto err_free;
  902. sf->session = session;
  903. sf->machine = &session->machines.host; /* No kvm support */
  904. sf->auxtrace_type = auxtrace_info->type;
  905. sf->pmu_type = PERF_TYPE_RAW;
  906. sf->machine_type = s390_cpumsf_get_type(session->evlist->env->cpuid);
  907. sf->auxtrace.process_event = s390_cpumsf_process_event;
  908. sf->auxtrace.process_auxtrace_event = s390_cpumsf_process_auxtrace_event;
  909. sf->auxtrace.flush_events = s390_cpumsf_flush;
  910. sf->auxtrace.free_events = s390_cpumsf_free_events;
  911. sf->auxtrace.free = s390_cpumsf_free;
  912. session->auxtrace = &sf->auxtrace;
  913. if (dump_trace)
  914. return 0;
  915. err = auxtrace_queues__process_index(&sf->queues, session);
  916. if (err)
  917. goto err_free_queues;
  918. if (sf->queues.populated)
  919. sf->data_queued = true;
  920. return 0;
  921. err_free_queues:
  922. auxtrace_queues__free(&sf->queues);
  923. session->auxtrace = NULL;
  924. err_free:
  925. free(sf->logdir);
  926. free(sf);
  927. return err;
  928. }