builtin-stat.c 63 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566
  1. /*
  2. * builtin-stat.c
  3. *
  4. * Builtin stat command: Give a precise performance counters summary
  5. * overview about any workload, CPU or specific PID.
  6. *
  7. * Sample output:
  8. $ perf stat ./hackbench 10
  9. Time: 0.118
  10. Performance counter stats for './hackbench 10':
  11. 1708.761321 task-clock # 11.037 CPUs utilized
  12. 41,190 context-switches # 0.024 M/sec
  13. 6,735 CPU-migrations # 0.004 M/sec
  14. 17,318 page-faults # 0.010 M/sec
  15. 5,205,202,243 cycles # 3.046 GHz
  16. 3,856,436,920 stalled-cycles-frontend # 74.09% frontend cycles idle
  17. 1,600,790,871 stalled-cycles-backend # 30.75% backend cycles idle
  18. 2,603,501,247 instructions # 0.50 insns per cycle
  19. # 1.48 stalled cycles per insn
  20. 484,357,498 branches # 283.455 M/sec
  21. 6,388,934 branch-misses # 1.32% of all branches
  22. 0.154822978 seconds time elapsed
  23. *
  24. * Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
  25. *
  26. * Improvements and fixes by:
  27. *
  28. * Arjan van de Ven <arjan@linux.intel.com>
  29. * Yanmin Zhang <yanmin.zhang@intel.com>
  30. * Wu Fengguang <fengguang.wu@intel.com>
  31. * Mike Galbraith <efault@gmx.de>
  32. * Paul Mackerras <paulus@samba.org>
  33. * Jaswinder Singh Rajput <jaswinder@kernel.org>
  34. *
  35. * Released under the GPL v2. (and only v2, not any later version)
  36. */
  37. #include "perf.h"
  38. #include "builtin.h"
  39. #include "util/cgroup.h"
  40. #include "util/util.h"
  41. #include <subcmd/parse-options.h>
  42. #include "util/parse-events.h"
  43. #include "util/pmu.h"
  44. #include "util/event.h"
  45. #include "util/evlist.h"
  46. #include "util/evsel.h"
  47. #include "util/debug.h"
  48. #include "util/color.h"
  49. #include "util/stat.h"
  50. #include "util/header.h"
  51. #include "util/cpumap.h"
  52. #include "util/thread.h"
  53. #include "util/thread_map.h"
  54. #include "util/counts.h"
  55. #include "util/group.h"
  56. #include "util/session.h"
  57. #include "util/tool.h"
  58. #include "util/group.h"
  59. #include "asm/bug.h"
  60. #include <api/fs/fs.h>
  61. #include <stdlib.h>
  62. #include <sys/prctl.h>
  63. #include <locale.h>
  64. #include <math.h>
  65. #define DEFAULT_SEPARATOR " "
  66. #define CNTR_NOT_SUPPORTED "<not supported>"
  67. #define CNTR_NOT_COUNTED "<not counted>"
  68. static void print_counters(struct timespec *ts, int argc, const char **argv);
  69. /* Default events used for perf stat -T */
  70. static const char *transaction_attrs = {
  71. "task-clock,"
  72. "{"
  73. "instructions,"
  74. "cycles,"
  75. "cpu/cycles-t/,"
  76. "cpu/tx-start/,"
  77. "cpu/el-start/,"
  78. "cpu/cycles-ct/"
  79. "}"
  80. };
  81. /* More limited version when the CPU does not have all events. */
  82. static const char * transaction_limited_attrs = {
  83. "task-clock,"
  84. "{"
  85. "instructions,"
  86. "cycles,"
  87. "cpu/cycles-t/,"
  88. "cpu/tx-start/"
  89. "}"
  90. };
  91. static const char * topdown_attrs[] = {
  92. "topdown-total-slots",
  93. "topdown-slots-retired",
  94. "topdown-recovery-bubbles",
  95. "topdown-fetch-bubbles",
  96. "topdown-slots-issued",
  97. NULL,
  98. };
  99. static struct perf_evlist *evsel_list;
  100. static struct target target = {
  101. .uid = UINT_MAX,
  102. };
  103. typedef int (*aggr_get_id_t)(struct cpu_map *m, int cpu);
  104. static int run_count = 1;
  105. static bool no_inherit = false;
  106. static volatile pid_t child_pid = -1;
  107. static bool null_run = false;
  108. static int detailed_run = 0;
  109. static bool transaction_run;
  110. static bool topdown_run = false;
  111. static bool big_num = true;
  112. static int big_num_opt = -1;
  113. static const char *csv_sep = NULL;
  114. static bool csv_output = false;
  115. static bool group = false;
  116. static const char *pre_cmd = NULL;
  117. static const char *post_cmd = NULL;
  118. static bool sync_run = false;
  119. static unsigned int initial_delay = 0;
  120. static unsigned int unit_width = 4; /* strlen("unit") */
  121. static bool forever = false;
  122. static bool metric_only = false;
  123. static bool force_metric_only = false;
  124. static struct timespec ref_time;
  125. static struct cpu_map *aggr_map;
  126. static aggr_get_id_t aggr_get_id;
  127. static bool append_file;
  128. static const char *output_name;
  129. static int output_fd;
  130. struct perf_stat {
  131. bool record;
  132. struct perf_data_file file;
  133. struct perf_session *session;
  134. u64 bytes_written;
  135. struct perf_tool tool;
  136. bool maps_allocated;
  137. struct cpu_map *cpus;
  138. struct thread_map *threads;
  139. enum aggr_mode aggr_mode;
  140. };
  141. static struct perf_stat perf_stat;
  142. #define STAT_RECORD perf_stat.record
  143. static volatile int done = 0;
  144. static struct perf_stat_config stat_config = {
  145. .aggr_mode = AGGR_GLOBAL,
  146. .scale = true,
  147. };
  148. static inline void diff_timespec(struct timespec *r, struct timespec *a,
  149. struct timespec *b)
  150. {
  151. r->tv_sec = a->tv_sec - b->tv_sec;
  152. if (a->tv_nsec < b->tv_nsec) {
  153. r->tv_nsec = a->tv_nsec + 1000000000L - b->tv_nsec;
  154. r->tv_sec--;
  155. } else {
  156. r->tv_nsec = a->tv_nsec - b->tv_nsec ;
  157. }
  158. }
  159. static void perf_stat__reset_stats(void)
  160. {
  161. perf_evlist__reset_stats(evsel_list);
  162. perf_stat__reset_shadow_stats();
  163. }
  164. static int create_perf_stat_counter(struct perf_evsel *evsel)
  165. {
  166. struct perf_event_attr *attr = &evsel->attr;
  167. if (stat_config.scale)
  168. attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
  169. PERF_FORMAT_TOTAL_TIME_RUNNING;
  170. attr->inherit = !no_inherit;
  171. /*
  172. * Some events get initialized with sample_(period/type) set,
  173. * like tracepoints. Clear it up for counting.
  174. */
  175. attr->sample_period = 0;
  176. /*
  177. * But set sample_type to PERF_SAMPLE_IDENTIFIER, which should be harmless
  178. * while avoiding that older tools show confusing messages.
  179. *
  180. * However for pipe sessions we need to keep it zero,
  181. * because script's perf_evsel__check_attr is triggered
  182. * by attr->sample_type != 0, and we can't run it on
  183. * stat sessions.
  184. */
  185. if (!(STAT_RECORD && perf_stat.file.is_pipe))
  186. attr->sample_type = PERF_SAMPLE_IDENTIFIER;
  187. /*
  188. * Disabling all counters initially, they will be enabled
  189. * either manually by us or by kernel via enable_on_exec
  190. * set later.
  191. */
  192. if (perf_evsel__is_group_leader(evsel)) {
  193. attr->disabled = 1;
  194. /*
  195. * In case of initial_delay we enable tracee
  196. * events manually.
  197. */
  198. if (target__none(&target) && !initial_delay)
  199. attr->enable_on_exec = 1;
  200. }
  201. if (target__has_cpu(&target))
  202. return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel));
  203. return perf_evsel__open_per_thread(evsel, evsel_list->threads);
  204. }
  205. /*
  206. * Does the counter have nsecs as a unit?
  207. */
  208. static inline int nsec_counter(struct perf_evsel *evsel)
  209. {
  210. if (perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) ||
  211. perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
  212. return 1;
  213. return 0;
  214. }
  215. static int process_synthesized_event(struct perf_tool *tool __maybe_unused,
  216. union perf_event *event,
  217. struct perf_sample *sample __maybe_unused,
  218. struct machine *machine __maybe_unused)
  219. {
  220. if (perf_data_file__write(&perf_stat.file, event, event->header.size) < 0) {
  221. pr_err("failed to write perf data, error: %m\n");
  222. return -1;
  223. }
  224. perf_stat.bytes_written += event->header.size;
  225. return 0;
  226. }
  227. static int write_stat_round_event(u64 tm, u64 type)
  228. {
  229. return perf_event__synthesize_stat_round(NULL, tm, type,
  230. process_synthesized_event,
  231. NULL);
  232. }
  233. #define WRITE_STAT_ROUND_EVENT(time, interval) \
  234. write_stat_round_event(time, PERF_STAT_ROUND_TYPE__ ## interval)
  235. #define SID(e, x, y) xyarray__entry(e->sample_id, x, y)
  236. static int
  237. perf_evsel__write_stat_event(struct perf_evsel *counter, u32 cpu, u32 thread,
  238. struct perf_counts_values *count)
  239. {
  240. struct perf_sample_id *sid = SID(counter, cpu, thread);
  241. return perf_event__synthesize_stat(NULL, cpu, thread, sid->id, count,
  242. process_synthesized_event, NULL);
  243. }
  244. /*
  245. * Read out the results of a single counter:
  246. * do not aggregate counts across CPUs in system-wide mode
  247. */
  248. static int read_counter(struct perf_evsel *counter)
  249. {
  250. int nthreads = thread_map__nr(evsel_list->threads);
  251. int ncpus = perf_evsel__nr_cpus(counter);
  252. int cpu, thread;
  253. if (!counter->supported)
  254. return -ENOENT;
  255. if (counter->system_wide)
  256. nthreads = 1;
  257. for (thread = 0; thread < nthreads; thread++) {
  258. for (cpu = 0; cpu < ncpus; cpu++) {
  259. struct perf_counts_values *count;
  260. count = perf_counts(counter->counts, cpu, thread);
  261. if (perf_evsel__read(counter, cpu, thread, count))
  262. return -1;
  263. if (STAT_RECORD) {
  264. if (perf_evsel__write_stat_event(counter, cpu, thread, count)) {
  265. pr_err("failed to write stat event\n");
  266. return -1;
  267. }
  268. }
  269. if (verbose > 1) {
  270. fprintf(stat_config.output,
  271. "%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
  272. perf_evsel__name(counter),
  273. cpu,
  274. count->val, count->ena, count->run);
  275. }
  276. }
  277. }
  278. return 0;
  279. }
  280. static void read_counters(bool close_counters)
  281. {
  282. struct perf_evsel *counter;
  283. evlist__for_each_entry(evsel_list, counter) {
  284. if (read_counter(counter))
  285. pr_debug("failed to read counter %s\n", counter->name);
  286. if (perf_stat_process_counter(&stat_config, counter))
  287. pr_warning("failed to process counter %s\n", counter->name);
  288. if (close_counters) {
  289. perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter),
  290. thread_map__nr(evsel_list->threads));
  291. }
  292. }
  293. }
  294. static void process_interval(void)
  295. {
  296. struct timespec ts, rs;
  297. read_counters(false);
  298. clock_gettime(CLOCK_MONOTONIC, &ts);
  299. diff_timespec(&rs, &ts, &ref_time);
  300. if (STAT_RECORD) {
  301. if (WRITE_STAT_ROUND_EVENT(rs.tv_sec * NSECS_PER_SEC + rs.tv_nsec, INTERVAL))
  302. pr_err("failed to write stat round event\n");
  303. }
  304. print_counters(&rs, 0, NULL);
  305. }
  306. static void enable_counters(void)
  307. {
  308. if (initial_delay)
  309. usleep(initial_delay * 1000);
  310. /*
  311. * We need to enable counters only if:
  312. * - we don't have tracee (attaching to task or cpu)
  313. * - we have initial delay configured
  314. */
  315. if (!target__none(&target) || initial_delay)
  316. perf_evlist__enable(evsel_list);
  317. }
  318. static volatile int workload_exec_errno;
  319. /*
  320. * perf_evlist__prepare_workload will send a SIGUSR1
  321. * if the fork fails, since we asked by setting its
  322. * want_signal to true.
  323. */
  324. static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *info,
  325. void *ucontext __maybe_unused)
  326. {
  327. workload_exec_errno = info->si_value.sival_int;
  328. }
  329. static bool has_unit(struct perf_evsel *counter)
  330. {
  331. return counter->unit && *counter->unit;
  332. }
  333. static bool has_scale(struct perf_evsel *counter)
  334. {
  335. return counter->scale != 1;
  336. }
  337. static int perf_stat_synthesize_config(bool is_pipe)
  338. {
  339. struct perf_evsel *counter;
  340. int err;
  341. if (is_pipe) {
  342. err = perf_event__synthesize_attrs(NULL, perf_stat.session,
  343. process_synthesized_event);
  344. if (err < 0) {
  345. pr_err("Couldn't synthesize attrs.\n");
  346. return err;
  347. }
  348. }
  349. /*
  350. * Synthesize other events stuff not carried within
  351. * attr event - unit, scale, name
  352. */
  353. evlist__for_each_entry(evsel_list, counter) {
  354. if (!counter->supported)
  355. continue;
  356. /*
  357. * Synthesize unit and scale only if it's defined.
  358. */
  359. if (has_unit(counter)) {
  360. err = perf_event__synthesize_event_update_unit(NULL, counter, process_synthesized_event);
  361. if (err < 0) {
  362. pr_err("Couldn't synthesize evsel unit.\n");
  363. return err;
  364. }
  365. }
  366. if (has_scale(counter)) {
  367. err = perf_event__synthesize_event_update_scale(NULL, counter, process_synthesized_event);
  368. if (err < 0) {
  369. pr_err("Couldn't synthesize evsel scale.\n");
  370. return err;
  371. }
  372. }
  373. if (counter->own_cpus) {
  374. err = perf_event__synthesize_event_update_cpus(NULL, counter, process_synthesized_event);
  375. if (err < 0) {
  376. pr_err("Couldn't synthesize evsel scale.\n");
  377. return err;
  378. }
  379. }
  380. /*
  381. * Name is needed only for pipe output,
  382. * perf.data carries event names.
  383. */
  384. if (is_pipe) {
  385. err = perf_event__synthesize_event_update_name(NULL, counter, process_synthesized_event);
  386. if (err < 0) {
  387. pr_err("Couldn't synthesize evsel name.\n");
  388. return err;
  389. }
  390. }
  391. }
  392. err = perf_event__synthesize_thread_map2(NULL, evsel_list->threads,
  393. process_synthesized_event,
  394. NULL);
  395. if (err < 0) {
  396. pr_err("Couldn't synthesize thread map.\n");
  397. return err;
  398. }
  399. err = perf_event__synthesize_cpu_map(NULL, evsel_list->cpus,
  400. process_synthesized_event, NULL);
  401. if (err < 0) {
  402. pr_err("Couldn't synthesize thread map.\n");
  403. return err;
  404. }
  405. err = perf_event__synthesize_stat_config(NULL, &stat_config,
  406. process_synthesized_event, NULL);
  407. if (err < 0) {
  408. pr_err("Couldn't synthesize config.\n");
  409. return err;
  410. }
  411. return 0;
  412. }
  413. #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
  414. static int __store_counter_ids(struct perf_evsel *counter,
  415. struct cpu_map *cpus,
  416. struct thread_map *threads)
  417. {
  418. int cpu, thread;
  419. for (cpu = 0; cpu < cpus->nr; cpu++) {
  420. for (thread = 0; thread < threads->nr; thread++) {
  421. int fd = FD(counter, cpu, thread);
  422. if (perf_evlist__id_add_fd(evsel_list, counter,
  423. cpu, thread, fd) < 0)
  424. return -1;
  425. }
  426. }
  427. return 0;
  428. }
  429. static int store_counter_ids(struct perf_evsel *counter)
  430. {
  431. struct cpu_map *cpus = counter->cpus;
  432. struct thread_map *threads = counter->threads;
  433. if (perf_evsel__alloc_id(counter, cpus->nr, threads->nr))
  434. return -ENOMEM;
  435. return __store_counter_ids(counter, cpus, threads);
  436. }
  437. static int __run_perf_stat(int argc, const char **argv)
  438. {
  439. int interval = stat_config.interval;
  440. char msg[512];
  441. unsigned long long t0, t1;
  442. struct perf_evsel *counter;
  443. struct timespec ts;
  444. size_t l;
  445. int status = 0;
  446. const bool forks = (argc > 0);
  447. bool is_pipe = STAT_RECORD ? perf_stat.file.is_pipe : false;
  448. if (interval) {
  449. ts.tv_sec = interval / 1000;
  450. ts.tv_nsec = (interval % 1000) * 1000000;
  451. } else {
  452. ts.tv_sec = 1;
  453. ts.tv_nsec = 0;
  454. }
  455. if (forks) {
  456. if (perf_evlist__prepare_workload(evsel_list, &target, argv, is_pipe,
  457. workload_exec_failed_signal) < 0) {
  458. perror("failed to prepare workload");
  459. return -1;
  460. }
  461. child_pid = evsel_list->workload.pid;
  462. }
  463. if (group)
  464. perf_evlist__set_leader(evsel_list);
  465. evlist__for_each_entry(evsel_list, counter) {
  466. try_again:
  467. if (create_perf_stat_counter(counter) < 0) {
  468. /*
  469. * PPC returns ENXIO for HW counters until 2.6.37
  470. * (behavior changed with commit b0a873e).
  471. */
  472. if (errno == EINVAL || errno == ENOSYS ||
  473. errno == ENOENT || errno == EOPNOTSUPP ||
  474. errno == ENXIO) {
  475. if (verbose)
  476. ui__warning("%s event is not supported by the kernel.\n",
  477. perf_evsel__name(counter));
  478. counter->supported = false;
  479. if ((counter->leader != counter) ||
  480. !(counter->leader->nr_members > 1))
  481. continue;
  482. } else if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) {
  483. if (verbose)
  484. ui__warning("%s\n", msg);
  485. goto try_again;
  486. }
  487. perf_evsel__open_strerror(counter, &target,
  488. errno, msg, sizeof(msg));
  489. ui__error("%s\n", msg);
  490. if (child_pid != -1)
  491. kill(child_pid, SIGTERM);
  492. return -1;
  493. }
  494. counter->supported = true;
  495. l = strlen(counter->unit);
  496. if (l > unit_width)
  497. unit_width = l;
  498. if (STAT_RECORD && store_counter_ids(counter))
  499. return -1;
  500. }
  501. if (perf_evlist__apply_filters(evsel_list, &counter)) {
  502. error("failed to set filter \"%s\" on event %s with %d (%s)\n",
  503. counter->filter, perf_evsel__name(counter), errno,
  504. str_error_r(errno, msg, sizeof(msg)));
  505. return -1;
  506. }
  507. if (STAT_RECORD) {
  508. int err, fd = perf_data_file__fd(&perf_stat.file);
  509. if (is_pipe) {
  510. err = perf_header__write_pipe(perf_data_file__fd(&perf_stat.file));
  511. } else {
  512. err = perf_session__write_header(perf_stat.session, evsel_list,
  513. fd, false);
  514. }
  515. if (err < 0)
  516. return err;
  517. err = perf_stat_synthesize_config(is_pipe);
  518. if (err < 0)
  519. return err;
  520. }
  521. /*
  522. * Enable counters and exec the command:
  523. */
  524. t0 = rdclock();
  525. clock_gettime(CLOCK_MONOTONIC, &ref_time);
  526. if (forks) {
  527. perf_evlist__start_workload(evsel_list);
  528. enable_counters();
  529. if (interval) {
  530. while (!waitpid(child_pid, &status, WNOHANG)) {
  531. nanosleep(&ts, NULL);
  532. process_interval();
  533. }
  534. }
  535. wait(&status);
  536. if (workload_exec_errno) {
  537. const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
  538. pr_err("Workload failed: %s\n", emsg);
  539. return -1;
  540. }
  541. if (WIFSIGNALED(status))
  542. psignal(WTERMSIG(status), argv[0]);
  543. } else {
  544. enable_counters();
  545. while (!done) {
  546. nanosleep(&ts, NULL);
  547. if (interval)
  548. process_interval();
  549. }
  550. }
  551. t1 = rdclock();
  552. update_stats(&walltime_nsecs_stats, t1 - t0);
  553. read_counters(true);
  554. return WEXITSTATUS(status);
  555. }
  556. static int run_perf_stat(int argc, const char **argv)
  557. {
  558. int ret;
  559. if (pre_cmd) {
  560. ret = system(pre_cmd);
  561. if (ret)
  562. return ret;
  563. }
  564. if (sync_run)
  565. sync();
  566. ret = __run_perf_stat(argc, argv);
  567. if (ret)
  568. return ret;
  569. if (post_cmd) {
  570. ret = system(post_cmd);
  571. if (ret)
  572. return ret;
  573. }
  574. return ret;
  575. }
  576. static void print_running(u64 run, u64 ena)
  577. {
  578. if (csv_output) {
  579. fprintf(stat_config.output, "%s%" PRIu64 "%s%.2f",
  580. csv_sep,
  581. run,
  582. csv_sep,
  583. ena ? 100.0 * run / ena : 100.0);
  584. } else if (run != ena) {
  585. fprintf(stat_config.output, " (%.2f%%)", 100.0 * run / ena);
  586. }
  587. }
  588. static void print_noise_pct(double total, double avg)
  589. {
  590. double pct = rel_stddev_stats(total, avg);
  591. if (csv_output)
  592. fprintf(stat_config.output, "%s%.2f%%", csv_sep, pct);
  593. else if (pct)
  594. fprintf(stat_config.output, " ( +-%6.2f%% )", pct);
  595. }
  596. static void print_noise(struct perf_evsel *evsel, double avg)
  597. {
  598. struct perf_stat_evsel *ps;
  599. if (run_count == 1)
  600. return;
  601. ps = evsel->priv;
  602. print_noise_pct(stddev_stats(&ps->res_stats[0]), avg);
  603. }
  604. static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
  605. {
  606. switch (stat_config.aggr_mode) {
  607. case AGGR_CORE:
  608. fprintf(stat_config.output, "S%d-C%*d%s%*d%s",
  609. cpu_map__id_to_socket(id),
  610. csv_output ? 0 : -8,
  611. cpu_map__id_to_cpu(id),
  612. csv_sep,
  613. csv_output ? 0 : 4,
  614. nr,
  615. csv_sep);
  616. break;
  617. case AGGR_SOCKET:
  618. fprintf(stat_config.output, "S%*d%s%*d%s",
  619. csv_output ? 0 : -5,
  620. id,
  621. csv_sep,
  622. csv_output ? 0 : 4,
  623. nr,
  624. csv_sep);
  625. break;
  626. case AGGR_NONE:
  627. fprintf(stat_config.output, "CPU%*d%s",
  628. csv_output ? 0 : -4,
  629. perf_evsel__cpus(evsel)->map[id], csv_sep);
  630. break;
  631. case AGGR_THREAD:
  632. fprintf(stat_config.output, "%*s-%*d%s",
  633. csv_output ? 0 : 16,
  634. thread_map__comm(evsel->threads, id),
  635. csv_output ? 0 : -8,
  636. thread_map__pid(evsel->threads, id),
  637. csv_sep);
  638. break;
  639. case AGGR_GLOBAL:
  640. case AGGR_UNSET:
  641. default:
  642. break;
  643. }
  644. }
  645. struct outstate {
  646. FILE *fh;
  647. bool newline;
  648. const char *prefix;
  649. int nfields;
  650. int id, nr;
  651. struct perf_evsel *evsel;
  652. };
  653. #define METRIC_LEN 35
  654. static void new_line_std(void *ctx)
  655. {
  656. struct outstate *os = ctx;
  657. os->newline = true;
  658. }
  659. static void do_new_line_std(struct outstate *os)
  660. {
  661. fputc('\n', os->fh);
  662. fputs(os->prefix, os->fh);
  663. aggr_printout(os->evsel, os->id, os->nr);
  664. if (stat_config.aggr_mode == AGGR_NONE)
  665. fprintf(os->fh, " ");
  666. fprintf(os->fh, " ");
  667. }
  668. static void print_metric_std(void *ctx, const char *color, const char *fmt,
  669. const char *unit, double val)
  670. {
  671. struct outstate *os = ctx;
  672. FILE *out = os->fh;
  673. int n;
  674. bool newline = os->newline;
  675. os->newline = false;
  676. if (unit == NULL || fmt == NULL) {
  677. fprintf(out, "%-*s", METRIC_LEN, "");
  678. return;
  679. }
  680. if (newline)
  681. do_new_line_std(os);
  682. n = fprintf(out, " # ");
  683. if (color)
  684. n += color_fprintf(out, color, fmt, val);
  685. else
  686. n += fprintf(out, fmt, val);
  687. fprintf(out, " %-*s", METRIC_LEN - n - 1, unit);
  688. }
  689. static void new_line_csv(void *ctx)
  690. {
  691. struct outstate *os = ctx;
  692. int i;
  693. fputc('\n', os->fh);
  694. if (os->prefix)
  695. fprintf(os->fh, "%s%s", os->prefix, csv_sep);
  696. aggr_printout(os->evsel, os->id, os->nr);
  697. for (i = 0; i < os->nfields; i++)
  698. fputs(csv_sep, os->fh);
  699. }
  700. static void print_metric_csv(void *ctx,
  701. const char *color __maybe_unused,
  702. const char *fmt, const char *unit, double val)
  703. {
  704. struct outstate *os = ctx;
  705. FILE *out = os->fh;
  706. char buf[64], *vals, *ends;
  707. if (unit == NULL || fmt == NULL) {
  708. fprintf(out, "%s%s%s%s", csv_sep, csv_sep, csv_sep, csv_sep);
  709. return;
  710. }
  711. snprintf(buf, sizeof(buf), fmt, val);
  712. vals = buf;
  713. while (isspace(*vals))
  714. vals++;
  715. ends = vals;
  716. while (isdigit(*ends) || *ends == '.')
  717. ends++;
  718. *ends = 0;
  719. while (isspace(*unit))
  720. unit++;
  721. fprintf(out, "%s%s%s%s", csv_sep, vals, csv_sep, unit);
  722. }
  723. #define METRIC_ONLY_LEN 20
  724. /* Filter out some columns that don't work well in metrics only mode */
  725. static bool valid_only_metric(const char *unit)
  726. {
  727. if (!unit)
  728. return false;
  729. if (strstr(unit, "/sec") ||
  730. strstr(unit, "hz") ||
  731. strstr(unit, "Hz") ||
  732. strstr(unit, "CPUs utilized"))
  733. return false;
  734. return true;
  735. }
  736. static const char *fixunit(char *buf, struct perf_evsel *evsel,
  737. const char *unit)
  738. {
  739. if (!strncmp(unit, "of all", 6)) {
  740. snprintf(buf, 1024, "%s %s", perf_evsel__name(evsel),
  741. unit);
  742. return buf;
  743. }
  744. return unit;
  745. }
  746. static void print_metric_only(void *ctx, const char *color, const char *fmt,
  747. const char *unit, double val)
  748. {
  749. struct outstate *os = ctx;
  750. FILE *out = os->fh;
  751. int n;
  752. char buf[1024];
  753. unsigned mlen = METRIC_ONLY_LEN;
  754. if (!valid_only_metric(unit))
  755. return;
  756. unit = fixunit(buf, os->evsel, unit);
  757. if (color)
  758. n = color_fprintf(out, color, fmt, val);
  759. else
  760. n = fprintf(out, fmt, val);
  761. if (n > METRIC_ONLY_LEN)
  762. n = METRIC_ONLY_LEN;
  763. if (mlen < strlen(unit))
  764. mlen = strlen(unit) + 1;
  765. fprintf(out, "%*s", mlen - n, "");
  766. }
  767. static void print_metric_only_csv(void *ctx, const char *color __maybe_unused,
  768. const char *fmt,
  769. const char *unit, double val)
  770. {
  771. struct outstate *os = ctx;
  772. FILE *out = os->fh;
  773. char buf[64], *vals, *ends;
  774. char tbuf[1024];
  775. if (!valid_only_metric(unit))
  776. return;
  777. unit = fixunit(tbuf, os->evsel, unit);
  778. snprintf(buf, sizeof buf, fmt, val);
  779. vals = buf;
  780. while (isspace(*vals))
  781. vals++;
  782. ends = vals;
  783. while (isdigit(*ends) || *ends == '.')
  784. ends++;
  785. *ends = 0;
  786. fprintf(out, "%s%s", vals, csv_sep);
  787. }
  788. static void new_line_metric(void *ctx __maybe_unused)
  789. {
  790. }
  791. static void print_metric_header(void *ctx, const char *color __maybe_unused,
  792. const char *fmt __maybe_unused,
  793. const char *unit, double val __maybe_unused)
  794. {
  795. struct outstate *os = ctx;
  796. char tbuf[1024];
  797. if (!valid_only_metric(unit))
  798. return;
  799. unit = fixunit(tbuf, os->evsel, unit);
  800. if (csv_output)
  801. fprintf(os->fh, "%s%s", unit, csv_sep);
  802. else
  803. fprintf(os->fh, "%-*s ", METRIC_ONLY_LEN, unit);
  804. }
  805. static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg)
  806. {
  807. FILE *output = stat_config.output;
  808. double msecs = avg / 1e6;
  809. const char *fmt_v, *fmt_n;
  810. char name[25];
  811. fmt_v = csv_output ? "%.6f%s" : "%18.6f%s";
  812. fmt_n = csv_output ? "%s" : "%-25s";
  813. aggr_printout(evsel, id, nr);
  814. scnprintf(name, sizeof(name), "%s%s",
  815. perf_evsel__name(evsel), csv_output ? "" : " (msec)");
  816. fprintf(output, fmt_v, msecs, csv_sep);
  817. if (csv_output)
  818. fprintf(output, "%s%s", evsel->unit, csv_sep);
  819. else
  820. fprintf(output, "%-*s%s", unit_width, evsel->unit, csv_sep);
  821. fprintf(output, fmt_n, name);
  822. if (evsel->cgrp)
  823. fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
  824. }
  825. static int first_shadow_cpu(struct perf_evsel *evsel, int id)
  826. {
  827. int i;
  828. if (!aggr_get_id)
  829. return 0;
  830. if (stat_config.aggr_mode == AGGR_NONE)
  831. return id;
  832. if (stat_config.aggr_mode == AGGR_GLOBAL)
  833. return 0;
  834. for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) {
  835. int cpu2 = perf_evsel__cpus(evsel)->map[i];
  836. if (aggr_get_id(evsel_list->cpus, cpu2) == id)
  837. return cpu2;
  838. }
  839. return 0;
  840. }
  841. static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
  842. {
  843. FILE *output = stat_config.output;
  844. double sc = evsel->scale;
  845. const char *fmt;
  846. if (csv_output) {
  847. fmt = floor(sc) != sc ? "%.2f%s" : "%.0f%s";
  848. } else {
  849. if (big_num)
  850. fmt = floor(sc) != sc ? "%'18.2f%s" : "%'18.0f%s";
  851. else
  852. fmt = floor(sc) != sc ? "%18.2f%s" : "%18.0f%s";
  853. }
  854. aggr_printout(evsel, id, nr);
  855. fprintf(output, fmt, avg, csv_sep);
  856. if (evsel->unit)
  857. fprintf(output, "%-*s%s",
  858. csv_output ? 0 : unit_width,
  859. evsel->unit, csv_sep);
  860. fprintf(output, "%-*s", csv_output ? 0 : 25, perf_evsel__name(evsel));
  861. if (evsel->cgrp)
  862. fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
  863. }
  864. static void printout(int id, int nr, struct perf_evsel *counter, double uval,
  865. char *prefix, u64 run, u64 ena, double noise)
  866. {
  867. struct perf_stat_output_ctx out;
  868. struct outstate os = {
  869. .fh = stat_config.output,
  870. .prefix = prefix ? prefix : "",
  871. .id = id,
  872. .nr = nr,
  873. .evsel = counter,
  874. };
  875. print_metric_t pm = print_metric_std;
  876. void (*nl)(void *);
  877. if (metric_only) {
  878. nl = new_line_metric;
  879. if (csv_output)
  880. pm = print_metric_only_csv;
  881. else
  882. pm = print_metric_only;
  883. } else
  884. nl = new_line_std;
  885. if (csv_output && !metric_only) {
  886. static int aggr_fields[] = {
  887. [AGGR_GLOBAL] = 0,
  888. [AGGR_THREAD] = 1,
  889. [AGGR_NONE] = 1,
  890. [AGGR_SOCKET] = 2,
  891. [AGGR_CORE] = 2,
  892. };
  893. pm = print_metric_csv;
  894. nl = new_line_csv;
  895. os.nfields = 3;
  896. os.nfields += aggr_fields[stat_config.aggr_mode];
  897. if (counter->cgrp)
  898. os.nfields++;
  899. }
  900. if (run == 0 || ena == 0 || counter->counts->scaled == -1) {
  901. if (metric_only) {
  902. pm(&os, NULL, "", "", 0);
  903. return;
  904. }
  905. aggr_printout(counter, id, nr);
  906. fprintf(stat_config.output, "%*s%s",
  907. csv_output ? 0 : 18,
  908. counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
  909. csv_sep);
  910. fprintf(stat_config.output, "%-*s%s",
  911. csv_output ? 0 : unit_width,
  912. counter->unit, csv_sep);
  913. fprintf(stat_config.output, "%*s",
  914. csv_output ? 0 : -25,
  915. perf_evsel__name(counter));
  916. if (counter->cgrp)
  917. fprintf(stat_config.output, "%s%s",
  918. csv_sep, counter->cgrp->name);
  919. if (!csv_output)
  920. pm(&os, NULL, NULL, "", 0);
  921. print_noise(counter, noise);
  922. print_running(run, ena);
  923. if (csv_output)
  924. pm(&os, NULL, NULL, "", 0);
  925. return;
  926. }
  927. if (metric_only)
  928. /* nothing */;
  929. else if (nsec_counter(counter))
  930. nsec_printout(id, nr, counter, uval);
  931. else
  932. abs_printout(id, nr, counter, uval);
  933. out.print_metric = pm;
  934. out.new_line = nl;
  935. out.ctx = &os;
  936. if (csv_output && !metric_only) {
  937. print_noise(counter, noise);
  938. print_running(run, ena);
  939. }
  940. perf_stat__print_shadow_stats(counter, uval,
  941. first_shadow_cpu(counter, id),
  942. &out);
  943. if (!csv_output && !metric_only) {
  944. print_noise(counter, noise);
  945. print_running(run, ena);
  946. }
  947. }
  948. static void aggr_update_shadow(void)
  949. {
  950. int cpu, s2, id, s;
  951. u64 val;
  952. struct perf_evsel *counter;
  953. for (s = 0; s < aggr_map->nr; s++) {
  954. id = aggr_map->map[s];
  955. evlist__for_each_entry(evsel_list, counter) {
  956. val = 0;
  957. for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
  958. s2 = aggr_get_id(evsel_list->cpus, cpu);
  959. if (s2 != id)
  960. continue;
  961. val += perf_counts(counter->counts, cpu, 0)->val;
  962. }
  963. val = val * counter->scale;
  964. perf_stat__update_shadow_stats(counter, &val,
  965. first_shadow_cpu(counter, id));
  966. }
  967. }
  968. }
  969. static void print_aggr(char *prefix)
  970. {
  971. FILE *output = stat_config.output;
  972. struct perf_evsel *counter;
  973. int cpu, s, s2, id, nr;
  974. double uval;
  975. u64 ena, run, val;
  976. bool first;
  977. if (!(aggr_map || aggr_get_id))
  978. return;
  979. aggr_update_shadow();
  980. /*
  981. * With metric_only everything is on a single line.
  982. * Without each counter has its own line.
  983. */
  984. for (s = 0; s < aggr_map->nr; s++) {
  985. if (prefix && metric_only)
  986. fprintf(output, "%s", prefix);
  987. id = aggr_map->map[s];
  988. first = true;
  989. evlist__for_each_entry(evsel_list, counter) {
  990. val = ena = run = 0;
  991. nr = 0;
  992. for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
  993. s2 = aggr_get_id(perf_evsel__cpus(counter), cpu);
  994. if (s2 != id)
  995. continue;
  996. val += perf_counts(counter->counts, cpu, 0)->val;
  997. ena += perf_counts(counter->counts, cpu, 0)->ena;
  998. run += perf_counts(counter->counts, cpu, 0)->run;
  999. nr++;
  1000. }
  1001. if (first && metric_only) {
  1002. first = false;
  1003. aggr_printout(counter, id, nr);
  1004. }
  1005. if (prefix && !metric_only)
  1006. fprintf(output, "%s", prefix);
  1007. uval = val * counter->scale;
  1008. printout(id, nr, counter, uval, prefix, run, ena, 1.0);
  1009. if (!metric_only)
  1010. fputc('\n', output);
  1011. }
  1012. if (metric_only)
  1013. fputc('\n', output);
  1014. }
  1015. }
  1016. static void print_aggr_thread(struct perf_evsel *counter, char *prefix)
  1017. {
  1018. FILE *output = stat_config.output;
  1019. int nthreads = thread_map__nr(counter->threads);
  1020. int ncpus = cpu_map__nr(counter->cpus);
  1021. int cpu, thread;
  1022. double uval;
  1023. for (thread = 0; thread < nthreads; thread++) {
  1024. u64 ena = 0, run = 0, val = 0;
  1025. for (cpu = 0; cpu < ncpus; cpu++) {
  1026. val += perf_counts(counter->counts, cpu, thread)->val;
  1027. ena += perf_counts(counter->counts, cpu, thread)->ena;
  1028. run += perf_counts(counter->counts, cpu, thread)->run;
  1029. }
  1030. if (prefix)
  1031. fprintf(output, "%s", prefix);
  1032. uval = val * counter->scale;
  1033. printout(thread, 0, counter, uval, prefix, run, ena, 1.0);
  1034. fputc('\n', output);
  1035. }
  1036. }
  1037. /*
  1038. * Print out the results of a single counter:
  1039. * aggregated counts in system-wide mode
  1040. */
  1041. static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
  1042. {
  1043. FILE *output = stat_config.output;
  1044. struct perf_stat_evsel *ps = counter->priv;
  1045. double avg = avg_stats(&ps->res_stats[0]);
  1046. double uval;
  1047. double avg_enabled, avg_running;
  1048. avg_enabled = avg_stats(&ps->res_stats[1]);
  1049. avg_running = avg_stats(&ps->res_stats[2]);
  1050. if (prefix && !metric_only)
  1051. fprintf(output, "%s", prefix);
  1052. uval = avg * counter->scale;
  1053. printout(-1, 0, counter, uval, prefix, avg_running, avg_enabled, avg);
  1054. if (!metric_only)
  1055. fprintf(output, "\n");
  1056. }
  1057. /*
  1058. * Print out the results of a single counter:
  1059. * does not use aggregated count in system-wide
  1060. */
  1061. static void print_counter(struct perf_evsel *counter, char *prefix)
  1062. {
  1063. FILE *output = stat_config.output;
  1064. u64 ena, run, val;
  1065. double uval;
  1066. int cpu;
  1067. for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
  1068. val = perf_counts(counter->counts, cpu, 0)->val;
  1069. ena = perf_counts(counter->counts, cpu, 0)->ena;
  1070. run = perf_counts(counter->counts, cpu, 0)->run;
  1071. if (prefix)
  1072. fprintf(output, "%s", prefix);
  1073. uval = val * counter->scale;
  1074. printout(cpu, 0, counter, uval, prefix, run, ena, 1.0);
  1075. fputc('\n', output);
  1076. }
  1077. }
  1078. static void print_no_aggr_metric(char *prefix)
  1079. {
  1080. int cpu;
  1081. int nrcpus = 0;
  1082. struct perf_evsel *counter;
  1083. u64 ena, run, val;
  1084. double uval;
  1085. nrcpus = evsel_list->cpus->nr;
  1086. for (cpu = 0; cpu < nrcpus; cpu++) {
  1087. bool first = true;
  1088. if (prefix)
  1089. fputs(prefix, stat_config.output);
  1090. evlist__for_each_entry(evsel_list, counter) {
  1091. if (first) {
  1092. aggr_printout(counter, cpu, 0);
  1093. first = false;
  1094. }
  1095. val = perf_counts(counter->counts, cpu, 0)->val;
  1096. ena = perf_counts(counter->counts, cpu, 0)->ena;
  1097. run = perf_counts(counter->counts, cpu, 0)->run;
  1098. uval = val * counter->scale;
  1099. printout(cpu, 0, counter, uval, prefix, run, ena, 1.0);
  1100. }
  1101. fputc('\n', stat_config.output);
  1102. }
  1103. }
  1104. static int aggr_header_lens[] = {
  1105. [AGGR_CORE] = 18,
  1106. [AGGR_SOCKET] = 12,
  1107. [AGGR_NONE] = 6,
  1108. [AGGR_THREAD] = 24,
  1109. [AGGR_GLOBAL] = 0,
  1110. };
  1111. static const char *aggr_header_csv[] = {
  1112. [AGGR_CORE] = "core,cpus,",
  1113. [AGGR_SOCKET] = "socket,cpus",
  1114. [AGGR_NONE] = "cpu,",
  1115. [AGGR_THREAD] = "comm-pid,",
  1116. [AGGR_GLOBAL] = ""
  1117. };
  1118. static void print_metric_headers(const char *prefix, bool no_indent)
  1119. {
  1120. struct perf_stat_output_ctx out;
  1121. struct perf_evsel *counter;
  1122. struct outstate os = {
  1123. .fh = stat_config.output
  1124. };
  1125. if (prefix)
  1126. fprintf(stat_config.output, "%s", prefix);
  1127. if (!csv_output && !no_indent)
  1128. fprintf(stat_config.output, "%*s",
  1129. aggr_header_lens[stat_config.aggr_mode], "");
  1130. if (csv_output) {
  1131. if (stat_config.interval)
  1132. fputs("time,", stat_config.output);
  1133. fputs(aggr_header_csv[stat_config.aggr_mode],
  1134. stat_config.output);
  1135. }
  1136. /* Print metrics headers only */
  1137. evlist__for_each_entry(evsel_list, counter) {
  1138. os.evsel = counter;
  1139. out.ctx = &os;
  1140. out.print_metric = print_metric_header;
  1141. out.new_line = new_line_metric;
  1142. os.evsel = counter;
  1143. perf_stat__print_shadow_stats(counter, 0,
  1144. 0,
  1145. &out);
  1146. }
  1147. fputc('\n', stat_config.output);
  1148. }
  1149. static void print_interval(char *prefix, struct timespec *ts)
  1150. {
  1151. FILE *output = stat_config.output;
  1152. static int num_print_interval;
  1153. sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep);
  1154. if (num_print_interval == 0 && !csv_output) {
  1155. switch (stat_config.aggr_mode) {
  1156. case AGGR_SOCKET:
  1157. fprintf(output, "# time socket cpus");
  1158. if (!metric_only)
  1159. fprintf(output, " counts %*s events\n", unit_width, "unit");
  1160. break;
  1161. case AGGR_CORE:
  1162. fprintf(output, "# time core cpus");
  1163. if (!metric_only)
  1164. fprintf(output, " counts %*s events\n", unit_width, "unit");
  1165. break;
  1166. case AGGR_NONE:
  1167. fprintf(output, "# time CPU");
  1168. if (!metric_only)
  1169. fprintf(output, " counts %*s events\n", unit_width, "unit");
  1170. break;
  1171. case AGGR_THREAD:
  1172. fprintf(output, "# time comm-pid");
  1173. if (!metric_only)
  1174. fprintf(output, " counts %*s events\n", unit_width, "unit");
  1175. break;
  1176. case AGGR_GLOBAL:
  1177. default:
  1178. fprintf(output, "# time");
  1179. if (!metric_only)
  1180. fprintf(output, " counts %*s events\n", unit_width, "unit");
  1181. case AGGR_UNSET:
  1182. break;
  1183. }
  1184. }
  1185. if (num_print_interval == 0 && metric_only)
  1186. print_metric_headers(" ", true);
  1187. if (++num_print_interval == 25)
  1188. num_print_interval = 0;
  1189. }
  1190. static void print_header(int argc, const char **argv)
  1191. {
  1192. FILE *output = stat_config.output;
  1193. int i;
  1194. fflush(stdout);
  1195. if (!csv_output) {
  1196. fprintf(output, "\n");
  1197. fprintf(output, " Performance counter stats for ");
  1198. if (target.system_wide)
  1199. fprintf(output, "\'system wide");
  1200. else if (target.cpu_list)
  1201. fprintf(output, "\'CPU(s) %s", target.cpu_list);
  1202. else if (!target__has_task(&target)) {
  1203. fprintf(output, "\'%s", argv ? argv[0] : "pipe");
  1204. for (i = 1; argv && (i < argc); i++)
  1205. fprintf(output, " %s", argv[i]);
  1206. } else if (target.pid)
  1207. fprintf(output, "process id \'%s", target.pid);
  1208. else
  1209. fprintf(output, "thread id \'%s", target.tid);
  1210. fprintf(output, "\'");
  1211. if (run_count > 1)
  1212. fprintf(output, " (%d runs)", run_count);
  1213. fprintf(output, ":\n\n");
  1214. }
  1215. }
  1216. static void print_footer(void)
  1217. {
  1218. FILE *output = stat_config.output;
  1219. if (!null_run)
  1220. fprintf(output, "\n");
  1221. fprintf(output, " %17.9f seconds time elapsed",
  1222. avg_stats(&walltime_nsecs_stats)/1e9);
  1223. if (run_count > 1) {
  1224. fprintf(output, " ");
  1225. print_noise_pct(stddev_stats(&walltime_nsecs_stats),
  1226. avg_stats(&walltime_nsecs_stats));
  1227. }
  1228. fprintf(output, "\n\n");
  1229. }
  1230. static void print_counters(struct timespec *ts, int argc, const char **argv)
  1231. {
  1232. int interval = stat_config.interval;
  1233. struct perf_evsel *counter;
  1234. char buf[64], *prefix = NULL;
  1235. /* Do not print anything if we record to the pipe. */
  1236. if (STAT_RECORD && perf_stat.file.is_pipe)
  1237. return;
  1238. if (interval)
  1239. print_interval(prefix = buf, ts);
  1240. else
  1241. print_header(argc, argv);
  1242. if (metric_only) {
  1243. static int num_print_iv;
  1244. if (num_print_iv == 0 && !interval)
  1245. print_metric_headers(prefix, false);
  1246. if (num_print_iv++ == 25)
  1247. num_print_iv = 0;
  1248. if (stat_config.aggr_mode == AGGR_GLOBAL && prefix)
  1249. fprintf(stat_config.output, "%s", prefix);
  1250. }
  1251. switch (stat_config.aggr_mode) {
  1252. case AGGR_CORE:
  1253. case AGGR_SOCKET:
  1254. print_aggr(prefix);
  1255. break;
  1256. case AGGR_THREAD:
  1257. evlist__for_each_entry(evsel_list, counter)
  1258. print_aggr_thread(counter, prefix);
  1259. break;
  1260. case AGGR_GLOBAL:
  1261. evlist__for_each_entry(evsel_list, counter)
  1262. print_counter_aggr(counter, prefix);
  1263. if (metric_only)
  1264. fputc('\n', stat_config.output);
  1265. break;
  1266. case AGGR_NONE:
  1267. if (metric_only)
  1268. print_no_aggr_metric(prefix);
  1269. else {
  1270. evlist__for_each_entry(evsel_list, counter)
  1271. print_counter(counter, prefix);
  1272. }
  1273. break;
  1274. case AGGR_UNSET:
  1275. default:
  1276. break;
  1277. }
  1278. if (!interval && !csv_output)
  1279. print_footer();
  1280. fflush(stat_config.output);
  1281. }
  1282. static volatile int signr = -1;
  1283. static void skip_signal(int signo)
  1284. {
  1285. if ((child_pid == -1) || stat_config.interval)
  1286. done = 1;
  1287. signr = signo;
  1288. /*
  1289. * render child_pid harmless
  1290. * won't send SIGTERM to a random
  1291. * process in case of race condition
  1292. * and fast PID recycling
  1293. */
  1294. child_pid = -1;
  1295. }
  1296. static void sig_atexit(void)
  1297. {
  1298. sigset_t set, oset;
  1299. /*
  1300. * avoid race condition with SIGCHLD handler
  1301. * in skip_signal() which is modifying child_pid
  1302. * goal is to avoid send SIGTERM to a random
  1303. * process
  1304. */
  1305. sigemptyset(&set);
  1306. sigaddset(&set, SIGCHLD);
  1307. sigprocmask(SIG_BLOCK, &set, &oset);
  1308. if (child_pid != -1)
  1309. kill(child_pid, SIGTERM);
  1310. sigprocmask(SIG_SETMASK, &oset, NULL);
  1311. if (signr == -1)
  1312. return;
  1313. signal(signr, SIG_DFL);
  1314. kill(getpid(), signr);
  1315. }
  1316. static int stat__set_big_num(const struct option *opt __maybe_unused,
  1317. const char *s __maybe_unused, int unset)
  1318. {
  1319. big_num_opt = unset ? 0 : 1;
  1320. return 0;
  1321. }
  1322. static int enable_metric_only(const struct option *opt __maybe_unused,
  1323. const char *s __maybe_unused, int unset)
  1324. {
  1325. force_metric_only = true;
  1326. metric_only = !unset;
  1327. return 0;
  1328. }
  1329. static const struct option stat_options[] = {
  1330. OPT_BOOLEAN('T', "transaction", &transaction_run,
  1331. "hardware transaction statistics"),
  1332. OPT_CALLBACK('e', "event", &evsel_list, "event",
  1333. "event selector. use 'perf list' to list available events",
  1334. parse_events_option),
  1335. OPT_CALLBACK(0, "filter", &evsel_list, "filter",
  1336. "event filter", parse_filter),
  1337. OPT_BOOLEAN('i', "no-inherit", &no_inherit,
  1338. "child tasks do not inherit counters"),
  1339. OPT_STRING('p', "pid", &target.pid, "pid",
  1340. "stat events on existing process id"),
  1341. OPT_STRING('t', "tid", &target.tid, "tid",
  1342. "stat events on existing thread id"),
  1343. OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
  1344. "system-wide collection from all CPUs"),
  1345. OPT_BOOLEAN('g', "group", &group,
  1346. "put the counters into a counter group"),
  1347. OPT_BOOLEAN('c', "scale", &stat_config.scale, "scale/normalize counters"),
  1348. OPT_INCR('v', "verbose", &verbose,
  1349. "be more verbose (show counter open errors, etc)"),
  1350. OPT_INTEGER('r', "repeat", &run_count,
  1351. "repeat command and print average + stddev (max: 100, forever: 0)"),
  1352. OPT_BOOLEAN('n', "null", &null_run,
  1353. "null run - dont start any counters"),
  1354. OPT_INCR('d', "detailed", &detailed_run,
  1355. "detailed run - start a lot of events"),
  1356. OPT_BOOLEAN('S', "sync", &sync_run,
  1357. "call sync() before starting a run"),
  1358. OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL,
  1359. "print large numbers with thousands\' separators",
  1360. stat__set_big_num),
  1361. OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
  1362. "list of cpus to monitor in system-wide"),
  1363. OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode,
  1364. "disable CPU count aggregation", AGGR_NONE),
  1365. OPT_STRING('x', "field-separator", &csv_sep, "separator",
  1366. "print counts with custom separator"),
  1367. OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
  1368. "monitor event in cgroup name only", parse_cgroups),
  1369. OPT_STRING('o', "output", &output_name, "file", "output file name"),
  1370. OPT_BOOLEAN(0, "append", &append_file, "append to the output file"),
  1371. OPT_INTEGER(0, "log-fd", &output_fd,
  1372. "log output to fd, instead of stderr"),
  1373. OPT_STRING(0, "pre", &pre_cmd, "command",
  1374. "command to run prior to the measured command"),
  1375. OPT_STRING(0, "post", &post_cmd, "command",
  1376. "command to run after to the measured command"),
  1377. OPT_UINTEGER('I', "interval-print", &stat_config.interval,
  1378. "print counts at regular interval in ms (>= 10)"),
  1379. OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode,
  1380. "aggregate counts per processor socket", AGGR_SOCKET),
  1381. OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode,
  1382. "aggregate counts per physical processor core", AGGR_CORE),
  1383. OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode,
  1384. "aggregate counts per thread", AGGR_THREAD),
  1385. OPT_UINTEGER('D', "delay", &initial_delay,
  1386. "ms to wait before starting measurement after program start"),
  1387. OPT_CALLBACK_NOOPT(0, "metric-only", &metric_only, NULL,
  1388. "Only print computed metrics. No raw values", enable_metric_only),
  1389. OPT_BOOLEAN(0, "topdown", &topdown_run,
  1390. "measure topdown level 1 statistics"),
  1391. OPT_END()
  1392. };
  1393. static int perf_stat__get_socket(struct cpu_map *map, int cpu)
  1394. {
  1395. return cpu_map__get_socket(map, cpu, NULL);
  1396. }
  1397. static int perf_stat__get_core(struct cpu_map *map, int cpu)
  1398. {
  1399. return cpu_map__get_core(map, cpu, NULL);
  1400. }
  1401. static int cpu_map__get_max(struct cpu_map *map)
  1402. {
  1403. int i, max = -1;
  1404. for (i = 0; i < map->nr; i++) {
  1405. if (map->map[i] > max)
  1406. max = map->map[i];
  1407. }
  1408. return max;
  1409. }
  1410. static struct cpu_map *cpus_aggr_map;
  1411. static int perf_stat__get_aggr(aggr_get_id_t get_id, struct cpu_map *map, int idx)
  1412. {
  1413. int cpu;
  1414. if (idx >= map->nr)
  1415. return -1;
  1416. cpu = map->map[idx];
  1417. if (cpus_aggr_map->map[cpu] == -1)
  1418. cpus_aggr_map->map[cpu] = get_id(map, idx);
  1419. return cpus_aggr_map->map[cpu];
  1420. }
  1421. static int perf_stat__get_socket_cached(struct cpu_map *map, int idx)
  1422. {
  1423. return perf_stat__get_aggr(perf_stat__get_socket, map, idx);
  1424. }
  1425. static int perf_stat__get_core_cached(struct cpu_map *map, int idx)
  1426. {
  1427. return perf_stat__get_aggr(perf_stat__get_core, map, idx);
  1428. }
  1429. static int perf_stat_init_aggr_mode(void)
  1430. {
  1431. int nr;
  1432. switch (stat_config.aggr_mode) {
  1433. case AGGR_SOCKET:
  1434. if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) {
  1435. perror("cannot build socket map");
  1436. return -1;
  1437. }
  1438. aggr_get_id = perf_stat__get_socket_cached;
  1439. break;
  1440. case AGGR_CORE:
  1441. if (cpu_map__build_core_map(evsel_list->cpus, &aggr_map)) {
  1442. perror("cannot build core map");
  1443. return -1;
  1444. }
  1445. aggr_get_id = perf_stat__get_core_cached;
  1446. break;
  1447. case AGGR_NONE:
  1448. case AGGR_GLOBAL:
  1449. case AGGR_THREAD:
  1450. case AGGR_UNSET:
  1451. default:
  1452. break;
  1453. }
  1454. /*
  1455. * The evsel_list->cpus is the base we operate on,
  1456. * taking the highest cpu number to be the size of
  1457. * the aggregation translate cpumap.
  1458. */
  1459. nr = cpu_map__get_max(evsel_list->cpus);
  1460. cpus_aggr_map = cpu_map__empty_new(nr + 1);
  1461. return cpus_aggr_map ? 0 : -ENOMEM;
  1462. }
  1463. static void perf_stat__exit_aggr_mode(void)
  1464. {
  1465. cpu_map__put(aggr_map);
  1466. cpu_map__put(cpus_aggr_map);
  1467. aggr_map = NULL;
  1468. cpus_aggr_map = NULL;
  1469. }
  1470. static inline int perf_env__get_cpu(struct perf_env *env, struct cpu_map *map, int idx)
  1471. {
  1472. int cpu;
  1473. if (idx > map->nr)
  1474. return -1;
  1475. cpu = map->map[idx];
  1476. if (cpu >= env->nr_cpus_online)
  1477. return -1;
  1478. return cpu;
  1479. }
  1480. static int perf_env__get_socket(struct cpu_map *map, int idx, void *data)
  1481. {
  1482. struct perf_env *env = data;
  1483. int cpu = perf_env__get_cpu(env, map, idx);
  1484. return cpu == -1 ? -1 : env->cpu[cpu].socket_id;
  1485. }
  1486. static int perf_env__get_core(struct cpu_map *map, int idx, void *data)
  1487. {
  1488. struct perf_env *env = data;
  1489. int core = -1, cpu = perf_env__get_cpu(env, map, idx);
  1490. if (cpu != -1) {
  1491. int socket_id = env->cpu[cpu].socket_id;
  1492. /*
  1493. * Encode socket in upper 16 bits
  1494. * core_id is relative to socket, and
  1495. * we need a global id. So we combine
  1496. * socket + core id.
  1497. */
  1498. core = (socket_id << 16) | (env->cpu[cpu].core_id & 0xffff);
  1499. }
  1500. return core;
  1501. }
  1502. static int perf_env__build_socket_map(struct perf_env *env, struct cpu_map *cpus,
  1503. struct cpu_map **sockp)
  1504. {
  1505. return cpu_map__build_map(cpus, sockp, perf_env__get_socket, env);
  1506. }
  1507. static int perf_env__build_core_map(struct perf_env *env, struct cpu_map *cpus,
  1508. struct cpu_map **corep)
  1509. {
  1510. return cpu_map__build_map(cpus, corep, perf_env__get_core, env);
  1511. }
  1512. static int perf_stat__get_socket_file(struct cpu_map *map, int idx)
  1513. {
  1514. return perf_env__get_socket(map, idx, &perf_stat.session->header.env);
  1515. }
  1516. static int perf_stat__get_core_file(struct cpu_map *map, int idx)
  1517. {
  1518. return perf_env__get_core(map, idx, &perf_stat.session->header.env);
  1519. }
  1520. static int perf_stat_init_aggr_mode_file(struct perf_stat *st)
  1521. {
  1522. struct perf_env *env = &st->session->header.env;
  1523. switch (stat_config.aggr_mode) {
  1524. case AGGR_SOCKET:
  1525. if (perf_env__build_socket_map(env, evsel_list->cpus, &aggr_map)) {
  1526. perror("cannot build socket map");
  1527. return -1;
  1528. }
  1529. aggr_get_id = perf_stat__get_socket_file;
  1530. break;
  1531. case AGGR_CORE:
  1532. if (perf_env__build_core_map(env, evsel_list->cpus, &aggr_map)) {
  1533. perror("cannot build core map");
  1534. return -1;
  1535. }
  1536. aggr_get_id = perf_stat__get_core_file;
  1537. break;
  1538. case AGGR_NONE:
  1539. case AGGR_GLOBAL:
  1540. case AGGR_THREAD:
  1541. case AGGR_UNSET:
  1542. default:
  1543. break;
  1544. }
  1545. return 0;
  1546. }
  1547. static int topdown_filter_events(const char **attr, char **str, bool use_group)
  1548. {
  1549. int off = 0;
  1550. int i;
  1551. int len = 0;
  1552. char *s;
  1553. for (i = 0; attr[i]; i++) {
  1554. if (pmu_have_event("cpu", attr[i])) {
  1555. len += strlen(attr[i]) + 1;
  1556. attr[i - off] = attr[i];
  1557. } else
  1558. off++;
  1559. }
  1560. attr[i - off] = NULL;
  1561. *str = malloc(len + 1 + 2);
  1562. if (!*str)
  1563. return -1;
  1564. s = *str;
  1565. if (i - off == 0) {
  1566. *s = 0;
  1567. return 0;
  1568. }
  1569. if (use_group)
  1570. *s++ = '{';
  1571. for (i = 0; attr[i]; i++) {
  1572. strcpy(s, attr[i]);
  1573. s += strlen(s);
  1574. *s++ = ',';
  1575. }
  1576. if (use_group) {
  1577. s[-1] = '}';
  1578. *s = 0;
  1579. } else
  1580. s[-1] = 0;
  1581. return 0;
  1582. }
  1583. __weak bool arch_topdown_check_group(bool *warn)
  1584. {
  1585. *warn = false;
  1586. return false;
  1587. }
  1588. __weak void arch_topdown_group_warn(void)
  1589. {
  1590. }
  1591. /*
  1592. * Add default attributes, if there were no attributes specified or
  1593. * if -d/--detailed, -d -d or -d -d -d is used:
  1594. */
  1595. static int add_default_attributes(void)
  1596. {
  1597. int err;
  1598. struct perf_event_attr default_attrs0[] = {
  1599. { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK },
  1600. { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES },
  1601. { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS },
  1602. { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS },
  1603. { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES },
  1604. };
  1605. struct perf_event_attr frontend_attrs[] = {
  1606. { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND },
  1607. };
  1608. struct perf_event_attr backend_attrs[] = {
  1609. { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND },
  1610. };
  1611. struct perf_event_attr default_attrs1[] = {
  1612. { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS },
  1613. { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
  1614. { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES },
  1615. };
  1616. /*
  1617. * Detailed stats (-d), covering the L1 and last level data caches:
  1618. */
  1619. struct perf_event_attr detailed_attrs[] = {
  1620. { .type = PERF_TYPE_HW_CACHE,
  1621. .config =
  1622. PERF_COUNT_HW_CACHE_L1D << 0 |
  1623. (PERF_COUNT_HW_CACHE_OP_READ << 8) |
  1624. (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
  1625. { .type = PERF_TYPE_HW_CACHE,
  1626. .config =
  1627. PERF_COUNT_HW_CACHE_L1D << 0 |
  1628. (PERF_COUNT_HW_CACHE_OP_READ << 8) |
  1629. (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
  1630. { .type = PERF_TYPE_HW_CACHE,
  1631. .config =
  1632. PERF_COUNT_HW_CACHE_LL << 0 |
  1633. (PERF_COUNT_HW_CACHE_OP_READ << 8) |
  1634. (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
  1635. { .type = PERF_TYPE_HW_CACHE,
  1636. .config =
  1637. PERF_COUNT_HW_CACHE_LL << 0 |
  1638. (PERF_COUNT_HW_CACHE_OP_READ << 8) |
  1639. (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
  1640. };
  1641. /*
  1642. * Very detailed stats (-d -d), covering the instruction cache and the TLB caches:
  1643. */
  1644. struct perf_event_attr very_detailed_attrs[] = {
  1645. { .type = PERF_TYPE_HW_CACHE,
  1646. .config =
  1647. PERF_COUNT_HW_CACHE_L1I << 0 |
  1648. (PERF_COUNT_HW_CACHE_OP_READ << 8) |
  1649. (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
  1650. { .type = PERF_TYPE_HW_CACHE,
  1651. .config =
  1652. PERF_COUNT_HW_CACHE_L1I << 0 |
  1653. (PERF_COUNT_HW_CACHE_OP_READ << 8) |
  1654. (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
  1655. { .type = PERF_TYPE_HW_CACHE,
  1656. .config =
  1657. PERF_COUNT_HW_CACHE_DTLB << 0 |
  1658. (PERF_COUNT_HW_CACHE_OP_READ << 8) |
  1659. (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
  1660. { .type = PERF_TYPE_HW_CACHE,
  1661. .config =
  1662. PERF_COUNT_HW_CACHE_DTLB << 0 |
  1663. (PERF_COUNT_HW_CACHE_OP_READ << 8) |
  1664. (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
  1665. { .type = PERF_TYPE_HW_CACHE,
  1666. .config =
  1667. PERF_COUNT_HW_CACHE_ITLB << 0 |
  1668. (PERF_COUNT_HW_CACHE_OP_READ << 8) |
  1669. (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
  1670. { .type = PERF_TYPE_HW_CACHE,
  1671. .config =
  1672. PERF_COUNT_HW_CACHE_ITLB << 0 |
  1673. (PERF_COUNT_HW_CACHE_OP_READ << 8) |
  1674. (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
  1675. };
  1676. /*
  1677. * Very, very detailed stats (-d -d -d), adding prefetch events:
  1678. */
  1679. struct perf_event_attr very_very_detailed_attrs[] = {
  1680. { .type = PERF_TYPE_HW_CACHE,
  1681. .config =
  1682. PERF_COUNT_HW_CACHE_L1D << 0 |
  1683. (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) |
  1684. (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
  1685. { .type = PERF_TYPE_HW_CACHE,
  1686. .config =
  1687. PERF_COUNT_HW_CACHE_L1D << 0 |
  1688. (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) |
  1689. (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
  1690. };
  1691. /* Set attrs if no event is selected and !null_run: */
  1692. if (null_run)
  1693. return 0;
  1694. if (transaction_run) {
  1695. if (pmu_have_event("cpu", "cycles-ct") &&
  1696. pmu_have_event("cpu", "el-start"))
  1697. err = parse_events(evsel_list, transaction_attrs, NULL);
  1698. else
  1699. err = parse_events(evsel_list, transaction_limited_attrs, NULL);
  1700. if (err) {
  1701. fprintf(stderr, "Cannot set up transaction events\n");
  1702. return -1;
  1703. }
  1704. return 0;
  1705. }
  1706. if (topdown_run) {
  1707. char *str = NULL;
  1708. bool warn = false;
  1709. if (stat_config.aggr_mode != AGGR_GLOBAL &&
  1710. stat_config.aggr_mode != AGGR_CORE) {
  1711. pr_err("top down event configuration requires --per-core mode\n");
  1712. return -1;
  1713. }
  1714. stat_config.aggr_mode = AGGR_CORE;
  1715. if (nr_cgroups || !target__has_cpu(&target)) {
  1716. pr_err("top down event configuration requires system-wide mode (-a)\n");
  1717. return -1;
  1718. }
  1719. if (!force_metric_only)
  1720. metric_only = true;
  1721. if (topdown_filter_events(topdown_attrs, &str,
  1722. arch_topdown_check_group(&warn)) < 0) {
  1723. pr_err("Out of memory\n");
  1724. return -1;
  1725. }
  1726. if (topdown_attrs[0] && str) {
  1727. if (warn)
  1728. arch_topdown_group_warn();
  1729. err = parse_events(evsel_list, str, NULL);
  1730. if (err) {
  1731. fprintf(stderr,
  1732. "Cannot set up top down events %s: %d\n",
  1733. str, err);
  1734. free(str);
  1735. return -1;
  1736. }
  1737. } else {
  1738. fprintf(stderr, "System does not support topdown\n");
  1739. return -1;
  1740. }
  1741. free(str);
  1742. }
  1743. if (!evsel_list->nr_entries) {
  1744. if (target__has_cpu(&target))
  1745. default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK;
  1746. if (perf_evlist__add_default_attrs(evsel_list, default_attrs0) < 0)
  1747. return -1;
  1748. if (pmu_have_event("cpu", "stalled-cycles-frontend")) {
  1749. if (perf_evlist__add_default_attrs(evsel_list,
  1750. frontend_attrs) < 0)
  1751. return -1;
  1752. }
  1753. if (pmu_have_event("cpu", "stalled-cycles-backend")) {
  1754. if (perf_evlist__add_default_attrs(evsel_list,
  1755. backend_attrs) < 0)
  1756. return -1;
  1757. }
  1758. if (perf_evlist__add_default_attrs(evsel_list, default_attrs1) < 0)
  1759. return -1;
  1760. }
  1761. /* Detailed events get appended to the event list: */
  1762. if (detailed_run < 1)
  1763. return 0;
  1764. /* Append detailed run extra attributes: */
  1765. if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0)
  1766. return -1;
  1767. if (detailed_run < 2)
  1768. return 0;
  1769. /* Append very detailed run extra attributes: */
  1770. if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0)
  1771. return -1;
  1772. if (detailed_run < 3)
  1773. return 0;
  1774. /* Append very, very detailed run extra attributes: */
  1775. return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs);
  1776. }
  1777. static const char * const stat_record_usage[] = {
  1778. "perf stat record [<options>]",
  1779. NULL,
  1780. };
  1781. static void init_features(struct perf_session *session)
  1782. {
  1783. int feat;
  1784. for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
  1785. perf_header__set_feat(&session->header, feat);
  1786. perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
  1787. perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
  1788. perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
  1789. perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
  1790. }
  1791. static int __cmd_record(int argc, const char **argv)
  1792. {
  1793. struct perf_session *session;
  1794. struct perf_data_file *file = &perf_stat.file;
  1795. argc = parse_options(argc, argv, stat_options, stat_record_usage,
  1796. PARSE_OPT_STOP_AT_NON_OPTION);
  1797. if (output_name)
  1798. file->path = output_name;
  1799. if (run_count != 1 || forever) {
  1800. pr_err("Cannot use -r option with perf stat record.\n");
  1801. return -1;
  1802. }
  1803. session = perf_session__new(file, false, NULL);
  1804. if (session == NULL) {
  1805. pr_err("Perf session creation failed.\n");
  1806. return -1;
  1807. }
  1808. init_features(session);
  1809. session->evlist = evsel_list;
  1810. perf_stat.session = session;
  1811. perf_stat.record = true;
  1812. return argc;
  1813. }
  1814. static int process_stat_round_event(struct perf_tool *tool __maybe_unused,
  1815. union perf_event *event,
  1816. struct perf_session *session)
  1817. {
  1818. struct stat_round_event *stat_round = &event->stat_round;
  1819. struct perf_evsel *counter;
  1820. struct timespec tsh, *ts = NULL;
  1821. const char **argv = session->header.env.cmdline_argv;
  1822. int argc = session->header.env.nr_cmdline;
  1823. evlist__for_each_entry(evsel_list, counter)
  1824. perf_stat_process_counter(&stat_config, counter);
  1825. if (stat_round->type == PERF_STAT_ROUND_TYPE__FINAL)
  1826. update_stats(&walltime_nsecs_stats, stat_round->time);
  1827. if (stat_config.interval && stat_round->time) {
  1828. tsh.tv_sec = stat_round->time / NSECS_PER_SEC;
  1829. tsh.tv_nsec = stat_round->time % NSECS_PER_SEC;
  1830. ts = &tsh;
  1831. }
  1832. print_counters(ts, argc, argv);
  1833. return 0;
  1834. }
  1835. static
  1836. int process_stat_config_event(struct perf_tool *tool __maybe_unused,
  1837. union perf_event *event,
  1838. struct perf_session *session __maybe_unused)
  1839. {
  1840. struct perf_stat *st = container_of(tool, struct perf_stat, tool);
  1841. perf_event__read_stat_config(&stat_config, &event->stat_config);
  1842. if (cpu_map__empty(st->cpus)) {
  1843. if (st->aggr_mode != AGGR_UNSET)
  1844. pr_warning("warning: processing task data, aggregation mode not set\n");
  1845. return 0;
  1846. }
  1847. if (st->aggr_mode != AGGR_UNSET)
  1848. stat_config.aggr_mode = st->aggr_mode;
  1849. if (perf_stat.file.is_pipe)
  1850. perf_stat_init_aggr_mode();
  1851. else
  1852. perf_stat_init_aggr_mode_file(st);
  1853. return 0;
  1854. }
  1855. static int set_maps(struct perf_stat *st)
  1856. {
  1857. if (!st->cpus || !st->threads)
  1858. return 0;
  1859. if (WARN_ONCE(st->maps_allocated, "stats double allocation\n"))
  1860. return -EINVAL;
  1861. perf_evlist__set_maps(evsel_list, st->cpus, st->threads);
  1862. if (perf_evlist__alloc_stats(evsel_list, true))
  1863. return -ENOMEM;
  1864. st->maps_allocated = true;
  1865. return 0;
  1866. }
  1867. static
  1868. int process_thread_map_event(struct perf_tool *tool __maybe_unused,
  1869. union perf_event *event,
  1870. struct perf_session *session __maybe_unused)
  1871. {
  1872. struct perf_stat *st = container_of(tool, struct perf_stat, tool);
  1873. if (st->threads) {
  1874. pr_warning("Extra thread map event, ignoring.\n");
  1875. return 0;
  1876. }
  1877. st->threads = thread_map__new_event(&event->thread_map);
  1878. if (!st->threads)
  1879. return -ENOMEM;
  1880. return set_maps(st);
  1881. }
  1882. static
  1883. int process_cpu_map_event(struct perf_tool *tool __maybe_unused,
  1884. union perf_event *event,
  1885. struct perf_session *session __maybe_unused)
  1886. {
  1887. struct perf_stat *st = container_of(tool, struct perf_stat, tool);
  1888. struct cpu_map *cpus;
  1889. if (st->cpus) {
  1890. pr_warning("Extra cpu map event, ignoring.\n");
  1891. return 0;
  1892. }
  1893. cpus = cpu_map__new_data(&event->cpu_map.data);
  1894. if (!cpus)
  1895. return -ENOMEM;
  1896. st->cpus = cpus;
  1897. return set_maps(st);
  1898. }
  1899. static const char * const stat_report_usage[] = {
  1900. "perf stat report [<options>]",
  1901. NULL,
  1902. };
  1903. static struct perf_stat perf_stat = {
  1904. .tool = {
  1905. .attr = perf_event__process_attr,
  1906. .event_update = perf_event__process_event_update,
  1907. .thread_map = process_thread_map_event,
  1908. .cpu_map = process_cpu_map_event,
  1909. .stat_config = process_stat_config_event,
  1910. .stat = perf_event__process_stat_event,
  1911. .stat_round = process_stat_round_event,
  1912. },
  1913. .aggr_mode = AGGR_UNSET,
  1914. };
  1915. static int __cmd_report(int argc, const char **argv)
  1916. {
  1917. struct perf_session *session;
  1918. const struct option options[] = {
  1919. OPT_STRING('i', "input", &input_name, "file", "input file name"),
  1920. OPT_SET_UINT(0, "per-socket", &perf_stat.aggr_mode,
  1921. "aggregate counts per processor socket", AGGR_SOCKET),
  1922. OPT_SET_UINT(0, "per-core", &perf_stat.aggr_mode,
  1923. "aggregate counts per physical processor core", AGGR_CORE),
  1924. OPT_SET_UINT('A', "no-aggr", &perf_stat.aggr_mode,
  1925. "disable CPU count aggregation", AGGR_NONE),
  1926. OPT_END()
  1927. };
  1928. struct stat st;
  1929. int ret;
  1930. argc = parse_options(argc, argv, options, stat_report_usage, 0);
  1931. if (!input_name || !strlen(input_name)) {
  1932. if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode))
  1933. input_name = "-";
  1934. else
  1935. input_name = "perf.data";
  1936. }
  1937. perf_stat.file.path = input_name;
  1938. perf_stat.file.mode = PERF_DATA_MODE_READ;
  1939. session = perf_session__new(&perf_stat.file, false, &perf_stat.tool);
  1940. if (session == NULL)
  1941. return -1;
  1942. perf_stat.session = session;
  1943. stat_config.output = stderr;
  1944. evsel_list = session->evlist;
  1945. ret = perf_session__process_events(session);
  1946. if (ret)
  1947. return ret;
  1948. perf_session__delete(session);
  1949. return 0;
  1950. }
  1951. int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
  1952. {
  1953. const char * const stat_usage[] = {
  1954. "perf stat [<options>] [<command>]",
  1955. NULL
  1956. };
  1957. int status = -EINVAL, run_idx;
  1958. const char *mode;
  1959. FILE *output = stderr;
  1960. unsigned int interval;
  1961. const char * const stat_subcommands[] = { "record", "report" };
  1962. setlocale(LC_ALL, "");
  1963. evsel_list = perf_evlist__new();
  1964. if (evsel_list == NULL)
  1965. return -ENOMEM;
  1966. parse_events__shrink_config_terms();
  1967. argc = parse_options_subcommand(argc, argv, stat_options, stat_subcommands,
  1968. (const char **) stat_usage,
  1969. PARSE_OPT_STOP_AT_NON_OPTION);
  1970. perf_stat__init_shadow_stats();
  1971. if (csv_sep) {
  1972. csv_output = true;
  1973. if (!strcmp(csv_sep, "\\t"))
  1974. csv_sep = "\t";
  1975. } else
  1976. csv_sep = DEFAULT_SEPARATOR;
  1977. if (argc && !strncmp(argv[0], "rec", 3)) {
  1978. argc = __cmd_record(argc, argv);
  1979. if (argc < 0)
  1980. return -1;
  1981. } else if (argc && !strncmp(argv[0], "rep", 3))
  1982. return __cmd_report(argc, argv);
  1983. interval = stat_config.interval;
  1984. /*
  1985. * For record command the -o is already taken care of.
  1986. */
  1987. if (!STAT_RECORD && output_name && strcmp(output_name, "-"))
  1988. output = NULL;
  1989. if (output_name && output_fd) {
  1990. fprintf(stderr, "cannot use both --output and --log-fd\n");
  1991. parse_options_usage(stat_usage, stat_options, "o", 1);
  1992. parse_options_usage(NULL, stat_options, "log-fd", 0);
  1993. goto out;
  1994. }
  1995. if (metric_only && stat_config.aggr_mode == AGGR_THREAD) {
  1996. fprintf(stderr, "--metric-only is not supported with --per-thread\n");
  1997. goto out;
  1998. }
  1999. if (metric_only && run_count > 1) {
  2000. fprintf(stderr, "--metric-only is not supported with -r\n");
  2001. goto out;
  2002. }
  2003. if (output_fd < 0) {
  2004. fprintf(stderr, "argument to --log-fd must be a > 0\n");
  2005. parse_options_usage(stat_usage, stat_options, "log-fd", 0);
  2006. goto out;
  2007. }
  2008. if (!output) {
  2009. struct timespec tm;
  2010. mode = append_file ? "a" : "w";
  2011. output = fopen(output_name, mode);
  2012. if (!output) {
  2013. perror("failed to create output file");
  2014. return -1;
  2015. }
  2016. clock_gettime(CLOCK_REALTIME, &tm);
  2017. fprintf(output, "# started on %s\n", ctime(&tm.tv_sec));
  2018. } else if (output_fd > 0) {
  2019. mode = append_file ? "a" : "w";
  2020. output = fdopen(output_fd, mode);
  2021. if (!output) {
  2022. perror("Failed opening logfd");
  2023. return -errno;
  2024. }
  2025. }
  2026. stat_config.output = output;
  2027. /*
  2028. * let the spreadsheet do the pretty-printing
  2029. */
  2030. if (csv_output) {
  2031. /* User explicitly passed -B? */
  2032. if (big_num_opt == 1) {
  2033. fprintf(stderr, "-B option not supported with -x\n");
  2034. parse_options_usage(stat_usage, stat_options, "B", 1);
  2035. parse_options_usage(NULL, stat_options, "x", 1);
  2036. goto out;
  2037. } else /* Nope, so disable big number formatting */
  2038. big_num = false;
  2039. } else if (big_num_opt == 0) /* User passed --no-big-num */
  2040. big_num = false;
  2041. if (!argc && target__none(&target))
  2042. usage_with_options(stat_usage, stat_options);
  2043. if (run_count < 0) {
  2044. pr_err("Run count must be a positive number\n");
  2045. parse_options_usage(stat_usage, stat_options, "r", 1);
  2046. goto out;
  2047. } else if (run_count == 0) {
  2048. forever = true;
  2049. run_count = 1;
  2050. }
  2051. if ((stat_config.aggr_mode == AGGR_THREAD) && !target__has_task(&target)) {
  2052. fprintf(stderr, "The --per-thread option is only available "
  2053. "when monitoring via -p -t options.\n");
  2054. parse_options_usage(NULL, stat_options, "p", 1);
  2055. parse_options_usage(NULL, stat_options, "t", 1);
  2056. goto out;
  2057. }
  2058. /*
  2059. * no_aggr, cgroup are for system-wide only
  2060. * --per-thread is aggregated per thread, we dont mix it with cpu mode
  2061. */
  2062. if (((stat_config.aggr_mode != AGGR_GLOBAL &&
  2063. stat_config.aggr_mode != AGGR_THREAD) || nr_cgroups) &&
  2064. !target__has_cpu(&target)) {
  2065. fprintf(stderr, "both cgroup and no-aggregation "
  2066. "modes only available in system-wide mode\n");
  2067. parse_options_usage(stat_usage, stat_options, "G", 1);
  2068. parse_options_usage(NULL, stat_options, "A", 1);
  2069. parse_options_usage(NULL, stat_options, "a", 1);
  2070. goto out;
  2071. }
  2072. if (add_default_attributes())
  2073. goto out;
  2074. target__validate(&target);
  2075. if (perf_evlist__create_maps(evsel_list, &target) < 0) {
  2076. if (target__has_task(&target)) {
  2077. pr_err("Problems finding threads of monitor\n");
  2078. parse_options_usage(stat_usage, stat_options, "p", 1);
  2079. parse_options_usage(NULL, stat_options, "t", 1);
  2080. } else if (target__has_cpu(&target)) {
  2081. perror("failed to parse CPUs map");
  2082. parse_options_usage(stat_usage, stat_options, "C", 1);
  2083. parse_options_usage(NULL, stat_options, "a", 1);
  2084. }
  2085. goto out;
  2086. }
  2087. /*
  2088. * Initialize thread_map with comm names,
  2089. * so we could print it out on output.
  2090. */
  2091. if (stat_config.aggr_mode == AGGR_THREAD)
  2092. thread_map__read_comms(evsel_list->threads);
  2093. if (interval && interval < 100) {
  2094. if (interval < 10) {
  2095. pr_err("print interval must be >= 10ms\n");
  2096. parse_options_usage(stat_usage, stat_options, "I", 1);
  2097. goto out;
  2098. } else
  2099. pr_warning("print interval < 100ms. "
  2100. "The overhead percentage could be high in some cases. "
  2101. "Please proceed with caution.\n");
  2102. }
  2103. if (perf_evlist__alloc_stats(evsel_list, interval))
  2104. goto out;
  2105. if (perf_stat_init_aggr_mode())
  2106. goto out;
  2107. /*
  2108. * We dont want to block the signals - that would cause
  2109. * child tasks to inherit that and Ctrl-C would not work.
  2110. * What we want is for Ctrl-C to work in the exec()-ed
  2111. * task, but being ignored by perf stat itself:
  2112. */
  2113. atexit(sig_atexit);
  2114. if (!forever)
  2115. signal(SIGINT, skip_signal);
  2116. signal(SIGCHLD, skip_signal);
  2117. signal(SIGALRM, skip_signal);
  2118. signal(SIGABRT, skip_signal);
  2119. status = 0;
  2120. for (run_idx = 0; forever || run_idx < run_count; run_idx++) {
  2121. if (run_count != 1 && verbose)
  2122. fprintf(output, "[ perf stat: executing run #%d ... ]\n",
  2123. run_idx + 1);
  2124. status = run_perf_stat(argc, argv);
  2125. if (forever && status != -1) {
  2126. print_counters(NULL, argc, argv);
  2127. perf_stat__reset_stats();
  2128. }
  2129. }
  2130. if (!forever && status != -1 && !interval)
  2131. print_counters(NULL, argc, argv);
  2132. if (STAT_RECORD) {
  2133. /*
  2134. * We synthesize the kernel mmap record just so that older tools
  2135. * don't emit warnings about not being able to resolve symbols
  2136. * due to /proc/sys/kernel/kptr_restrict settings and instear provide
  2137. * a saner message about no samples being in the perf.data file.
  2138. *
  2139. * This also serves to suppress a warning about f_header.data.size == 0
  2140. * in header.c at the moment 'perf stat record' gets introduced, which
  2141. * is not really needed once we start adding the stat specific PERF_RECORD_
  2142. * records, but the need to suppress the kptr_restrict messages in older
  2143. * tools remain -acme
  2144. */
  2145. int fd = perf_data_file__fd(&perf_stat.file);
  2146. int err = perf_event__synthesize_kernel_mmap((void *)&perf_stat,
  2147. process_synthesized_event,
  2148. &perf_stat.session->machines.host);
  2149. if (err) {
  2150. pr_warning("Couldn't synthesize the kernel mmap record, harmless, "
  2151. "older tools may produce warnings about this file\n.");
  2152. }
  2153. if (!interval) {
  2154. if (WRITE_STAT_ROUND_EVENT(walltime_nsecs_stats.max, FINAL))
  2155. pr_err("failed to write stat round event\n");
  2156. }
  2157. if (!perf_stat.file.is_pipe) {
  2158. perf_stat.session->header.data_size += perf_stat.bytes_written;
  2159. perf_session__write_header(perf_stat.session, evsel_list, fd, true);
  2160. }
  2161. perf_session__delete(perf_stat.session);
  2162. }
  2163. perf_stat__exit_aggr_mode();
  2164. perf_evlist__free_stats(evsel_list);
  2165. out:
  2166. perf_evlist__delete(evsel_list);
  2167. return status;
  2168. }