builtin-stat.c 77 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169
  1. /*
  2. * builtin-stat.c
  3. *
  4. * Builtin stat command: Give a precise performance counters summary
  5. * overview about any workload, CPU or specific PID.
  6. *
  7. * Sample output:
  8. $ perf stat ./hackbench 10
  9. Time: 0.118
  10. Performance counter stats for './hackbench 10':
  11. 1708.761321 task-clock # 11.037 CPUs utilized
  12. 41,190 context-switches # 0.024 M/sec
  13. 6,735 CPU-migrations # 0.004 M/sec
  14. 17,318 page-faults # 0.010 M/sec
  15. 5,205,202,243 cycles # 3.046 GHz
  16. 3,856,436,920 stalled-cycles-frontend # 74.09% frontend cycles idle
  17. 1,600,790,871 stalled-cycles-backend # 30.75% backend cycles idle
  18. 2,603,501,247 instructions # 0.50 insns per cycle
  19. # 1.48 stalled cycles per insn
  20. 484,357,498 branches # 283.455 M/sec
  21. 6,388,934 branch-misses # 1.32% of all branches
  22. 0.154822978 seconds time elapsed
  23. *
  24. * Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
  25. *
  26. * Improvements and fixes by:
  27. *
  28. * Arjan van de Ven <arjan@linux.intel.com>
  29. * Yanmin Zhang <yanmin.zhang@intel.com>
  30. * Wu Fengguang <fengguang.wu@intel.com>
  31. * Mike Galbraith <efault@gmx.de>
  32. * Paul Mackerras <paulus@samba.org>
  33. * Jaswinder Singh Rajput <jaswinder@kernel.org>
  34. *
  35. * Released under the GPL v2. (and only v2, not any later version)
  36. */
  37. #include "perf.h"
  38. #include "builtin.h"
  39. #include "util/cgroup.h"
  40. #include "util/util.h"
  41. #include <subcmd/parse-options.h>
  42. #include "util/parse-events.h"
  43. #include "util/pmu.h"
  44. #include "util/event.h"
  45. #include "util/evlist.h"
  46. #include "util/evsel.h"
  47. #include "util/debug.h"
  48. #include "util/drv_configs.h"
  49. #include "util/color.h"
  50. #include "util/stat.h"
  51. #include "util/header.h"
  52. #include "util/cpumap.h"
  53. #include "util/thread.h"
  54. #include "util/thread_map.h"
  55. #include "util/counts.h"
  56. #include "util/group.h"
  57. #include "util/session.h"
  58. #include "util/tool.h"
  59. #include "util/string2.h"
  60. #include "util/metricgroup.h"
  61. #include "asm/bug.h"
  62. #include <linux/time64.h>
  63. #include <api/fs/fs.h>
  64. #include <errno.h>
  65. #include <signal.h>
  66. #include <stdlib.h>
  67. #include <sys/prctl.h>
  68. #include <inttypes.h>
  69. #include <locale.h>
  70. #include <math.h>
  71. #include <sys/types.h>
  72. #include <sys/stat.h>
  73. #include <sys/wait.h>
  74. #include <unistd.h>
  75. #include <sys/time.h>
  76. #include <sys/resource.h>
  77. #include <sys/wait.h>
  78. #include "sane_ctype.h"
  79. #define DEFAULT_SEPARATOR " "
  80. #define CNTR_NOT_SUPPORTED "<not supported>"
  81. #define CNTR_NOT_COUNTED "<not counted>"
  82. #define FREEZE_ON_SMI_PATH "devices/cpu/freeze_on_smi"
  83. static void print_counters(struct timespec *ts, int argc, const char **argv);
  84. /* Default events used for perf stat -T */
  85. static const char *transaction_attrs = {
  86. "task-clock,"
  87. "{"
  88. "instructions,"
  89. "cycles,"
  90. "cpu/cycles-t/,"
  91. "cpu/tx-start/,"
  92. "cpu/el-start/,"
  93. "cpu/cycles-ct/"
  94. "}"
  95. };
  96. /* More limited version when the CPU does not have all events. */
  97. static const char * transaction_limited_attrs = {
  98. "task-clock,"
  99. "{"
  100. "instructions,"
  101. "cycles,"
  102. "cpu/cycles-t/,"
  103. "cpu/tx-start/"
  104. "}"
  105. };
  106. static const char * topdown_attrs[] = {
  107. "topdown-total-slots",
  108. "topdown-slots-retired",
  109. "topdown-recovery-bubbles",
  110. "topdown-fetch-bubbles",
  111. "topdown-slots-issued",
  112. NULL,
  113. };
  114. static const char *smi_cost_attrs = {
  115. "{"
  116. "msr/aperf/,"
  117. "msr/smi/,"
  118. "cycles"
  119. "}"
  120. };
  121. static struct perf_evlist *evsel_list;
  122. static struct rblist metric_events;
  123. static struct target target = {
  124. .uid = UINT_MAX,
  125. };
  126. typedef int (*aggr_get_id_t)(struct cpu_map *m, int cpu);
  127. static int run_count = 1;
  128. static bool no_inherit = false;
  129. static volatile pid_t child_pid = -1;
  130. static bool null_run = false;
  131. static int detailed_run = 0;
  132. static bool transaction_run;
  133. static bool topdown_run = false;
  134. static bool smi_cost = false;
  135. static bool smi_reset = false;
  136. static bool big_num = true;
  137. static int big_num_opt = -1;
  138. static const char *csv_sep = NULL;
  139. static bool csv_output = false;
  140. static bool group = false;
  141. static const char *pre_cmd = NULL;
  142. static const char *post_cmd = NULL;
  143. static bool sync_run = false;
  144. static unsigned int initial_delay = 0;
  145. static unsigned int unit_width = 4; /* strlen("unit") */
  146. static bool forever = false;
  147. static bool metric_only = false;
  148. static bool force_metric_only = false;
  149. static bool no_merge = false;
  150. static bool walltime_run_table = false;
  151. static struct timespec ref_time;
  152. static struct cpu_map *aggr_map;
  153. static aggr_get_id_t aggr_get_id;
  154. static bool append_file;
  155. static bool interval_count;
  156. static const char *output_name;
  157. static int output_fd;
  158. static int print_free_counters_hint;
  159. static int print_mixed_hw_group_error;
  160. static u64 *walltime_run;
  161. static bool ru_display = false;
  162. static struct rusage ru_data;
  163. struct perf_stat {
  164. bool record;
  165. struct perf_data data;
  166. struct perf_session *session;
  167. u64 bytes_written;
  168. struct perf_tool tool;
  169. bool maps_allocated;
  170. struct cpu_map *cpus;
  171. struct thread_map *threads;
  172. enum aggr_mode aggr_mode;
  173. };
  174. static struct perf_stat perf_stat;
  175. #define STAT_RECORD perf_stat.record
  176. static volatile int done = 0;
  177. static struct perf_stat_config stat_config = {
  178. .aggr_mode = AGGR_GLOBAL,
  179. .scale = true,
  180. };
  181. static bool is_duration_time(struct perf_evsel *evsel)
  182. {
  183. return !strcmp(evsel->name, "duration_time");
  184. }
  185. static inline void diff_timespec(struct timespec *r, struct timespec *a,
  186. struct timespec *b)
  187. {
  188. r->tv_sec = a->tv_sec - b->tv_sec;
  189. if (a->tv_nsec < b->tv_nsec) {
  190. r->tv_nsec = a->tv_nsec + NSEC_PER_SEC - b->tv_nsec;
  191. r->tv_sec--;
  192. } else {
  193. r->tv_nsec = a->tv_nsec - b->tv_nsec ;
  194. }
  195. }
  196. static void perf_stat__reset_stats(void)
  197. {
  198. int i;
  199. perf_evlist__reset_stats(evsel_list);
  200. perf_stat__reset_shadow_stats();
  201. for (i = 0; i < stat_config.stats_num; i++)
  202. perf_stat__reset_shadow_per_stat(&stat_config.stats[i]);
  203. }
  204. static int create_perf_stat_counter(struct perf_evsel *evsel)
  205. {
  206. struct perf_event_attr *attr = &evsel->attr;
  207. struct perf_evsel *leader = evsel->leader;
  208. if (stat_config.scale) {
  209. attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
  210. PERF_FORMAT_TOTAL_TIME_RUNNING;
  211. }
  212. /*
  213. * The event is part of non trivial group, let's enable
  214. * the group read (for leader) and ID retrieval for all
  215. * members.
  216. */
  217. if (leader->nr_members > 1)
  218. attr->read_format |= PERF_FORMAT_ID|PERF_FORMAT_GROUP;
  219. attr->inherit = !no_inherit;
  220. /*
  221. * Some events get initialized with sample_(period/type) set,
  222. * like tracepoints. Clear it up for counting.
  223. */
  224. attr->sample_period = 0;
  225. /*
  226. * But set sample_type to PERF_SAMPLE_IDENTIFIER, which should be harmless
  227. * while avoiding that older tools show confusing messages.
  228. *
  229. * However for pipe sessions we need to keep it zero,
  230. * because script's perf_evsel__check_attr is triggered
  231. * by attr->sample_type != 0, and we can't run it on
  232. * stat sessions.
  233. */
  234. if (!(STAT_RECORD && perf_stat.data.is_pipe))
  235. attr->sample_type = PERF_SAMPLE_IDENTIFIER;
  236. /*
  237. * Disabling all counters initially, they will be enabled
  238. * either manually by us or by kernel via enable_on_exec
  239. * set later.
  240. */
  241. if (perf_evsel__is_group_leader(evsel)) {
  242. attr->disabled = 1;
  243. /*
  244. * In case of initial_delay we enable tracee
  245. * events manually.
  246. */
  247. if (target__none(&target) && !initial_delay)
  248. attr->enable_on_exec = 1;
  249. }
  250. if (target__has_cpu(&target) && !target__has_per_thread(&target))
  251. return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel));
  252. return perf_evsel__open_per_thread(evsel, evsel_list->threads);
  253. }
  254. /*
  255. * Does the counter have nsecs as a unit?
  256. */
  257. static inline int nsec_counter(struct perf_evsel *evsel)
  258. {
  259. if (perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) ||
  260. perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
  261. return 1;
  262. return 0;
  263. }
  264. static int process_synthesized_event(struct perf_tool *tool __maybe_unused,
  265. union perf_event *event,
  266. struct perf_sample *sample __maybe_unused,
  267. struct machine *machine __maybe_unused)
  268. {
  269. if (perf_data__write(&perf_stat.data, event, event->header.size) < 0) {
  270. pr_err("failed to write perf data, error: %m\n");
  271. return -1;
  272. }
  273. perf_stat.bytes_written += event->header.size;
  274. return 0;
  275. }
  276. static int write_stat_round_event(u64 tm, u64 type)
  277. {
  278. return perf_event__synthesize_stat_round(NULL, tm, type,
  279. process_synthesized_event,
  280. NULL);
  281. }
  282. #define WRITE_STAT_ROUND_EVENT(time, interval) \
  283. write_stat_round_event(time, PERF_STAT_ROUND_TYPE__ ## interval)
  284. #define SID(e, x, y) xyarray__entry(e->sample_id, x, y)
  285. static int
  286. perf_evsel__write_stat_event(struct perf_evsel *counter, u32 cpu, u32 thread,
  287. struct perf_counts_values *count)
  288. {
  289. struct perf_sample_id *sid = SID(counter, cpu, thread);
  290. return perf_event__synthesize_stat(NULL, cpu, thread, sid->id, count,
  291. process_synthesized_event, NULL);
  292. }
  293. /*
  294. * Read out the results of a single counter:
  295. * do not aggregate counts across CPUs in system-wide mode
  296. */
  297. static int read_counter(struct perf_evsel *counter)
  298. {
  299. int nthreads = thread_map__nr(evsel_list->threads);
  300. int ncpus, cpu, thread;
  301. if (target__has_cpu(&target) && !target__has_per_thread(&target))
  302. ncpus = perf_evsel__nr_cpus(counter);
  303. else
  304. ncpus = 1;
  305. if (!counter->supported)
  306. return -ENOENT;
  307. if (counter->system_wide)
  308. nthreads = 1;
  309. for (thread = 0; thread < nthreads; thread++) {
  310. for (cpu = 0; cpu < ncpus; cpu++) {
  311. struct perf_counts_values *count;
  312. count = perf_counts(counter->counts, cpu, thread);
  313. /*
  314. * The leader's group read loads data into its group members
  315. * (via perf_evsel__read_counter) and sets threir count->loaded.
  316. */
  317. if (!count->loaded &&
  318. perf_evsel__read_counter(counter, cpu, thread)) {
  319. counter->counts->scaled = -1;
  320. perf_counts(counter->counts, cpu, thread)->ena = 0;
  321. perf_counts(counter->counts, cpu, thread)->run = 0;
  322. return -1;
  323. }
  324. count->loaded = false;
  325. if (STAT_RECORD) {
  326. if (perf_evsel__write_stat_event(counter, cpu, thread, count)) {
  327. pr_err("failed to write stat event\n");
  328. return -1;
  329. }
  330. }
  331. if (verbose > 1) {
  332. fprintf(stat_config.output,
  333. "%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
  334. perf_evsel__name(counter),
  335. cpu,
  336. count->val, count->ena, count->run);
  337. }
  338. }
  339. }
  340. return 0;
  341. }
  342. static void read_counters(void)
  343. {
  344. struct perf_evsel *counter;
  345. int ret;
  346. evlist__for_each_entry(evsel_list, counter) {
  347. ret = read_counter(counter);
  348. if (ret)
  349. pr_debug("failed to read counter %s\n", counter->name);
  350. if (ret == 0 && perf_stat_process_counter(&stat_config, counter))
  351. pr_warning("failed to process counter %s\n", counter->name);
  352. }
  353. }
  354. static void process_interval(void)
  355. {
  356. struct timespec ts, rs;
  357. read_counters();
  358. clock_gettime(CLOCK_MONOTONIC, &ts);
  359. diff_timespec(&rs, &ts, &ref_time);
  360. if (STAT_RECORD) {
  361. if (WRITE_STAT_ROUND_EVENT(rs.tv_sec * NSEC_PER_SEC + rs.tv_nsec, INTERVAL))
  362. pr_err("failed to write stat round event\n");
  363. }
  364. init_stats(&walltime_nsecs_stats);
  365. update_stats(&walltime_nsecs_stats, stat_config.interval * 1000000);
  366. print_counters(&rs, 0, NULL);
  367. }
  368. static void enable_counters(void)
  369. {
  370. if (initial_delay)
  371. usleep(initial_delay * USEC_PER_MSEC);
  372. /*
  373. * We need to enable counters only if:
  374. * - we don't have tracee (attaching to task or cpu)
  375. * - we have initial delay configured
  376. */
  377. if (!target__none(&target) || initial_delay)
  378. perf_evlist__enable(evsel_list);
  379. }
  380. static void disable_counters(void)
  381. {
  382. /*
  383. * If we don't have tracee (attaching to task or cpu), counters may
  384. * still be running. To get accurate group ratios, we must stop groups
  385. * from counting before reading their constituent counters.
  386. */
  387. if (!target__none(&target))
  388. perf_evlist__disable(evsel_list);
  389. }
  390. static volatile int workload_exec_errno;
  391. /*
  392. * perf_evlist__prepare_workload will send a SIGUSR1
  393. * if the fork fails, since we asked by setting its
  394. * want_signal to true.
  395. */
  396. static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *info,
  397. void *ucontext __maybe_unused)
  398. {
  399. workload_exec_errno = info->si_value.sival_int;
  400. }
  401. static int perf_stat_synthesize_config(bool is_pipe)
  402. {
  403. int err;
  404. if (is_pipe) {
  405. err = perf_event__synthesize_attrs(NULL, perf_stat.session,
  406. process_synthesized_event);
  407. if (err < 0) {
  408. pr_err("Couldn't synthesize attrs.\n");
  409. return err;
  410. }
  411. }
  412. err = perf_event__synthesize_extra_attr(NULL,
  413. evsel_list,
  414. process_synthesized_event,
  415. is_pipe);
  416. err = perf_event__synthesize_thread_map2(NULL, evsel_list->threads,
  417. process_synthesized_event,
  418. NULL);
  419. if (err < 0) {
  420. pr_err("Couldn't synthesize thread map.\n");
  421. return err;
  422. }
  423. err = perf_event__synthesize_cpu_map(NULL, evsel_list->cpus,
  424. process_synthesized_event, NULL);
  425. if (err < 0) {
  426. pr_err("Couldn't synthesize thread map.\n");
  427. return err;
  428. }
  429. err = perf_event__synthesize_stat_config(NULL, &stat_config,
  430. process_synthesized_event, NULL);
  431. if (err < 0) {
  432. pr_err("Couldn't synthesize config.\n");
  433. return err;
  434. }
  435. return 0;
  436. }
  437. #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
  438. static int __store_counter_ids(struct perf_evsel *counter)
  439. {
  440. int cpu, thread;
  441. for (cpu = 0; cpu < xyarray__max_x(counter->fd); cpu++) {
  442. for (thread = 0; thread < xyarray__max_y(counter->fd);
  443. thread++) {
  444. int fd = FD(counter, cpu, thread);
  445. if (perf_evlist__id_add_fd(evsel_list, counter,
  446. cpu, thread, fd) < 0)
  447. return -1;
  448. }
  449. }
  450. return 0;
  451. }
  452. static int store_counter_ids(struct perf_evsel *counter)
  453. {
  454. struct cpu_map *cpus = counter->cpus;
  455. struct thread_map *threads = counter->threads;
  456. if (perf_evsel__alloc_id(counter, cpus->nr, threads->nr))
  457. return -ENOMEM;
  458. return __store_counter_ids(counter);
  459. }
  460. static bool perf_evsel__should_store_id(struct perf_evsel *counter)
  461. {
  462. return STAT_RECORD || counter->attr.read_format & PERF_FORMAT_ID;
  463. }
  464. static struct perf_evsel *perf_evsel__reset_weak_group(struct perf_evsel *evsel)
  465. {
  466. struct perf_evsel *c2, *leader;
  467. bool is_open = true;
  468. leader = evsel->leader;
  469. pr_debug("Weak group for %s/%d failed\n",
  470. leader->name, leader->nr_members);
  471. /*
  472. * for_each_group_member doesn't work here because it doesn't
  473. * include the first entry.
  474. */
  475. evlist__for_each_entry(evsel_list, c2) {
  476. if (c2 == evsel)
  477. is_open = false;
  478. if (c2->leader == leader) {
  479. if (is_open)
  480. perf_evsel__close(c2);
  481. c2->leader = c2;
  482. c2->nr_members = 0;
  483. }
  484. }
  485. return leader;
  486. }
  487. static int __run_perf_stat(int argc, const char **argv, int run_idx)
  488. {
  489. int interval = stat_config.interval;
  490. int times = stat_config.times;
  491. int timeout = stat_config.timeout;
  492. char msg[BUFSIZ];
  493. unsigned long long t0, t1;
  494. struct perf_evsel *counter;
  495. struct timespec ts;
  496. size_t l;
  497. int status = 0;
  498. const bool forks = (argc > 0);
  499. bool is_pipe = STAT_RECORD ? perf_stat.data.is_pipe : false;
  500. struct perf_evsel_config_term *err_term;
  501. if (interval) {
  502. ts.tv_sec = interval / USEC_PER_MSEC;
  503. ts.tv_nsec = (interval % USEC_PER_MSEC) * NSEC_PER_MSEC;
  504. } else if (timeout) {
  505. ts.tv_sec = timeout / USEC_PER_MSEC;
  506. ts.tv_nsec = (timeout % USEC_PER_MSEC) * NSEC_PER_MSEC;
  507. } else {
  508. ts.tv_sec = 1;
  509. ts.tv_nsec = 0;
  510. }
  511. if (forks) {
  512. if (perf_evlist__prepare_workload(evsel_list, &target, argv, is_pipe,
  513. workload_exec_failed_signal) < 0) {
  514. perror("failed to prepare workload");
  515. return -1;
  516. }
  517. child_pid = evsel_list->workload.pid;
  518. }
  519. if (group)
  520. perf_evlist__set_leader(evsel_list);
  521. evlist__for_each_entry(evsel_list, counter) {
  522. try_again:
  523. if (create_perf_stat_counter(counter) < 0) {
  524. /* Weak group failed. Reset the group. */
  525. if ((errno == EINVAL || errno == EBADF) &&
  526. counter->leader != counter &&
  527. counter->weak_group) {
  528. counter = perf_evsel__reset_weak_group(counter);
  529. goto try_again;
  530. }
  531. /*
  532. * PPC returns ENXIO for HW counters until 2.6.37
  533. * (behavior changed with commit b0a873e).
  534. */
  535. if (errno == EINVAL || errno == ENOSYS ||
  536. errno == ENOENT || errno == EOPNOTSUPP ||
  537. errno == ENXIO) {
  538. if (verbose > 0)
  539. ui__warning("%s event is not supported by the kernel.\n",
  540. perf_evsel__name(counter));
  541. counter->supported = false;
  542. if ((counter->leader != counter) ||
  543. !(counter->leader->nr_members > 1))
  544. continue;
  545. } else if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) {
  546. if (verbose > 0)
  547. ui__warning("%s\n", msg);
  548. goto try_again;
  549. } else if (target__has_per_thread(&target) &&
  550. evsel_list->threads &&
  551. evsel_list->threads->err_thread != -1) {
  552. /*
  553. * For global --per-thread case, skip current
  554. * error thread.
  555. */
  556. if (!thread_map__remove(evsel_list->threads,
  557. evsel_list->threads->err_thread)) {
  558. evsel_list->threads->err_thread = -1;
  559. goto try_again;
  560. }
  561. }
  562. perf_evsel__open_strerror(counter, &target,
  563. errno, msg, sizeof(msg));
  564. ui__error("%s\n", msg);
  565. if (child_pid != -1)
  566. kill(child_pid, SIGTERM);
  567. return -1;
  568. }
  569. counter->supported = true;
  570. l = strlen(counter->unit);
  571. if (l > unit_width)
  572. unit_width = l;
  573. if (perf_evsel__should_store_id(counter) &&
  574. store_counter_ids(counter))
  575. return -1;
  576. }
  577. if (perf_evlist__apply_filters(evsel_list, &counter)) {
  578. pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
  579. counter->filter, perf_evsel__name(counter), errno,
  580. str_error_r(errno, msg, sizeof(msg)));
  581. return -1;
  582. }
  583. if (perf_evlist__apply_drv_configs(evsel_list, &counter, &err_term)) {
  584. pr_err("failed to set config \"%s\" on event %s with %d (%s)\n",
  585. err_term->val.drv_cfg, perf_evsel__name(counter), errno,
  586. str_error_r(errno, msg, sizeof(msg)));
  587. return -1;
  588. }
  589. if (STAT_RECORD) {
  590. int err, fd = perf_data__fd(&perf_stat.data);
  591. if (is_pipe) {
  592. err = perf_header__write_pipe(perf_data__fd(&perf_stat.data));
  593. } else {
  594. err = perf_session__write_header(perf_stat.session, evsel_list,
  595. fd, false);
  596. }
  597. if (err < 0)
  598. return err;
  599. err = perf_stat_synthesize_config(is_pipe);
  600. if (err < 0)
  601. return err;
  602. }
  603. /*
  604. * Enable counters and exec the command:
  605. */
  606. t0 = rdclock();
  607. clock_gettime(CLOCK_MONOTONIC, &ref_time);
  608. if (forks) {
  609. perf_evlist__start_workload(evsel_list);
  610. enable_counters();
  611. if (interval || timeout) {
  612. while (!waitpid(child_pid, &status, WNOHANG)) {
  613. nanosleep(&ts, NULL);
  614. if (timeout)
  615. break;
  616. process_interval();
  617. if (interval_count && !(--times))
  618. break;
  619. }
  620. }
  621. wait4(child_pid, &status, 0, &ru_data);
  622. if (workload_exec_errno) {
  623. const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
  624. pr_err("Workload failed: %s\n", emsg);
  625. return -1;
  626. }
  627. if (WIFSIGNALED(status))
  628. psignal(WTERMSIG(status), argv[0]);
  629. } else {
  630. enable_counters();
  631. while (!done) {
  632. nanosleep(&ts, NULL);
  633. if (timeout)
  634. break;
  635. if (interval) {
  636. process_interval();
  637. if (interval_count && !(--times))
  638. break;
  639. }
  640. }
  641. }
  642. disable_counters();
  643. t1 = rdclock();
  644. if (walltime_run_table)
  645. walltime_run[run_idx] = t1 - t0;
  646. update_stats(&walltime_nsecs_stats, t1 - t0);
  647. /*
  648. * Closing a group leader splits the group, and as we only disable
  649. * group leaders, results in remaining events becoming enabled. To
  650. * avoid arbitrary skew, we must read all counters before closing any
  651. * group leaders.
  652. */
  653. read_counters();
  654. perf_evlist__close(evsel_list);
  655. return WEXITSTATUS(status);
  656. }
  657. static int run_perf_stat(int argc, const char **argv, int run_idx)
  658. {
  659. int ret;
  660. if (pre_cmd) {
  661. ret = system(pre_cmd);
  662. if (ret)
  663. return ret;
  664. }
  665. if (sync_run)
  666. sync();
  667. ret = __run_perf_stat(argc, argv, run_idx);
  668. if (ret)
  669. return ret;
  670. if (post_cmd) {
  671. ret = system(post_cmd);
  672. if (ret)
  673. return ret;
  674. }
  675. return ret;
  676. }
  677. static void print_running(u64 run, u64 ena)
  678. {
  679. if (csv_output) {
  680. fprintf(stat_config.output, "%s%" PRIu64 "%s%.2f",
  681. csv_sep,
  682. run,
  683. csv_sep,
  684. ena ? 100.0 * run / ena : 100.0);
  685. } else if (run != ena) {
  686. fprintf(stat_config.output, " (%.2f%%)", 100.0 * run / ena);
  687. }
  688. }
  689. static void print_noise_pct(double total, double avg)
  690. {
  691. double pct = rel_stddev_stats(total, avg);
  692. if (csv_output)
  693. fprintf(stat_config.output, "%s%.2f%%", csv_sep, pct);
  694. else if (pct)
  695. fprintf(stat_config.output, " ( +-%6.2f%% )", pct);
  696. }
  697. static void print_noise(struct perf_evsel *evsel, double avg)
  698. {
  699. struct perf_stat_evsel *ps;
  700. if (run_count == 1)
  701. return;
  702. ps = evsel->stats;
  703. print_noise_pct(stddev_stats(&ps->res_stats[0]), avg);
  704. }
  705. static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
  706. {
  707. switch (stat_config.aggr_mode) {
  708. case AGGR_CORE:
  709. fprintf(stat_config.output, "S%d-C%*d%s%*d%s",
  710. cpu_map__id_to_socket(id),
  711. csv_output ? 0 : -8,
  712. cpu_map__id_to_cpu(id),
  713. csv_sep,
  714. csv_output ? 0 : 4,
  715. nr,
  716. csv_sep);
  717. break;
  718. case AGGR_SOCKET:
  719. fprintf(stat_config.output, "S%*d%s%*d%s",
  720. csv_output ? 0 : -5,
  721. id,
  722. csv_sep,
  723. csv_output ? 0 : 4,
  724. nr,
  725. csv_sep);
  726. break;
  727. case AGGR_NONE:
  728. fprintf(stat_config.output, "CPU%*d%s",
  729. csv_output ? 0 : -4,
  730. perf_evsel__cpus(evsel)->map[id], csv_sep);
  731. break;
  732. case AGGR_THREAD:
  733. fprintf(stat_config.output, "%*s-%*d%s",
  734. csv_output ? 0 : 16,
  735. thread_map__comm(evsel->threads, id),
  736. csv_output ? 0 : -8,
  737. thread_map__pid(evsel->threads, id),
  738. csv_sep);
  739. break;
  740. case AGGR_GLOBAL:
  741. case AGGR_UNSET:
  742. default:
  743. break;
  744. }
  745. }
  746. struct outstate {
  747. FILE *fh;
  748. bool newline;
  749. const char *prefix;
  750. int nfields;
  751. int id, nr;
  752. struct perf_evsel *evsel;
  753. };
  754. #define METRIC_LEN 35
  755. static void new_line_std(void *ctx)
  756. {
  757. struct outstate *os = ctx;
  758. os->newline = true;
  759. }
  760. static void do_new_line_std(struct outstate *os)
  761. {
  762. fputc('\n', os->fh);
  763. fputs(os->prefix, os->fh);
  764. aggr_printout(os->evsel, os->id, os->nr);
  765. if (stat_config.aggr_mode == AGGR_NONE)
  766. fprintf(os->fh, " ");
  767. fprintf(os->fh, " ");
  768. }
  769. static void print_metric_std(void *ctx, const char *color, const char *fmt,
  770. const char *unit, double val)
  771. {
  772. struct outstate *os = ctx;
  773. FILE *out = os->fh;
  774. int n;
  775. bool newline = os->newline;
  776. os->newline = false;
  777. if (unit == NULL || fmt == NULL) {
  778. fprintf(out, "%-*s", METRIC_LEN, "");
  779. return;
  780. }
  781. if (newline)
  782. do_new_line_std(os);
  783. n = fprintf(out, " # ");
  784. if (color)
  785. n += color_fprintf(out, color, fmt, val);
  786. else
  787. n += fprintf(out, fmt, val);
  788. fprintf(out, " %-*s", METRIC_LEN - n - 1, unit);
  789. }
  790. static void new_line_csv(void *ctx)
  791. {
  792. struct outstate *os = ctx;
  793. int i;
  794. fputc('\n', os->fh);
  795. if (os->prefix)
  796. fprintf(os->fh, "%s%s", os->prefix, csv_sep);
  797. aggr_printout(os->evsel, os->id, os->nr);
  798. for (i = 0; i < os->nfields; i++)
  799. fputs(csv_sep, os->fh);
  800. }
  801. static void print_metric_csv(void *ctx,
  802. const char *color __maybe_unused,
  803. const char *fmt, const char *unit, double val)
  804. {
  805. struct outstate *os = ctx;
  806. FILE *out = os->fh;
  807. char buf[64], *vals, *ends;
  808. if (unit == NULL || fmt == NULL) {
  809. fprintf(out, "%s%s", csv_sep, csv_sep);
  810. return;
  811. }
  812. snprintf(buf, sizeof(buf), fmt, val);
  813. ends = vals = ltrim(buf);
  814. while (isdigit(*ends) || *ends == '.')
  815. ends++;
  816. *ends = 0;
  817. while (isspace(*unit))
  818. unit++;
  819. fprintf(out, "%s%s%s%s", csv_sep, vals, csv_sep, unit);
  820. }
  821. #define METRIC_ONLY_LEN 20
  822. /* Filter out some columns that don't work well in metrics only mode */
  823. static bool valid_only_metric(const char *unit)
  824. {
  825. if (!unit)
  826. return false;
  827. if (strstr(unit, "/sec") ||
  828. strstr(unit, "hz") ||
  829. strstr(unit, "Hz") ||
  830. strstr(unit, "CPUs utilized"))
  831. return false;
  832. return true;
  833. }
  834. static const char *fixunit(char *buf, struct perf_evsel *evsel,
  835. const char *unit)
  836. {
  837. if (!strncmp(unit, "of all", 6)) {
  838. snprintf(buf, 1024, "%s %s", perf_evsel__name(evsel),
  839. unit);
  840. return buf;
  841. }
  842. return unit;
  843. }
  844. static void print_metric_only(void *ctx, const char *color, const char *fmt,
  845. const char *unit, double val)
  846. {
  847. struct outstate *os = ctx;
  848. FILE *out = os->fh;
  849. int n;
  850. char buf[1024];
  851. unsigned mlen = METRIC_ONLY_LEN;
  852. if (!valid_only_metric(unit))
  853. return;
  854. unit = fixunit(buf, os->evsel, unit);
  855. if (color)
  856. n = color_fprintf(out, color, fmt, val);
  857. else
  858. n = fprintf(out, fmt, val);
  859. if (n > METRIC_ONLY_LEN)
  860. n = METRIC_ONLY_LEN;
  861. if (mlen < strlen(unit))
  862. mlen = strlen(unit) + 1;
  863. fprintf(out, "%*s", mlen - n, "");
  864. }
  865. static void print_metric_only_csv(void *ctx, const char *color __maybe_unused,
  866. const char *fmt,
  867. const char *unit, double val)
  868. {
  869. struct outstate *os = ctx;
  870. FILE *out = os->fh;
  871. char buf[64], *vals, *ends;
  872. char tbuf[1024];
  873. if (!valid_only_metric(unit))
  874. return;
  875. unit = fixunit(tbuf, os->evsel, unit);
  876. snprintf(buf, sizeof buf, fmt, val);
  877. ends = vals = ltrim(buf);
  878. while (isdigit(*ends) || *ends == '.')
  879. ends++;
  880. *ends = 0;
  881. fprintf(out, "%s%s", vals, csv_sep);
  882. }
  883. static void new_line_metric(void *ctx __maybe_unused)
  884. {
  885. }
  886. static void print_metric_header(void *ctx, const char *color __maybe_unused,
  887. const char *fmt __maybe_unused,
  888. const char *unit, double val __maybe_unused)
  889. {
  890. struct outstate *os = ctx;
  891. char tbuf[1024];
  892. if (!valid_only_metric(unit))
  893. return;
  894. unit = fixunit(tbuf, os->evsel, unit);
  895. if (csv_output)
  896. fprintf(os->fh, "%s%s", unit, csv_sep);
  897. else
  898. fprintf(os->fh, "%-*s ", METRIC_ONLY_LEN, unit);
  899. }
  900. static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg)
  901. {
  902. FILE *output = stat_config.output;
  903. double msecs = avg / NSEC_PER_MSEC;
  904. const char *fmt_v, *fmt_n;
  905. char name[25];
  906. fmt_v = csv_output ? "%.6f%s" : "%18.6f%s";
  907. fmt_n = csv_output ? "%s" : "%-25s";
  908. aggr_printout(evsel, id, nr);
  909. scnprintf(name, sizeof(name), "%s%s",
  910. perf_evsel__name(evsel), csv_output ? "" : " (msec)");
  911. fprintf(output, fmt_v, msecs, csv_sep);
  912. if (csv_output)
  913. fprintf(output, "%s%s", evsel->unit, csv_sep);
  914. else
  915. fprintf(output, "%-*s%s", unit_width, evsel->unit, csv_sep);
  916. fprintf(output, fmt_n, name);
  917. if (evsel->cgrp)
  918. fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
  919. }
  920. static int first_shadow_cpu(struct perf_evsel *evsel, int id)
  921. {
  922. int i;
  923. if (!aggr_get_id)
  924. return 0;
  925. if (stat_config.aggr_mode == AGGR_NONE)
  926. return id;
  927. if (stat_config.aggr_mode == AGGR_GLOBAL)
  928. return 0;
  929. for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) {
  930. int cpu2 = perf_evsel__cpus(evsel)->map[i];
  931. if (aggr_get_id(evsel_list->cpus, cpu2) == id)
  932. return cpu2;
  933. }
  934. return 0;
  935. }
  936. static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
  937. {
  938. FILE *output = stat_config.output;
  939. double sc = evsel->scale;
  940. const char *fmt;
  941. if (csv_output) {
  942. fmt = floor(sc) != sc ? "%.2f%s" : "%.0f%s";
  943. } else {
  944. if (big_num)
  945. fmt = floor(sc) != sc ? "%'18.2f%s" : "%'18.0f%s";
  946. else
  947. fmt = floor(sc) != sc ? "%18.2f%s" : "%18.0f%s";
  948. }
  949. aggr_printout(evsel, id, nr);
  950. fprintf(output, fmt, avg, csv_sep);
  951. if (evsel->unit)
  952. fprintf(output, "%-*s%s",
  953. csv_output ? 0 : unit_width,
  954. evsel->unit, csv_sep);
  955. fprintf(output, "%-*s", csv_output ? 0 : 25, perf_evsel__name(evsel));
  956. if (evsel->cgrp)
  957. fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
  958. }
  959. static bool is_mixed_hw_group(struct perf_evsel *counter)
  960. {
  961. struct perf_evlist *evlist = counter->evlist;
  962. u32 pmu_type = counter->attr.type;
  963. struct perf_evsel *pos;
  964. if (counter->nr_members < 2)
  965. return false;
  966. evlist__for_each_entry(evlist, pos) {
  967. /* software events can be part of any hardware group */
  968. if (pos->attr.type == PERF_TYPE_SOFTWARE)
  969. continue;
  970. if (pmu_type == PERF_TYPE_SOFTWARE) {
  971. pmu_type = pos->attr.type;
  972. continue;
  973. }
  974. if (pmu_type != pos->attr.type)
  975. return true;
  976. }
  977. return false;
  978. }
  979. static void printout(int id, int nr, struct perf_evsel *counter, double uval,
  980. char *prefix, u64 run, u64 ena, double noise,
  981. struct runtime_stat *st)
  982. {
  983. struct perf_stat_output_ctx out;
  984. struct outstate os = {
  985. .fh = stat_config.output,
  986. .prefix = prefix ? prefix : "",
  987. .id = id,
  988. .nr = nr,
  989. .evsel = counter,
  990. };
  991. print_metric_t pm = print_metric_std;
  992. void (*nl)(void *);
  993. if (metric_only) {
  994. nl = new_line_metric;
  995. if (csv_output)
  996. pm = print_metric_only_csv;
  997. else
  998. pm = print_metric_only;
  999. } else
  1000. nl = new_line_std;
  1001. if (csv_output && !metric_only) {
  1002. static int aggr_fields[] = {
  1003. [AGGR_GLOBAL] = 0,
  1004. [AGGR_THREAD] = 1,
  1005. [AGGR_NONE] = 1,
  1006. [AGGR_SOCKET] = 2,
  1007. [AGGR_CORE] = 2,
  1008. };
  1009. pm = print_metric_csv;
  1010. nl = new_line_csv;
  1011. os.nfields = 3;
  1012. os.nfields += aggr_fields[stat_config.aggr_mode];
  1013. if (counter->cgrp)
  1014. os.nfields++;
  1015. }
  1016. if (run == 0 || ena == 0 || counter->counts->scaled == -1) {
  1017. if (metric_only) {
  1018. pm(&os, NULL, "", "", 0);
  1019. return;
  1020. }
  1021. aggr_printout(counter, id, nr);
  1022. fprintf(stat_config.output, "%*s%s",
  1023. csv_output ? 0 : 18,
  1024. counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
  1025. csv_sep);
  1026. if (counter->supported) {
  1027. print_free_counters_hint = 1;
  1028. if (is_mixed_hw_group(counter))
  1029. print_mixed_hw_group_error = 1;
  1030. }
  1031. fprintf(stat_config.output, "%-*s%s",
  1032. csv_output ? 0 : unit_width,
  1033. counter->unit, csv_sep);
  1034. fprintf(stat_config.output, "%*s",
  1035. csv_output ? 0 : -25,
  1036. perf_evsel__name(counter));
  1037. if (counter->cgrp)
  1038. fprintf(stat_config.output, "%s%s",
  1039. csv_sep, counter->cgrp->name);
  1040. if (!csv_output)
  1041. pm(&os, NULL, NULL, "", 0);
  1042. print_noise(counter, noise);
  1043. print_running(run, ena);
  1044. if (csv_output)
  1045. pm(&os, NULL, NULL, "", 0);
  1046. return;
  1047. }
  1048. if (metric_only)
  1049. /* nothing */;
  1050. else if (nsec_counter(counter))
  1051. nsec_printout(id, nr, counter, uval);
  1052. else
  1053. abs_printout(id, nr, counter, uval);
  1054. out.print_metric = pm;
  1055. out.new_line = nl;
  1056. out.ctx = &os;
  1057. out.force_header = false;
  1058. if (csv_output && !metric_only) {
  1059. print_noise(counter, noise);
  1060. print_running(run, ena);
  1061. }
  1062. perf_stat__print_shadow_stats(counter, uval,
  1063. first_shadow_cpu(counter, id),
  1064. &out, &metric_events, st);
  1065. if (!csv_output && !metric_only) {
  1066. print_noise(counter, noise);
  1067. print_running(run, ena);
  1068. }
  1069. }
  1070. static void aggr_update_shadow(void)
  1071. {
  1072. int cpu, s2, id, s;
  1073. u64 val;
  1074. struct perf_evsel *counter;
  1075. for (s = 0; s < aggr_map->nr; s++) {
  1076. id = aggr_map->map[s];
  1077. evlist__for_each_entry(evsel_list, counter) {
  1078. val = 0;
  1079. for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
  1080. s2 = aggr_get_id(evsel_list->cpus, cpu);
  1081. if (s2 != id)
  1082. continue;
  1083. val += perf_counts(counter->counts, cpu, 0)->val;
  1084. }
  1085. perf_stat__update_shadow_stats(counter, val,
  1086. first_shadow_cpu(counter, id),
  1087. &rt_stat);
  1088. }
  1089. }
  1090. }
  1091. static void uniquify_event_name(struct perf_evsel *counter)
  1092. {
  1093. char *new_name;
  1094. char *config;
  1095. if (counter->uniquified_name ||
  1096. !counter->pmu_name || !strncmp(counter->name, counter->pmu_name,
  1097. strlen(counter->pmu_name)))
  1098. return;
  1099. config = strchr(counter->name, '/');
  1100. if (config) {
  1101. if (asprintf(&new_name,
  1102. "%s%s", counter->pmu_name, config) > 0) {
  1103. free(counter->name);
  1104. counter->name = new_name;
  1105. }
  1106. } else {
  1107. if (asprintf(&new_name,
  1108. "%s [%s]", counter->name, counter->pmu_name) > 0) {
  1109. free(counter->name);
  1110. counter->name = new_name;
  1111. }
  1112. }
  1113. counter->uniquified_name = true;
  1114. }
  1115. static void collect_all_aliases(struct perf_evsel *counter,
  1116. void (*cb)(struct perf_evsel *counter, void *data,
  1117. bool first),
  1118. void *data)
  1119. {
  1120. struct perf_evsel *alias;
  1121. alias = list_prepare_entry(counter, &(evsel_list->entries), node);
  1122. list_for_each_entry_continue (alias, &evsel_list->entries, node) {
  1123. if (strcmp(perf_evsel__name(alias), perf_evsel__name(counter)) ||
  1124. alias->scale != counter->scale ||
  1125. alias->cgrp != counter->cgrp ||
  1126. strcmp(alias->unit, counter->unit) ||
  1127. nsec_counter(alias) != nsec_counter(counter))
  1128. break;
  1129. alias->merged_stat = true;
  1130. cb(alias, data, false);
  1131. }
  1132. }
  1133. static bool collect_data(struct perf_evsel *counter,
  1134. void (*cb)(struct perf_evsel *counter, void *data,
  1135. bool first),
  1136. void *data)
  1137. {
  1138. if (counter->merged_stat)
  1139. return false;
  1140. cb(counter, data, true);
  1141. if (no_merge)
  1142. uniquify_event_name(counter);
  1143. else if (counter->auto_merge_stats)
  1144. collect_all_aliases(counter, cb, data);
  1145. return true;
  1146. }
  1147. struct aggr_data {
  1148. u64 ena, run, val;
  1149. int id;
  1150. int nr;
  1151. int cpu;
  1152. };
  1153. static void aggr_cb(struct perf_evsel *counter, void *data, bool first)
  1154. {
  1155. struct aggr_data *ad = data;
  1156. int cpu, s2;
  1157. for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
  1158. struct perf_counts_values *counts;
  1159. s2 = aggr_get_id(perf_evsel__cpus(counter), cpu);
  1160. if (s2 != ad->id)
  1161. continue;
  1162. if (first)
  1163. ad->nr++;
  1164. counts = perf_counts(counter->counts, cpu, 0);
  1165. /*
  1166. * When any result is bad, make them all to give
  1167. * consistent output in interval mode.
  1168. */
  1169. if (counts->ena == 0 || counts->run == 0 ||
  1170. counter->counts->scaled == -1) {
  1171. ad->ena = 0;
  1172. ad->run = 0;
  1173. break;
  1174. }
  1175. ad->val += counts->val;
  1176. ad->ena += counts->ena;
  1177. ad->run += counts->run;
  1178. }
  1179. }
  1180. static void print_aggr(char *prefix)
  1181. {
  1182. FILE *output = stat_config.output;
  1183. struct perf_evsel *counter;
  1184. int s, id, nr;
  1185. double uval;
  1186. u64 ena, run, val;
  1187. bool first;
  1188. if (!(aggr_map || aggr_get_id))
  1189. return;
  1190. aggr_update_shadow();
  1191. /*
  1192. * With metric_only everything is on a single line.
  1193. * Without each counter has its own line.
  1194. */
  1195. for (s = 0; s < aggr_map->nr; s++) {
  1196. struct aggr_data ad;
  1197. if (prefix && metric_only)
  1198. fprintf(output, "%s", prefix);
  1199. ad.id = id = aggr_map->map[s];
  1200. first = true;
  1201. evlist__for_each_entry(evsel_list, counter) {
  1202. if (is_duration_time(counter))
  1203. continue;
  1204. ad.val = ad.ena = ad.run = 0;
  1205. ad.nr = 0;
  1206. if (!collect_data(counter, aggr_cb, &ad))
  1207. continue;
  1208. nr = ad.nr;
  1209. ena = ad.ena;
  1210. run = ad.run;
  1211. val = ad.val;
  1212. if (first && metric_only) {
  1213. first = false;
  1214. aggr_printout(counter, id, nr);
  1215. }
  1216. if (prefix && !metric_only)
  1217. fprintf(output, "%s", prefix);
  1218. uval = val * counter->scale;
  1219. printout(id, nr, counter, uval, prefix, run, ena, 1.0,
  1220. &rt_stat);
  1221. if (!metric_only)
  1222. fputc('\n', output);
  1223. }
  1224. if (metric_only)
  1225. fputc('\n', output);
  1226. }
  1227. }
  1228. static int cmp_val(const void *a, const void *b)
  1229. {
  1230. return ((struct perf_aggr_thread_value *)b)->val -
  1231. ((struct perf_aggr_thread_value *)a)->val;
  1232. }
  1233. static struct perf_aggr_thread_value *sort_aggr_thread(
  1234. struct perf_evsel *counter,
  1235. int nthreads, int ncpus,
  1236. int *ret)
  1237. {
  1238. int cpu, thread, i = 0;
  1239. double uval;
  1240. struct perf_aggr_thread_value *buf;
  1241. buf = calloc(nthreads, sizeof(struct perf_aggr_thread_value));
  1242. if (!buf)
  1243. return NULL;
  1244. for (thread = 0; thread < nthreads; thread++) {
  1245. u64 ena = 0, run = 0, val = 0;
  1246. for (cpu = 0; cpu < ncpus; cpu++) {
  1247. val += perf_counts(counter->counts, cpu, thread)->val;
  1248. ena += perf_counts(counter->counts, cpu, thread)->ena;
  1249. run += perf_counts(counter->counts, cpu, thread)->run;
  1250. }
  1251. uval = val * counter->scale;
  1252. /*
  1253. * Skip value 0 when enabling --per-thread globally,
  1254. * otherwise too many 0 output.
  1255. */
  1256. if (uval == 0.0 && target__has_per_thread(&target))
  1257. continue;
  1258. buf[i].counter = counter;
  1259. buf[i].id = thread;
  1260. buf[i].uval = uval;
  1261. buf[i].val = val;
  1262. buf[i].run = run;
  1263. buf[i].ena = ena;
  1264. i++;
  1265. }
  1266. qsort(buf, i, sizeof(struct perf_aggr_thread_value), cmp_val);
  1267. if (ret)
  1268. *ret = i;
  1269. return buf;
  1270. }
  1271. static void print_aggr_thread(struct perf_evsel *counter, char *prefix)
  1272. {
  1273. FILE *output = stat_config.output;
  1274. int nthreads = thread_map__nr(counter->threads);
  1275. int ncpus = cpu_map__nr(counter->cpus);
  1276. int thread, sorted_threads, id;
  1277. struct perf_aggr_thread_value *buf;
  1278. buf = sort_aggr_thread(counter, nthreads, ncpus, &sorted_threads);
  1279. if (!buf) {
  1280. perror("cannot sort aggr thread");
  1281. return;
  1282. }
  1283. for (thread = 0; thread < sorted_threads; thread++) {
  1284. if (prefix)
  1285. fprintf(output, "%s", prefix);
  1286. id = buf[thread].id;
  1287. if (stat_config.stats)
  1288. printout(id, 0, buf[thread].counter, buf[thread].uval,
  1289. prefix, buf[thread].run, buf[thread].ena, 1.0,
  1290. &stat_config.stats[id]);
  1291. else
  1292. printout(id, 0, buf[thread].counter, buf[thread].uval,
  1293. prefix, buf[thread].run, buf[thread].ena, 1.0,
  1294. &rt_stat);
  1295. fputc('\n', output);
  1296. }
  1297. free(buf);
  1298. }
  1299. struct caggr_data {
  1300. double avg, avg_enabled, avg_running;
  1301. };
  1302. static void counter_aggr_cb(struct perf_evsel *counter, void *data,
  1303. bool first __maybe_unused)
  1304. {
  1305. struct caggr_data *cd = data;
  1306. struct perf_stat_evsel *ps = counter->stats;
  1307. cd->avg += avg_stats(&ps->res_stats[0]);
  1308. cd->avg_enabled += avg_stats(&ps->res_stats[1]);
  1309. cd->avg_running += avg_stats(&ps->res_stats[2]);
  1310. }
  1311. /*
  1312. * Print out the results of a single counter:
  1313. * aggregated counts in system-wide mode
  1314. */
  1315. static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
  1316. {
  1317. FILE *output = stat_config.output;
  1318. double uval;
  1319. struct caggr_data cd = { .avg = 0.0 };
  1320. if (!collect_data(counter, counter_aggr_cb, &cd))
  1321. return;
  1322. if (prefix && !metric_only)
  1323. fprintf(output, "%s", prefix);
  1324. uval = cd.avg * counter->scale;
  1325. printout(-1, 0, counter, uval, prefix, cd.avg_running, cd.avg_enabled,
  1326. cd.avg, &rt_stat);
  1327. if (!metric_only)
  1328. fprintf(output, "\n");
  1329. }
  1330. static void counter_cb(struct perf_evsel *counter, void *data,
  1331. bool first __maybe_unused)
  1332. {
  1333. struct aggr_data *ad = data;
  1334. ad->val += perf_counts(counter->counts, ad->cpu, 0)->val;
  1335. ad->ena += perf_counts(counter->counts, ad->cpu, 0)->ena;
  1336. ad->run += perf_counts(counter->counts, ad->cpu, 0)->run;
  1337. }
  1338. /*
  1339. * Print out the results of a single counter:
  1340. * does not use aggregated count in system-wide
  1341. */
  1342. static void print_counter(struct perf_evsel *counter, char *prefix)
  1343. {
  1344. FILE *output = stat_config.output;
  1345. u64 ena, run, val;
  1346. double uval;
  1347. int cpu;
  1348. for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
  1349. struct aggr_data ad = { .cpu = cpu };
  1350. if (!collect_data(counter, counter_cb, &ad))
  1351. return;
  1352. val = ad.val;
  1353. ena = ad.ena;
  1354. run = ad.run;
  1355. if (prefix)
  1356. fprintf(output, "%s", prefix);
  1357. uval = val * counter->scale;
  1358. printout(cpu, 0, counter, uval, prefix, run, ena, 1.0,
  1359. &rt_stat);
  1360. fputc('\n', output);
  1361. }
  1362. }
  1363. static void print_no_aggr_metric(char *prefix)
  1364. {
  1365. int cpu;
  1366. int nrcpus = 0;
  1367. struct perf_evsel *counter;
  1368. u64 ena, run, val;
  1369. double uval;
  1370. nrcpus = evsel_list->cpus->nr;
  1371. for (cpu = 0; cpu < nrcpus; cpu++) {
  1372. bool first = true;
  1373. if (prefix)
  1374. fputs(prefix, stat_config.output);
  1375. evlist__for_each_entry(evsel_list, counter) {
  1376. if (is_duration_time(counter))
  1377. continue;
  1378. if (first) {
  1379. aggr_printout(counter, cpu, 0);
  1380. first = false;
  1381. }
  1382. val = perf_counts(counter->counts, cpu, 0)->val;
  1383. ena = perf_counts(counter->counts, cpu, 0)->ena;
  1384. run = perf_counts(counter->counts, cpu, 0)->run;
  1385. uval = val * counter->scale;
  1386. printout(cpu, 0, counter, uval, prefix, run, ena, 1.0,
  1387. &rt_stat);
  1388. }
  1389. fputc('\n', stat_config.output);
  1390. }
  1391. }
  1392. static int aggr_header_lens[] = {
  1393. [AGGR_CORE] = 18,
  1394. [AGGR_SOCKET] = 12,
  1395. [AGGR_NONE] = 6,
  1396. [AGGR_THREAD] = 24,
  1397. [AGGR_GLOBAL] = 0,
  1398. };
  1399. static const char *aggr_header_csv[] = {
  1400. [AGGR_CORE] = "core,cpus,",
  1401. [AGGR_SOCKET] = "socket,cpus",
  1402. [AGGR_NONE] = "cpu,",
  1403. [AGGR_THREAD] = "comm-pid,",
  1404. [AGGR_GLOBAL] = ""
  1405. };
  1406. static void print_metric_headers(const char *prefix, bool no_indent)
  1407. {
  1408. struct perf_stat_output_ctx out;
  1409. struct perf_evsel *counter;
  1410. struct outstate os = {
  1411. .fh = stat_config.output
  1412. };
  1413. if (prefix)
  1414. fprintf(stat_config.output, "%s", prefix);
  1415. if (!csv_output && !no_indent)
  1416. fprintf(stat_config.output, "%*s",
  1417. aggr_header_lens[stat_config.aggr_mode], "");
  1418. if (csv_output) {
  1419. if (stat_config.interval)
  1420. fputs("time,", stat_config.output);
  1421. fputs(aggr_header_csv[stat_config.aggr_mode],
  1422. stat_config.output);
  1423. }
  1424. /* Print metrics headers only */
  1425. evlist__for_each_entry(evsel_list, counter) {
  1426. if (is_duration_time(counter))
  1427. continue;
  1428. os.evsel = counter;
  1429. out.ctx = &os;
  1430. out.print_metric = print_metric_header;
  1431. out.new_line = new_line_metric;
  1432. out.force_header = true;
  1433. os.evsel = counter;
  1434. perf_stat__print_shadow_stats(counter, 0,
  1435. 0,
  1436. &out,
  1437. &metric_events,
  1438. &rt_stat);
  1439. }
  1440. fputc('\n', stat_config.output);
  1441. }
  1442. static void print_interval(char *prefix, struct timespec *ts)
  1443. {
  1444. FILE *output = stat_config.output;
  1445. static int num_print_interval;
  1446. sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep);
  1447. if (num_print_interval == 0 && !csv_output) {
  1448. switch (stat_config.aggr_mode) {
  1449. case AGGR_SOCKET:
  1450. fprintf(output, "# time socket cpus");
  1451. if (!metric_only)
  1452. fprintf(output, " counts %*s events\n", unit_width, "unit");
  1453. break;
  1454. case AGGR_CORE:
  1455. fprintf(output, "# time core cpus");
  1456. if (!metric_only)
  1457. fprintf(output, " counts %*s events\n", unit_width, "unit");
  1458. break;
  1459. case AGGR_NONE:
  1460. fprintf(output, "# time CPU");
  1461. if (!metric_only)
  1462. fprintf(output, " counts %*s events\n", unit_width, "unit");
  1463. break;
  1464. case AGGR_THREAD:
  1465. fprintf(output, "# time comm-pid");
  1466. if (!metric_only)
  1467. fprintf(output, " counts %*s events\n", unit_width, "unit");
  1468. break;
  1469. case AGGR_GLOBAL:
  1470. default:
  1471. fprintf(output, "# time");
  1472. if (!metric_only)
  1473. fprintf(output, " counts %*s events\n", unit_width, "unit");
  1474. case AGGR_UNSET:
  1475. break;
  1476. }
  1477. }
  1478. if (num_print_interval == 0 && metric_only)
  1479. print_metric_headers(" ", true);
  1480. if (++num_print_interval == 25)
  1481. num_print_interval = 0;
  1482. }
  1483. static void print_header(int argc, const char **argv)
  1484. {
  1485. FILE *output = stat_config.output;
  1486. int i;
  1487. fflush(stdout);
  1488. if (!csv_output) {
  1489. fprintf(output, "\n");
  1490. fprintf(output, " Performance counter stats for ");
  1491. if (target.system_wide)
  1492. fprintf(output, "\'system wide");
  1493. else if (target.cpu_list)
  1494. fprintf(output, "\'CPU(s) %s", target.cpu_list);
  1495. else if (!target__has_task(&target)) {
  1496. fprintf(output, "\'%s", argv ? argv[0] : "pipe");
  1497. for (i = 1; argv && (i < argc); i++)
  1498. fprintf(output, " %s", argv[i]);
  1499. } else if (target.pid)
  1500. fprintf(output, "process id \'%s", target.pid);
  1501. else
  1502. fprintf(output, "thread id \'%s", target.tid);
  1503. fprintf(output, "\'");
  1504. if (run_count > 1)
  1505. fprintf(output, " (%d runs)", run_count);
  1506. fprintf(output, ":\n\n");
  1507. }
  1508. }
  1509. static int get_precision(double num)
  1510. {
  1511. if (num > 1)
  1512. return 0;
  1513. return lround(ceil(-log10(num)));
  1514. }
  1515. static void print_table(FILE *output, int precision, double avg)
  1516. {
  1517. char tmp[64];
  1518. int idx, indent = 0;
  1519. scnprintf(tmp, 64, " %17.*f", precision, avg);
  1520. while (tmp[indent] == ' ')
  1521. indent++;
  1522. fprintf(output, "%*s# Table of individual measurements:\n", indent, "");
  1523. for (idx = 0; idx < run_count; idx++) {
  1524. double run = (double) walltime_run[idx] / NSEC_PER_SEC;
  1525. int h, n = 1 + abs((int) (100.0 * (run - avg)/run) / 5);
  1526. fprintf(output, " %17.*f (%+.*f) ",
  1527. precision, run, precision, run - avg);
  1528. for (h = 0; h < n; h++)
  1529. fprintf(output, "#");
  1530. fprintf(output, "\n");
  1531. }
  1532. fprintf(output, "\n%*s# Final result:\n", indent, "");
  1533. }
  1534. static double timeval2double(struct timeval *t)
  1535. {
  1536. return t->tv_sec + (double) t->tv_usec/USEC_PER_SEC;
  1537. }
  1538. static void print_footer(void)
  1539. {
  1540. double avg = avg_stats(&walltime_nsecs_stats) / NSEC_PER_SEC;
  1541. FILE *output = stat_config.output;
  1542. int n;
  1543. if (!null_run)
  1544. fprintf(output, "\n");
  1545. if (run_count == 1) {
  1546. fprintf(output, " %17.9f seconds time elapsed", avg);
  1547. if (ru_display) {
  1548. double ru_utime = timeval2double(&ru_data.ru_utime);
  1549. double ru_stime = timeval2double(&ru_data.ru_stime);
  1550. fprintf(output, "\n\n");
  1551. fprintf(output, " %17.9f seconds user\n", ru_utime);
  1552. fprintf(output, " %17.9f seconds sys\n", ru_stime);
  1553. }
  1554. } else {
  1555. double sd = stddev_stats(&walltime_nsecs_stats) / NSEC_PER_SEC;
  1556. /*
  1557. * Display at most 2 more significant
  1558. * digits than the stddev inaccuracy.
  1559. */
  1560. int precision = get_precision(sd) + 2;
  1561. if (walltime_run_table)
  1562. print_table(output, precision, avg);
  1563. fprintf(output, " %17.*f +- %.*f seconds time elapsed",
  1564. precision, avg, precision, sd);
  1565. print_noise_pct(sd, avg);
  1566. }
  1567. fprintf(output, "\n\n");
  1568. if (print_free_counters_hint &&
  1569. sysctl__read_int("kernel/nmi_watchdog", &n) >= 0 &&
  1570. n > 0)
  1571. fprintf(output,
  1572. "Some events weren't counted. Try disabling the NMI watchdog:\n"
  1573. " echo 0 > /proc/sys/kernel/nmi_watchdog\n"
  1574. " perf stat ...\n"
  1575. " echo 1 > /proc/sys/kernel/nmi_watchdog\n");
  1576. if (print_mixed_hw_group_error)
  1577. fprintf(output,
  1578. "The events in group usually have to be from "
  1579. "the same PMU. Try reorganizing the group.\n");
  1580. }
  1581. static void print_counters(struct timespec *ts, int argc, const char **argv)
  1582. {
  1583. int interval = stat_config.interval;
  1584. struct perf_evsel *counter;
  1585. char buf[64], *prefix = NULL;
  1586. /* Do not print anything if we record to the pipe. */
  1587. if (STAT_RECORD && perf_stat.data.is_pipe)
  1588. return;
  1589. if (interval)
  1590. print_interval(prefix = buf, ts);
  1591. else
  1592. print_header(argc, argv);
  1593. if (metric_only) {
  1594. static int num_print_iv;
  1595. if (num_print_iv == 0 && !interval)
  1596. print_metric_headers(prefix, false);
  1597. if (num_print_iv++ == 25)
  1598. num_print_iv = 0;
  1599. if (stat_config.aggr_mode == AGGR_GLOBAL && prefix)
  1600. fprintf(stat_config.output, "%s", prefix);
  1601. }
  1602. switch (stat_config.aggr_mode) {
  1603. case AGGR_CORE:
  1604. case AGGR_SOCKET:
  1605. print_aggr(prefix);
  1606. break;
  1607. case AGGR_THREAD:
  1608. evlist__for_each_entry(evsel_list, counter) {
  1609. if (is_duration_time(counter))
  1610. continue;
  1611. print_aggr_thread(counter, prefix);
  1612. }
  1613. break;
  1614. case AGGR_GLOBAL:
  1615. evlist__for_each_entry(evsel_list, counter) {
  1616. if (is_duration_time(counter))
  1617. continue;
  1618. print_counter_aggr(counter, prefix);
  1619. }
  1620. if (metric_only)
  1621. fputc('\n', stat_config.output);
  1622. break;
  1623. case AGGR_NONE:
  1624. if (metric_only)
  1625. print_no_aggr_metric(prefix);
  1626. else {
  1627. evlist__for_each_entry(evsel_list, counter) {
  1628. if (is_duration_time(counter))
  1629. continue;
  1630. print_counter(counter, prefix);
  1631. }
  1632. }
  1633. break;
  1634. case AGGR_UNSET:
  1635. default:
  1636. break;
  1637. }
  1638. if (!interval && !csv_output)
  1639. print_footer();
  1640. fflush(stat_config.output);
  1641. }
  1642. static volatile int signr = -1;
  1643. static void skip_signal(int signo)
  1644. {
  1645. if ((child_pid == -1) || stat_config.interval)
  1646. done = 1;
  1647. signr = signo;
  1648. /*
  1649. * render child_pid harmless
  1650. * won't send SIGTERM to a random
  1651. * process in case of race condition
  1652. * and fast PID recycling
  1653. */
  1654. child_pid = -1;
  1655. }
  1656. static void sig_atexit(void)
  1657. {
  1658. sigset_t set, oset;
  1659. /*
  1660. * avoid race condition with SIGCHLD handler
  1661. * in skip_signal() which is modifying child_pid
  1662. * goal is to avoid send SIGTERM to a random
  1663. * process
  1664. */
  1665. sigemptyset(&set);
  1666. sigaddset(&set, SIGCHLD);
  1667. sigprocmask(SIG_BLOCK, &set, &oset);
  1668. if (child_pid != -1)
  1669. kill(child_pid, SIGTERM);
  1670. sigprocmask(SIG_SETMASK, &oset, NULL);
  1671. if (signr == -1)
  1672. return;
  1673. signal(signr, SIG_DFL);
  1674. kill(getpid(), signr);
  1675. }
  1676. static int stat__set_big_num(const struct option *opt __maybe_unused,
  1677. const char *s __maybe_unused, int unset)
  1678. {
  1679. big_num_opt = unset ? 0 : 1;
  1680. return 0;
  1681. }
  1682. static int enable_metric_only(const struct option *opt __maybe_unused,
  1683. const char *s __maybe_unused, int unset)
  1684. {
  1685. force_metric_only = true;
  1686. metric_only = !unset;
  1687. return 0;
  1688. }
  1689. static int parse_metric_groups(const struct option *opt,
  1690. const char *str,
  1691. int unset __maybe_unused)
  1692. {
  1693. return metricgroup__parse_groups(opt, str, &metric_events);
  1694. }
  1695. static const struct option stat_options[] = {
  1696. OPT_BOOLEAN('T', "transaction", &transaction_run,
  1697. "hardware transaction statistics"),
  1698. OPT_CALLBACK('e', "event", &evsel_list, "event",
  1699. "event selector. use 'perf list' to list available events",
  1700. parse_events_option),
  1701. OPT_CALLBACK(0, "filter", &evsel_list, "filter",
  1702. "event filter", parse_filter),
  1703. OPT_BOOLEAN('i', "no-inherit", &no_inherit,
  1704. "child tasks do not inherit counters"),
  1705. OPT_STRING('p', "pid", &target.pid, "pid",
  1706. "stat events on existing process id"),
  1707. OPT_STRING('t', "tid", &target.tid, "tid",
  1708. "stat events on existing thread id"),
  1709. OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
  1710. "system-wide collection from all CPUs"),
  1711. OPT_BOOLEAN('g', "group", &group,
  1712. "put the counters into a counter group"),
  1713. OPT_BOOLEAN('c', "scale", &stat_config.scale, "scale/normalize counters"),
  1714. OPT_INCR('v', "verbose", &verbose,
  1715. "be more verbose (show counter open errors, etc)"),
  1716. OPT_INTEGER('r', "repeat", &run_count,
  1717. "repeat command and print average + stddev (max: 100, forever: 0)"),
  1718. OPT_BOOLEAN(0, "table", &walltime_run_table,
  1719. "display details about each run (only with -r option)"),
  1720. OPT_BOOLEAN('n', "null", &null_run,
  1721. "null run - dont start any counters"),
  1722. OPT_INCR('d', "detailed", &detailed_run,
  1723. "detailed run - start a lot of events"),
  1724. OPT_BOOLEAN('S', "sync", &sync_run,
  1725. "call sync() before starting a run"),
  1726. OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL,
  1727. "print large numbers with thousands\' separators",
  1728. stat__set_big_num),
  1729. OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
  1730. "list of cpus to monitor in system-wide"),
  1731. OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode,
  1732. "disable CPU count aggregation", AGGR_NONE),
  1733. OPT_BOOLEAN(0, "no-merge", &no_merge, "Do not merge identical named events"),
  1734. OPT_STRING('x', "field-separator", &csv_sep, "separator",
  1735. "print counts with custom separator"),
  1736. OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
  1737. "monitor event in cgroup name only", parse_cgroups),
  1738. OPT_STRING('o', "output", &output_name, "file", "output file name"),
  1739. OPT_BOOLEAN(0, "append", &append_file, "append to the output file"),
  1740. OPT_INTEGER(0, "log-fd", &output_fd,
  1741. "log output to fd, instead of stderr"),
  1742. OPT_STRING(0, "pre", &pre_cmd, "command",
  1743. "command to run prior to the measured command"),
  1744. OPT_STRING(0, "post", &post_cmd, "command",
  1745. "command to run after to the measured command"),
  1746. OPT_UINTEGER('I', "interval-print", &stat_config.interval,
  1747. "print counts at regular interval in ms "
  1748. "(overhead is possible for values <= 100ms)"),
  1749. OPT_INTEGER(0, "interval-count", &stat_config.times,
  1750. "print counts for fixed number of times"),
  1751. OPT_UINTEGER(0, "timeout", &stat_config.timeout,
  1752. "stop workload and print counts after a timeout period in ms (>= 10ms)"),
  1753. OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode,
  1754. "aggregate counts per processor socket", AGGR_SOCKET),
  1755. OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode,
  1756. "aggregate counts per physical processor core", AGGR_CORE),
  1757. OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode,
  1758. "aggregate counts per thread", AGGR_THREAD),
  1759. OPT_UINTEGER('D', "delay", &initial_delay,
  1760. "ms to wait before starting measurement after program start"),
  1761. OPT_CALLBACK_NOOPT(0, "metric-only", &metric_only, NULL,
  1762. "Only print computed metrics. No raw values", enable_metric_only),
  1763. OPT_BOOLEAN(0, "topdown", &topdown_run,
  1764. "measure topdown level 1 statistics"),
  1765. OPT_BOOLEAN(0, "smi-cost", &smi_cost,
  1766. "measure SMI cost"),
  1767. OPT_CALLBACK('M', "metrics", &evsel_list, "metric/metric group list",
  1768. "monitor specified metrics or metric groups (separated by ,)",
  1769. parse_metric_groups),
  1770. OPT_END()
  1771. };
  1772. static int perf_stat__get_socket(struct cpu_map *map, int cpu)
  1773. {
  1774. return cpu_map__get_socket(map, cpu, NULL);
  1775. }
  1776. static int perf_stat__get_core(struct cpu_map *map, int cpu)
  1777. {
  1778. return cpu_map__get_core(map, cpu, NULL);
  1779. }
  1780. static int cpu_map__get_max(struct cpu_map *map)
  1781. {
  1782. int i, max = -1;
  1783. for (i = 0; i < map->nr; i++) {
  1784. if (map->map[i] > max)
  1785. max = map->map[i];
  1786. }
  1787. return max;
  1788. }
  1789. static struct cpu_map *cpus_aggr_map;
  1790. static int perf_stat__get_aggr(aggr_get_id_t get_id, struct cpu_map *map, int idx)
  1791. {
  1792. int cpu;
  1793. if (idx >= map->nr)
  1794. return -1;
  1795. cpu = map->map[idx];
  1796. if (cpus_aggr_map->map[cpu] == -1)
  1797. cpus_aggr_map->map[cpu] = get_id(map, idx);
  1798. return cpus_aggr_map->map[cpu];
  1799. }
  1800. static int perf_stat__get_socket_cached(struct cpu_map *map, int idx)
  1801. {
  1802. return perf_stat__get_aggr(perf_stat__get_socket, map, idx);
  1803. }
  1804. static int perf_stat__get_core_cached(struct cpu_map *map, int idx)
  1805. {
  1806. return perf_stat__get_aggr(perf_stat__get_core, map, idx);
  1807. }
  1808. static int perf_stat_init_aggr_mode(void)
  1809. {
  1810. int nr;
  1811. switch (stat_config.aggr_mode) {
  1812. case AGGR_SOCKET:
  1813. if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) {
  1814. perror("cannot build socket map");
  1815. return -1;
  1816. }
  1817. aggr_get_id = perf_stat__get_socket_cached;
  1818. break;
  1819. case AGGR_CORE:
  1820. if (cpu_map__build_core_map(evsel_list->cpus, &aggr_map)) {
  1821. perror("cannot build core map");
  1822. return -1;
  1823. }
  1824. aggr_get_id = perf_stat__get_core_cached;
  1825. break;
  1826. case AGGR_NONE:
  1827. case AGGR_GLOBAL:
  1828. case AGGR_THREAD:
  1829. case AGGR_UNSET:
  1830. default:
  1831. break;
  1832. }
  1833. /*
  1834. * The evsel_list->cpus is the base we operate on,
  1835. * taking the highest cpu number to be the size of
  1836. * the aggregation translate cpumap.
  1837. */
  1838. nr = cpu_map__get_max(evsel_list->cpus);
  1839. cpus_aggr_map = cpu_map__empty_new(nr + 1);
  1840. return cpus_aggr_map ? 0 : -ENOMEM;
  1841. }
  1842. static void perf_stat__exit_aggr_mode(void)
  1843. {
  1844. cpu_map__put(aggr_map);
  1845. cpu_map__put(cpus_aggr_map);
  1846. aggr_map = NULL;
  1847. cpus_aggr_map = NULL;
  1848. }
  1849. static inline int perf_env__get_cpu(struct perf_env *env, struct cpu_map *map, int idx)
  1850. {
  1851. int cpu;
  1852. if (idx > map->nr)
  1853. return -1;
  1854. cpu = map->map[idx];
  1855. if (cpu >= env->nr_cpus_avail)
  1856. return -1;
  1857. return cpu;
  1858. }
  1859. static int perf_env__get_socket(struct cpu_map *map, int idx, void *data)
  1860. {
  1861. struct perf_env *env = data;
  1862. int cpu = perf_env__get_cpu(env, map, idx);
  1863. return cpu == -1 ? -1 : env->cpu[cpu].socket_id;
  1864. }
  1865. static int perf_env__get_core(struct cpu_map *map, int idx, void *data)
  1866. {
  1867. struct perf_env *env = data;
  1868. int core = -1, cpu = perf_env__get_cpu(env, map, idx);
  1869. if (cpu != -1) {
  1870. int socket_id = env->cpu[cpu].socket_id;
  1871. /*
  1872. * Encode socket in upper 16 bits
  1873. * core_id is relative to socket, and
  1874. * we need a global id. So we combine
  1875. * socket + core id.
  1876. */
  1877. core = (socket_id << 16) | (env->cpu[cpu].core_id & 0xffff);
  1878. }
  1879. return core;
  1880. }
  1881. static int perf_env__build_socket_map(struct perf_env *env, struct cpu_map *cpus,
  1882. struct cpu_map **sockp)
  1883. {
  1884. return cpu_map__build_map(cpus, sockp, perf_env__get_socket, env);
  1885. }
  1886. static int perf_env__build_core_map(struct perf_env *env, struct cpu_map *cpus,
  1887. struct cpu_map **corep)
  1888. {
  1889. return cpu_map__build_map(cpus, corep, perf_env__get_core, env);
  1890. }
  1891. static int perf_stat__get_socket_file(struct cpu_map *map, int idx)
  1892. {
  1893. return perf_env__get_socket(map, idx, &perf_stat.session->header.env);
  1894. }
  1895. static int perf_stat__get_core_file(struct cpu_map *map, int idx)
  1896. {
  1897. return perf_env__get_core(map, idx, &perf_stat.session->header.env);
  1898. }
  1899. static int perf_stat_init_aggr_mode_file(struct perf_stat *st)
  1900. {
  1901. struct perf_env *env = &st->session->header.env;
  1902. switch (stat_config.aggr_mode) {
  1903. case AGGR_SOCKET:
  1904. if (perf_env__build_socket_map(env, evsel_list->cpus, &aggr_map)) {
  1905. perror("cannot build socket map");
  1906. return -1;
  1907. }
  1908. aggr_get_id = perf_stat__get_socket_file;
  1909. break;
  1910. case AGGR_CORE:
  1911. if (perf_env__build_core_map(env, evsel_list->cpus, &aggr_map)) {
  1912. perror("cannot build core map");
  1913. return -1;
  1914. }
  1915. aggr_get_id = perf_stat__get_core_file;
  1916. break;
  1917. case AGGR_NONE:
  1918. case AGGR_GLOBAL:
  1919. case AGGR_THREAD:
  1920. case AGGR_UNSET:
  1921. default:
  1922. break;
  1923. }
  1924. return 0;
  1925. }
  1926. static int topdown_filter_events(const char **attr, char **str, bool use_group)
  1927. {
  1928. int off = 0;
  1929. int i;
  1930. int len = 0;
  1931. char *s;
  1932. for (i = 0; attr[i]; i++) {
  1933. if (pmu_have_event("cpu", attr[i])) {
  1934. len += strlen(attr[i]) + 1;
  1935. attr[i - off] = attr[i];
  1936. } else
  1937. off++;
  1938. }
  1939. attr[i - off] = NULL;
  1940. *str = malloc(len + 1 + 2);
  1941. if (!*str)
  1942. return -1;
  1943. s = *str;
  1944. if (i - off == 0) {
  1945. *s = 0;
  1946. return 0;
  1947. }
  1948. if (use_group)
  1949. *s++ = '{';
  1950. for (i = 0; attr[i]; i++) {
  1951. strcpy(s, attr[i]);
  1952. s += strlen(s);
  1953. *s++ = ',';
  1954. }
  1955. if (use_group) {
  1956. s[-1] = '}';
  1957. *s = 0;
  1958. } else
  1959. s[-1] = 0;
  1960. return 0;
  1961. }
  1962. __weak bool arch_topdown_check_group(bool *warn)
  1963. {
  1964. *warn = false;
  1965. return false;
  1966. }
  1967. __weak void arch_topdown_group_warn(void)
  1968. {
  1969. }
  1970. /*
  1971. * Add default attributes, if there were no attributes specified or
  1972. * if -d/--detailed, -d -d or -d -d -d is used:
  1973. */
  1974. static int add_default_attributes(void)
  1975. {
  1976. int err;
  1977. struct perf_event_attr default_attrs0[] = {
  1978. { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK },
  1979. { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES },
  1980. { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS },
  1981. { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS },
  1982. { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES },
  1983. };
  1984. struct perf_event_attr frontend_attrs[] = {
  1985. { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND },
  1986. };
  1987. struct perf_event_attr backend_attrs[] = {
  1988. { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND },
  1989. };
  1990. struct perf_event_attr default_attrs1[] = {
  1991. { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS },
  1992. { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
  1993. { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES },
  1994. };
  1995. /*
  1996. * Detailed stats (-d), covering the L1 and last level data caches:
  1997. */
  1998. struct perf_event_attr detailed_attrs[] = {
  1999. { .type = PERF_TYPE_HW_CACHE,
  2000. .config =
  2001. PERF_COUNT_HW_CACHE_L1D << 0 |
  2002. (PERF_COUNT_HW_CACHE_OP_READ << 8) |
  2003. (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
  2004. { .type = PERF_TYPE_HW_CACHE,
  2005. .config =
  2006. PERF_COUNT_HW_CACHE_L1D << 0 |
  2007. (PERF_COUNT_HW_CACHE_OP_READ << 8) |
  2008. (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
  2009. { .type = PERF_TYPE_HW_CACHE,
  2010. .config =
  2011. PERF_COUNT_HW_CACHE_LL << 0 |
  2012. (PERF_COUNT_HW_CACHE_OP_READ << 8) |
  2013. (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
  2014. { .type = PERF_TYPE_HW_CACHE,
  2015. .config =
  2016. PERF_COUNT_HW_CACHE_LL << 0 |
  2017. (PERF_COUNT_HW_CACHE_OP_READ << 8) |
  2018. (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
  2019. };
  2020. /*
  2021. * Very detailed stats (-d -d), covering the instruction cache and the TLB caches:
  2022. */
  2023. struct perf_event_attr very_detailed_attrs[] = {
  2024. { .type = PERF_TYPE_HW_CACHE,
  2025. .config =
  2026. PERF_COUNT_HW_CACHE_L1I << 0 |
  2027. (PERF_COUNT_HW_CACHE_OP_READ << 8) |
  2028. (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
  2029. { .type = PERF_TYPE_HW_CACHE,
  2030. .config =
  2031. PERF_COUNT_HW_CACHE_L1I << 0 |
  2032. (PERF_COUNT_HW_CACHE_OP_READ << 8) |
  2033. (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
  2034. { .type = PERF_TYPE_HW_CACHE,
  2035. .config =
  2036. PERF_COUNT_HW_CACHE_DTLB << 0 |
  2037. (PERF_COUNT_HW_CACHE_OP_READ << 8) |
  2038. (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
  2039. { .type = PERF_TYPE_HW_CACHE,
  2040. .config =
  2041. PERF_COUNT_HW_CACHE_DTLB << 0 |
  2042. (PERF_COUNT_HW_CACHE_OP_READ << 8) |
  2043. (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
  2044. { .type = PERF_TYPE_HW_CACHE,
  2045. .config =
  2046. PERF_COUNT_HW_CACHE_ITLB << 0 |
  2047. (PERF_COUNT_HW_CACHE_OP_READ << 8) |
  2048. (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
  2049. { .type = PERF_TYPE_HW_CACHE,
  2050. .config =
  2051. PERF_COUNT_HW_CACHE_ITLB << 0 |
  2052. (PERF_COUNT_HW_CACHE_OP_READ << 8) |
  2053. (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
  2054. };
  2055. /*
  2056. * Very, very detailed stats (-d -d -d), adding prefetch events:
  2057. */
  2058. struct perf_event_attr very_very_detailed_attrs[] = {
  2059. { .type = PERF_TYPE_HW_CACHE,
  2060. .config =
  2061. PERF_COUNT_HW_CACHE_L1D << 0 |
  2062. (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) |
  2063. (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
  2064. { .type = PERF_TYPE_HW_CACHE,
  2065. .config =
  2066. PERF_COUNT_HW_CACHE_L1D << 0 |
  2067. (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) |
  2068. (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
  2069. };
  2070. /* Set attrs if no event is selected and !null_run: */
  2071. if (null_run)
  2072. return 0;
  2073. if (transaction_run) {
  2074. struct parse_events_error errinfo;
  2075. if (pmu_have_event("cpu", "cycles-ct") &&
  2076. pmu_have_event("cpu", "el-start"))
  2077. err = parse_events(evsel_list, transaction_attrs,
  2078. &errinfo);
  2079. else
  2080. err = parse_events(evsel_list,
  2081. transaction_limited_attrs,
  2082. &errinfo);
  2083. if (err) {
  2084. fprintf(stderr, "Cannot set up transaction events\n");
  2085. return -1;
  2086. }
  2087. return 0;
  2088. }
  2089. if (smi_cost) {
  2090. int smi;
  2091. if (sysfs__read_int(FREEZE_ON_SMI_PATH, &smi) < 0) {
  2092. fprintf(stderr, "freeze_on_smi is not supported.\n");
  2093. return -1;
  2094. }
  2095. if (!smi) {
  2096. if (sysfs__write_int(FREEZE_ON_SMI_PATH, 1) < 0) {
  2097. fprintf(stderr, "Failed to set freeze_on_smi.\n");
  2098. return -1;
  2099. }
  2100. smi_reset = true;
  2101. }
  2102. if (pmu_have_event("msr", "aperf") &&
  2103. pmu_have_event("msr", "smi")) {
  2104. if (!force_metric_only)
  2105. metric_only = true;
  2106. err = parse_events(evsel_list, smi_cost_attrs, NULL);
  2107. } else {
  2108. fprintf(stderr, "To measure SMI cost, it needs "
  2109. "msr/aperf/, msr/smi/ and cpu/cycles/ support\n");
  2110. return -1;
  2111. }
  2112. if (err) {
  2113. fprintf(stderr, "Cannot set up SMI cost events\n");
  2114. return -1;
  2115. }
  2116. return 0;
  2117. }
  2118. if (topdown_run) {
  2119. char *str = NULL;
  2120. bool warn = false;
  2121. if (stat_config.aggr_mode != AGGR_GLOBAL &&
  2122. stat_config.aggr_mode != AGGR_CORE) {
  2123. pr_err("top down event configuration requires --per-core mode\n");
  2124. return -1;
  2125. }
  2126. stat_config.aggr_mode = AGGR_CORE;
  2127. if (nr_cgroups || !target__has_cpu(&target)) {
  2128. pr_err("top down event configuration requires system-wide mode (-a)\n");
  2129. return -1;
  2130. }
  2131. if (!force_metric_only)
  2132. metric_only = true;
  2133. if (topdown_filter_events(topdown_attrs, &str,
  2134. arch_topdown_check_group(&warn)) < 0) {
  2135. pr_err("Out of memory\n");
  2136. return -1;
  2137. }
  2138. if (topdown_attrs[0] && str) {
  2139. if (warn)
  2140. arch_topdown_group_warn();
  2141. err = parse_events(evsel_list, str, NULL);
  2142. if (err) {
  2143. fprintf(stderr,
  2144. "Cannot set up top down events %s: %d\n",
  2145. str, err);
  2146. free(str);
  2147. return -1;
  2148. }
  2149. } else {
  2150. fprintf(stderr, "System does not support topdown\n");
  2151. return -1;
  2152. }
  2153. free(str);
  2154. }
  2155. if (!evsel_list->nr_entries) {
  2156. if (target__has_cpu(&target))
  2157. default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK;
  2158. if (perf_evlist__add_default_attrs(evsel_list, default_attrs0) < 0)
  2159. return -1;
  2160. if (pmu_have_event("cpu", "stalled-cycles-frontend")) {
  2161. if (perf_evlist__add_default_attrs(evsel_list,
  2162. frontend_attrs) < 0)
  2163. return -1;
  2164. }
  2165. if (pmu_have_event("cpu", "stalled-cycles-backend")) {
  2166. if (perf_evlist__add_default_attrs(evsel_list,
  2167. backend_attrs) < 0)
  2168. return -1;
  2169. }
  2170. if (perf_evlist__add_default_attrs(evsel_list, default_attrs1) < 0)
  2171. return -1;
  2172. }
  2173. /* Detailed events get appended to the event list: */
  2174. if (detailed_run < 1)
  2175. return 0;
  2176. /* Append detailed run extra attributes: */
  2177. if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0)
  2178. return -1;
  2179. if (detailed_run < 2)
  2180. return 0;
  2181. /* Append very detailed run extra attributes: */
  2182. if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0)
  2183. return -1;
  2184. if (detailed_run < 3)
  2185. return 0;
  2186. /* Append very, very detailed run extra attributes: */
  2187. return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs);
  2188. }
  2189. static const char * const stat_record_usage[] = {
  2190. "perf stat record [<options>]",
  2191. NULL,
  2192. };
  2193. static void init_features(struct perf_session *session)
  2194. {
  2195. int feat;
  2196. for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
  2197. perf_header__set_feat(&session->header, feat);
  2198. perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
  2199. perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
  2200. perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
  2201. perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
  2202. }
  2203. static int __cmd_record(int argc, const char **argv)
  2204. {
  2205. struct perf_session *session;
  2206. struct perf_data *data = &perf_stat.data;
  2207. argc = parse_options(argc, argv, stat_options, stat_record_usage,
  2208. PARSE_OPT_STOP_AT_NON_OPTION);
  2209. if (output_name)
  2210. data->file.path = output_name;
  2211. if (run_count != 1 || forever) {
  2212. pr_err("Cannot use -r option with perf stat record.\n");
  2213. return -1;
  2214. }
  2215. session = perf_session__new(data, false, NULL);
  2216. if (session == NULL) {
  2217. pr_err("Perf session creation failed.\n");
  2218. return -1;
  2219. }
  2220. init_features(session);
  2221. session->evlist = evsel_list;
  2222. perf_stat.session = session;
  2223. perf_stat.record = true;
  2224. return argc;
  2225. }
  2226. static int process_stat_round_event(struct perf_tool *tool __maybe_unused,
  2227. union perf_event *event,
  2228. struct perf_session *session)
  2229. {
  2230. struct stat_round_event *stat_round = &event->stat_round;
  2231. struct perf_evsel *counter;
  2232. struct timespec tsh, *ts = NULL;
  2233. const char **argv = session->header.env.cmdline_argv;
  2234. int argc = session->header.env.nr_cmdline;
  2235. evlist__for_each_entry(evsel_list, counter)
  2236. perf_stat_process_counter(&stat_config, counter);
  2237. if (stat_round->type == PERF_STAT_ROUND_TYPE__FINAL)
  2238. update_stats(&walltime_nsecs_stats, stat_round->time);
  2239. if (stat_config.interval && stat_round->time) {
  2240. tsh.tv_sec = stat_round->time / NSEC_PER_SEC;
  2241. tsh.tv_nsec = stat_round->time % NSEC_PER_SEC;
  2242. ts = &tsh;
  2243. }
  2244. print_counters(ts, argc, argv);
  2245. return 0;
  2246. }
  2247. static
  2248. int process_stat_config_event(struct perf_tool *tool,
  2249. union perf_event *event,
  2250. struct perf_session *session __maybe_unused)
  2251. {
  2252. struct perf_stat *st = container_of(tool, struct perf_stat, tool);
  2253. perf_event__read_stat_config(&stat_config, &event->stat_config);
  2254. if (cpu_map__empty(st->cpus)) {
  2255. if (st->aggr_mode != AGGR_UNSET)
  2256. pr_warning("warning: processing task data, aggregation mode not set\n");
  2257. return 0;
  2258. }
  2259. if (st->aggr_mode != AGGR_UNSET)
  2260. stat_config.aggr_mode = st->aggr_mode;
  2261. if (perf_stat.data.is_pipe)
  2262. perf_stat_init_aggr_mode();
  2263. else
  2264. perf_stat_init_aggr_mode_file(st);
  2265. return 0;
  2266. }
  2267. static int set_maps(struct perf_stat *st)
  2268. {
  2269. if (!st->cpus || !st->threads)
  2270. return 0;
  2271. if (WARN_ONCE(st->maps_allocated, "stats double allocation\n"))
  2272. return -EINVAL;
  2273. perf_evlist__set_maps(evsel_list, st->cpus, st->threads);
  2274. if (perf_evlist__alloc_stats(evsel_list, true))
  2275. return -ENOMEM;
  2276. st->maps_allocated = true;
  2277. return 0;
  2278. }
  2279. static
  2280. int process_thread_map_event(struct perf_tool *tool,
  2281. union perf_event *event,
  2282. struct perf_session *session __maybe_unused)
  2283. {
  2284. struct perf_stat *st = container_of(tool, struct perf_stat, tool);
  2285. if (st->threads) {
  2286. pr_warning("Extra thread map event, ignoring.\n");
  2287. return 0;
  2288. }
  2289. st->threads = thread_map__new_event(&event->thread_map);
  2290. if (!st->threads)
  2291. return -ENOMEM;
  2292. return set_maps(st);
  2293. }
  2294. static
  2295. int process_cpu_map_event(struct perf_tool *tool,
  2296. union perf_event *event,
  2297. struct perf_session *session __maybe_unused)
  2298. {
  2299. struct perf_stat *st = container_of(tool, struct perf_stat, tool);
  2300. struct cpu_map *cpus;
  2301. if (st->cpus) {
  2302. pr_warning("Extra cpu map event, ignoring.\n");
  2303. return 0;
  2304. }
  2305. cpus = cpu_map__new_data(&event->cpu_map.data);
  2306. if (!cpus)
  2307. return -ENOMEM;
  2308. st->cpus = cpus;
  2309. return set_maps(st);
  2310. }
  2311. static int runtime_stat_new(struct perf_stat_config *config, int nthreads)
  2312. {
  2313. int i;
  2314. config->stats = calloc(nthreads, sizeof(struct runtime_stat));
  2315. if (!config->stats)
  2316. return -1;
  2317. config->stats_num = nthreads;
  2318. for (i = 0; i < nthreads; i++)
  2319. runtime_stat__init(&config->stats[i]);
  2320. return 0;
  2321. }
  2322. static void runtime_stat_delete(struct perf_stat_config *config)
  2323. {
  2324. int i;
  2325. if (!config->stats)
  2326. return;
  2327. for (i = 0; i < config->stats_num; i++)
  2328. runtime_stat__exit(&config->stats[i]);
  2329. free(config->stats);
  2330. }
  2331. static const char * const stat_report_usage[] = {
  2332. "perf stat report [<options>]",
  2333. NULL,
  2334. };
  2335. static struct perf_stat perf_stat = {
  2336. .tool = {
  2337. .attr = perf_event__process_attr,
  2338. .event_update = perf_event__process_event_update,
  2339. .thread_map = process_thread_map_event,
  2340. .cpu_map = process_cpu_map_event,
  2341. .stat_config = process_stat_config_event,
  2342. .stat = perf_event__process_stat_event,
  2343. .stat_round = process_stat_round_event,
  2344. },
  2345. .aggr_mode = AGGR_UNSET,
  2346. };
  2347. static int __cmd_report(int argc, const char **argv)
  2348. {
  2349. struct perf_session *session;
  2350. const struct option options[] = {
  2351. OPT_STRING('i', "input", &input_name, "file", "input file name"),
  2352. OPT_SET_UINT(0, "per-socket", &perf_stat.aggr_mode,
  2353. "aggregate counts per processor socket", AGGR_SOCKET),
  2354. OPT_SET_UINT(0, "per-core", &perf_stat.aggr_mode,
  2355. "aggregate counts per physical processor core", AGGR_CORE),
  2356. OPT_SET_UINT('A', "no-aggr", &perf_stat.aggr_mode,
  2357. "disable CPU count aggregation", AGGR_NONE),
  2358. OPT_END()
  2359. };
  2360. struct stat st;
  2361. int ret;
  2362. argc = parse_options(argc, argv, options, stat_report_usage, 0);
  2363. if (!input_name || !strlen(input_name)) {
  2364. if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode))
  2365. input_name = "-";
  2366. else
  2367. input_name = "perf.data";
  2368. }
  2369. perf_stat.data.file.path = input_name;
  2370. perf_stat.data.mode = PERF_DATA_MODE_READ;
  2371. session = perf_session__new(&perf_stat.data, false, &perf_stat.tool);
  2372. if (session == NULL)
  2373. return -1;
  2374. perf_stat.session = session;
  2375. stat_config.output = stderr;
  2376. evsel_list = session->evlist;
  2377. ret = perf_session__process_events(session);
  2378. if (ret)
  2379. return ret;
  2380. perf_session__delete(session);
  2381. return 0;
  2382. }
  2383. static void setup_system_wide(int forks)
  2384. {
  2385. /*
  2386. * Make system wide (-a) the default target if
  2387. * no target was specified and one of following
  2388. * conditions is met:
  2389. *
  2390. * - there's no workload specified
  2391. * - there is workload specified but all requested
  2392. * events are system wide events
  2393. */
  2394. if (!target__none(&target))
  2395. return;
  2396. if (!forks)
  2397. target.system_wide = true;
  2398. else {
  2399. struct perf_evsel *counter;
  2400. evlist__for_each_entry(evsel_list, counter) {
  2401. if (!counter->system_wide)
  2402. return;
  2403. }
  2404. if (evsel_list->nr_entries)
  2405. target.system_wide = true;
  2406. }
  2407. }
  2408. int cmd_stat(int argc, const char **argv)
  2409. {
  2410. const char * const stat_usage[] = {
  2411. "perf stat [<options>] [<command>]",
  2412. NULL
  2413. };
  2414. int status = -EINVAL, run_idx;
  2415. const char *mode;
  2416. FILE *output = stderr;
  2417. unsigned int interval, timeout;
  2418. const char * const stat_subcommands[] = { "record", "report" };
  2419. setlocale(LC_ALL, "");
  2420. evsel_list = perf_evlist__new();
  2421. if (evsel_list == NULL)
  2422. return -ENOMEM;
  2423. parse_events__shrink_config_terms();
  2424. argc = parse_options_subcommand(argc, argv, stat_options, stat_subcommands,
  2425. (const char **) stat_usage,
  2426. PARSE_OPT_STOP_AT_NON_OPTION);
  2427. perf_stat__collect_metric_expr(evsel_list);
  2428. perf_stat__init_shadow_stats();
  2429. if (csv_sep) {
  2430. csv_output = true;
  2431. if (!strcmp(csv_sep, "\\t"))
  2432. csv_sep = "\t";
  2433. } else
  2434. csv_sep = DEFAULT_SEPARATOR;
  2435. if (argc && !strncmp(argv[0], "rec", 3)) {
  2436. argc = __cmd_record(argc, argv);
  2437. if (argc < 0)
  2438. return -1;
  2439. } else if (argc && !strncmp(argv[0], "rep", 3))
  2440. return __cmd_report(argc, argv);
  2441. interval = stat_config.interval;
  2442. timeout = stat_config.timeout;
  2443. /*
  2444. * For record command the -o is already taken care of.
  2445. */
  2446. if (!STAT_RECORD && output_name && strcmp(output_name, "-"))
  2447. output = NULL;
  2448. if (output_name && output_fd) {
  2449. fprintf(stderr, "cannot use both --output and --log-fd\n");
  2450. parse_options_usage(stat_usage, stat_options, "o", 1);
  2451. parse_options_usage(NULL, stat_options, "log-fd", 0);
  2452. goto out;
  2453. }
  2454. if (metric_only && stat_config.aggr_mode == AGGR_THREAD) {
  2455. fprintf(stderr, "--metric-only is not supported with --per-thread\n");
  2456. goto out;
  2457. }
  2458. if (metric_only && run_count > 1) {
  2459. fprintf(stderr, "--metric-only is not supported with -r\n");
  2460. goto out;
  2461. }
  2462. if (walltime_run_table && run_count <= 1) {
  2463. fprintf(stderr, "--table is only supported with -r\n");
  2464. parse_options_usage(stat_usage, stat_options, "r", 1);
  2465. parse_options_usage(NULL, stat_options, "table", 0);
  2466. goto out;
  2467. }
  2468. if (output_fd < 0) {
  2469. fprintf(stderr, "argument to --log-fd must be a > 0\n");
  2470. parse_options_usage(stat_usage, stat_options, "log-fd", 0);
  2471. goto out;
  2472. }
  2473. if (!output) {
  2474. struct timespec tm;
  2475. mode = append_file ? "a" : "w";
  2476. output = fopen(output_name, mode);
  2477. if (!output) {
  2478. perror("failed to create output file");
  2479. return -1;
  2480. }
  2481. clock_gettime(CLOCK_REALTIME, &tm);
  2482. fprintf(output, "# started on %s\n", ctime(&tm.tv_sec));
  2483. } else if (output_fd > 0) {
  2484. mode = append_file ? "a" : "w";
  2485. output = fdopen(output_fd, mode);
  2486. if (!output) {
  2487. perror("Failed opening logfd");
  2488. return -errno;
  2489. }
  2490. }
  2491. stat_config.output = output;
  2492. /*
  2493. * let the spreadsheet do the pretty-printing
  2494. */
  2495. if (csv_output) {
  2496. /* User explicitly passed -B? */
  2497. if (big_num_opt == 1) {
  2498. fprintf(stderr, "-B option not supported with -x\n");
  2499. parse_options_usage(stat_usage, stat_options, "B", 1);
  2500. parse_options_usage(NULL, stat_options, "x", 1);
  2501. goto out;
  2502. } else /* Nope, so disable big number formatting */
  2503. big_num = false;
  2504. } else if (big_num_opt == 0) /* User passed --no-big-num */
  2505. big_num = false;
  2506. setup_system_wide(argc);
  2507. /*
  2508. * Display user/system times only for single
  2509. * run and when there's specified tracee.
  2510. */
  2511. if ((run_count == 1) && target__none(&target))
  2512. ru_display = true;
  2513. if (run_count < 0) {
  2514. pr_err("Run count must be a positive number\n");
  2515. parse_options_usage(stat_usage, stat_options, "r", 1);
  2516. goto out;
  2517. } else if (run_count == 0) {
  2518. forever = true;
  2519. run_count = 1;
  2520. }
  2521. if (walltime_run_table) {
  2522. walltime_run = zalloc(run_count * sizeof(walltime_run[0]));
  2523. if (!walltime_run) {
  2524. pr_err("failed to setup -r option");
  2525. goto out;
  2526. }
  2527. }
  2528. if ((stat_config.aggr_mode == AGGR_THREAD) &&
  2529. !target__has_task(&target)) {
  2530. if (!target.system_wide || target.cpu_list) {
  2531. fprintf(stderr, "The --per-thread option is only "
  2532. "available when monitoring via -p -t -a "
  2533. "options or only --per-thread.\n");
  2534. parse_options_usage(NULL, stat_options, "p", 1);
  2535. parse_options_usage(NULL, stat_options, "t", 1);
  2536. goto out;
  2537. }
  2538. }
  2539. /*
  2540. * no_aggr, cgroup are for system-wide only
  2541. * --per-thread is aggregated per thread, we dont mix it with cpu mode
  2542. */
  2543. if (((stat_config.aggr_mode != AGGR_GLOBAL &&
  2544. stat_config.aggr_mode != AGGR_THREAD) || nr_cgroups) &&
  2545. !target__has_cpu(&target)) {
  2546. fprintf(stderr, "both cgroup and no-aggregation "
  2547. "modes only available in system-wide mode\n");
  2548. parse_options_usage(stat_usage, stat_options, "G", 1);
  2549. parse_options_usage(NULL, stat_options, "A", 1);
  2550. parse_options_usage(NULL, stat_options, "a", 1);
  2551. goto out;
  2552. }
  2553. if (add_default_attributes())
  2554. goto out;
  2555. target__validate(&target);
  2556. if ((stat_config.aggr_mode == AGGR_THREAD) && (target.system_wide))
  2557. target.per_thread = true;
  2558. if (perf_evlist__create_maps(evsel_list, &target) < 0) {
  2559. if (target__has_task(&target)) {
  2560. pr_err("Problems finding threads of monitor\n");
  2561. parse_options_usage(stat_usage, stat_options, "p", 1);
  2562. parse_options_usage(NULL, stat_options, "t", 1);
  2563. } else if (target__has_cpu(&target)) {
  2564. perror("failed to parse CPUs map");
  2565. parse_options_usage(stat_usage, stat_options, "C", 1);
  2566. parse_options_usage(NULL, stat_options, "a", 1);
  2567. }
  2568. goto out;
  2569. }
  2570. /*
  2571. * Initialize thread_map with comm names,
  2572. * so we could print it out on output.
  2573. */
  2574. if (stat_config.aggr_mode == AGGR_THREAD) {
  2575. thread_map__read_comms(evsel_list->threads);
  2576. if (target.system_wide) {
  2577. if (runtime_stat_new(&stat_config,
  2578. thread_map__nr(evsel_list->threads))) {
  2579. goto out;
  2580. }
  2581. }
  2582. }
  2583. if (stat_config.times && interval)
  2584. interval_count = true;
  2585. else if (stat_config.times && !interval) {
  2586. pr_err("interval-count option should be used together with "
  2587. "interval-print.\n");
  2588. parse_options_usage(stat_usage, stat_options, "interval-count", 0);
  2589. parse_options_usage(stat_usage, stat_options, "I", 1);
  2590. goto out;
  2591. }
  2592. if (timeout && timeout < 100) {
  2593. if (timeout < 10) {
  2594. pr_err("timeout must be >= 10ms.\n");
  2595. parse_options_usage(stat_usage, stat_options, "timeout", 0);
  2596. goto out;
  2597. } else
  2598. pr_warning("timeout < 100ms. "
  2599. "The overhead percentage could be high in some cases. "
  2600. "Please proceed with caution.\n");
  2601. }
  2602. if (timeout && interval) {
  2603. pr_err("timeout option is not supported with interval-print.\n");
  2604. parse_options_usage(stat_usage, stat_options, "timeout", 0);
  2605. parse_options_usage(stat_usage, stat_options, "I", 1);
  2606. goto out;
  2607. }
  2608. if (perf_evlist__alloc_stats(evsel_list, interval))
  2609. goto out;
  2610. if (perf_stat_init_aggr_mode())
  2611. goto out;
  2612. /*
  2613. * We dont want to block the signals - that would cause
  2614. * child tasks to inherit that and Ctrl-C would not work.
  2615. * What we want is for Ctrl-C to work in the exec()-ed
  2616. * task, but being ignored by perf stat itself:
  2617. */
  2618. atexit(sig_atexit);
  2619. if (!forever)
  2620. signal(SIGINT, skip_signal);
  2621. signal(SIGCHLD, skip_signal);
  2622. signal(SIGALRM, skip_signal);
  2623. signal(SIGABRT, skip_signal);
  2624. status = 0;
  2625. for (run_idx = 0; forever || run_idx < run_count; run_idx++) {
  2626. if (run_count != 1 && verbose > 0)
  2627. fprintf(output, "[ perf stat: executing run #%d ... ]\n",
  2628. run_idx + 1);
  2629. status = run_perf_stat(argc, argv, run_idx);
  2630. if (forever && status != -1) {
  2631. print_counters(NULL, argc, argv);
  2632. perf_stat__reset_stats();
  2633. }
  2634. }
  2635. if (!forever && status != -1 && !interval)
  2636. print_counters(NULL, argc, argv);
  2637. if (STAT_RECORD) {
  2638. /*
  2639. * We synthesize the kernel mmap record just so that older tools
  2640. * don't emit warnings about not being able to resolve symbols
  2641. * due to /proc/sys/kernel/kptr_restrict settings and instear provide
  2642. * a saner message about no samples being in the perf.data file.
  2643. *
  2644. * This also serves to suppress a warning about f_header.data.size == 0
  2645. * in header.c at the moment 'perf stat record' gets introduced, which
  2646. * is not really needed once we start adding the stat specific PERF_RECORD_
  2647. * records, but the need to suppress the kptr_restrict messages in older
  2648. * tools remain -acme
  2649. */
  2650. int fd = perf_data__fd(&perf_stat.data);
  2651. int err = perf_event__synthesize_kernel_mmap((void *)&perf_stat,
  2652. process_synthesized_event,
  2653. &perf_stat.session->machines.host);
  2654. if (err) {
  2655. pr_warning("Couldn't synthesize the kernel mmap record, harmless, "
  2656. "older tools may produce warnings about this file\n.");
  2657. }
  2658. if (!interval) {
  2659. if (WRITE_STAT_ROUND_EVENT(walltime_nsecs_stats.max, FINAL))
  2660. pr_err("failed to write stat round event\n");
  2661. }
  2662. if (!perf_stat.data.is_pipe) {
  2663. perf_stat.session->header.data_size += perf_stat.bytes_written;
  2664. perf_session__write_header(perf_stat.session, evsel_list, fd, true);
  2665. }
  2666. perf_session__delete(perf_stat.session);
  2667. }
  2668. perf_stat__exit_aggr_mode();
  2669. perf_evlist__free_stats(evsel_list);
  2670. out:
  2671. free(walltime_run);
  2672. if (smi_cost && smi_reset)
  2673. sysfs__write_int(FREEZE_ON_SMI_PATH, 0);
  2674. perf_evlist__delete(evsel_list);
  2675. runtime_stat_delete(&stat_config);
  2676. return status;
  2677. }