builtin-stat.c 50 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908
  1. /*
  2. * builtin-stat.c
  3. *
  4. * Builtin stat command: Give a precise performance counters summary
  5. * overview about any workload, CPU or specific PID.
  6. *
  7. * Sample output:
  8. $ perf stat ./hackbench 10
  9. Time: 0.118
  10. Performance counter stats for './hackbench 10':
  11. 1708.761321 task-clock # 11.037 CPUs utilized
  12. 41,190 context-switches # 0.024 M/sec
  13. 6,735 CPU-migrations # 0.004 M/sec
  14. 17,318 page-faults # 0.010 M/sec
  15. 5,205,202,243 cycles # 3.046 GHz
  16. 3,856,436,920 stalled-cycles-frontend # 74.09% frontend cycles idle
  17. 1,600,790,871 stalled-cycles-backend # 30.75% backend cycles idle
  18. 2,603,501,247 instructions # 0.50 insns per cycle
  19. # 1.48 stalled cycles per insn
  20. 484,357,498 branches # 283.455 M/sec
  21. 6,388,934 branch-misses # 1.32% of all branches
  22. 0.154822978 seconds time elapsed
  23. *
  24. * Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
  25. *
  26. * Improvements and fixes by:
  27. *
  28. * Arjan van de Ven <arjan@linux.intel.com>
  29. * Yanmin Zhang <yanmin.zhang@intel.com>
  30. * Wu Fengguang <fengguang.wu@intel.com>
  31. * Mike Galbraith <efault@gmx.de>
  32. * Paul Mackerras <paulus@samba.org>
  33. * Jaswinder Singh Rajput <jaswinder@kernel.org>
  34. *
  35. * Released under the GPL v2. (and only v2, not any later version)
  36. */
  37. #include "perf.h"
  38. #include "builtin.h"
  39. #include "util/cgroup.h"
  40. #include "util/util.h"
  41. #include <subcmd/parse-options.h>
  42. #include "util/parse-events.h"
  43. #include "util/pmu.h"
  44. #include "util/event.h"
  45. #include "util/evlist.h"
  46. #include "util/evsel.h"
  47. #include "util/debug.h"
  48. #include "util/drv_configs.h"
  49. #include "util/color.h"
  50. #include "util/stat.h"
  51. #include "util/header.h"
  52. #include "util/cpumap.h"
  53. #include "util/thread.h"
  54. #include "util/thread_map.h"
  55. #include "util/counts.h"
  56. #include "util/group.h"
  57. #include "util/session.h"
  58. #include "util/tool.h"
  59. #include "util/string2.h"
  60. #include "util/metricgroup.h"
  61. #include "util/top.h"
  62. #include "asm/bug.h"
  63. #include <linux/time64.h>
  64. #include <api/fs/fs.h>
  65. #include <errno.h>
  66. #include <signal.h>
  67. #include <stdlib.h>
  68. #include <sys/prctl.h>
  69. #include <inttypes.h>
  70. #include <locale.h>
  71. #include <math.h>
  72. #include <sys/types.h>
  73. #include <sys/stat.h>
  74. #include <sys/wait.h>
  75. #include <unistd.h>
  76. #include <sys/time.h>
  77. #include <sys/resource.h>
  78. #include <sys/wait.h>
  79. #include "sane_ctype.h"
  80. #define DEFAULT_SEPARATOR " "
  81. #define FREEZE_ON_SMI_PATH "devices/cpu/freeze_on_smi"
  82. static void print_counters(struct timespec *ts, int argc, const char **argv);
  83. /* Default events used for perf stat -T */
  84. static const char *transaction_attrs = {
  85. "task-clock,"
  86. "{"
  87. "instructions,"
  88. "cycles,"
  89. "cpu/cycles-t/,"
  90. "cpu/tx-start/,"
  91. "cpu/el-start/,"
  92. "cpu/cycles-ct/"
  93. "}"
  94. };
  95. /* More limited version when the CPU does not have all events. */
  96. static const char * transaction_limited_attrs = {
  97. "task-clock,"
  98. "{"
  99. "instructions,"
  100. "cycles,"
  101. "cpu/cycles-t/,"
  102. "cpu/tx-start/"
  103. "}"
  104. };
  105. static const char * topdown_attrs[] = {
  106. "topdown-total-slots",
  107. "topdown-slots-retired",
  108. "topdown-recovery-bubbles",
  109. "topdown-fetch-bubbles",
  110. "topdown-slots-issued",
  111. NULL,
  112. };
  113. static const char *smi_cost_attrs = {
  114. "{"
  115. "msr/aperf/,"
  116. "msr/smi/,"
  117. "cycles"
  118. "}"
  119. };
  120. static struct perf_evlist *evsel_list;
  121. static struct target target = {
  122. .uid = UINT_MAX,
  123. };
  124. #define METRIC_ONLY_LEN 20
  125. static volatile pid_t child_pid = -1;
  126. static int detailed_run = 0;
  127. static bool transaction_run;
  128. static bool topdown_run = false;
  129. static bool smi_cost = false;
  130. static bool smi_reset = false;
  131. static int big_num_opt = -1;
  132. static bool group = false;
  133. static const char *pre_cmd = NULL;
  134. static const char *post_cmd = NULL;
  135. static bool sync_run = false;
  136. static bool forever = false;
  137. static bool force_metric_only = false;
  138. static struct timespec ref_time;
  139. static bool append_file;
  140. static bool interval_count;
  141. static const char *output_name;
  142. static int output_fd;
  143. struct perf_stat {
  144. bool record;
  145. struct perf_data data;
  146. struct perf_session *session;
  147. u64 bytes_written;
  148. struct perf_tool tool;
  149. bool maps_allocated;
  150. struct cpu_map *cpus;
  151. struct thread_map *threads;
  152. enum aggr_mode aggr_mode;
  153. };
  154. static struct perf_stat perf_stat;
  155. #define STAT_RECORD perf_stat.record
  156. static volatile int done = 0;
  157. static struct perf_stat_config stat_config = {
  158. .aggr_mode = AGGR_GLOBAL,
  159. .scale = true,
  160. .unit_width = 4, /* strlen("unit") */
  161. .run_count = 1,
  162. .metric_only_len = METRIC_ONLY_LEN,
  163. .walltime_nsecs_stats = &walltime_nsecs_stats,
  164. .big_num = true,
  165. };
  166. static inline void diff_timespec(struct timespec *r, struct timespec *a,
  167. struct timespec *b)
  168. {
  169. r->tv_sec = a->tv_sec - b->tv_sec;
  170. if (a->tv_nsec < b->tv_nsec) {
  171. r->tv_nsec = a->tv_nsec + NSEC_PER_SEC - b->tv_nsec;
  172. r->tv_sec--;
  173. } else {
  174. r->tv_nsec = a->tv_nsec - b->tv_nsec ;
  175. }
  176. }
  177. static void perf_stat__reset_stats(void)
  178. {
  179. int i;
  180. perf_evlist__reset_stats(evsel_list);
  181. perf_stat__reset_shadow_stats();
  182. for (i = 0; i < stat_config.stats_num; i++)
  183. perf_stat__reset_shadow_per_stat(&stat_config.stats[i]);
  184. }
  185. static int process_synthesized_event(struct perf_tool *tool __maybe_unused,
  186. union perf_event *event,
  187. struct perf_sample *sample __maybe_unused,
  188. struct machine *machine __maybe_unused)
  189. {
  190. if (perf_data__write(&perf_stat.data, event, event->header.size) < 0) {
  191. pr_err("failed to write perf data, error: %m\n");
  192. return -1;
  193. }
  194. perf_stat.bytes_written += event->header.size;
  195. return 0;
  196. }
  197. static int write_stat_round_event(u64 tm, u64 type)
  198. {
  199. return perf_event__synthesize_stat_round(NULL, tm, type,
  200. process_synthesized_event,
  201. NULL);
  202. }
  203. #define WRITE_STAT_ROUND_EVENT(time, interval) \
  204. write_stat_round_event(time, PERF_STAT_ROUND_TYPE__ ## interval)
  205. #define SID(e, x, y) xyarray__entry(e->sample_id, x, y)
  206. static int
  207. perf_evsel__write_stat_event(struct perf_evsel *counter, u32 cpu, u32 thread,
  208. struct perf_counts_values *count)
  209. {
  210. struct perf_sample_id *sid = SID(counter, cpu, thread);
  211. return perf_event__synthesize_stat(NULL, cpu, thread, sid->id, count,
  212. process_synthesized_event, NULL);
  213. }
  214. /*
  215. * Read out the results of a single counter:
  216. * do not aggregate counts across CPUs in system-wide mode
  217. */
  218. static int read_counter(struct perf_evsel *counter)
  219. {
  220. int nthreads = thread_map__nr(evsel_list->threads);
  221. int ncpus, cpu, thread;
  222. if (target__has_cpu(&target) && !target__has_per_thread(&target))
  223. ncpus = perf_evsel__nr_cpus(counter);
  224. else
  225. ncpus = 1;
  226. if (!counter->supported)
  227. return -ENOENT;
  228. if (counter->system_wide)
  229. nthreads = 1;
  230. for (thread = 0; thread < nthreads; thread++) {
  231. for (cpu = 0; cpu < ncpus; cpu++) {
  232. struct perf_counts_values *count;
  233. count = perf_counts(counter->counts, cpu, thread);
  234. /*
  235. * The leader's group read loads data into its group members
  236. * (via perf_evsel__read_counter) and sets threir count->loaded.
  237. */
  238. if (!count->loaded &&
  239. perf_evsel__read_counter(counter, cpu, thread)) {
  240. counter->counts->scaled = -1;
  241. perf_counts(counter->counts, cpu, thread)->ena = 0;
  242. perf_counts(counter->counts, cpu, thread)->run = 0;
  243. return -1;
  244. }
  245. count->loaded = false;
  246. if (STAT_RECORD) {
  247. if (perf_evsel__write_stat_event(counter, cpu, thread, count)) {
  248. pr_err("failed to write stat event\n");
  249. return -1;
  250. }
  251. }
  252. if (verbose > 1) {
  253. fprintf(stat_config.output,
  254. "%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
  255. perf_evsel__name(counter),
  256. cpu,
  257. count->val, count->ena, count->run);
  258. }
  259. }
  260. }
  261. return 0;
  262. }
  263. static void read_counters(void)
  264. {
  265. struct perf_evsel *counter;
  266. int ret;
  267. evlist__for_each_entry(evsel_list, counter) {
  268. ret = read_counter(counter);
  269. if (ret)
  270. pr_debug("failed to read counter %s\n", counter->name);
  271. if (ret == 0 && perf_stat_process_counter(&stat_config, counter))
  272. pr_warning("failed to process counter %s\n", counter->name);
  273. }
  274. }
  275. static void process_interval(void)
  276. {
  277. struct timespec ts, rs;
  278. read_counters();
  279. clock_gettime(CLOCK_MONOTONIC, &ts);
  280. diff_timespec(&rs, &ts, &ref_time);
  281. if (STAT_RECORD) {
  282. if (WRITE_STAT_ROUND_EVENT(rs.tv_sec * NSEC_PER_SEC + rs.tv_nsec, INTERVAL))
  283. pr_err("failed to write stat round event\n");
  284. }
  285. init_stats(&walltime_nsecs_stats);
  286. update_stats(&walltime_nsecs_stats, stat_config.interval * 1000000);
  287. print_counters(&rs, 0, NULL);
  288. }
  289. static void enable_counters(void)
  290. {
  291. if (stat_config.initial_delay)
  292. usleep(stat_config.initial_delay * USEC_PER_MSEC);
  293. /*
  294. * We need to enable counters only if:
  295. * - we don't have tracee (attaching to task or cpu)
  296. * - we have initial delay configured
  297. */
  298. if (!target__none(&target) || stat_config.initial_delay)
  299. perf_evlist__enable(evsel_list);
  300. }
  301. static void disable_counters(void)
  302. {
  303. /*
  304. * If we don't have tracee (attaching to task or cpu), counters may
  305. * still be running. To get accurate group ratios, we must stop groups
  306. * from counting before reading their constituent counters.
  307. */
  308. if (!target__none(&target))
  309. perf_evlist__disable(evsel_list);
  310. }
  311. static volatile int workload_exec_errno;
  312. /*
  313. * perf_evlist__prepare_workload will send a SIGUSR1
  314. * if the fork fails, since we asked by setting its
  315. * want_signal to true.
  316. */
  317. static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *info,
  318. void *ucontext __maybe_unused)
  319. {
  320. workload_exec_errno = info->si_value.sival_int;
  321. }
  322. static bool perf_evsel__should_store_id(struct perf_evsel *counter)
  323. {
  324. return STAT_RECORD || counter->attr.read_format & PERF_FORMAT_ID;
  325. }
  326. static bool is_target_alive(struct target *_target,
  327. struct thread_map *threads)
  328. {
  329. struct stat st;
  330. int i;
  331. if (!target__has_task(_target))
  332. return true;
  333. for (i = 0; i < threads->nr; i++) {
  334. char path[PATH_MAX];
  335. scnprintf(path, PATH_MAX, "%s/%d", procfs__mountpoint(),
  336. threads->map[i].pid);
  337. if (!stat(path, &st))
  338. return true;
  339. }
  340. return false;
  341. }
  342. static int __run_perf_stat(int argc, const char **argv, int run_idx)
  343. {
  344. int interval = stat_config.interval;
  345. int times = stat_config.times;
  346. int timeout = stat_config.timeout;
  347. char msg[BUFSIZ];
  348. unsigned long long t0, t1;
  349. struct perf_evsel *counter;
  350. struct timespec ts;
  351. size_t l;
  352. int status = 0;
  353. const bool forks = (argc > 0);
  354. bool is_pipe = STAT_RECORD ? perf_stat.data.is_pipe : false;
  355. struct perf_evsel_config_term *err_term;
  356. if (interval) {
  357. ts.tv_sec = interval / USEC_PER_MSEC;
  358. ts.tv_nsec = (interval % USEC_PER_MSEC) * NSEC_PER_MSEC;
  359. } else if (timeout) {
  360. ts.tv_sec = timeout / USEC_PER_MSEC;
  361. ts.tv_nsec = (timeout % USEC_PER_MSEC) * NSEC_PER_MSEC;
  362. } else {
  363. ts.tv_sec = 1;
  364. ts.tv_nsec = 0;
  365. }
  366. if (forks) {
  367. if (perf_evlist__prepare_workload(evsel_list, &target, argv, is_pipe,
  368. workload_exec_failed_signal) < 0) {
  369. perror("failed to prepare workload");
  370. return -1;
  371. }
  372. child_pid = evsel_list->workload.pid;
  373. }
  374. if (group)
  375. perf_evlist__set_leader(evsel_list);
  376. evlist__for_each_entry(evsel_list, counter) {
  377. try_again:
  378. if (create_perf_stat_counter(counter, &stat_config, &target) < 0) {
  379. /* Weak group failed. Reset the group. */
  380. if ((errno == EINVAL || errno == EBADF) &&
  381. counter->leader != counter &&
  382. counter->weak_group) {
  383. counter = perf_evlist__reset_weak_group(evsel_list, counter);
  384. goto try_again;
  385. }
  386. /*
  387. * PPC returns ENXIO for HW counters until 2.6.37
  388. * (behavior changed with commit b0a873e).
  389. */
  390. if (errno == EINVAL || errno == ENOSYS ||
  391. errno == ENOENT || errno == EOPNOTSUPP ||
  392. errno == ENXIO) {
  393. if (verbose > 0)
  394. ui__warning("%s event is not supported by the kernel.\n",
  395. perf_evsel__name(counter));
  396. counter->supported = false;
  397. if ((counter->leader != counter) ||
  398. !(counter->leader->nr_members > 1))
  399. continue;
  400. } else if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) {
  401. if (verbose > 0)
  402. ui__warning("%s\n", msg);
  403. goto try_again;
  404. } else if (target__has_per_thread(&target) &&
  405. evsel_list->threads &&
  406. evsel_list->threads->err_thread != -1) {
  407. /*
  408. * For global --per-thread case, skip current
  409. * error thread.
  410. */
  411. if (!thread_map__remove(evsel_list->threads,
  412. evsel_list->threads->err_thread)) {
  413. evsel_list->threads->err_thread = -1;
  414. goto try_again;
  415. }
  416. }
  417. perf_evsel__open_strerror(counter, &target,
  418. errno, msg, sizeof(msg));
  419. ui__error("%s\n", msg);
  420. if (child_pid != -1)
  421. kill(child_pid, SIGTERM);
  422. return -1;
  423. }
  424. counter->supported = true;
  425. l = strlen(counter->unit);
  426. if (l > stat_config.unit_width)
  427. stat_config.unit_width = l;
  428. if (perf_evsel__should_store_id(counter) &&
  429. perf_evsel__store_ids(counter, evsel_list))
  430. return -1;
  431. }
  432. if (perf_evlist__apply_filters(evsel_list, &counter)) {
  433. pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
  434. counter->filter, perf_evsel__name(counter), errno,
  435. str_error_r(errno, msg, sizeof(msg)));
  436. return -1;
  437. }
  438. if (perf_evlist__apply_drv_configs(evsel_list, &counter, &err_term)) {
  439. pr_err("failed to set config \"%s\" on event %s with %d (%s)\n",
  440. err_term->val.drv_cfg, perf_evsel__name(counter), errno,
  441. str_error_r(errno, msg, sizeof(msg)));
  442. return -1;
  443. }
  444. if (STAT_RECORD) {
  445. int err, fd = perf_data__fd(&perf_stat.data);
  446. if (is_pipe) {
  447. err = perf_header__write_pipe(perf_data__fd(&perf_stat.data));
  448. } else {
  449. err = perf_session__write_header(perf_stat.session, evsel_list,
  450. fd, false);
  451. }
  452. if (err < 0)
  453. return err;
  454. err = perf_stat_synthesize_config(&stat_config, NULL, evsel_list,
  455. process_synthesized_event, is_pipe);
  456. if (err < 0)
  457. return err;
  458. }
  459. /*
  460. * Enable counters and exec the command:
  461. */
  462. t0 = rdclock();
  463. clock_gettime(CLOCK_MONOTONIC, &ref_time);
  464. if (forks) {
  465. perf_evlist__start_workload(evsel_list);
  466. enable_counters();
  467. if (interval || timeout) {
  468. while (!waitpid(child_pid, &status, WNOHANG)) {
  469. nanosleep(&ts, NULL);
  470. if (timeout)
  471. break;
  472. process_interval();
  473. if (interval_count && !(--times))
  474. break;
  475. }
  476. }
  477. wait4(child_pid, &status, 0, &stat_config.ru_data);
  478. if (workload_exec_errno) {
  479. const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
  480. pr_err("Workload failed: %s\n", emsg);
  481. return -1;
  482. }
  483. if (WIFSIGNALED(status))
  484. psignal(WTERMSIG(status), argv[0]);
  485. } else {
  486. enable_counters();
  487. while (!done) {
  488. nanosleep(&ts, NULL);
  489. if (!is_target_alive(&target, evsel_list->threads))
  490. break;
  491. if (timeout)
  492. break;
  493. if (interval) {
  494. process_interval();
  495. if (interval_count && !(--times))
  496. break;
  497. }
  498. }
  499. }
  500. disable_counters();
  501. t1 = rdclock();
  502. if (stat_config.walltime_run_table)
  503. stat_config.walltime_run[run_idx] = t1 - t0;
  504. update_stats(&walltime_nsecs_stats, t1 - t0);
  505. /*
  506. * Closing a group leader splits the group, and as we only disable
  507. * group leaders, results in remaining events becoming enabled. To
  508. * avoid arbitrary skew, we must read all counters before closing any
  509. * group leaders.
  510. */
  511. read_counters();
  512. perf_evlist__close(evsel_list);
  513. return WEXITSTATUS(status);
  514. }
  515. static int run_perf_stat(int argc, const char **argv, int run_idx)
  516. {
  517. int ret;
  518. if (pre_cmd) {
  519. ret = system(pre_cmd);
  520. if (ret)
  521. return ret;
  522. }
  523. if (sync_run)
  524. sync();
  525. ret = __run_perf_stat(argc, argv, run_idx);
  526. if (ret)
  527. return ret;
  528. if (post_cmd) {
  529. ret = system(post_cmd);
  530. if (ret)
  531. return ret;
  532. }
  533. return ret;
  534. }
  535. static void print_counters(struct timespec *ts, int argc, const char **argv)
  536. {
  537. /* Do not print anything if we record to the pipe. */
  538. if (STAT_RECORD && perf_stat.data.is_pipe)
  539. return;
  540. perf_evlist__print_counters(evsel_list, &stat_config, &target,
  541. ts, argc, argv);
  542. }
  543. static volatile int signr = -1;
  544. static void skip_signal(int signo)
  545. {
  546. if ((child_pid == -1) || stat_config.interval)
  547. done = 1;
  548. signr = signo;
  549. /*
  550. * render child_pid harmless
  551. * won't send SIGTERM to a random
  552. * process in case of race condition
  553. * and fast PID recycling
  554. */
  555. child_pid = -1;
  556. }
  557. static void sig_atexit(void)
  558. {
  559. sigset_t set, oset;
  560. /*
  561. * avoid race condition with SIGCHLD handler
  562. * in skip_signal() which is modifying child_pid
  563. * goal is to avoid send SIGTERM to a random
  564. * process
  565. */
  566. sigemptyset(&set);
  567. sigaddset(&set, SIGCHLD);
  568. sigprocmask(SIG_BLOCK, &set, &oset);
  569. if (child_pid != -1)
  570. kill(child_pid, SIGTERM);
  571. sigprocmask(SIG_SETMASK, &oset, NULL);
  572. if (signr == -1)
  573. return;
  574. signal(signr, SIG_DFL);
  575. kill(getpid(), signr);
  576. }
  577. static int stat__set_big_num(const struct option *opt __maybe_unused,
  578. const char *s __maybe_unused, int unset)
  579. {
  580. big_num_opt = unset ? 0 : 1;
  581. return 0;
  582. }
  583. static int enable_metric_only(const struct option *opt __maybe_unused,
  584. const char *s __maybe_unused, int unset)
  585. {
  586. force_metric_only = true;
  587. stat_config.metric_only = !unset;
  588. return 0;
  589. }
  590. static int parse_metric_groups(const struct option *opt,
  591. const char *str,
  592. int unset __maybe_unused)
  593. {
  594. return metricgroup__parse_groups(opt, str, &stat_config.metric_events);
  595. }
  596. static const struct option stat_options[] = {
  597. OPT_BOOLEAN('T', "transaction", &transaction_run,
  598. "hardware transaction statistics"),
  599. OPT_CALLBACK('e', "event", &evsel_list, "event",
  600. "event selector. use 'perf list' to list available events",
  601. parse_events_option),
  602. OPT_CALLBACK(0, "filter", &evsel_list, "filter",
  603. "event filter", parse_filter),
  604. OPT_BOOLEAN('i', "no-inherit", &stat_config.no_inherit,
  605. "child tasks do not inherit counters"),
  606. OPT_STRING('p', "pid", &target.pid, "pid",
  607. "stat events on existing process id"),
  608. OPT_STRING('t', "tid", &target.tid, "tid",
  609. "stat events on existing thread id"),
  610. OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
  611. "system-wide collection from all CPUs"),
  612. OPT_BOOLEAN('g', "group", &group,
  613. "put the counters into a counter group"),
  614. OPT_BOOLEAN('c', "scale", &stat_config.scale, "scale/normalize counters"),
  615. OPT_INCR('v', "verbose", &verbose,
  616. "be more verbose (show counter open errors, etc)"),
  617. OPT_INTEGER('r', "repeat", &stat_config.run_count,
  618. "repeat command and print average + stddev (max: 100, forever: 0)"),
  619. OPT_BOOLEAN(0, "table", &stat_config.walltime_run_table,
  620. "display details about each run (only with -r option)"),
  621. OPT_BOOLEAN('n', "null", &stat_config.null_run,
  622. "null run - dont start any counters"),
  623. OPT_INCR('d', "detailed", &detailed_run,
  624. "detailed run - start a lot of events"),
  625. OPT_BOOLEAN('S', "sync", &sync_run,
  626. "call sync() before starting a run"),
  627. OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL,
  628. "print large numbers with thousands\' separators",
  629. stat__set_big_num),
  630. OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
  631. "list of cpus to monitor in system-wide"),
  632. OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode,
  633. "disable CPU count aggregation", AGGR_NONE),
  634. OPT_BOOLEAN(0, "no-merge", &stat_config.no_merge, "Do not merge identical named events"),
  635. OPT_STRING('x', "field-separator", &stat_config.csv_sep, "separator",
  636. "print counts with custom separator"),
  637. OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
  638. "monitor event in cgroup name only", parse_cgroups),
  639. OPT_STRING('o', "output", &output_name, "file", "output file name"),
  640. OPT_BOOLEAN(0, "append", &append_file, "append to the output file"),
  641. OPT_INTEGER(0, "log-fd", &output_fd,
  642. "log output to fd, instead of stderr"),
  643. OPT_STRING(0, "pre", &pre_cmd, "command",
  644. "command to run prior to the measured command"),
  645. OPT_STRING(0, "post", &post_cmd, "command",
  646. "command to run after to the measured command"),
  647. OPT_UINTEGER('I', "interval-print", &stat_config.interval,
  648. "print counts at regular interval in ms "
  649. "(overhead is possible for values <= 100ms)"),
  650. OPT_INTEGER(0, "interval-count", &stat_config.times,
  651. "print counts for fixed number of times"),
  652. OPT_BOOLEAN(0, "interval-clear", &stat_config.interval_clear,
  653. "clear screen in between new interval"),
  654. OPT_UINTEGER(0, "timeout", &stat_config.timeout,
  655. "stop workload and print counts after a timeout period in ms (>= 10ms)"),
  656. OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode,
  657. "aggregate counts per processor socket", AGGR_SOCKET),
  658. OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode,
  659. "aggregate counts per physical processor core", AGGR_CORE),
  660. OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode,
  661. "aggregate counts per thread", AGGR_THREAD),
  662. OPT_UINTEGER('D', "delay", &stat_config.initial_delay,
  663. "ms to wait before starting measurement after program start"),
  664. OPT_CALLBACK_NOOPT(0, "metric-only", &stat_config.metric_only, NULL,
  665. "Only print computed metrics. No raw values", enable_metric_only),
  666. OPT_BOOLEAN(0, "topdown", &topdown_run,
  667. "measure topdown level 1 statistics"),
  668. OPT_BOOLEAN(0, "smi-cost", &smi_cost,
  669. "measure SMI cost"),
  670. OPT_CALLBACK('M', "metrics", &evsel_list, "metric/metric group list",
  671. "monitor specified metrics or metric groups (separated by ,)",
  672. parse_metric_groups),
  673. OPT_END()
  674. };
  675. static int perf_stat__get_socket(struct perf_stat_config *config __maybe_unused,
  676. struct cpu_map *map, int cpu)
  677. {
  678. return cpu_map__get_socket(map, cpu, NULL);
  679. }
  680. static int perf_stat__get_core(struct perf_stat_config *config __maybe_unused,
  681. struct cpu_map *map, int cpu)
  682. {
  683. return cpu_map__get_core(map, cpu, NULL);
  684. }
  685. static int cpu_map__get_max(struct cpu_map *map)
  686. {
  687. int i, max = -1;
  688. for (i = 0; i < map->nr; i++) {
  689. if (map->map[i] > max)
  690. max = map->map[i];
  691. }
  692. return max;
  693. }
  694. static int perf_stat__get_aggr(struct perf_stat_config *config,
  695. aggr_get_id_t get_id, struct cpu_map *map, int idx)
  696. {
  697. int cpu;
  698. if (idx >= map->nr)
  699. return -1;
  700. cpu = map->map[idx];
  701. if (config->cpus_aggr_map->map[cpu] == -1)
  702. config->cpus_aggr_map->map[cpu] = get_id(config, map, idx);
  703. return config->cpus_aggr_map->map[cpu];
  704. }
  705. static int perf_stat__get_socket_cached(struct perf_stat_config *config,
  706. struct cpu_map *map, int idx)
  707. {
  708. return perf_stat__get_aggr(config, perf_stat__get_socket, map, idx);
  709. }
  710. static int perf_stat__get_core_cached(struct perf_stat_config *config,
  711. struct cpu_map *map, int idx)
  712. {
  713. return perf_stat__get_aggr(config, perf_stat__get_core, map, idx);
  714. }
  715. static int perf_stat_init_aggr_mode(void)
  716. {
  717. int nr;
  718. switch (stat_config.aggr_mode) {
  719. case AGGR_SOCKET:
  720. if (cpu_map__build_socket_map(evsel_list->cpus, &stat_config.aggr_map)) {
  721. perror("cannot build socket map");
  722. return -1;
  723. }
  724. stat_config.aggr_get_id = perf_stat__get_socket_cached;
  725. break;
  726. case AGGR_CORE:
  727. if (cpu_map__build_core_map(evsel_list->cpus, &stat_config.aggr_map)) {
  728. perror("cannot build core map");
  729. return -1;
  730. }
  731. stat_config.aggr_get_id = perf_stat__get_core_cached;
  732. break;
  733. case AGGR_NONE:
  734. case AGGR_GLOBAL:
  735. case AGGR_THREAD:
  736. case AGGR_UNSET:
  737. default:
  738. break;
  739. }
  740. /*
  741. * The evsel_list->cpus is the base we operate on,
  742. * taking the highest cpu number to be the size of
  743. * the aggregation translate cpumap.
  744. */
  745. nr = cpu_map__get_max(evsel_list->cpus);
  746. stat_config.cpus_aggr_map = cpu_map__empty_new(nr + 1);
  747. return stat_config.cpus_aggr_map ? 0 : -ENOMEM;
  748. }
  749. static void perf_stat__exit_aggr_mode(void)
  750. {
  751. cpu_map__put(stat_config.aggr_map);
  752. cpu_map__put(stat_config.cpus_aggr_map);
  753. stat_config.aggr_map = NULL;
  754. stat_config.cpus_aggr_map = NULL;
  755. }
  756. static inline int perf_env__get_cpu(struct perf_env *env, struct cpu_map *map, int idx)
  757. {
  758. int cpu;
  759. if (idx > map->nr)
  760. return -1;
  761. cpu = map->map[idx];
  762. if (cpu >= env->nr_cpus_avail)
  763. return -1;
  764. return cpu;
  765. }
  766. static int perf_env__get_socket(struct cpu_map *map, int idx, void *data)
  767. {
  768. struct perf_env *env = data;
  769. int cpu = perf_env__get_cpu(env, map, idx);
  770. return cpu == -1 ? -1 : env->cpu[cpu].socket_id;
  771. }
  772. static int perf_env__get_core(struct cpu_map *map, int idx, void *data)
  773. {
  774. struct perf_env *env = data;
  775. int core = -1, cpu = perf_env__get_cpu(env, map, idx);
  776. if (cpu != -1) {
  777. int socket_id = env->cpu[cpu].socket_id;
  778. /*
  779. * Encode socket in upper 16 bits
  780. * core_id is relative to socket, and
  781. * we need a global id. So we combine
  782. * socket + core id.
  783. */
  784. core = (socket_id << 16) | (env->cpu[cpu].core_id & 0xffff);
  785. }
  786. return core;
  787. }
  788. static int perf_env__build_socket_map(struct perf_env *env, struct cpu_map *cpus,
  789. struct cpu_map **sockp)
  790. {
  791. return cpu_map__build_map(cpus, sockp, perf_env__get_socket, env);
  792. }
  793. static int perf_env__build_core_map(struct perf_env *env, struct cpu_map *cpus,
  794. struct cpu_map **corep)
  795. {
  796. return cpu_map__build_map(cpus, corep, perf_env__get_core, env);
  797. }
  798. static int perf_stat__get_socket_file(struct perf_stat_config *config __maybe_unused,
  799. struct cpu_map *map, int idx)
  800. {
  801. return perf_env__get_socket(map, idx, &perf_stat.session->header.env);
  802. }
  803. static int perf_stat__get_core_file(struct perf_stat_config *config __maybe_unused,
  804. struct cpu_map *map, int idx)
  805. {
  806. return perf_env__get_core(map, idx, &perf_stat.session->header.env);
  807. }
  808. static int perf_stat_init_aggr_mode_file(struct perf_stat *st)
  809. {
  810. struct perf_env *env = &st->session->header.env;
  811. switch (stat_config.aggr_mode) {
  812. case AGGR_SOCKET:
  813. if (perf_env__build_socket_map(env, evsel_list->cpus, &stat_config.aggr_map)) {
  814. perror("cannot build socket map");
  815. return -1;
  816. }
  817. stat_config.aggr_get_id = perf_stat__get_socket_file;
  818. break;
  819. case AGGR_CORE:
  820. if (perf_env__build_core_map(env, evsel_list->cpus, &stat_config.aggr_map)) {
  821. perror("cannot build core map");
  822. return -1;
  823. }
  824. stat_config.aggr_get_id = perf_stat__get_core_file;
  825. break;
  826. case AGGR_NONE:
  827. case AGGR_GLOBAL:
  828. case AGGR_THREAD:
  829. case AGGR_UNSET:
  830. default:
  831. break;
  832. }
  833. return 0;
  834. }
  835. static int topdown_filter_events(const char **attr, char **str, bool use_group)
  836. {
  837. int off = 0;
  838. int i;
  839. int len = 0;
  840. char *s;
  841. for (i = 0; attr[i]; i++) {
  842. if (pmu_have_event("cpu", attr[i])) {
  843. len += strlen(attr[i]) + 1;
  844. attr[i - off] = attr[i];
  845. } else
  846. off++;
  847. }
  848. attr[i - off] = NULL;
  849. *str = malloc(len + 1 + 2);
  850. if (!*str)
  851. return -1;
  852. s = *str;
  853. if (i - off == 0) {
  854. *s = 0;
  855. return 0;
  856. }
  857. if (use_group)
  858. *s++ = '{';
  859. for (i = 0; attr[i]; i++) {
  860. strcpy(s, attr[i]);
  861. s += strlen(s);
  862. *s++ = ',';
  863. }
  864. if (use_group) {
  865. s[-1] = '}';
  866. *s = 0;
  867. } else
  868. s[-1] = 0;
  869. return 0;
  870. }
  871. __weak bool arch_topdown_check_group(bool *warn)
  872. {
  873. *warn = false;
  874. return false;
  875. }
  876. __weak void arch_topdown_group_warn(void)
  877. {
  878. }
  879. /*
  880. * Add default attributes, if there were no attributes specified or
  881. * if -d/--detailed, -d -d or -d -d -d is used:
  882. */
  883. static int add_default_attributes(void)
  884. {
  885. int err;
  886. struct perf_event_attr default_attrs0[] = {
  887. { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK },
  888. { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES },
  889. { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS },
  890. { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS },
  891. { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES },
  892. };
  893. struct perf_event_attr frontend_attrs[] = {
  894. { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND },
  895. };
  896. struct perf_event_attr backend_attrs[] = {
  897. { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND },
  898. };
  899. struct perf_event_attr default_attrs1[] = {
  900. { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS },
  901. { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
  902. { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES },
  903. };
  904. /*
  905. * Detailed stats (-d), covering the L1 and last level data caches:
  906. */
  907. struct perf_event_attr detailed_attrs[] = {
  908. { .type = PERF_TYPE_HW_CACHE,
  909. .config =
  910. PERF_COUNT_HW_CACHE_L1D << 0 |
  911. (PERF_COUNT_HW_CACHE_OP_READ << 8) |
  912. (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
  913. { .type = PERF_TYPE_HW_CACHE,
  914. .config =
  915. PERF_COUNT_HW_CACHE_L1D << 0 |
  916. (PERF_COUNT_HW_CACHE_OP_READ << 8) |
  917. (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
  918. { .type = PERF_TYPE_HW_CACHE,
  919. .config =
  920. PERF_COUNT_HW_CACHE_LL << 0 |
  921. (PERF_COUNT_HW_CACHE_OP_READ << 8) |
  922. (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
  923. { .type = PERF_TYPE_HW_CACHE,
  924. .config =
  925. PERF_COUNT_HW_CACHE_LL << 0 |
  926. (PERF_COUNT_HW_CACHE_OP_READ << 8) |
  927. (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
  928. };
  929. /*
  930. * Very detailed stats (-d -d), covering the instruction cache and the TLB caches:
  931. */
  932. struct perf_event_attr very_detailed_attrs[] = {
  933. { .type = PERF_TYPE_HW_CACHE,
  934. .config =
  935. PERF_COUNT_HW_CACHE_L1I << 0 |
  936. (PERF_COUNT_HW_CACHE_OP_READ << 8) |
  937. (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
  938. { .type = PERF_TYPE_HW_CACHE,
  939. .config =
  940. PERF_COUNT_HW_CACHE_L1I << 0 |
  941. (PERF_COUNT_HW_CACHE_OP_READ << 8) |
  942. (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
  943. { .type = PERF_TYPE_HW_CACHE,
  944. .config =
  945. PERF_COUNT_HW_CACHE_DTLB << 0 |
  946. (PERF_COUNT_HW_CACHE_OP_READ << 8) |
  947. (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
  948. { .type = PERF_TYPE_HW_CACHE,
  949. .config =
  950. PERF_COUNT_HW_CACHE_DTLB << 0 |
  951. (PERF_COUNT_HW_CACHE_OP_READ << 8) |
  952. (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
  953. { .type = PERF_TYPE_HW_CACHE,
  954. .config =
  955. PERF_COUNT_HW_CACHE_ITLB << 0 |
  956. (PERF_COUNT_HW_CACHE_OP_READ << 8) |
  957. (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
  958. { .type = PERF_TYPE_HW_CACHE,
  959. .config =
  960. PERF_COUNT_HW_CACHE_ITLB << 0 |
  961. (PERF_COUNT_HW_CACHE_OP_READ << 8) |
  962. (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
  963. };
  964. /*
  965. * Very, very detailed stats (-d -d -d), adding prefetch events:
  966. */
  967. struct perf_event_attr very_very_detailed_attrs[] = {
  968. { .type = PERF_TYPE_HW_CACHE,
  969. .config =
  970. PERF_COUNT_HW_CACHE_L1D << 0 |
  971. (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) |
  972. (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
  973. { .type = PERF_TYPE_HW_CACHE,
  974. .config =
  975. PERF_COUNT_HW_CACHE_L1D << 0 |
  976. (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) |
  977. (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
  978. };
  979. struct parse_events_error errinfo;
  980. /* Set attrs if no event is selected and !null_run: */
  981. if (stat_config.null_run)
  982. return 0;
  983. if (transaction_run) {
  984. /* Handle -T as -M transaction. Once platform specific metrics
  985. * support has been added to the json files, all archictures
  986. * will use this approach. To determine transaction support
  987. * on an architecture test for such a metric name.
  988. */
  989. if (metricgroup__has_metric("transaction")) {
  990. struct option opt = { .value = &evsel_list };
  991. return metricgroup__parse_groups(&opt, "transaction",
  992. &stat_config.metric_events);
  993. }
  994. if (pmu_have_event("cpu", "cycles-ct") &&
  995. pmu_have_event("cpu", "el-start"))
  996. err = parse_events(evsel_list, transaction_attrs,
  997. &errinfo);
  998. else
  999. err = parse_events(evsel_list,
  1000. transaction_limited_attrs,
  1001. &errinfo);
  1002. if (err) {
  1003. fprintf(stderr, "Cannot set up transaction events\n");
  1004. parse_events_print_error(&errinfo, transaction_attrs);
  1005. return -1;
  1006. }
  1007. return 0;
  1008. }
  1009. if (smi_cost) {
  1010. int smi;
  1011. if (sysfs__read_int(FREEZE_ON_SMI_PATH, &smi) < 0) {
  1012. fprintf(stderr, "freeze_on_smi is not supported.\n");
  1013. return -1;
  1014. }
  1015. if (!smi) {
  1016. if (sysfs__write_int(FREEZE_ON_SMI_PATH, 1) < 0) {
  1017. fprintf(stderr, "Failed to set freeze_on_smi.\n");
  1018. return -1;
  1019. }
  1020. smi_reset = true;
  1021. }
  1022. if (pmu_have_event("msr", "aperf") &&
  1023. pmu_have_event("msr", "smi")) {
  1024. if (!force_metric_only)
  1025. stat_config.metric_only = true;
  1026. err = parse_events(evsel_list, smi_cost_attrs, &errinfo);
  1027. } else {
  1028. fprintf(stderr, "To measure SMI cost, it needs "
  1029. "msr/aperf/, msr/smi/ and cpu/cycles/ support\n");
  1030. parse_events_print_error(&errinfo, smi_cost_attrs);
  1031. return -1;
  1032. }
  1033. if (err) {
  1034. fprintf(stderr, "Cannot set up SMI cost events\n");
  1035. return -1;
  1036. }
  1037. return 0;
  1038. }
  1039. if (topdown_run) {
  1040. char *str = NULL;
  1041. bool warn = false;
  1042. if (stat_config.aggr_mode != AGGR_GLOBAL &&
  1043. stat_config.aggr_mode != AGGR_CORE) {
  1044. pr_err("top down event configuration requires --per-core mode\n");
  1045. return -1;
  1046. }
  1047. stat_config.aggr_mode = AGGR_CORE;
  1048. if (nr_cgroups || !target__has_cpu(&target)) {
  1049. pr_err("top down event configuration requires system-wide mode (-a)\n");
  1050. return -1;
  1051. }
  1052. if (!force_metric_only)
  1053. stat_config.metric_only = true;
  1054. if (topdown_filter_events(topdown_attrs, &str,
  1055. arch_topdown_check_group(&warn)) < 0) {
  1056. pr_err("Out of memory\n");
  1057. return -1;
  1058. }
  1059. if (topdown_attrs[0] && str) {
  1060. if (warn)
  1061. arch_topdown_group_warn();
  1062. err = parse_events(evsel_list, str, &errinfo);
  1063. if (err) {
  1064. fprintf(stderr,
  1065. "Cannot set up top down events %s: %d\n",
  1066. str, err);
  1067. free(str);
  1068. parse_events_print_error(&errinfo, str);
  1069. return -1;
  1070. }
  1071. } else {
  1072. fprintf(stderr, "System does not support topdown\n");
  1073. return -1;
  1074. }
  1075. free(str);
  1076. }
  1077. if (!evsel_list->nr_entries) {
  1078. if (target__has_cpu(&target))
  1079. default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK;
  1080. if (perf_evlist__add_default_attrs(evsel_list, default_attrs0) < 0)
  1081. return -1;
  1082. if (pmu_have_event("cpu", "stalled-cycles-frontend")) {
  1083. if (perf_evlist__add_default_attrs(evsel_list,
  1084. frontend_attrs) < 0)
  1085. return -1;
  1086. }
  1087. if (pmu_have_event("cpu", "stalled-cycles-backend")) {
  1088. if (perf_evlist__add_default_attrs(evsel_list,
  1089. backend_attrs) < 0)
  1090. return -1;
  1091. }
  1092. if (perf_evlist__add_default_attrs(evsel_list, default_attrs1) < 0)
  1093. return -1;
  1094. }
  1095. /* Detailed events get appended to the event list: */
  1096. if (detailed_run < 1)
  1097. return 0;
  1098. /* Append detailed run extra attributes: */
  1099. if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0)
  1100. return -1;
  1101. if (detailed_run < 2)
  1102. return 0;
  1103. /* Append very detailed run extra attributes: */
  1104. if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0)
  1105. return -1;
  1106. if (detailed_run < 3)
  1107. return 0;
  1108. /* Append very, very detailed run extra attributes: */
  1109. return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs);
  1110. }
  1111. static const char * const stat_record_usage[] = {
  1112. "perf stat record [<options>]",
  1113. NULL,
  1114. };
  1115. static void init_features(struct perf_session *session)
  1116. {
  1117. int feat;
  1118. for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
  1119. perf_header__set_feat(&session->header, feat);
  1120. perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
  1121. perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
  1122. perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
  1123. perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
  1124. }
  1125. static int __cmd_record(int argc, const char **argv)
  1126. {
  1127. struct perf_session *session;
  1128. struct perf_data *data = &perf_stat.data;
  1129. argc = parse_options(argc, argv, stat_options, stat_record_usage,
  1130. PARSE_OPT_STOP_AT_NON_OPTION);
  1131. if (output_name)
  1132. data->file.path = output_name;
  1133. if (stat_config.run_count != 1 || forever) {
  1134. pr_err("Cannot use -r option with perf stat record.\n");
  1135. return -1;
  1136. }
  1137. session = perf_session__new(data, false, NULL);
  1138. if (session == NULL) {
  1139. pr_err("Perf session creation failed.\n");
  1140. return -1;
  1141. }
  1142. init_features(session);
  1143. session->evlist = evsel_list;
  1144. perf_stat.session = session;
  1145. perf_stat.record = true;
  1146. return argc;
  1147. }
  1148. static int process_stat_round_event(struct perf_session *session,
  1149. union perf_event *event)
  1150. {
  1151. struct stat_round_event *stat_round = &event->stat_round;
  1152. struct perf_evsel *counter;
  1153. struct timespec tsh, *ts = NULL;
  1154. const char **argv = session->header.env.cmdline_argv;
  1155. int argc = session->header.env.nr_cmdline;
  1156. evlist__for_each_entry(evsel_list, counter)
  1157. perf_stat_process_counter(&stat_config, counter);
  1158. if (stat_round->type == PERF_STAT_ROUND_TYPE__FINAL)
  1159. update_stats(&walltime_nsecs_stats, stat_round->time);
  1160. if (stat_config.interval && stat_round->time) {
  1161. tsh.tv_sec = stat_round->time / NSEC_PER_SEC;
  1162. tsh.tv_nsec = stat_round->time % NSEC_PER_SEC;
  1163. ts = &tsh;
  1164. }
  1165. print_counters(ts, argc, argv);
  1166. return 0;
  1167. }
  1168. static
  1169. int process_stat_config_event(struct perf_session *session,
  1170. union perf_event *event)
  1171. {
  1172. struct perf_tool *tool = session->tool;
  1173. struct perf_stat *st = container_of(tool, struct perf_stat, tool);
  1174. perf_event__read_stat_config(&stat_config, &event->stat_config);
  1175. if (cpu_map__empty(st->cpus)) {
  1176. if (st->aggr_mode != AGGR_UNSET)
  1177. pr_warning("warning: processing task data, aggregation mode not set\n");
  1178. return 0;
  1179. }
  1180. if (st->aggr_mode != AGGR_UNSET)
  1181. stat_config.aggr_mode = st->aggr_mode;
  1182. if (perf_stat.data.is_pipe)
  1183. perf_stat_init_aggr_mode();
  1184. else
  1185. perf_stat_init_aggr_mode_file(st);
  1186. return 0;
  1187. }
  1188. static int set_maps(struct perf_stat *st)
  1189. {
  1190. if (!st->cpus || !st->threads)
  1191. return 0;
  1192. if (WARN_ONCE(st->maps_allocated, "stats double allocation\n"))
  1193. return -EINVAL;
  1194. perf_evlist__set_maps(evsel_list, st->cpus, st->threads);
  1195. if (perf_evlist__alloc_stats(evsel_list, true))
  1196. return -ENOMEM;
  1197. st->maps_allocated = true;
  1198. return 0;
  1199. }
  1200. static
  1201. int process_thread_map_event(struct perf_session *session,
  1202. union perf_event *event)
  1203. {
  1204. struct perf_tool *tool = session->tool;
  1205. struct perf_stat *st = container_of(tool, struct perf_stat, tool);
  1206. if (st->threads) {
  1207. pr_warning("Extra thread map event, ignoring.\n");
  1208. return 0;
  1209. }
  1210. st->threads = thread_map__new_event(&event->thread_map);
  1211. if (!st->threads)
  1212. return -ENOMEM;
  1213. return set_maps(st);
  1214. }
  1215. static
  1216. int process_cpu_map_event(struct perf_session *session,
  1217. union perf_event *event)
  1218. {
  1219. struct perf_tool *tool = session->tool;
  1220. struct perf_stat *st = container_of(tool, struct perf_stat, tool);
  1221. struct cpu_map *cpus;
  1222. if (st->cpus) {
  1223. pr_warning("Extra cpu map event, ignoring.\n");
  1224. return 0;
  1225. }
  1226. cpus = cpu_map__new_data(&event->cpu_map.data);
  1227. if (!cpus)
  1228. return -ENOMEM;
  1229. st->cpus = cpus;
  1230. return set_maps(st);
  1231. }
  1232. static int runtime_stat_new(struct perf_stat_config *config, int nthreads)
  1233. {
  1234. int i;
  1235. config->stats = calloc(nthreads, sizeof(struct runtime_stat));
  1236. if (!config->stats)
  1237. return -1;
  1238. config->stats_num = nthreads;
  1239. for (i = 0; i < nthreads; i++)
  1240. runtime_stat__init(&config->stats[i]);
  1241. return 0;
  1242. }
  1243. static void runtime_stat_delete(struct perf_stat_config *config)
  1244. {
  1245. int i;
  1246. if (!config->stats)
  1247. return;
  1248. for (i = 0; i < config->stats_num; i++)
  1249. runtime_stat__exit(&config->stats[i]);
  1250. free(config->stats);
  1251. }
  1252. static const char * const stat_report_usage[] = {
  1253. "perf stat report [<options>]",
  1254. NULL,
  1255. };
  1256. static struct perf_stat perf_stat = {
  1257. .tool = {
  1258. .attr = perf_event__process_attr,
  1259. .event_update = perf_event__process_event_update,
  1260. .thread_map = process_thread_map_event,
  1261. .cpu_map = process_cpu_map_event,
  1262. .stat_config = process_stat_config_event,
  1263. .stat = perf_event__process_stat_event,
  1264. .stat_round = process_stat_round_event,
  1265. },
  1266. .aggr_mode = AGGR_UNSET,
  1267. };
  1268. static int __cmd_report(int argc, const char **argv)
  1269. {
  1270. struct perf_session *session;
  1271. const struct option options[] = {
  1272. OPT_STRING('i', "input", &input_name, "file", "input file name"),
  1273. OPT_SET_UINT(0, "per-socket", &perf_stat.aggr_mode,
  1274. "aggregate counts per processor socket", AGGR_SOCKET),
  1275. OPT_SET_UINT(0, "per-core", &perf_stat.aggr_mode,
  1276. "aggregate counts per physical processor core", AGGR_CORE),
  1277. OPT_SET_UINT('A', "no-aggr", &perf_stat.aggr_mode,
  1278. "disable CPU count aggregation", AGGR_NONE),
  1279. OPT_END()
  1280. };
  1281. struct stat st;
  1282. int ret;
  1283. argc = parse_options(argc, argv, options, stat_report_usage, 0);
  1284. if (!input_name || !strlen(input_name)) {
  1285. if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode))
  1286. input_name = "-";
  1287. else
  1288. input_name = "perf.data";
  1289. }
  1290. perf_stat.data.file.path = input_name;
  1291. perf_stat.data.mode = PERF_DATA_MODE_READ;
  1292. session = perf_session__new(&perf_stat.data, false, &perf_stat.tool);
  1293. if (session == NULL)
  1294. return -1;
  1295. perf_stat.session = session;
  1296. stat_config.output = stderr;
  1297. evsel_list = session->evlist;
  1298. ret = perf_session__process_events(session);
  1299. if (ret)
  1300. return ret;
  1301. perf_session__delete(session);
  1302. return 0;
  1303. }
  1304. static void setup_system_wide(int forks)
  1305. {
  1306. /*
  1307. * Make system wide (-a) the default target if
  1308. * no target was specified and one of following
  1309. * conditions is met:
  1310. *
  1311. * - there's no workload specified
  1312. * - there is workload specified but all requested
  1313. * events are system wide events
  1314. */
  1315. if (!target__none(&target))
  1316. return;
  1317. if (!forks)
  1318. target.system_wide = true;
  1319. else {
  1320. struct perf_evsel *counter;
  1321. evlist__for_each_entry(evsel_list, counter) {
  1322. if (!counter->system_wide)
  1323. return;
  1324. }
  1325. if (evsel_list->nr_entries)
  1326. target.system_wide = true;
  1327. }
  1328. }
  1329. int cmd_stat(int argc, const char **argv)
  1330. {
  1331. const char * const stat_usage[] = {
  1332. "perf stat [<options>] [<command>]",
  1333. NULL
  1334. };
  1335. int status = -EINVAL, run_idx;
  1336. const char *mode;
  1337. FILE *output = stderr;
  1338. unsigned int interval, timeout;
  1339. const char * const stat_subcommands[] = { "record", "report" };
  1340. setlocale(LC_ALL, "");
  1341. evsel_list = perf_evlist__new();
  1342. if (evsel_list == NULL)
  1343. return -ENOMEM;
  1344. parse_events__shrink_config_terms();
  1345. argc = parse_options_subcommand(argc, argv, stat_options, stat_subcommands,
  1346. (const char **) stat_usage,
  1347. PARSE_OPT_STOP_AT_NON_OPTION);
  1348. perf_stat__collect_metric_expr(evsel_list);
  1349. perf_stat__init_shadow_stats();
  1350. if (stat_config.csv_sep) {
  1351. stat_config.csv_output = true;
  1352. if (!strcmp(stat_config.csv_sep, "\\t"))
  1353. stat_config.csv_sep = "\t";
  1354. } else
  1355. stat_config.csv_sep = DEFAULT_SEPARATOR;
  1356. if (argc && !strncmp(argv[0], "rec", 3)) {
  1357. argc = __cmd_record(argc, argv);
  1358. if (argc < 0)
  1359. return -1;
  1360. } else if (argc && !strncmp(argv[0], "rep", 3))
  1361. return __cmd_report(argc, argv);
  1362. interval = stat_config.interval;
  1363. timeout = stat_config.timeout;
  1364. /*
  1365. * For record command the -o is already taken care of.
  1366. */
  1367. if (!STAT_RECORD && output_name && strcmp(output_name, "-"))
  1368. output = NULL;
  1369. if (output_name && output_fd) {
  1370. fprintf(stderr, "cannot use both --output and --log-fd\n");
  1371. parse_options_usage(stat_usage, stat_options, "o", 1);
  1372. parse_options_usage(NULL, stat_options, "log-fd", 0);
  1373. goto out;
  1374. }
  1375. if (stat_config.metric_only && stat_config.aggr_mode == AGGR_THREAD) {
  1376. fprintf(stderr, "--metric-only is not supported with --per-thread\n");
  1377. goto out;
  1378. }
  1379. if (stat_config.metric_only && stat_config.run_count > 1) {
  1380. fprintf(stderr, "--metric-only is not supported with -r\n");
  1381. goto out;
  1382. }
  1383. if (stat_config.walltime_run_table && stat_config.run_count <= 1) {
  1384. fprintf(stderr, "--table is only supported with -r\n");
  1385. parse_options_usage(stat_usage, stat_options, "r", 1);
  1386. parse_options_usage(NULL, stat_options, "table", 0);
  1387. goto out;
  1388. }
  1389. if (output_fd < 0) {
  1390. fprintf(stderr, "argument to --log-fd must be a > 0\n");
  1391. parse_options_usage(stat_usage, stat_options, "log-fd", 0);
  1392. goto out;
  1393. }
  1394. if (!output) {
  1395. struct timespec tm;
  1396. mode = append_file ? "a" : "w";
  1397. output = fopen(output_name, mode);
  1398. if (!output) {
  1399. perror("failed to create output file");
  1400. return -1;
  1401. }
  1402. clock_gettime(CLOCK_REALTIME, &tm);
  1403. fprintf(output, "# started on %s\n", ctime(&tm.tv_sec));
  1404. } else if (output_fd > 0) {
  1405. mode = append_file ? "a" : "w";
  1406. output = fdopen(output_fd, mode);
  1407. if (!output) {
  1408. perror("Failed opening logfd");
  1409. return -errno;
  1410. }
  1411. }
  1412. stat_config.output = output;
  1413. /*
  1414. * let the spreadsheet do the pretty-printing
  1415. */
  1416. if (stat_config.csv_output) {
  1417. /* User explicitly passed -B? */
  1418. if (big_num_opt == 1) {
  1419. fprintf(stderr, "-B option not supported with -x\n");
  1420. parse_options_usage(stat_usage, stat_options, "B", 1);
  1421. parse_options_usage(NULL, stat_options, "x", 1);
  1422. goto out;
  1423. } else /* Nope, so disable big number formatting */
  1424. stat_config.big_num = false;
  1425. } else if (big_num_opt == 0) /* User passed --no-big-num */
  1426. stat_config.big_num = false;
  1427. setup_system_wide(argc);
  1428. /*
  1429. * Display user/system times only for single
  1430. * run and when there's specified tracee.
  1431. */
  1432. if ((stat_config.run_count == 1) && target__none(&target))
  1433. stat_config.ru_display = true;
  1434. if (stat_config.run_count < 0) {
  1435. pr_err("Run count must be a positive number\n");
  1436. parse_options_usage(stat_usage, stat_options, "r", 1);
  1437. goto out;
  1438. } else if (stat_config.run_count == 0) {
  1439. forever = true;
  1440. stat_config.run_count = 1;
  1441. }
  1442. if (stat_config.walltime_run_table) {
  1443. stat_config.walltime_run = zalloc(stat_config.run_count * sizeof(stat_config.walltime_run[0]));
  1444. if (!stat_config.walltime_run) {
  1445. pr_err("failed to setup -r option");
  1446. goto out;
  1447. }
  1448. }
  1449. if ((stat_config.aggr_mode == AGGR_THREAD) &&
  1450. !target__has_task(&target)) {
  1451. if (!target.system_wide || target.cpu_list) {
  1452. fprintf(stderr, "The --per-thread option is only "
  1453. "available when monitoring via -p -t -a "
  1454. "options or only --per-thread.\n");
  1455. parse_options_usage(NULL, stat_options, "p", 1);
  1456. parse_options_usage(NULL, stat_options, "t", 1);
  1457. goto out;
  1458. }
  1459. }
  1460. /*
  1461. * no_aggr, cgroup are for system-wide only
  1462. * --per-thread is aggregated per thread, we dont mix it with cpu mode
  1463. */
  1464. if (((stat_config.aggr_mode != AGGR_GLOBAL &&
  1465. stat_config.aggr_mode != AGGR_THREAD) || nr_cgroups) &&
  1466. !target__has_cpu(&target)) {
  1467. fprintf(stderr, "both cgroup and no-aggregation "
  1468. "modes only available in system-wide mode\n");
  1469. parse_options_usage(stat_usage, stat_options, "G", 1);
  1470. parse_options_usage(NULL, stat_options, "A", 1);
  1471. parse_options_usage(NULL, stat_options, "a", 1);
  1472. goto out;
  1473. }
  1474. if (add_default_attributes())
  1475. goto out;
  1476. target__validate(&target);
  1477. if ((stat_config.aggr_mode == AGGR_THREAD) && (target.system_wide))
  1478. target.per_thread = true;
  1479. if (perf_evlist__create_maps(evsel_list, &target) < 0) {
  1480. if (target__has_task(&target)) {
  1481. pr_err("Problems finding threads of monitor\n");
  1482. parse_options_usage(stat_usage, stat_options, "p", 1);
  1483. parse_options_usage(NULL, stat_options, "t", 1);
  1484. } else if (target__has_cpu(&target)) {
  1485. perror("failed to parse CPUs map");
  1486. parse_options_usage(stat_usage, stat_options, "C", 1);
  1487. parse_options_usage(NULL, stat_options, "a", 1);
  1488. }
  1489. goto out;
  1490. }
  1491. /*
  1492. * Initialize thread_map with comm names,
  1493. * so we could print it out on output.
  1494. */
  1495. if (stat_config.aggr_mode == AGGR_THREAD) {
  1496. thread_map__read_comms(evsel_list->threads);
  1497. if (target.system_wide) {
  1498. if (runtime_stat_new(&stat_config,
  1499. thread_map__nr(evsel_list->threads))) {
  1500. goto out;
  1501. }
  1502. }
  1503. }
  1504. if (stat_config.times && interval)
  1505. interval_count = true;
  1506. else if (stat_config.times && !interval) {
  1507. pr_err("interval-count option should be used together with "
  1508. "interval-print.\n");
  1509. parse_options_usage(stat_usage, stat_options, "interval-count", 0);
  1510. parse_options_usage(stat_usage, stat_options, "I", 1);
  1511. goto out;
  1512. }
  1513. if (timeout && timeout < 100) {
  1514. if (timeout < 10) {
  1515. pr_err("timeout must be >= 10ms.\n");
  1516. parse_options_usage(stat_usage, stat_options, "timeout", 0);
  1517. goto out;
  1518. } else
  1519. pr_warning("timeout < 100ms. "
  1520. "The overhead percentage could be high in some cases. "
  1521. "Please proceed with caution.\n");
  1522. }
  1523. if (timeout && interval) {
  1524. pr_err("timeout option is not supported with interval-print.\n");
  1525. parse_options_usage(stat_usage, stat_options, "timeout", 0);
  1526. parse_options_usage(stat_usage, stat_options, "I", 1);
  1527. goto out;
  1528. }
  1529. if (perf_evlist__alloc_stats(evsel_list, interval))
  1530. goto out;
  1531. if (perf_stat_init_aggr_mode())
  1532. goto out;
  1533. /*
  1534. * Set sample_type to PERF_SAMPLE_IDENTIFIER, which should be harmless
  1535. * while avoiding that older tools show confusing messages.
  1536. *
  1537. * However for pipe sessions we need to keep it zero,
  1538. * because script's perf_evsel__check_attr is triggered
  1539. * by attr->sample_type != 0, and we can't run it on
  1540. * stat sessions.
  1541. */
  1542. stat_config.identifier = !(STAT_RECORD && perf_stat.data.is_pipe);
  1543. /*
  1544. * We dont want to block the signals - that would cause
  1545. * child tasks to inherit that and Ctrl-C would not work.
  1546. * What we want is for Ctrl-C to work in the exec()-ed
  1547. * task, but being ignored by perf stat itself:
  1548. */
  1549. atexit(sig_atexit);
  1550. if (!forever)
  1551. signal(SIGINT, skip_signal);
  1552. signal(SIGCHLD, skip_signal);
  1553. signal(SIGALRM, skip_signal);
  1554. signal(SIGABRT, skip_signal);
  1555. status = 0;
  1556. for (run_idx = 0; forever || run_idx < stat_config.run_count; run_idx++) {
  1557. if (stat_config.run_count != 1 && verbose > 0)
  1558. fprintf(output, "[ perf stat: executing run #%d ... ]\n",
  1559. run_idx + 1);
  1560. status = run_perf_stat(argc, argv, run_idx);
  1561. if (forever && status != -1) {
  1562. print_counters(NULL, argc, argv);
  1563. perf_stat__reset_stats();
  1564. }
  1565. }
  1566. if (!forever && status != -1 && !interval)
  1567. print_counters(NULL, argc, argv);
  1568. if (STAT_RECORD) {
  1569. /*
  1570. * We synthesize the kernel mmap record just so that older tools
  1571. * don't emit warnings about not being able to resolve symbols
  1572. * due to /proc/sys/kernel/kptr_restrict settings and instear provide
  1573. * a saner message about no samples being in the perf.data file.
  1574. *
  1575. * This also serves to suppress a warning about f_header.data.size == 0
  1576. * in header.c at the moment 'perf stat record' gets introduced, which
  1577. * is not really needed once we start adding the stat specific PERF_RECORD_
  1578. * records, but the need to suppress the kptr_restrict messages in older
  1579. * tools remain -acme
  1580. */
  1581. int fd = perf_data__fd(&perf_stat.data);
  1582. int err = perf_event__synthesize_kernel_mmap((void *)&perf_stat,
  1583. process_synthesized_event,
  1584. &perf_stat.session->machines.host);
  1585. if (err) {
  1586. pr_warning("Couldn't synthesize the kernel mmap record, harmless, "
  1587. "older tools may produce warnings about this file\n.");
  1588. }
  1589. if (!interval) {
  1590. if (WRITE_STAT_ROUND_EVENT(walltime_nsecs_stats.max, FINAL))
  1591. pr_err("failed to write stat round event\n");
  1592. }
  1593. if (!perf_stat.data.is_pipe) {
  1594. perf_stat.session->header.data_size += perf_stat.bytes_written;
  1595. perf_session__write_header(perf_stat.session, evsel_list, fd, true);
  1596. }
  1597. perf_session__delete(perf_stat.session);
  1598. }
  1599. perf_stat__exit_aggr_mode();
  1600. perf_evlist__free_stats(evsel_list);
  1601. out:
  1602. free(stat_config.walltime_run);
  1603. if (smi_cost && smi_reset)
  1604. sysfs__write_int(FREEZE_ON_SMI_PATH, 0);
  1605. perf_evlist__delete(evsel_list);
  1606. runtime_stat_delete(&stat_config);
  1607. return status;
  1608. }