stat-shadow.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458
  1. #include <stdio.h>
  2. #include "evsel.h"
  3. #include "stat.h"
  4. #include "color.h"
  5. enum {
  6. CTX_BIT_USER = 1 << 0,
  7. CTX_BIT_KERNEL = 1 << 1,
  8. CTX_BIT_HV = 1 << 2,
  9. CTX_BIT_HOST = 1 << 3,
  10. CTX_BIT_IDLE = 1 << 4,
  11. CTX_BIT_MAX = 1 << 5,
  12. };
  13. #define NUM_CTX CTX_BIT_MAX
  14. static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
  15. static struct stats runtime_cycles_stats[NUM_CTX][MAX_NR_CPUS];
  16. static struct stats runtime_stalled_cycles_front_stats[NUM_CTX][MAX_NR_CPUS];
  17. static struct stats runtime_stalled_cycles_back_stats[NUM_CTX][MAX_NR_CPUS];
  18. static struct stats runtime_branches_stats[NUM_CTX][MAX_NR_CPUS];
  19. static struct stats runtime_cacherefs_stats[NUM_CTX][MAX_NR_CPUS];
  20. static struct stats runtime_l1_dcache_stats[NUM_CTX][MAX_NR_CPUS];
  21. static struct stats runtime_l1_icache_stats[NUM_CTX][MAX_NR_CPUS];
  22. static struct stats runtime_ll_cache_stats[NUM_CTX][MAX_NR_CPUS];
  23. static struct stats runtime_itlb_cache_stats[NUM_CTX][MAX_NR_CPUS];
  24. static struct stats runtime_dtlb_cache_stats[NUM_CTX][MAX_NR_CPUS];
  25. static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS];
  26. static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS];
  27. static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS];
  28. struct stats walltime_nsecs_stats;
  29. static int evsel_context(struct perf_evsel *evsel)
  30. {
  31. int ctx = 0;
  32. if (evsel->attr.exclude_kernel)
  33. ctx |= CTX_BIT_KERNEL;
  34. if (evsel->attr.exclude_user)
  35. ctx |= CTX_BIT_USER;
  36. if (evsel->attr.exclude_hv)
  37. ctx |= CTX_BIT_HV;
  38. if (evsel->attr.exclude_host)
  39. ctx |= CTX_BIT_HOST;
  40. if (evsel->attr.exclude_idle)
  41. ctx |= CTX_BIT_IDLE;
  42. return ctx;
  43. }
  44. void perf_stat__reset_shadow_stats(void)
  45. {
  46. memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats));
  47. memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats));
  48. memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats));
  49. memset(runtime_stalled_cycles_back_stats, 0, sizeof(runtime_stalled_cycles_back_stats));
  50. memset(runtime_branches_stats, 0, sizeof(runtime_branches_stats));
  51. memset(runtime_cacherefs_stats, 0, sizeof(runtime_cacherefs_stats));
  52. memset(runtime_l1_dcache_stats, 0, sizeof(runtime_l1_dcache_stats));
  53. memset(runtime_l1_icache_stats, 0, sizeof(runtime_l1_icache_stats));
  54. memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats));
  55. memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats));
  56. memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats));
  57. memset(runtime_cycles_in_tx_stats, 0,
  58. sizeof(runtime_cycles_in_tx_stats));
  59. memset(runtime_transaction_stats, 0,
  60. sizeof(runtime_transaction_stats));
  61. memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats));
  62. memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
  63. }
  64. /*
  65. * Update various tracking values we maintain to print
  66. * more semantic information such as miss/hit ratios,
  67. * instruction rates, etc:
  68. */
  69. void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count,
  70. int cpu)
  71. {
  72. int ctx = evsel_context(counter);
  73. if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK))
  74. update_stats(&runtime_nsecs_stats[cpu], count[0]);
  75. else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
  76. update_stats(&runtime_cycles_stats[ctx][cpu], count[0]);
  77. else if (perf_stat_evsel__is(counter, CYCLES_IN_TX))
  78. update_stats(&runtime_cycles_in_tx_stats[ctx][cpu], count[0]);
  79. else if (perf_stat_evsel__is(counter, TRANSACTION_START))
  80. update_stats(&runtime_transaction_stats[ctx][cpu], count[0]);
  81. else if (perf_stat_evsel__is(counter, ELISION_START))
  82. update_stats(&runtime_elision_stats[ctx][cpu], count[0]);
  83. else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
  84. update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count[0]);
  85. else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
  86. update_stats(&runtime_stalled_cycles_back_stats[ctx][cpu], count[0]);
  87. else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
  88. update_stats(&runtime_branches_stats[ctx][cpu], count[0]);
  89. else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
  90. update_stats(&runtime_cacherefs_stats[ctx][cpu], count[0]);
  91. else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
  92. update_stats(&runtime_l1_dcache_stats[ctx][cpu], count[0]);
  93. else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
  94. update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]);
  95. else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL))
  96. update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]);
  97. else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
  98. update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]);
  99. else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
  100. update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]);
  101. }
  102. /* used for get_ratio_color() */
  103. enum grc_type {
  104. GRC_STALLED_CYCLES_FE,
  105. GRC_STALLED_CYCLES_BE,
  106. GRC_CACHE_MISSES,
  107. GRC_MAX_NR
  108. };
  109. static const char *get_ratio_color(enum grc_type type, double ratio)
  110. {
  111. static const double grc_table[GRC_MAX_NR][3] = {
  112. [GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 },
  113. [GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 },
  114. [GRC_CACHE_MISSES] = { 20.0, 10.0, 5.0 },
  115. };
  116. const char *color = PERF_COLOR_NORMAL;
  117. if (ratio > grc_table[type][0])
  118. color = PERF_COLOR_RED;
  119. else if (ratio > grc_table[type][1])
  120. color = PERF_COLOR_MAGENTA;
  121. else if (ratio > grc_table[type][2])
  122. color = PERF_COLOR_YELLOW;
  123. return color;
  124. }
  125. static void print_stalled_cycles_frontend(int cpu,
  126. struct perf_evsel *evsel
  127. __maybe_unused, double avg,
  128. struct perf_stat_output_ctx *out)
  129. {
  130. double total, ratio = 0.0;
  131. const char *color;
  132. int ctx = evsel_context(evsel);
  133. total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
  134. if (total)
  135. ratio = avg / total * 100.0;
  136. color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);
  137. if (ratio)
  138. out->print_metric(out->ctx, color, "%7.2f%%", "frontend cycles idle",
  139. ratio);
  140. else
  141. out->print_metric(out->ctx, NULL, NULL, "frontend cycles idle", 0);
  142. }
  143. static void print_stalled_cycles_backend(int cpu,
  144. struct perf_evsel *evsel
  145. __maybe_unused, double avg,
  146. struct perf_stat_output_ctx *out)
  147. {
  148. double total, ratio = 0.0;
  149. const char *color;
  150. int ctx = evsel_context(evsel);
  151. total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
  152. if (total)
  153. ratio = avg / total * 100.0;
  154. color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);
  155. out->print_metric(out->ctx, color, "%6.2f%%", "backend cycles idle", ratio);
  156. }
  157. static void print_branch_misses(int cpu,
  158. struct perf_evsel *evsel __maybe_unused,
  159. double avg,
  160. struct perf_stat_output_ctx *out)
  161. {
  162. double total, ratio = 0.0;
  163. const char *color;
  164. int ctx = evsel_context(evsel);
  165. total = avg_stats(&runtime_branches_stats[ctx][cpu]);
  166. if (total)
  167. ratio = avg / total * 100.0;
  168. color = get_ratio_color(GRC_CACHE_MISSES, ratio);
  169. out->print_metric(out->ctx, color, "%7.2f%%", "of all branches", ratio);
  170. }
  171. static void print_l1_dcache_misses(int cpu,
  172. struct perf_evsel *evsel __maybe_unused,
  173. double avg,
  174. struct perf_stat_output_ctx *out)
  175. {
  176. double total, ratio = 0.0;
  177. const char *color;
  178. int ctx = evsel_context(evsel);
  179. total = avg_stats(&runtime_l1_dcache_stats[ctx][cpu]);
  180. if (total)
  181. ratio = avg / total * 100.0;
  182. color = get_ratio_color(GRC_CACHE_MISSES, ratio);
  183. out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-dcache hits", ratio);
  184. }
  185. static void print_l1_icache_misses(int cpu,
  186. struct perf_evsel *evsel __maybe_unused,
  187. double avg,
  188. struct perf_stat_output_ctx *out)
  189. {
  190. double total, ratio = 0.0;
  191. const char *color;
  192. int ctx = evsel_context(evsel);
  193. total = avg_stats(&runtime_l1_icache_stats[ctx][cpu]);
  194. if (total)
  195. ratio = avg / total * 100.0;
  196. color = get_ratio_color(GRC_CACHE_MISSES, ratio);
  197. out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-icache hits", ratio);
  198. }
  199. static void print_dtlb_cache_misses(int cpu,
  200. struct perf_evsel *evsel __maybe_unused,
  201. double avg,
  202. struct perf_stat_output_ctx *out)
  203. {
  204. double total, ratio = 0.0;
  205. const char *color;
  206. int ctx = evsel_context(evsel);
  207. total = avg_stats(&runtime_dtlb_cache_stats[ctx][cpu]);
  208. if (total)
  209. ratio = avg / total * 100.0;
  210. color = get_ratio_color(GRC_CACHE_MISSES, ratio);
  211. out->print_metric(out->ctx, color, "%7.2f%%", "of all dTLB cache hits", ratio);
  212. }
  213. static void print_itlb_cache_misses(int cpu,
  214. struct perf_evsel *evsel __maybe_unused,
  215. double avg,
  216. struct perf_stat_output_ctx *out)
  217. {
  218. double total, ratio = 0.0;
  219. const char *color;
  220. int ctx = evsel_context(evsel);
  221. total = avg_stats(&runtime_itlb_cache_stats[ctx][cpu]);
  222. if (total)
  223. ratio = avg / total * 100.0;
  224. color = get_ratio_color(GRC_CACHE_MISSES, ratio);
  225. out->print_metric(out->ctx, color, "%7.2f%%", "of all iTLB cache hits", ratio);
  226. }
  227. static void print_ll_cache_misses(int cpu,
  228. struct perf_evsel *evsel __maybe_unused,
  229. double avg,
  230. struct perf_stat_output_ctx *out)
  231. {
  232. double total, ratio = 0.0;
  233. const char *color;
  234. int ctx = evsel_context(evsel);
  235. total = avg_stats(&runtime_ll_cache_stats[ctx][cpu]);
  236. if (total)
  237. ratio = avg / total * 100.0;
  238. color = get_ratio_color(GRC_CACHE_MISSES, ratio);
  239. out->print_metric(out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio);
  240. }
  241. void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
  242. double avg, int cpu,
  243. struct perf_stat_output_ctx *out)
  244. {
  245. void *ctxp = out->ctx;
  246. print_metric_t print_metric = out->print_metric;
  247. double total, ratio = 0.0, total2;
  248. int ctx = evsel_context(evsel);
  249. if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
  250. total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
  251. if (total) {
  252. ratio = avg / total;
  253. print_metric(ctxp, NULL, "%7.2f ",
  254. "insn per cycle", ratio);
  255. } else {
  256. print_metric(ctxp, NULL, NULL, "insn per cycle", 0);
  257. }
  258. total = avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]);
  259. total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu]));
  260. out->new_line(ctxp);
  261. if (total && avg) {
  262. ratio = total / avg;
  263. print_metric(ctxp, NULL, "%7.2f ",
  264. "stalled cycles per insn",
  265. ratio);
  266. } else {
  267. print_metric(ctxp, NULL, NULL,
  268. "stalled cycles per insn", 0);
  269. }
  270. } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) {
  271. if (runtime_branches_stats[ctx][cpu].n != 0)
  272. print_branch_misses(cpu, evsel, avg, out);
  273. else
  274. print_metric(ctxp, NULL, NULL, "of all branches", 0);
  275. } else if (
  276. evsel->attr.type == PERF_TYPE_HW_CACHE &&
  277. evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D |
  278. ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
  279. ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
  280. if (runtime_l1_dcache_stats[ctx][cpu].n != 0)
  281. print_l1_dcache_misses(cpu, evsel, avg, out);
  282. else
  283. print_metric(ctxp, NULL, NULL, "of all L1-dcache hits", 0);
  284. } else if (
  285. evsel->attr.type == PERF_TYPE_HW_CACHE &&
  286. evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I |
  287. ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
  288. ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
  289. if (runtime_l1_icache_stats[ctx][cpu].n != 0)
  290. print_l1_icache_misses(cpu, evsel, avg, out);
  291. else
  292. print_metric(ctxp, NULL, NULL, "of all L1-icache hits", 0);
  293. } else if (
  294. evsel->attr.type == PERF_TYPE_HW_CACHE &&
  295. evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB |
  296. ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
  297. ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
  298. if (runtime_dtlb_cache_stats[ctx][cpu].n != 0)
  299. print_dtlb_cache_misses(cpu, evsel, avg, out);
  300. else
  301. print_metric(ctxp, NULL, NULL, "of all dTLB cache hits", 0);
  302. } else if (
  303. evsel->attr.type == PERF_TYPE_HW_CACHE &&
  304. evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB |
  305. ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
  306. ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
  307. if (runtime_itlb_cache_stats[ctx][cpu].n != 0)
  308. print_itlb_cache_misses(cpu, evsel, avg, out);
  309. else
  310. print_metric(ctxp, NULL, NULL, "of all iTLB cache hits", 0);
  311. } else if (
  312. evsel->attr.type == PERF_TYPE_HW_CACHE &&
  313. evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL |
  314. ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
  315. ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
  316. if (runtime_ll_cache_stats[ctx][cpu].n != 0)
  317. print_ll_cache_misses(cpu, evsel, avg, out);
  318. else
  319. print_metric(ctxp, NULL, NULL, "of all LL-cache hits", 0);
  320. } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) {
  321. total = avg_stats(&runtime_cacherefs_stats[ctx][cpu]);
  322. if (total)
  323. ratio = avg * 100 / total;
  324. if (runtime_cacherefs_stats[ctx][cpu].n != 0)
  325. print_metric(ctxp, NULL, "%8.3f %%",
  326. "of all cache refs", ratio);
  327. else
  328. print_metric(ctxp, NULL, NULL, "of all cache refs", 0);
  329. } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
  330. print_stalled_cycles_frontend(cpu, evsel, avg, out);
  331. } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
  332. print_stalled_cycles_backend(cpu, evsel, avg, out);
  333. } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
  334. total = avg_stats(&runtime_nsecs_stats[cpu]);
  335. if (total) {
  336. ratio = avg / total;
  337. print_metric(ctxp, NULL, "%8.3f", "GHz", ratio);
  338. } else {
  339. print_metric(ctxp, NULL, NULL, "Ghz", 0);
  340. }
  341. } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) {
  342. total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
  343. if (total)
  344. print_metric(ctxp, NULL,
  345. "%7.2f%%", "transactional cycles",
  346. 100.0 * (avg / total));
  347. else
  348. print_metric(ctxp, NULL, NULL, "transactional cycles",
  349. 0);
  350. } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) {
  351. total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
  352. total2 = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
  353. if (total2 < avg)
  354. total2 = avg;
  355. if (total)
  356. print_metric(ctxp, NULL, "%7.2f%%", "aborted cycles",
  357. 100.0 * ((total2-avg) / total));
  358. else
  359. print_metric(ctxp, NULL, NULL, "aborted cycles", 0);
  360. } else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) {
  361. total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
  362. if (avg)
  363. ratio = total / avg;
  364. if (runtime_cycles_in_tx_stats[ctx][cpu].n != 0)
  365. print_metric(ctxp, NULL, "%8.0f",
  366. "cycles / transaction", ratio);
  367. else
  368. print_metric(ctxp, NULL, NULL, "cycles / transaction",
  369. 0);
  370. } else if (perf_stat_evsel__is(evsel, ELISION_START)) {
  371. total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
  372. if (avg)
  373. ratio = total / avg;
  374. print_metric(ctxp, NULL, "%8.0f", "cycles / elision", ratio);
  375. } else if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) {
  376. if ((ratio = avg_stats(&walltime_nsecs_stats)) != 0)
  377. print_metric(ctxp, NULL, "%8.3f", "CPUs utilized",
  378. avg / ratio);
  379. else
  380. print_metric(ctxp, NULL, NULL, "CPUs utilized", 0);
  381. } else if (runtime_nsecs_stats[cpu].n != 0) {
  382. char unit = 'M';
  383. char unit_buf[10];
  384. total = avg_stats(&runtime_nsecs_stats[cpu]);
  385. if (total)
  386. ratio = 1000.0 * avg / total;
  387. if (ratio < 0.001) {
  388. ratio *= 1000;
  389. unit = 'K';
  390. }
  391. snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
  392. print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio);
  393. } else {
  394. print_metric(ctxp, NULL, NULL, NULL, 0);
  395. }
  396. }