9 years ago · 239bd47f83
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -36,6 +36,11 @@ static struct stats runtime_dtlb_cache_stats[NUM_CTX][MAX_NR_CPUS];
 
				 static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS];
			
 
				 static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS];
			
 
				 static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS];
			
 
				+static struct stats runtime_topdown_total_slots[NUM_CTX][MAX_NR_CPUS];
			
 
				+static struct stats runtime_topdown_slots_issued[NUM_CTX][MAX_NR_CPUS];
			
 
				+static struct stats runtime_topdown_slots_retired[NUM_CTX][MAX_NR_CPUS];
			
 
				+static struct stats runtime_topdown_fetch_bubbles[NUM_CTX][MAX_NR_CPUS];
			
 
				+static struct stats runtime_topdown_recovery_bubbles[NUM_CTX][MAX_NR_CPUS];
			
 
				 static bool have_frontend_stalled;
			
 
				 
			
 
				 struct stats walltime_nsecs_stats;
			
@@ -82,6 +87,11 @@ void perf_stat__reset_shadow_stats(void)
 
				 		sizeof(runtime_transaction_stats));
			
 
				 	memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats));
			
 
				 	memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
			
 
				+	memset(runtime_topdown_total_slots, 0, sizeof(runtime_topdown_total_slots));
			
 
				+	memset(runtime_topdown_slots_retired, 0, sizeof(runtime_topdown_slots_retired));
			
 
				+	memset(runtime_topdown_slots_issued, 0, sizeof(runtime_topdown_slots_issued));
			
 
				+	memset(runtime_topdown_fetch_bubbles, 0, sizeof(runtime_topdown_fetch_bubbles));
			
 
				+	memset(runtime_topdown_recovery_bubbles, 0, sizeof(runtime_topdown_recovery_bubbles));
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -105,6 +115,16 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count,
 
				 		update_stats(&runtime_transaction_stats[ctx][cpu], count[0]);
			
 
				 	else if (perf_stat_evsel__is(counter, ELISION_START))
			
 
				 		update_stats(&runtime_elision_stats[ctx][cpu], count[0]);
			
 
				+	else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS))
			
 
				+		update_stats(&runtime_topdown_total_slots[ctx][cpu], count[0]);
			
 
				+	else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED))
			
 
				+		update_stats(&runtime_topdown_slots_issued[ctx][cpu], count[0]);
			
 
				+	else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED))
			
 
				+		update_stats(&runtime_topdown_slots_retired[ctx][cpu], count[0]);
			
 
				+	else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES))
			
 
				+		update_stats(&runtime_topdown_fetch_bubbles[ctx][cpu],count[0]);
			
 
				+	else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES))
			
 
				+		update_stats(&runtime_topdown_recovery_bubbles[ctx][cpu], count[0]);
			
 
				 	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
			
 
				 		update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count[0]);
			
 
				 	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
			
@@ -302,6 +322,107 @@ static void print_ll_cache_misses(int cpu,
 
				 	out->print_metric(out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio);
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * High level "TopDown" CPU core pipe line bottleneck break down.
			
 
				+ *
			
 
				+ * Basic concept following
			
 
				+ * Yasin, A Top Down Method for Performance analysis and Counter architecture
			
 
				+ * ISPASS14
			
 
				+ *
			
 
				+ * The CPU pipeline is divided into 4 areas that can be bottlenecks:
			
 
				+ *
			
 
				+ * Frontend -> Backend -> Retiring
			
 
				+ * BadSpeculation in addition means out of order execution that is thrown away
			
 
				+ * (for example branch mispredictions)
			
 
				+ * Frontend is instruction decoding.
			
 
				+ * Backend is execution, like computation and accessing data in memory
			
 
				+ * Retiring is good execution that is not directly bottlenecked
			
 
				+ *
			
 
				+ * The formulas are computed in slots.
			
 
				+ * A slot is an entry in the pipeline each for the pipeline width
			
 
				+ * (for example a 4-wide pipeline has 4 slots for each cycle)
			
 
				+ *
			
 
				+ * Formulas:
			
 
				+ * BadSpeculation = ((SlotsIssued - SlotsRetired) + RecoveryBubbles) /
			
 
				+ *			TotalSlots
			
 
				+ * Retiring = SlotsRetired / TotalSlots
			
 
				+ * FrontendBound = FetchBubbles / TotalSlots
			
 
				+ * BackendBound = 1.0 - BadSpeculation - Retiring - FrontendBound
			
 
				+ *
			
 
				+ * The kernel provides the mapping to the low level CPU events and any scaling
			
 
				+ * needed for the CPU pipeline width, for example:
			
 
				+ *
			
 
				+ * TotalSlots = Cycles * 4
			
 
				+ *
			
 
				+ * The scaling factor is communicated in the sysfs unit.
			
 
				+ *
			
 
				+ * In some cases the CPU may not be able to measure all the formulas due to
			
 
				+ * missing events. In this case multiple formulas are combined, as possible.
			
 
				+ *
			
 
				+ * Full TopDown supports more levels to sub-divide each area: for example
			
 
				+ * BackendBound into computing bound and memory bound. For now we only
			
 
				+ * support Level 1 TopDown.
			
 
				+ */
			
 
				+
			
 
				+static double sanitize_val(double x)
			
 
				+{
			
 
				+	if (x < 0 && x >= -0.02)
			
 
				+		return 0.0;
			
 
				+	return x;
			
 
				+}
			
 
				+
			
 
				+static double td_total_slots(int ctx, int cpu)
			
 
				+{
			
 
				+	return avg_stats(&runtime_topdown_total_slots[ctx][cpu]);
			
 
				+}
			
 
				+
			
 
				+static double td_bad_spec(int ctx, int cpu)
			
 
				+{
			
 
				+	double bad_spec = 0;
			
 
				+	double total_slots;
			
 
				+	double total;
			
 
				+
			
 
				+	total = avg_stats(&runtime_topdown_slots_issued[ctx][cpu]) -
			
 
				+		avg_stats(&runtime_topdown_slots_retired[ctx][cpu]) +
			
 
				+		avg_stats(&runtime_topdown_recovery_bubbles[ctx][cpu]);
			
 
				+	total_slots = td_total_slots(ctx, cpu);
			
 
				+	if (total_slots)
			
 
				+		bad_spec = total / total_slots;
			
 
				+	return sanitize_val(bad_spec);
			
 
				+}
			
 
				+
			
 
				+static double td_retiring(int ctx, int cpu)
			
 
				+{
			
 
				+	double retiring = 0;
			
 
				+	double total_slots = td_total_slots(ctx, cpu);
			
 
				+	double ret_slots = avg_stats(&runtime_topdown_slots_retired[ctx][cpu]);
			
 
				+
			
 
				+	if (total_slots)
			
 
				+		retiring = ret_slots / total_slots;
			
 
				+	return retiring;
			
 
				+}
			
 
				+
			
 
				+static double td_fe_bound(int ctx, int cpu)
			
 
				+{
			
 
				+	double fe_bound = 0;
			
 
				+	double total_slots = td_total_slots(ctx, cpu);
			
 
				+	double fetch_bub = avg_stats(&runtime_topdown_fetch_bubbles[ctx][cpu]);
			
 
				+
			
 
				+	if (total_slots)
			
 
				+		fe_bound = fetch_bub / total_slots;
			
 
				+	return fe_bound;
			
 
				+}
			
 
				+
			
 
				+static double td_be_bound(int ctx, int cpu)
			
 
				+{
			
 
				+	double sum = (td_fe_bound(ctx, cpu) +
			
 
				+		      td_bad_spec(ctx, cpu) +
			
 
				+		      td_retiring(ctx, cpu));
			
 
				+	if (sum == 0)
			
 
				+		return 0;
			
 
				+	return sanitize_val(1.0 - sum);
			
 
				+}
			
 
				+
			
 
				 void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
			
 
				 				   double avg, int cpu,
			
 
				 				   struct perf_stat_output_ctx *out)
			
@@ -309,6 +430,7 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
 
				 	void *ctxp = out->ctx;
			
 
				 	print_metric_t print_metric = out->print_metric;
			
 
				 	double total, ratio = 0.0, total2;
			
 
				+	const char *color = NULL;
			
 
				 	int ctx = evsel_context(evsel);
			
 
				 
			
 
				 	if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
			
@@ -452,6 +574,46 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
 
				 				     avg / ratio);
			
 
				 		else
			
 
				 			print_metric(ctxp, NULL, NULL, "CPUs utilized", 0);
			
 
				+	} else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) {
			
 
				+		double fe_bound = td_fe_bound(ctx, cpu);
			
 
				+
			
 
				+		if (fe_bound > 0.2)
			
 
				+			color = PERF_COLOR_RED;
			
 
				+		print_metric(ctxp, color, "%8.1f%%", "frontend bound",
			
 
				+				fe_bound * 100.);
			
 
				+	} else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) {
			
 
				+		double retiring = td_retiring(ctx, cpu);
			
 
				+
			
 
				+		if (retiring > 0.7)
			
 
				+			color = PERF_COLOR_GREEN;
			
 
				+		print_metric(ctxp, color, "%8.1f%%", "retiring",
			
 
				+				retiring * 100.);
			
 
				+	} else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) {
			
 
				+		double bad_spec = td_bad_spec(ctx, cpu);
			
 
				+
			
 
				+		if (bad_spec > 0.1)
			
 
				+			color = PERF_COLOR_RED;
			
 
				+		print_metric(ctxp, color, "%8.1f%%", "bad speculation",
			
 
				+				bad_spec * 100.);
			
 
				+	} else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) {
			
 
				+		double be_bound = td_be_bound(ctx, cpu);
			
 
				+		const char *name = "backend bound";
			
 
				+		static int have_recovery_bubbles = -1;
			
 
				+
			
 
				+		/* In case the CPU does not support topdown-recovery-bubbles */
			
 
				+		if (have_recovery_bubbles < 0)
			
 
				+			have_recovery_bubbles = pmu_have_event("cpu",
			
 
				+					"topdown-recovery-bubbles");
			
 
				+		if (!have_recovery_bubbles)
			
 
				+			name = "backend bound/bad spec";
			
 
				+
			
 
				+		if (be_bound > 0.2)
			
 
				+			color = PERF_COLOR_RED;
			
 
				+		if (td_total_slots(ctx, cpu) > 0)
			
 
				+			print_metric(ctxp, color, "%8.1f%%", name,
			
 
				+					be_bound * 100.);
			
 
				+		else
			
 
				+			print_metric(ctxp, NULL, NULL, name, 0);
			
 
				 	} else if (runtime_nsecs_stats[cpu].n != 0) {
			
 
				 		char unit = 'M';
			
 
				 		char unit_buf[10];
			
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -79,6 +79,11 @@ static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = {
 
				 	ID(TRANSACTION_START,	cpu/tx-start/),
			
 
				 	ID(ELISION_START,	cpu/el-start/),
			
 
				 	ID(CYCLES_IN_TX_CP,	cpu/cycles-ct/),
			
 
				+	ID(TOPDOWN_TOTAL_SLOTS, topdown-total-slots),
			
 
				+	ID(TOPDOWN_SLOTS_ISSUED, topdown-slots-issued),
			
 
				+	ID(TOPDOWN_SLOTS_RETIRED, topdown-slots-retired),
			
 
				+	ID(TOPDOWN_FETCH_BUBBLES, topdown-fetch-bubbles),
			
 
				+	ID(TOPDOWN_RECOVERY_BUBBLES, topdown-recovery-bubbles),
			
 
				 };
			
 
				 #undef ID
			
 
				 
			
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -17,6 +17,11 @@ enum perf_stat_evsel_id {
 
				 	PERF_STAT_EVSEL_ID__TRANSACTION_START,
			
 
				 	PERF_STAT_EVSEL_ID__ELISION_START,
			
 
				 	PERF_STAT_EVSEL_ID__CYCLES_IN_TX_CP,
			
 
				+	PERF_STAT_EVSEL_ID__TOPDOWN_TOTAL_SLOTS,
			
 
				+	PERF_STAT_EVSEL_ID__TOPDOWN_SLOTS_ISSUED,
			
 
				+	PERF_STAT_EVSEL_ID__TOPDOWN_SLOTS_RETIRED,
			
 
				+	PERF_STAT_EVSEL_ID__TOPDOWN_FETCH_BUBBLES,
			
 
				+	PERF_STAT_EVSEL_ID__TOPDOWN_RECOVERY_BUBBLES,
			
 
				 	PERF_STAT_EVSEL_ID__MAX,
			
 
				 };