ソースを参照

Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

User visible changes:

  - Fix perf.data size reporting in 'perf record' in no-buildid mode (He Kuang)

Infrastructure changes:

  - Protect accesses the DSO rbtrees/lists with a rw lock and reference
    count struct dso instances (Arnaldo Carvalho de Melo)

  - Export dynamic symbols used by traceevent plugins (He Kuang)

  - Add libtrace-dynamic-list file to libtraceevent's .gitignore (He Kuang)

  - Refactor shadow stats code in 'perf stat', prep work for further
    patchkits (Jiri Olsa)

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Ingo Molnar 10 年 前
コミット
028c63b567

+ 1 - 0
tools/lib/traceevent/.gitignore

@@ -1 +1,2 @@
 TRACEEVENT-CFLAGS
+libtraceevent-dynamic-list

+ 13 - 1
tools/lib/traceevent/Makefile

@@ -23,6 +23,7 @@ endef
 # Allow setting CC and AR, or setting CROSS_COMPILE as a prefix.
 $(call allow-override,CC,$(CROSS_COMPILE)gcc)
 $(call allow-override,AR,$(CROSS_COMPILE)ar)
+$(call allow-override,NM,$(CROSS_COMPILE)nm)
 
 EXT = -std=gnu99
 INSTALL = install
@@ -157,8 +158,9 @@ PLUGINS_IN := $(PLUGINS:.so=-in.o)
 
 TE_IN    := $(OUTPUT)libtraceevent-in.o
 LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE))
+DYNAMIC_LIST_FILE := $(OUTPUT)libtraceevent-dynamic-list
 
-CMD_TARGETS = $(LIB_FILE) $(PLUGINS)
+CMD_TARGETS = $(LIB_FILE) $(PLUGINS) $(DYNAMIC_LIST_FILE)
 
 TARGETS = $(CMD_TARGETS)
 
@@ -175,6 +177,9 @@ $(OUTPUT)libtraceevent.so: $(TE_IN)
 $(OUTPUT)libtraceevent.a: $(TE_IN)
 	$(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^
 
+$(OUTPUT)libtraceevent-dynamic-list: $(PLUGINS)
+	$(QUIET_GEN)$(call do_generate_dynamic_list_file, $(PLUGINS), $@)
+
 plugins: $(PLUGINS)
 
 __plugin_obj = $(notdir $@)
@@ -244,6 +249,13 @@ define do_install_plugins
 	done
 endef
 
+define do_generate_dynamic_list_file
+	(echo '{';							\
+	$(NM) -u -D $1 | awk 'NF>1 {print "\t"$$2";"}' | sort -u;	\
+	echo '};';							\
+	) > $2
+endef
+
 install_lib: all_cmd install_plugins
 	$(call QUIET_INSTALL, $(LIB_FILE)) \
 		$(call do_install,$(LIB_FILE),$(libdir_SQ))

+ 12 - 2
tools/perf/Makefile.perf

@@ -173,6 +173,9 @@ endif
 LIBTRACEEVENT = $(TE_PATH)libtraceevent.a
 export LIBTRACEEVENT
 
+LIBTRACEEVENT_DYNAMIC_LIST = $(TE_PATH)libtraceevent-dynamic-list
+LDFLAGS += -Xlinker --dynamic-list=$(LIBTRACEEVENT_DYNAMIC_LIST)
+
 LIBAPI = $(LIB_PATH)libapi.a
 export LIBAPI
 
@@ -278,7 +281,7 @@ build := -f $(srctree)/tools/build/Makefile.build dir=. obj
 $(PERF_IN): $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h FORCE
 	$(Q)$(MAKE) $(build)=perf
 
-$(OUTPUT)perf: $(PERFLIBS) $(PERF_IN)
+$(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(LIBTRACEEVENT_DYNAMIC_LIST)
 	$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(PERF_IN) $(LIBS) -o $@
 
 $(GTK_IN): FORCE
@@ -373,7 +376,13 @@ $(LIB_FILE): $(LIBPERF_IN)
 LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ)
 
 $(LIBTRACEEVENT): FORCE
-	$(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent.a plugins
+	$(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent.a
+
+libtraceevent_plugins: FORCE
+	$(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) plugins
+
+$(LIBTRACEEVENT_DYNAMIC_LIST): libtraceevent_plugins
+	$(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent-dynamic-list
 
 $(LIBTRACEEVENT)-clean:
 	$(call QUIET_CLEAN, libtraceevent)
@@ -555,4 +564,5 @@ FORCE:
 .PHONY: all install clean config-clean strip install-gtk
 .PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell
 .PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope FORCE single_dep
+.PHONY: libtraceevent_plugins
 

+ 2 - 4
tools/perf/builtin-record.c

@@ -345,12 +345,9 @@ static int process_buildids(struct record *rec)
 	struct perf_data_file *file  = &rec->file;
 	struct perf_session *session = rec->session;
 
-	u64 size = lseek(perf_data_file__fd(file), 0, SEEK_CUR);
-	if (size == 0)
+	if (file->size == 0)
 		return 0;
 
-	file->size = size;
-
 	/*
 	 * During this process, it'll load kernel map and replace the
 	 * dso->long_name to a real pathname it found.  In this case
@@ -719,6 +716,7 @@ out_child:
 
 	if (!err && !file->is_pipe) {
 		rec->session->header.data_size += rec->bytes_written;
+		file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR);
 
 		if (!rec->no_buildid) {
 			process_buildids(rec);

+ 13 - 493
tools/perf/builtin-stat.c

@@ -73,8 +73,8 @@ static void print_counter(struct perf_evsel *counter, char *prefix);
 static void print_aggr(char *prefix);
 
 /* Default events used for perf stat -T */
-static const char * const transaction_attrs[] = {
-	"task-clock",
+static const char *transaction_attrs = {
+	"task-clock,"
 	"{"
 	"instructions,"
 	"cycles,"
@@ -86,8 +86,8 @@ static const char * const transaction_attrs[] = {
 };
 
 /* More limited version when the CPU does not have all events. */
-static const char * const transaction_limited_attrs[] = {
-	"task-clock",
+static const char * transaction_limited_attrs = {
+	"task-clock,"
 	"{"
 	"instructions,"
 	"cycles,"
@@ -96,30 +96,12 @@ static const char * const transaction_limited_attrs[] = {
 	"}"
 };
 
-/* must match transaction_attrs and the beginning limited_attrs */
-enum {
-	T_TASK_CLOCK,
-	T_INSTRUCTIONS,
-	T_CYCLES,
-	T_CYCLES_IN_TX,
-	T_TRANSACTION_START,
-	T_ELISION_START,
-	T_CYCLES_IN_TX_CP,
-};
-
 static struct perf_evlist	*evsel_list;
 
 static struct target target = {
 	.uid	= UINT_MAX,
 };
 
-enum aggr_mode {
-	AGGR_NONE,
-	AGGR_GLOBAL,
-	AGGR_SOCKET,
-	AGGR_CORE,
-};
-
 static int			run_count			=  1;
 static bool			no_inherit			= false;
 static bool			scale				=  true;
@@ -147,10 +129,6 @@ static int			(*aggr_get_id)(struct cpu_map *m, int cpu);
 
 static volatile int done = 0;
 
-struct perf_stat {
-	struct stats	  res_stats[3];
-};
-
 static inline void diff_timespec(struct timespec *r, struct timespec *a,
 				 struct timespec *b)
 {
@@ -180,6 +158,8 @@ static void perf_evsel__reset_stat_priv(struct perf_evsel *evsel)
 
 	for (i = 0; i < 3; i++)
 		init_stats(&ps->res_stats[i]);
+
+	perf_stat_evsel_id_init(evsel);
 }
 
 static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
@@ -247,51 +227,6 @@ out_free:
 	return -1;
 }
 
-enum {
-	CTX_BIT_USER	= 1 << 0,
-	CTX_BIT_KERNEL	= 1 << 1,
-	CTX_BIT_HV	= 1 << 2,
-	CTX_BIT_HOST	= 1 << 3,
-	CTX_BIT_IDLE	= 1 << 4,
-	CTX_BIT_MAX	= 1 << 5,
-};
-
-#define NUM_CTX CTX_BIT_MAX
-
-static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
-static struct stats runtime_cycles_stats[NUM_CTX][MAX_NR_CPUS];
-static struct stats runtime_stalled_cycles_front_stats[NUM_CTX][MAX_NR_CPUS];
-static struct stats runtime_stalled_cycles_back_stats[NUM_CTX][MAX_NR_CPUS];
-static struct stats runtime_branches_stats[NUM_CTX][MAX_NR_CPUS];
-static struct stats runtime_cacherefs_stats[NUM_CTX][MAX_NR_CPUS];
-static struct stats runtime_l1_dcache_stats[NUM_CTX][MAX_NR_CPUS];
-static struct stats runtime_l1_icache_stats[NUM_CTX][MAX_NR_CPUS];
-static struct stats runtime_ll_cache_stats[NUM_CTX][MAX_NR_CPUS];
-static struct stats runtime_itlb_cache_stats[NUM_CTX][MAX_NR_CPUS];
-static struct stats runtime_dtlb_cache_stats[NUM_CTX][MAX_NR_CPUS];
-static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS];
-static struct stats walltime_nsecs_stats;
-static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS];
-static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS];
-
-static int evsel_context(struct perf_evsel *evsel)
-{
-	int ctx = 0;
-
-	if (evsel->attr.exclude_kernel)
-		ctx |= CTX_BIT_KERNEL;
-	if (evsel->attr.exclude_user)
-		ctx |= CTX_BIT_USER;
-	if (evsel->attr.exclude_hv)
-		ctx |= CTX_BIT_HV;
-	if (evsel->attr.exclude_host)
-		ctx |= CTX_BIT_HOST;
-	if (evsel->attr.exclude_idle)
-		ctx |= CTX_BIT_IDLE;
-
-	return ctx;
-}
-
 static void perf_stat__reset_stats(struct perf_evlist *evlist)
 {
 	struct perf_evsel *evsel;
@@ -301,23 +236,7 @@ static void perf_stat__reset_stats(struct perf_evlist *evlist)
 		perf_evsel__reset_counts(evsel, perf_evsel__nr_cpus(evsel));
 	}
 
-	memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats));
-	memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats));
-	memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats));
-	memset(runtime_stalled_cycles_back_stats, 0, sizeof(runtime_stalled_cycles_back_stats));
-	memset(runtime_branches_stats, 0, sizeof(runtime_branches_stats));
-	memset(runtime_cacherefs_stats, 0, sizeof(runtime_cacherefs_stats));
-	memset(runtime_l1_dcache_stats, 0, sizeof(runtime_l1_dcache_stats));
-	memset(runtime_l1_icache_stats, 0, sizeof(runtime_l1_icache_stats));
-	memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats));
-	memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats));
-	memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats));
-	memset(runtime_cycles_in_tx_stats, 0,
-			sizeof(runtime_cycles_in_tx_stats));
-	memset(runtime_transaction_stats, 0,
-		sizeof(runtime_transaction_stats));
-	memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats));
-	memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
+	perf_stat__reset_shadow_stats();
 }
 
 static int create_perf_stat_counter(struct perf_evsel *evsel)
@@ -354,72 +273,6 @@ static inline int nsec_counter(struct perf_evsel *evsel)
 	return 0;
 }
 
-static struct perf_evsel *nth_evsel(int n)
-{
-	static struct perf_evsel **array;
-	static int array_len;
-	struct perf_evsel *ev;
-	int j;
-
-	/* Assumes this only called when evsel_list does not change anymore. */
-	if (!array) {
-		evlist__for_each(evsel_list, ev)
-			array_len++;
-		array = malloc(array_len * sizeof(void *));
-		if (!array)
-			exit(ENOMEM);
-		j = 0;
-		evlist__for_each(evsel_list, ev)
-			array[j++] = ev;
-	}
-	if (n < array_len)
-		return array[n];
-	return NULL;
-}
-
-/*
- * Update various tracking values we maintain to print
- * more semantic information such as miss/hit ratios,
- * instruction rates, etc:
- */
-static void update_shadow_stats(struct perf_evsel *counter, u64 *count,
-				int cpu)
-{
-	int ctx = evsel_context(counter);
-
-	if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK))
-		update_stats(&runtime_nsecs_stats[cpu], count[0]);
-	else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
-		update_stats(&runtime_cycles_stats[ctx][cpu], count[0]);
-	else if (transaction_run &&
-		 perf_evsel__cmp(counter, nth_evsel(T_CYCLES_IN_TX)))
-		update_stats(&runtime_transaction_stats[ctx][cpu], count[0]);
-	else if (transaction_run &&
-		 perf_evsel__cmp(counter, nth_evsel(T_TRANSACTION_START)))
-		update_stats(&runtime_transaction_stats[ctx][cpu], count[0]);
-	else if (transaction_run &&
-		 perf_evsel__cmp(counter, nth_evsel(T_ELISION_START)))
-		update_stats(&runtime_elision_stats[ctx][cpu], count[0]);
-	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
-		update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count[0]);
-	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
-		update_stats(&runtime_stalled_cycles_back_stats[ctx][cpu], count[0]);
-	else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
-		update_stats(&runtime_branches_stats[ctx][cpu], count[0]);
-	else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
-		update_stats(&runtime_cacherefs_stats[ctx][cpu], count[0]);
-	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
-		update_stats(&runtime_l1_dcache_stats[ctx][cpu], count[0]);
-	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
-		update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]);
-	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL))
-		update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]);
-	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
-		update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]);
-	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
-		update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]);
-}
-
 static void zero_per_pkg(struct perf_evsel *counter)
 {
 	if (counter->per_pkg_mask)
@@ -480,7 +333,7 @@ static int read_cb(struct perf_evsel *evsel, int cpu, int thread __maybe_unused,
 		perf_counts_values__scale(count, scale, NULL);
 		evsel->counts->cpu[cpu] = *count;
 		if (aggr_mode == AGGR_NONE)
-			update_shadow_stats(evsel, count->values, cpu);
+			perf_stat__update_shadow_stats(evsel, count->values, cpu);
 		break;
 	case AGGR_GLOBAL:
 		aggr->val += count->val;
@@ -528,7 +381,7 @@ static int read_counter_aggr(struct perf_evsel *counter)
 	/*
 	 * Save the full runtime - to allow normalization during printout:
 	 */
-	update_shadow_stats(counter, count, 0);
+	perf_stat__update_shadow_stats(counter, count, 0);
 
 	return 0;
 }
@@ -906,200 +759,11 @@ static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 		fprintf(output, "                                   ");
 }
 
-/* used for get_ratio_color() */
-enum grc_type {
-	GRC_STALLED_CYCLES_FE,
-	GRC_STALLED_CYCLES_BE,
-	GRC_CACHE_MISSES,
-	GRC_MAX_NR
-};
-
-static const char *get_ratio_color(enum grc_type type, double ratio)
-{
-	static const double grc_table[GRC_MAX_NR][3] = {
-		[GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 },
-		[GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 },
-		[GRC_CACHE_MISSES] 	= { 20.0, 10.0, 5.0 },
-	};
-	const char *color = PERF_COLOR_NORMAL;
-
-	if (ratio > grc_table[type][0])
-		color = PERF_COLOR_RED;
-	else if (ratio > grc_table[type][1])
-		color = PERF_COLOR_MAGENTA;
-	else if (ratio > grc_table[type][2])
-		color = PERF_COLOR_YELLOW;
-
-	return color;
-}
-
-static void print_stalled_cycles_frontend(int cpu,
-					  struct perf_evsel *evsel
-					  __maybe_unused, double avg)
-{
-	double total, ratio = 0.0;
-	const char *color;
-	int ctx = evsel_context(evsel);
-
-	total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
-
-	if (total)
-		ratio = avg / total * 100.0;
-
-	color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);
-
-	fprintf(output, " #  ");
-	color_fprintf(output, color, "%6.2f%%", ratio);
-	fprintf(output, " frontend cycles idle   ");
-}
-
-static void print_stalled_cycles_backend(int cpu,
-					 struct perf_evsel *evsel
-					 __maybe_unused, double avg)
-{
-	double total, ratio = 0.0;
-	const char *color;
-	int ctx = evsel_context(evsel);
-
-	total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
-
-	if (total)
-		ratio = avg / total * 100.0;
-
-	color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);
-
-	fprintf(output, " #  ");
-	color_fprintf(output, color, "%6.2f%%", ratio);
-	fprintf(output, " backend  cycles idle   ");
-}
-
-static void print_branch_misses(int cpu,
-				struct perf_evsel *evsel __maybe_unused,
-				double avg)
-{
-	double total, ratio = 0.0;
-	const char *color;
-	int ctx = evsel_context(evsel);
-
-	total = avg_stats(&runtime_branches_stats[ctx][cpu]);
-
-	if (total)
-		ratio = avg / total * 100.0;
-
-	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
-
-	fprintf(output, " #  ");
-	color_fprintf(output, color, "%6.2f%%", ratio);
-	fprintf(output, " of all branches        ");
-}
-
-static void print_l1_dcache_misses(int cpu,
-				   struct perf_evsel *evsel __maybe_unused,
-				   double avg)
-{
-	double total, ratio = 0.0;
-	const char *color;
-	int ctx = evsel_context(evsel);
-
-	total = avg_stats(&runtime_l1_dcache_stats[ctx][cpu]);
-
-	if (total)
-		ratio = avg / total * 100.0;
-
-	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
-
-	fprintf(output, " #  ");
-	color_fprintf(output, color, "%6.2f%%", ratio);
-	fprintf(output, " of all L1-dcache hits  ");
-}
-
-static void print_l1_icache_misses(int cpu,
-				   struct perf_evsel *evsel __maybe_unused,
-				   double avg)
-{
-	double total, ratio = 0.0;
-	const char *color;
-	int ctx = evsel_context(evsel);
-
-	total = avg_stats(&runtime_l1_icache_stats[ctx][cpu]);
-
-	if (total)
-		ratio = avg / total * 100.0;
-
-	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
-
-	fprintf(output, " #  ");
-	color_fprintf(output, color, "%6.2f%%", ratio);
-	fprintf(output, " of all L1-icache hits  ");
-}
-
-static void print_dtlb_cache_misses(int cpu,
-				    struct perf_evsel *evsel __maybe_unused,
-				    double avg)
-{
-	double total, ratio = 0.0;
-	const char *color;
-	int ctx = evsel_context(evsel);
-
-	total = avg_stats(&runtime_dtlb_cache_stats[ctx][cpu]);
-
-	if (total)
-		ratio = avg / total * 100.0;
-
-	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
-
-	fprintf(output, " #  ");
-	color_fprintf(output, color, "%6.2f%%", ratio);
-	fprintf(output, " of all dTLB cache hits ");
-}
-
-static void print_itlb_cache_misses(int cpu,
-				    struct perf_evsel *evsel __maybe_unused,
-				    double avg)
-{
-	double total, ratio = 0.0;
-	const char *color;
-	int ctx = evsel_context(evsel);
-
-	total = avg_stats(&runtime_itlb_cache_stats[ctx][cpu]);
-
-	if (total)
-		ratio = avg / total * 100.0;
-
-	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
-
-	fprintf(output, " #  ");
-	color_fprintf(output, color, "%6.2f%%", ratio);
-	fprintf(output, " of all iTLB cache hits ");
-}
-
-static void print_ll_cache_misses(int cpu,
-				  struct perf_evsel *evsel __maybe_unused,
-				  double avg)
-{
-	double total, ratio = 0.0;
-	const char *color;
-	int ctx = evsel_context(evsel);
-
-	total = avg_stats(&runtime_ll_cache_stats[ctx][cpu]);
-
-	if (total)
-		ratio = avg / total * 100.0;
-
-	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
-
-	fprintf(output, " #  ");
-	color_fprintf(output, color, "%6.2f%%", ratio);
-	fprintf(output, " of all LL-cache hits   ");
-}
-
 static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 {
-	double total, ratio = 0.0, total2;
 	double sc =  evsel->scale;
 	const char *fmt;
 	int cpu = cpu_map__id_to_cpu(id);
-	int ctx = evsel_context(evsel);
 
 	if (csv_output) {
 		fmt = sc != 1.0 ?  "%.2f%s" : "%.0f%s";
@@ -1130,138 +794,7 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 	if (csv_output || interval)
 		return;
 
-	if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
-		total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
-		if (total) {
-			ratio = avg / total;
-			fprintf(output, " #   %5.2f  insns per cycle        ", ratio);
-		} else {
-			fprintf(output, "                                   ");
-		}
-		total = avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]);
-		total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu]));
-
-		if (total && avg) {
-			ratio = total / avg;
-			fprintf(output, "\n");
-			if (aggr_mode == AGGR_NONE)
-				fprintf(output, "        ");
-			fprintf(output, "                                                  #   %5.2f  stalled cycles per insn", ratio);
-		}
-
-	} else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) &&
-			runtime_branches_stats[ctx][cpu].n != 0) {
-		print_branch_misses(cpu, evsel, avg);
-	} else if (
-		evsel->attr.type == PERF_TYPE_HW_CACHE &&
-		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1D |
-					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
-					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
-			runtime_l1_dcache_stats[ctx][cpu].n != 0) {
-		print_l1_dcache_misses(cpu, evsel, avg);
-	} else if (
-		evsel->attr.type == PERF_TYPE_HW_CACHE &&
-		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1I |
-					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
-					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
-			runtime_l1_icache_stats[ctx][cpu].n != 0) {
-		print_l1_icache_misses(cpu, evsel, avg);
-	} else if (
-		evsel->attr.type == PERF_TYPE_HW_CACHE &&
-		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_DTLB |
-					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
-					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
-			runtime_dtlb_cache_stats[ctx][cpu].n != 0) {
-		print_dtlb_cache_misses(cpu, evsel, avg);
-	} else if (
-		evsel->attr.type == PERF_TYPE_HW_CACHE &&
-		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_ITLB |
-					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
-					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
-			runtime_itlb_cache_stats[ctx][cpu].n != 0) {
-		print_itlb_cache_misses(cpu, evsel, avg);
-	} else if (
-		evsel->attr.type == PERF_TYPE_HW_CACHE &&
-		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_LL |
-					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
-					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
-			runtime_ll_cache_stats[ctx][cpu].n != 0) {
-		print_ll_cache_misses(cpu, evsel, avg);
-	} else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) &&
-			runtime_cacherefs_stats[ctx][cpu].n != 0) {
-		total = avg_stats(&runtime_cacherefs_stats[ctx][cpu]);
-
-		if (total)
-			ratio = avg * 100 / total;
-
-		fprintf(output, " # %8.3f %% of all cache refs    ", ratio);
-
-	} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
-		print_stalled_cycles_frontend(cpu, evsel, avg);
-	} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
-		print_stalled_cycles_backend(cpu, evsel, avg);
-	} else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
-		total = avg_stats(&runtime_nsecs_stats[cpu]);
-
-		if (total) {
-			ratio = avg / total;
-			fprintf(output, " # %8.3f GHz                    ", ratio);
-		} else {
-			fprintf(output, "                                   ");
-		}
-	} else if (transaction_run &&
-		   perf_evsel__cmp(evsel, nth_evsel(T_CYCLES_IN_TX))) {
-		total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
-		if (total)
-			fprintf(output,
-				" #   %5.2f%% transactional cycles   ",
-				100.0 * (avg / total));
-	} else if (transaction_run &&
-		   perf_evsel__cmp(evsel, nth_evsel(T_CYCLES_IN_TX_CP))) {
-		total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
-		total2 = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
-		if (total2 < avg)
-			total2 = avg;
-		if (total)
-			fprintf(output,
-				" #   %5.2f%% aborted cycles         ",
-				100.0 * ((total2-avg) / total));
-	} else if (transaction_run &&
-		   perf_evsel__cmp(evsel, nth_evsel(T_TRANSACTION_START)) &&
-		   avg > 0 &&
-		   runtime_cycles_in_tx_stats[ctx][cpu].n != 0) {
-		total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
-
-		if (total)
-			ratio = total / avg;
-
-		fprintf(output, " # %8.0f cycles / transaction   ", ratio);
-	} else if (transaction_run &&
-		   perf_evsel__cmp(evsel, nth_evsel(T_ELISION_START)) &&
-		   avg > 0 &&
-		   runtime_cycles_in_tx_stats[ctx][cpu].n != 0) {
-		total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
-
-		if (total)
-			ratio = total / avg;
-
-		fprintf(output, " # %8.0f cycles / elision       ", ratio);
-	} else if (runtime_nsecs_stats[cpu].n != 0) {
-		char unit = 'M';
-
-		total = avg_stats(&runtime_nsecs_stats[cpu]);
-
-		if (total)
-			ratio = 1000.0 * avg / total;
-		if (ratio < 0.001) {
-			ratio *= 1000;
-			unit = 'K';
-		}
-
-		fprintf(output, " # %8.3f %c/sec                  ", ratio, unit);
-	} else {
-		fprintf(output, "                                   ");
-	}
+	perf_stat__print_shadow_stats(output, evsel, avg, cpu, aggr_mode);
 }
 
 static void print_aggr(char *prefix)
@@ -1576,17 +1109,6 @@ static int perf_stat_init_aggr_mode(void)
 	return 0;
 }
 
-static int setup_events(const char * const *attrs, unsigned len)
-{
-	unsigned i;
-
-	for (i = 0; i < len; i++) {
-		if (parse_events(evsel_list, attrs[i], NULL))
-			return -1;
-	}
-	return 0;
-}
-
 /*
  * Add default attributes, if there were no attributes specified or
  * if -d/--detailed, -d -d or -d -d -d is used:
@@ -1708,12 +1230,10 @@ static int add_default_attributes(void)
 		int err;
 		if (pmu_have_event("cpu", "cycles-ct") &&
 		    pmu_have_event("cpu", "el-start"))
-			err = setup_events(transaction_attrs,
-					ARRAY_SIZE(transaction_attrs));
+			err = parse_events(evsel_list, transaction_attrs, NULL);
 		else
-			err = setup_events(transaction_limited_attrs,
-				 ARRAY_SIZE(transaction_limited_attrs));
-		if (err < 0) {
+			err = parse_events(evsel_list, transaction_limited_attrs, NULL);
+		if (err) {
 			fprintf(stderr, "Cannot set up transaction events\n");
 			return -1;
 		}

+ 2 - 2
tools/perf/tests/dso-data.c

@@ -166,7 +166,7 @@ int test__dso_data(void)
 		free(buf);
 	}
 
-	dso__delete(dso);
+	dso__put(dso);
 	unlink(file);
 	return 0;
 }
@@ -226,7 +226,7 @@ static void dsos__delete(int cnt)
 		struct dso *dso = dsos[i];
 
 		unlink(dso->name);
-		dso__delete(dso);
+		dso__put(dso);
 	}
 
 	free(dsos);

+ 5 - 1
tools/perf/tests/hists_common.c

@@ -134,11 +134,15 @@ struct machine *setup_fake_machine(struct machines *machines)
 
 			sym = symbol__new(fsym->start, fsym->length,
 					  STB_GLOBAL, fsym->name);
-			if (sym == NULL)
+			if (sym == NULL) {
+				dso__put(dso);
 				goto out;
+			}
 
 			symbols__insert(&dso->symbols[MAP__FUNCTION], sym);
 		}
+
+		dso__put(dso);
 	}
 
 	return machine;

+ 1 - 0
tools/perf/util/Build

@@ -68,6 +68,7 @@ libperf-y += rblist.o
 libperf-y += intlist.o
 libperf-y += vdso.o
 libperf-y += stat.o
+libperf-y += stat-shadow.o
 libperf-y += record.o
 libperf-y += srcline.o
 libperf-y += data.o

+ 73 - 14
tools/perf/util/dso.c

@@ -889,8 +889,8 @@ struct dso *machine__findnew_kernel(struct machine *machine, const char *name,
  * Either one of the dso or name parameter must be non-NULL or the
  * function will not work.
  */
-static struct dso *dso__findlink_by_longname(struct rb_root *root,
-					     struct dso *dso, const char *name)
+static struct dso *__dso__findlink_by_longname(struct rb_root *root,
+					       struct dso *dso, const char *name)
 {
 	struct rb_node **p = &root->rb_node;
 	struct rb_node  *parent = NULL;
@@ -937,10 +937,10 @@ static struct dso *dso__findlink_by_longname(struct rb_root *root,
 	return NULL;
 }
 
-static inline struct dso *
-dso__find_by_longname(const struct rb_root *root, const char *name)
+static inline struct dso *__dso__find_by_longname(struct rb_root *root,
+						  const char *name)
 {
-	return dso__findlink_by_longname((struct rb_root *)root, NULL, name);
+	return __dso__findlink_by_longname(root, NULL, name);
 }
 
 void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated)
@@ -1049,6 +1049,7 @@ struct dso *dso__new(const char *name)
 		INIT_LIST_HEAD(&dso->node);
 		INIT_LIST_HEAD(&dso->data.open_entry);
 		pthread_mutex_init(&dso->lock, NULL);
+		atomic_set(&dso->refcnt, 1);
 	}
 
 	return dso;
@@ -1083,6 +1084,19 @@ void dso__delete(struct dso *dso)
 	free(dso);
 }
 
+struct dso *dso__get(struct dso *dso)
+{
+	if (dso)
+		atomic_inc(&dso->refcnt);
+	return dso;
+}
+
+void dso__put(struct dso *dso)
+{
+	if (dso && atomic_dec_and_test(&dso->refcnt))
+		dso__delete(dso);
+}
+
 void dso__set_build_id(struct dso *dso, void *build_id)
 {
 	memcpy(dso->build_id, build_id, sizeof(dso->build_id));
@@ -1149,14 +1163,41 @@ bool __dsos__read_build_ids(struct list_head *head, bool with_hits)
 	return have_build_id;
 }
 
-void dsos__add(struct dsos *dsos, struct dso *dso)
+void __dsos__add(struct dsos *dsos, struct dso *dso)
 {
 	list_add_tail(&dso->node, &dsos->head);
-	dso__findlink_by_longname(&dsos->root, dso, NULL);
+	__dso__findlink_by_longname(&dsos->root, dso, NULL);
+	/*
+	 * It is now in the linked list, grab a reference, then garbage collect
+	 * this when needing memory, by looking at LRU dso instances in the
+	 * list with atomic_read(&dso->refcnt) == 1, i.e. no references
+	 * anywhere besides the one for the list, do, under a lock for the
+	 * list: remove it from the list, then a dso__put(), that probably will
+	 * be the last and will then call dso__delete(), end of life.
+	 *
+	 * That, or at the end of the 'struct machine' lifetime, when all
+	 * 'struct dso' instances will be removed from the list, in
+	 * dsos__exit(), if they have no other reference from some other data
+	 * structure.
+	 *
+	 * E.g.: after processing a 'perf.data' file and storing references
+	 * to objects instantiated while processing events, we will have
+	 * references to the 'thread', 'map', 'dso' structs all from 'struct
+	 * hist_entry' instances, but we may not need anything not referenced,
+	 * so we might as well call machines__exit()/machines__delete() and
+	 * garbage collect it.
+	 */
+	dso__get(dso);
 }
 
-struct dso *dsos__find(const struct dsos *dsos, const char *name,
-		       bool cmp_short)
+void dsos__add(struct dsos *dsos, struct dso *dso)
+{
+	pthread_rwlock_wrlock(&dsos->lock);
+	__dsos__add(dsos, dso);
+	pthread_rwlock_unlock(&dsos->lock);
+}
+
+struct dso *__dsos__find(struct dsos *dsos, const char *name, bool cmp_short)
 {
 	struct dso *pos;
 
@@ -1166,15 +1207,24 @@ struct dso *dsos__find(const struct dsos *dsos, const char *name,
 				return pos;
 		return NULL;
 	}
-	return dso__find_by_longname(&dsos->root, name);
+	return __dso__find_by_longname(&dsos->root, name);
 }
 
-struct dso *dsos__addnew(struct dsos *dsos, const char *name)
+struct dso *dsos__find(struct dsos *dsos, const char *name, bool cmp_short)
+{
+	struct dso *dso;
+	pthread_rwlock_rdlock(&dsos->lock);
+	dso = __dsos__find(dsos, name, cmp_short);
+	pthread_rwlock_unlock(&dsos->lock);
+	return dso;
+}
+
+struct dso *__dsos__addnew(struct dsos *dsos, const char *name)
 {
 	struct dso *dso = dso__new(name);
 
 	if (dso != NULL) {
-		dsos__add(dsos, dso);
+		__dsos__add(dsos, dso);
 		dso__set_basename(dso);
 	}
 	return dso;
@@ -1182,9 +1232,18 @@ struct dso *dsos__addnew(struct dsos *dsos, const char *name)
 
 struct dso *__dsos__findnew(struct dsos *dsos, const char *name)
 {
-	struct dso *dso = dsos__find(dsos, name, false);
+	struct dso *dso = __dsos__find(dsos, name, false);
+
+	return dso ? dso : __dsos__addnew(dsos, name);
+}
 
-	return dso ? dso : dsos__addnew(dsos, name);
+struct dso *dsos__findnew(struct dsos *dsos, const char *name)
+{
+	struct dso *dso;
+	pthread_rwlock_wrlock(&dsos->lock);
+	dso = dso__get(__dsos__findnew(dsos, name));
+	pthread_rwlock_unlock(&dsos->lock);
+	return dso;
 }
 
 size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp,

+ 20 - 4
tools/perf/util/dso.h

@@ -1,9 +1,11 @@
 #ifndef __PERF_DSO
 #define __PERF_DSO
 
+#include <linux/atomic.h>
 #include <linux/types.h>
 #include <linux/rbtree.h>
 #include <stdbool.h>
+#include <pthread.h>
 #include <linux/types.h>
 #include <linux/bitops.h>
 #include "map.h"
@@ -124,6 +126,7 @@ struct dso_cache {
 struct dsos {
 	struct list_head head;
 	struct rb_root	 root;	/* rbtree root sorted by long name */
+	pthread_rwlock_t lock;
 };
 
 struct auxtrace_cache;
@@ -177,7 +180,7 @@ struct dso {
 		void	 *priv;
 		u64	 db_id;
 	};
-
+	atomic_t	 refcnt;
 	char		 name[0];
 };
 
@@ -204,6 +207,17 @@ void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated);
 
 int dso__name_len(const struct dso *dso);
 
+struct dso *dso__get(struct dso *dso);
+void dso__put(struct dso *dso);
+
+static inline void __dso__zput(struct dso **dso)
+{
+	dso__put(*dso);
+	*dso = NULL;
+}
+
+#define dso__zput(dso) __dso__zput(&dso)
+
 bool dso__loaded(const struct dso *dso, enum map_type type);
 
 bool dso__sorted_by_name(const struct dso *dso, enum map_type type);
@@ -297,11 +311,13 @@ struct map *dso__new_map(const char *name);
 struct dso *machine__findnew_kernel(struct machine *machine, const char *name,
 				    const char *short_name, int dso_type);
 
+void __dsos__add(struct dsos *dsos, struct dso *dso);
 void dsos__add(struct dsos *dsos, struct dso *dso);
-struct dso *dsos__addnew(struct dsos *dsos, const char *name);
-struct dso *dsos__find(const struct dsos *dsos, const char *name,
-		       bool cmp_short);
+struct dso *__dsos__addnew(struct dsos *dsos, const char *name);
+struct dso *__dsos__find(struct dsos *dsos, const char *name, bool cmp_short);
+struct dso *dsos__find(struct dsos *dsos, const char *name, bool cmp_short);
 struct dso *__dsos__findnew(struct dsos *dsos, const char *name);
+struct dso *dsos__findnew(struct dsos *dsos, const char *name);
 bool __dsos__read_build_ids(struct list_head *head, bool with_hits);
 
 size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp,

+ 1 - 0
tools/perf/util/header.c

@@ -1277,6 +1277,7 @@ static int __event_process_build_id(struct build_id_event *bev,
 				  sbuild_id);
 		pr_debug("build id event received for %s: %s\n",
 			 dso->long_name, sbuild_id);
+		dso__put(dso);
 	}
 
 	err = 0;

+ 40 - 18
tools/perf/util/machine.c

@@ -20,6 +20,7 @@ static void dsos__init(struct dsos *dsos)
 {
 	INIT_LIST_HEAD(&dsos->head);
 	dsos->root = RB_ROOT;
+	pthread_rwlock_init(&dsos->lock, NULL);
 }
 
 int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
@@ -81,15 +82,25 @@ out_delete:
 	return NULL;
 }
 
-static void dsos__delete(struct dsos *dsos)
+static void dsos__purge(struct dsos *dsos)
 {
 	struct dso *pos, *n;
 
+	pthread_rwlock_wrlock(&dsos->lock);
+
 	list_for_each_entry_safe(pos, n, &dsos->head, node) {
 		RB_CLEAR_NODE(&pos->rb_node);
-		list_del(&pos->node);
-		dso__delete(pos);
+		list_del_init(&pos->node);
+		dso__put(pos);
 	}
+
+	pthread_rwlock_unlock(&dsos->lock);
+}
+
+static void dsos__exit(struct dsos *dsos)
+{
+	dsos__purge(dsos);
+	pthread_rwlock_destroy(&dsos->lock);
 }
 
 void machine__delete_threads(struct machine *machine)
@@ -110,7 +121,7 @@ void machine__delete_threads(struct machine *machine)
 void machine__exit(struct machine *machine)
 {
 	map_groups__exit(&machine->kmaps);
-	dsos__delete(&machine->dsos);
+	dsos__exit(&machine->dsos);
 	machine__exit_vdso(machine);
 	zfree(&machine->root_dir);
 	zfree(&machine->current_tid);
@@ -490,17 +501,19 @@ int machine__process_lost_samples_event(struct machine *machine __maybe_unused,
 	return 0;
 }
 
-static struct dso*
-machine__module_dso(struct machine *machine, struct kmod_path *m,
-		    const char *filename)
+static struct dso *machine__findnew_module_dso(struct machine *machine,
+					       struct kmod_path *m,
+					       const char *filename)
 {
 	struct dso *dso;
 
-	dso = dsos__find(&machine->dsos, m->name, true);
+	pthread_rwlock_wrlock(&machine->dsos.lock);
+
+	dso = __dsos__find(&machine->dsos, m->name, true);
 	if (!dso) {
-		dso = dsos__addnew(&machine->dsos, m->name);
+		dso = __dsos__addnew(&machine->dsos, m->name);
 		if (dso == NULL)
-			return NULL;
+			goto out_unlock;
 
 		if (machine__is_host(machine))
 			dso->symtab_type = DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE;
@@ -515,6 +528,9 @@ machine__module_dso(struct machine *machine, struct kmod_path *m,
 		dso__set_long_name(dso, strdup(filename), true);
 	}
 
+	dso__get(dso);
+out_unlock:
+	pthread_rwlock_unlock(&machine->dsos.lock);
 	return dso;
 }
 
@@ -534,8 +550,8 @@ int machine__process_itrace_start_event(struct machine *machine __maybe_unused,
 	return 0;
 }
 
-struct map *machine__new_module(struct machine *machine, u64 start,
-				const char *filename)
+struct map *machine__findnew_module_map(struct machine *machine, u64 start,
+					const char *filename)
 {
 	struct map *map = NULL;
 	struct dso *dso;
@@ -549,7 +565,7 @@ struct map *machine__new_module(struct machine *machine, u64 start,
 	if (map)
 		goto out;
 
-	dso = machine__module_dso(machine, &m, filename);
+	dso = machine__findnew_module_dso(machine, &m, filename);
 	if (dso == NULL)
 		goto out;
 
@@ -1017,7 +1033,7 @@ static int machine__create_module(void *arg, const char *name, u64 start)
 	struct machine *machine = arg;
 	struct map *map;
 
-	map = machine__new_module(machine, start, name);
+	map = machine__findnew_module_map(machine, start, name);
 	if (map == NULL)
 		return -1;
 
@@ -1140,8 +1156,8 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
 				strlen(kmmap_prefix) - 1) == 0;
 	if (event->mmap.filename[0] == '/' ||
 	    (!is_kernel_mmap && event->mmap.filename[0] == '[')) {
-		map = machine__new_module(machine, event->mmap.start,
-					  event->mmap.filename);
+		map = machine__findnew_module_map(machine, event->mmap.start,
+						  event->mmap.filename);
 		if (map == NULL)
 			goto out_problem;
 
@@ -1156,6 +1172,8 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
 		struct dso *kernel = NULL;
 		struct dso *dso;
 
+		pthread_rwlock_rdlock(&machine->dsos.lock);
+
 		list_for_each_entry(dso, &machine->dsos.head, node) {
 
 			/*
@@ -1184,14 +1202,18 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
 			break;
 		}
 
+		pthread_rwlock_unlock(&machine->dsos.lock);
+
 		if (kernel == NULL)
 			kernel = machine__findnew_dso(machine, kmmap_prefix);
 		if (kernel == NULL)
 			goto out_problem;
 
 		kernel->kernel = kernel_type;
-		if (__machine__create_kernel_maps(machine, kernel) < 0)
+		if (__machine__create_kernel_maps(machine, kernel) < 0) {
+			dso__put(kernel);
 			goto out_problem;
+		}
 
 		if (strstr(kernel->long_name, "vmlinux"))
 			dso__set_short_name(kernel, "[kernel.vmlinux]", false);
@@ -1948,5 +1970,5 @@ int machine__get_kernel_start(struct machine *machine)
 
 struct dso *machine__findnew_dso(struct machine *machine, const char *filename)
 {
-	return __dsos__findnew(&machine->dsos, filename);
+	return dsos__findnew(&machine->dsos, filename);
 }

+ 2 - 2
tools/perf/util/machine.h

@@ -189,8 +189,8 @@ struct symbol *machine__find_kernel_function_by_name(struct machine *machine,
 						 filter);
 }
 
-struct map *machine__new_module(struct machine *machine, u64 start,
-				const char *filename);
+struct map *machine__findnew_module_map(struct machine *machine, u64 start,
+					const char *filename);
 
 int machine__load_kallsyms(struct machine *machine, const char *filename,
 			   enum map_type type, symbol_filter_t filter);

+ 9 - 2
tools/perf/util/map.c

@@ -132,7 +132,7 @@ void map__init(struct map *map, enum map_type type,
 	map->end      = end;
 	map->pgoff    = pgoff;
 	map->reloc    = 0;
-	map->dso      = dso;
+	map->dso      = dso__get(dso);
 	map->map_ip   = map__map_ip;
 	map->unmap_ip = map__unmap_ip;
 	RB_CLEAR_NODE(&map->rb_node);
@@ -198,6 +198,7 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
 			if (type != MAP__FUNCTION)
 				dso__set_loaded(dso, map->type);
 		}
+		dso__put(dso);
 	}
 	return map;
 out_delete:
@@ -224,9 +225,15 @@ struct map *map__new2(u64 start, struct dso *dso, enum map_type type)
 	return map;
 }
 
-void map__delete(struct map *map)
+static void map__exit(struct map *map)
 {
 	BUG_ON(!RB_EMPTY_NODE(&map->rb_node));
+	dso__zput(map->dso);
+}
+
+void map__delete(struct map *map)
+{
+	map__exit(map);
 	free(map);
 }
 

+ 1 - 1
tools/perf/util/probe-event.c

@@ -168,7 +168,7 @@ static struct map *kernel_get_module_map(const char *module)
 
 	/* A file path -- this is an offline module */
 	if (module && strchr(module, '/'))
-		return machine__new_module(host_machine, 0, module);
+		return machine__findnew_module_map(host_machine, 0, module);
 
 	if (!module)
 		module = "kernel";

+ 1 - 1
tools/perf/util/probe-finder.c

@@ -130,7 +130,7 @@ struct debuginfo *debuginfo__new(const char *path)
 			continue;
 		dinfo = __debuginfo__new(buf);
 	}
-	dso__delete(dso);
+	dso__put(dso);
 
 out:
 	/* if failed to open all distro debuginfo, open given binary */

+ 434 - 0
tools/perf/util/stat-shadow.c

@@ -0,0 +1,434 @@
+#include <stdio.h>
+#include "evsel.h"
+#include "stat.h"
+#include "color.h"
+
+enum {
+	CTX_BIT_USER	= 1 << 0,
+	CTX_BIT_KERNEL	= 1 << 1,
+	CTX_BIT_HV	= 1 << 2,
+	CTX_BIT_HOST	= 1 << 3,
+	CTX_BIT_IDLE	= 1 << 4,
+	CTX_BIT_MAX	= 1 << 5,
+};
+
+#define NUM_CTX CTX_BIT_MAX
+
+static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
+static struct stats runtime_cycles_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_stalled_cycles_front_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_stalled_cycles_back_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_branches_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_cacherefs_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_l1_dcache_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_l1_icache_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_ll_cache_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_itlb_cache_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_dtlb_cache_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS];
+
+struct stats walltime_nsecs_stats;
+
+static int evsel_context(struct perf_evsel *evsel)
+{
+	int ctx = 0;
+
+	if (evsel->attr.exclude_kernel)
+		ctx |= CTX_BIT_KERNEL;
+	if (evsel->attr.exclude_user)
+		ctx |= CTX_BIT_USER;
+	if (evsel->attr.exclude_hv)
+		ctx |= CTX_BIT_HV;
+	if (evsel->attr.exclude_host)
+		ctx |= CTX_BIT_HOST;
+	if (evsel->attr.exclude_idle)
+		ctx |= CTX_BIT_IDLE;
+
+	return ctx;
+}
+
+void perf_stat__reset_shadow_stats(void)
+{
+	memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats));
+	memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats));
+	memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats));
+	memset(runtime_stalled_cycles_back_stats, 0, sizeof(runtime_stalled_cycles_back_stats));
+	memset(runtime_branches_stats, 0, sizeof(runtime_branches_stats));
+	memset(runtime_cacherefs_stats, 0, sizeof(runtime_cacherefs_stats));
+	memset(runtime_l1_dcache_stats, 0, sizeof(runtime_l1_dcache_stats));
+	memset(runtime_l1_icache_stats, 0, sizeof(runtime_l1_icache_stats));
+	memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats));
+	memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats));
+	memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats));
+	memset(runtime_cycles_in_tx_stats, 0,
+			sizeof(runtime_cycles_in_tx_stats));
+	memset(runtime_transaction_stats, 0,
+		sizeof(runtime_transaction_stats));
+	memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats));
+	memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
+}
+
+/*
+ * Update various tracking values we maintain to print
+ * more semantic information such as miss/hit ratios,
+ * instruction rates, etc:
+ */
+void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count,
+				    int cpu)
+{
+	int ctx = evsel_context(counter);
+
+	if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK))
+		update_stats(&runtime_nsecs_stats[cpu], count[0]);
+	else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
+		update_stats(&runtime_cycles_stats[ctx][cpu], count[0]);
+	else if (perf_stat_evsel__is(counter, CYCLES_IN_TX))
+		update_stats(&runtime_transaction_stats[ctx][cpu], count[0]);
+	else if (perf_stat_evsel__is(counter, TRANSACTION_START))
+		update_stats(&runtime_transaction_stats[ctx][cpu], count[0]);
+	else if (perf_stat_evsel__is(counter, ELISION_START))
+		update_stats(&runtime_elision_stats[ctx][cpu], count[0]);
+	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
+		update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count[0]);
+	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
+		update_stats(&runtime_stalled_cycles_back_stats[ctx][cpu], count[0]);
+	else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
+		update_stats(&runtime_branches_stats[ctx][cpu], count[0]);
+	else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
+		update_stats(&runtime_cacherefs_stats[ctx][cpu], count[0]);
+	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
+		update_stats(&runtime_l1_dcache_stats[ctx][cpu], count[0]);
+	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
+		update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]);
+	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL))
+		update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]);
+	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
+		update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]);
+	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
+		update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]);
+}
+
+/* used for get_ratio_color() */
+enum grc_type {
+	GRC_STALLED_CYCLES_FE,
+	GRC_STALLED_CYCLES_BE,
+	GRC_CACHE_MISSES,
+	GRC_MAX_NR
+};
+
+static const char *get_ratio_color(enum grc_type type, double ratio)
+{
+	static const double grc_table[GRC_MAX_NR][3] = {
+		[GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 },
+		[GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 },
+		[GRC_CACHE_MISSES] 	= { 20.0, 10.0, 5.0 },
+	};
+	const char *color = PERF_COLOR_NORMAL;
+
+	if (ratio > grc_table[type][0])
+		color = PERF_COLOR_RED;
+	else if (ratio > grc_table[type][1])
+		color = PERF_COLOR_MAGENTA;
+	else if (ratio > grc_table[type][2])
+		color = PERF_COLOR_YELLOW;
+
+	return color;
+}
+
+static void print_stalled_cycles_frontend(FILE *out, int cpu,
+					  struct perf_evsel *evsel
+					  __maybe_unused, double avg)
+{
+	double total, ratio = 0.0;
+	const char *color;
+	int ctx = evsel_context(evsel);
+
+	total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
+
+	if (total)
+		ratio = avg / total * 100.0;
+
+	color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);
+
+	fprintf(out, " #  ");
+	color_fprintf(out, color, "%6.2f%%", ratio);
+	fprintf(out, " frontend cycles idle   ");
+}
+
+static void print_stalled_cycles_backend(FILE *out, int cpu,
+					 struct perf_evsel *evsel
+					 __maybe_unused, double avg)
+{
+	double total, ratio = 0.0;
+	const char *color;
+	int ctx = evsel_context(evsel);
+
+	total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
+
+	if (total)
+		ratio = avg / total * 100.0;
+
+	color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);
+
+	fprintf(out, " #  ");
+	color_fprintf(out, color, "%6.2f%%", ratio);
+	fprintf(out, " backend  cycles idle   ");
+}
+
+static void print_branch_misses(FILE *out, int cpu,
+				struct perf_evsel *evsel __maybe_unused,
+				double avg)
+{
+	double total, ratio = 0.0;
+	const char *color;
+	int ctx = evsel_context(evsel);
+
+	total = avg_stats(&runtime_branches_stats[ctx][cpu]);
+
+	if (total)
+		ratio = avg / total * 100.0;
+
+	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
+
+	fprintf(out, " #  ");
+	color_fprintf(out, color, "%6.2f%%", ratio);
+	fprintf(out, " of all branches        ");
+}
+
+static void print_l1_dcache_misses(FILE *out, int cpu,
+				   struct perf_evsel *evsel __maybe_unused,
+				   double avg)
+{
+	double total, ratio = 0.0;
+	const char *color;
+	int ctx = evsel_context(evsel);
+
+	total = avg_stats(&runtime_l1_dcache_stats[ctx][cpu]);
+
+	if (total)
+		ratio = avg / total * 100.0;
+
+	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
+
+	fprintf(out, " #  ");
+	color_fprintf(out, color, "%6.2f%%", ratio);
+	fprintf(out, " of all L1-dcache hits  ");
+}
+
+static void print_l1_icache_misses(FILE *out, int cpu,
+				   struct perf_evsel *evsel __maybe_unused,
+				   double avg)
+{
+	double total, ratio = 0.0;
+	const char *color;
+	int ctx = evsel_context(evsel);
+
+	total = avg_stats(&runtime_l1_icache_stats[ctx][cpu]);
+
+	if (total)
+		ratio = avg / total * 100.0;
+
+	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
+
+	fprintf(out, " #  ");
+	color_fprintf(out, color, "%6.2f%%", ratio);
+	fprintf(out, " of all L1-icache hits  ");
+}
+
+static void print_dtlb_cache_misses(FILE *out, int cpu,
+				    struct perf_evsel *evsel __maybe_unused,
+				    double avg)
+{
+	double total, ratio = 0.0;
+	const char *color;
+	int ctx = evsel_context(evsel);
+
+	total = avg_stats(&runtime_dtlb_cache_stats[ctx][cpu]);
+
+	if (total)
+		ratio = avg / total * 100.0;
+
+	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
+
+	fprintf(out, " #  ");
+	color_fprintf(out, color, "%6.2f%%", ratio);
+	fprintf(out, " of all dTLB cache hits ");
+}
+
+static void print_itlb_cache_misses(FILE *out, int cpu,
+				    struct perf_evsel *evsel __maybe_unused,
+				    double avg)
+{
+	double total, ratio = 0.0;
+	const char *color;
+	int ctx = evsel_context(evsel);
+
+	total = avg_stats(&runtime_itlb_cache_stats[ctx][cpu]);
+
+	if (total)
+		ratio = avg / total * 100.0;
+
+	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
+
+	fprintf(out, " #  ");
+	color_fprintf(out, color, "%6.2f%%", ratio);
+	fprintf(out, " of all iTLB cache hits ");
+}
+
+static void print_ll_cache_misses(FILE *out, int cpu,
+				  struct perf_evsel *evsel __maybe_unused,
+				  double avg)
+{
+	double total, ratio = 0.0;
+	const char *color;
+	int ctx = evsel_context(evsel);
+
+	total = avg_stats(&runtime_ll_cache_stats[ctx][cpu]);
+
+	if (total)
+		ratio = avg / total * 100.0;
+
+	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
+
+	fprintf(out, " #  ");
+	color_fprintf(out, color, "%6.2f%%", ratio);
+	fprintf(out, " of all LL-cache hits   ");
+}
+
+void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel,
+				   double avg, int cpu, enum aggr_mode aggr)
+{
+	double total, ratio = 0.0, total2;
+	int ctx = evsel_context(evsel);
+
+	if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
+		total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
+		if (total) {
+			ratio = avg / total;
+			fprintf(out, " #   %5.2f  insns per cycle        ", ratio);
+		} else {
+			fprintf(out, "                                   ");
+		}
+		total = avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]);
+		total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu]));
+
+		if (total && avg) {
+			ratio = total / avg;
+			fprintf(out, "\n");
+			if (aggr == AGGR_NONE)
+				fprintf(out, "        ");
+			fprintf(out, "                                                  #   %5.2f  stalled cycles per insn", ratio);
+		}
+
+	} else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) &&
+			runtime_branches_stats[ctx][cpu].n != 0) {
+		print_branch_misses(out, cpu, evsel, avg);
+	} else if (
+		evsel->attr.type == PERF_TYPE_HW_CACHE &&
+		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1D |
+					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
+					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
+			runtime_l1_dcache_stats[ctx][cpu].n != 0) {
+		print_l1_dcache_misses(out, cpu, evsel, avg);
+	} else if (
+		evsel->attr.type == PERF_TYPE_HW_CACHE &&
+		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1I |
+					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
+					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
+			runtime_l1_icache_stats[ctx][cpu].n != 0) {
+		print_l1_icache_misses(out, cpu, evsel, avg);
+	} else if (
+		evsel->attr.type == PERF_TYPE_HW_CACHE &&
+		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_DTLB |
+					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
+					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
+			runtime_dtlb_cache_stats[ctx][cpu].n != 0) {
+		print_dtlb_cache_misses(out, cpu, evsel, avg);
+	} else if (
+		evsel->attr.type == PERF_TYPE_HW_CACHE &&
+		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_ITLB |
+					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
+					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
+			runtime_itlb_cache_stats[ctx][cpu].n != 0) {
+		print_itlb_cache_misses(out, cpu, evsel, avg);
+	} else if (
+		evsel->attr.type == PERF_TYPE_HW_CACHE &&
+		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_LL |
+					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
+					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
+			runtime_ll_cache_stats[ctx][cpu].n != 0) {
+		print_ll_cache_misses(out, cpu, evsel, avg);
+	} else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) &&
+			runtime_cacherefs_stats[ctx][cpu].n != 0) {
+		total = avg_stats(&runtime_cacherefs_stats[ctx][cpu]);
+
+		if (total)
+			ratio = avg * 100 / total;
+
+		fprintf(out, " # %8.3f %% of all cache refs    ", ratio);
+
+	} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
+		print_stalled_cycles_frontend(out, cpu, evsel, avg);
+	} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
+		print_stalled_cycles_backend(out, cpu, evsel, avg);
+	} else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
+		total = avg_stats(&runtime_nsecs_stats[cpu]);
+
+		if (total) {
+			ratio = avg / total;
+			fprintf(out, " # %8.3f GHz                    ", ratio);
+		} else {
+			fprintf(out, "                                   ");
+		}
+	} else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) {
+		total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
+		if (total)
+			fprintf(out,
+				" #   %5.2f%% transactional cycles   ",
+				100.0 * (avg / total));
+	} else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) {
+		total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
+		total2 = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
+		if (total2 < avg)
+			total2 = avg;
+		if (total)
+			fprintf(out,
+				" #   %5.2f%% aborted cycles         ",
+				100.0 * ((total2-avg) / total));
+	} else if (perf_stat_evsel__is(evsel, TRANSACTION_START) &&
+		   avg > 0 &&
+		   runtime_cycles_in_tx_stats[ctx][cpu].n != 0) {
+		total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
+
+		if (total)
+			ratio = total / avg;
+
+		fprintf(out, " # %8.0f cycles / transaction   ", ratio);
+	} else if (perf_stat_evsel__is(evsel, ELISION_START) &&
+		   avg > 0 &&
+		   runtime_cycles_in_tx_stats[ctx][cpu].n != 0) {
+		total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
+
+		if (total)
+			ratio = total / avg;
+
+		fprintf(out, " # %8.0f cycles / elision       ", ratio);
+	} else if (runtime_nsecs_stats[cpu].n != 0) {
+		char unit = 'M';
+
+		total = avg_stats(&runtime_nsecs_stats[cpu]);
+
+		if (total)
+			ratio = 1000.0 * avg / total;
+		if (ratio < 0.001) {
+			ratio *= 1000;
+			unit = 'K';
+		}
+
+		fprintf(out, " # %8.3f %c/sec                  ", ratio, unit);
+	} else {
+		fprintf(out, "                                   ");
+	}
+}

+ 34 - 1
tools/perf/util/stat.c

@@ -1,6 +1,6 @@
 #include <math.h>
-
 #include "stat.h"
+#include "evsel.h"
 
 void update_stats(struct stats *stats, u64 val)
 {
@@ -61,3 +61,36 @@ double rel_stddev_stats(double stddev, double avg)
 
 	return pct;
 }
+
+bool __perf_evsel_stat__is(struct perf_evsel *evsel,
+			   enum perf_stat_evsel_id id)
+{
+	struct perf_stat *ps = evsel->priv;
+
+	return ps->id == id;
+}
+
+#define ID(id, name) [PERF_STAT_EVSEL_ID__##id] = #name
+static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = {
+	ID(NONE,		x),
+	ID(CYCLES_IN_TX,	cpu/cycles-t/),
+	ID(TRANSACTION_START,	cpu/tx-start/),
+	ID(ELISION_START,	cpu/el-start/),
+	ID(CYCLES_IN_TX_CP,	cpu/cycles-ct/),
+};
+#undef ID
+
+void perf_stat_evsel_id_init(struct perf_evsel *evsel)
+{
+	struct perf_stat *ps = evsel->priv;
+	int i;
+
+	/* ps->id is 0 hence PERF_STAT_EVSEL_ID__NONE by default */
+
+	for (i = 0; i < PERF_STAT_EVSEL_ID__MAX; i++) {
+		if (!strcmp(perf_evsel__name(evsel), id_str[i])) {
+			ps->id = i;
+			break;
+		}
+	}
+}

+ 40 - 0
tools/perf/util/stat.h

@@ -2,6 +2,7 @@
 #define __PERF_STATS_H
 
 #include <linux/types.h>
+#include <stdio.h>
 
 struct stats
 {
@@ -9,6 +10,27 @@ struct stats
 	u64 max, min;
 };
 
+enum perf_stat_evsel_id {
+	PERF_STAT_EVSEL_ID__NONE = 0,
+	PERF_STAT_EVSEL_ID__CYCLES_IN_TX,
+	PERF_STAT_EVSEL_ID__TRANSACTION_START,
+	PERF_STAT_EVSEL_ID__ELISION_START,
+	PERF_STAT_EVSEL_ID__CYCLES_IN_TX_CP,
+	PERF_STAT_EVSEL_ID__MAX,
+};
+
+struct perf_stat {
+	struct stats		res_stats[3];
+	enum perf_stat_evsel_id	id;
+};
+
+enum aggr_mode {
+	AGGR_NONE,
+	AGGR_GLOBAL,
+	AGGR_SOCKET,
+	AGGR_CORE,
+};
+
 void update_stats(struct stats *stats, u64 val);
 double avg_stats(struct stats *stats);
 double stddev_stats(struct stats *stats);
@@ -22,4 +44,22 @@ static inline void init_stats(struct stats *stats)
 	stats->min  = (u64) -1;
 	stats->max  = 0;
 }
+
+struct perf_evsel;
+bool __perf_evsel_stat__is(struct perf_evsel *evsel,
+			   enum perf_stat_evsel_id id);
+
+#define perf_stat_evsel__is(evsel, id) \
+	__perf_evsel_stat__is(evsel, PERF_STAT_EVSEL_ID__ ## id)
+
+void perf_stat_evsel_id_init(struct perf_evsel *evsel);
+
+extern struct stats walltime_nsecs_stats;
+
+void perf_stat__reset_shadow_stats(void);
+void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count,
+				    int cpu);
+void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel,
+				   double avg, int cpu, enum aggr_mode aggr);
+
 #endif

+ 1 - 1
tools/perf/util/symbol-elf.c

@@ -1016,7 +1016,7 @@ int dso__load_sym(struct dso *dso, struct map *map,
 				curr_map = map__new2(start, curr_dso,
 						     map->type);
 				if (curr_map == NULL) {
-					dso__delete(curr_dso);
+					dso__put(curr_dso);
 					goto out_elf_end;
 				}
 				if (adjust_kernel_syms) {

+ 2 - 2
tools/perf/util/symbol.c

@@ -786,7 +786,7 @@ static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta,
 
 			curr_map = map__new2(pos->start, ndso, map->type);
 			if (curr_map == NULL) {
-				dso__delete(ndso);
+				dso__put(ndso);
 				return -1;
 			}
 
@@ -1364,7 +1364,7 @@ static bool dso__is_compatible_symtab_type(struct dso *dso, bool kmod,
 	case DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP:
 		/*
 		 * kernel modules know their symtab type - it's set when
-		 * creating a module dso in machine__new_module().
+		 * creating a module dso in machine__findnew_module_map().
 		 */
 		return kmod && dso->symtab_type == type;
 

+ 30 - 24
tools/perf/util/vdso.c

@@ -120,14 +120,14 @@ void machine__exit_vdso(struct machine *machine)
 	zfree(&machine->vdso_info);
 }
 
-static struct dso *machine__addnew_vdso(struct machine *machine, const char *short_name,
-					const char *long_name)
+static struct dso *__machine__addnew_vdso(struct machine *machine, const char *short_name,
+					  const char *long_name)
 {
 	struct dso *dso;
 
 	dso = dso__new(short_name);
 	if (dso != NULL) {
-		dsos__add(&machine->dsos, dso);
+		__dsos__add(&machine->dsos, dso);
 		dso__set_long_name(dso, long_name, false);
 	}
 
@@ -230,27 +230,31 @@ static const char *vdso__get_compat_file(struct vdso_file *vdso_file)
 	return vdso_file->temp_file_name;
 }
 
-static struct dso *vdso__findnew_compat(struct machine *machine,
-					struct vdso_file *vdso_file)
+static struct dso *__machine__findnew_compat(struct machine *machine,
+					     struct vdso_file *vdso_file)
 {
 	const char *file_name;
 	struct dso *dso;
 
-	dso = dsos__find(&machine->dsos, vdso_file->dso_name, true);
+	pthread_rwlock_wrlock(&machine->dsos.lock);
+	dso = __dsos__find(&machine->dsos, vdso_file->dso_name, true);
 	if (dso)
-		return dso;
+		goto out_unlock;
 
 	file_name = vdso__get_compat_file(vdso_file);
 	if (!file_name)
-		return NULL;
+		goto out_unlock;
 
-	return machine__addnew_vdso(machine, vdso_file->dso_name, file_name);
+	dso = __machine__addnew_vdso(machine, vdso_file->dso_name, file_name);
+out_unlock:
+	pthread_rwlock_unlock(&machine->dsos.lock);
+	return dso;
 }
 
-static int machine__findnew_vdso_compat(struct machine *machine,
-				    struct thread *thread,
-				    struct vdso_info *vdso_info,
-				    struct dso **dso)
+static int __machine__findnew_vdso_compat(struct machine *machine,
+					  struct thread *thread,
+					  struct vdso_info *vdso_info,
+					  struct dso **dso)
 {
 	enum dso_type dso_type;
 
@@ -267,10 +271,10 @@ static int machine__findnew_vdso_compat(struct machine *machine,
 
 	switch (dso_type) {
 	case DSO__TYPE_32BIT:
-		*dso = vdso__findnew_compat(machine, &vdso_info->vdso32);
+		*dso = __machine__findnew_compat(machine, &vdso_info->vdso32);
 		return 1;
 	case DSO__TYPE_X32BIT:
-		*dso = vdso__findnew_compat(machine, &vdso_info->vdsox32);
+		*dso = __machine__findnew_compat(machine, &vdso_info->vdsox32);
 		return 1;
 	case DSO__TYPE_UNKNOWN:
 	case DSO__TYPE_64BIT:
@@ -285,31 +289,33 @@ struct dso *machine__findnew_vdso(struct machine *machine,
 				  struct thread *thread __maybe_unused)
 {
 	struct vdso_info *vdso_info;
-	struct dso *dso;
+	struct dso *dso = NULL;
 
+	pthread_rwlock_wrlock(&machine->dsos.lock);
 	if (!machine->vdso_info)
 		machine->vdso_info = vdso_info__new();
 
 	vdso_info = machine->vdso_info;
 	if (!vdso_info)
-		return NULL;
+		goto out_unlock;
 
 #if BITS_PER_LONG == 64
-	if (machine__findnew_vdso_compat(machine, thread, vdso_info, &dso))
-		return dso;
+	if (__machine__findnew_vdso_compat(machine, thread, vdso_info, &dso))
+		goto out_unlock;
 #endif
 
-	dso = dsos__find(&machine->dsos, DSO__NAME_VDSO, true);
+	dso = __dsos__find(&machine->dsos, DSO__NAME_VDSO, true);
 	if (!dso) {
 		char *file;
 
 		file = get_file(&vdso_info->vdso);
-		if (!file)
-			return NULL;
-
-		dso = machine__addnew_vdso(machine, DSO__NAME_VDSO, file);
+		if (file)
+			dso = __machine__addnew_vdso(machine, DSO__NAME_VDSO, file);
 	}
 
+out_unlock:
+	dso__get(dso);
+	pthread_rwlock_unlock(&machine->dsos.lock);
 	return dso;
 }