Browse Source

Merge tag 'perf-core-for-mingo-20161114' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

New features:

 - Allow querying and setting .perfconfig variables (Taeung Song)

 - Show branch information in callchains (predicted, TSX aborts, loop
   iteractions, etc) (Jin Yao)

Infrastructure changes:

 - Support kbuild's CFLAGS_REMOVE_ in tools/build (Jiri Olsa)

 - Plug building jvmti to the main perf Makefile (Jiri Olsa)

Documentation changes:

 - Update Intel PT documentation about context switch events (Arnaldo Carvalho de Melo)

 - Fix 'perf record --call-graph dwarf' help/config in builds not linking
   with a unwind library, mentioning that is a possible record option (Rabin Vincent)

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Ingo Molnar 8 năm trước cách đây
mục cha
commit
6a6b12e212

+ 3 - 1
tools/build/Build.include

@@ -89,7 +89,9 @@ if_changed = $(if $(strip $(any-prereq) $(arg-check)),             \
 # - per target C flags
 # - per object C flags
 # - BUILD_STR macro to allow '-D"$(variable)"' constructs
-c_flags = -Wp,-MD,$(depfile),-MT,$@ $(CFLAGS) -D"BUILD_STR(s)=\#s" $(CFLAGS_$(basetarget).o) $(CFLAGS_$(obj))
+c_flags_1 = -Wp,-MD,$(depfile),-MT,$@ $(CFLAGS) -D"BUILD_STR(s)=\#s" $(CFLAGS_$(basetarget).o) $(CFLAGS_$(obj))
+c_flags_2 = $(filter-out $(CFLAGS_REMOVE_$(basetarget).o), $(c_flags_1))
+c_flags   = $(filter-out $(CFLAGS_REMOVE_$(obj)), $(c_flags_2))
 cxx_flags = -Wp,-MD,$(depfile),-MT,$@ $(CXXFLAGS) -D"BUILD_STR(s)=\#s" $(CXXFLAGS_$(basetarget).o) $(CXXFLAGS_$(obj))
 
 ###

+ 4 - 2
tools/build/Documentation/Build.txt

@@ -135,8 +135,10 @@ CFLAGS
 
 It's possible to alter the standard object C flags in the following way:
 
-  CFLAGS_perf.o += '...' - alters CFLAGS for perf.o object
-  CFLAGS_gtk += '...'    - alters CFLAGS for gtk build object
+  CFLAGS_perf.o        += '...'  - adds CFLAGS for perf.o object
+  CFLAGS_gtk           += '...'  - adds CFLAGS for gtk build object
+  CFLAGS_REMOVE_perf.o += '...'  - removes CFLAGS for perf.o object
+  CFLAGS_REMOVE_gtk    += '...'  - removes CFLAGS for gtk build object
 
 This C flags changes has the scope of the Build makefile they are defined in.
 

+ 5 - 1
tools/build/feature/Makefile

@@ -47,7 +47,8 @@ FILES=					\
 	test-bpf.bin			\
 	test-get_cpuid.bin		\
 	test-sdt.bin			\
-	test-cxx.bin
+	test-cxx.bin			\
+	test-jvmti.bin
 
 FILES := $(addprefix $(OUTPUT),$(FILES))
 
@@ -225,6 +226,9 @@ $(OUTPUT)test-sdt.bin:
 $(OUTPUT)test-cxx.bin:
 	$(BUILDXX) -std=gnu++11
 
+$(OUTPUT)test-jvmti.bin:
+	$(BUILD)
+
 -include $(OUTPUT)*.d
 
 ###############################

+ 13 - 0
tools/build/feature/test-jvmti.c

@@ -0,0 +1,13 @@
+#include <jvmti.h>
+#include <jvmticmlr.h>
+
+int main(void)
+{
+	JavaVM			jvm	__attribute__((unused));
+	jvmtiEventCallbacks	cb	__attribute__((unused));
+	jvmtiCapabilities	caps	__attribute__((unused));
+	jvmtiJlocationFormat	format	__attribute__((unused));
+	jvmtiEnv		jvmti	__attribute__((unused));
+
+	return 0;
+}

+ 17 - 2
tools/perf/Documentation/intel-pt.txt

@@ -550,6 +550,18 @@ Unless /proc/sys/kernel/perf_event_paranoid is set to -1, unprivileged users
 have memory limits imposed upon them.  That affects what buffer sizes they can
 have as outlined above.
 
+The v4.2 kernel introduced support for a context switch metadata event,
+PERF_RECORD_SWITCH, which allows unprivileged users to see when their processes
+are scheduled out and in, just not by whom, which is left for the
+PERF_RECORD_SWITCH_CPU_WIDE, that is only accessible in system wide context,
+which in turn requires CAP_SYS_ADMIN.
+
+Please see the 45ac1403f564 ("perf: Add PERF_RECORD_SWITCH to indicate context
+switches") commit, that introduces these metadata events for further info.
+
+When working with kernels < v4.2, the following considerations must be taken,
+as the sched:sched_switch tracepoints will be used to receive such information:
+
 Unless /proc/sys/kernel/perf_event_paranoid is set to -1, unprivileged users are
 not permitted to use tracepoints which means there is insufficient side-band
 information to decode Intel PT in per-cpu mode, and potentially workload-only
@@ -564,8 +576,11 @@ sched_switch tracepoint
 -----------------------
 
 The sched_switch tracepoint is used to provide side-band data for Intel PT
-decoding.  sched_switch events are automatically added. e.g. the second event
-shown below
+decoding in kernels where the PERF_RECORD_SWITCH metadata event isn't
+available.
+
+The sched_switch events are automatically added. e.g. the second event shown
+below:
 
 	$ perf record -vv -e intel_pt//u uname
 	------------------------------------------------------------

+ 35 - 0
tools/perf/Documentation/perf-config.txt

@@ -8,6 +8,8 @@ perf-config - Get and set variables in a configuration file.
 SYNOPSIS
 --------
 [verse]
+'perf config' [<file-option>] [section.name[=value] ...]
+or
 'perf config' [<file-option>] -l | --list
 
 DESCRIPTION
@@ -118,6 +120,39 @@ Given a $HOME/.perfconfig like this:
 		children = true
 		group = true
 
+You can hide source code of annotate feature setting the config to false with
+
+	% perf config annotate.hide_src_code=true
+
+If you want to add or modify several config items, you can do like
+
+	% perf config ui.show-headers=false kmem.default=slab
+
+To modify the sort order of report functionality in user config file(i.e. `~/.perfconfig`), do
+
+	% perf config --user report sort-order=srcline
+
+To change colors of selected line to other foreground and background colors
+in system config file (i.e. `$(sysconf)/perfconfig`), do
+
+	% perf config --system colors.selected=yellow,green
+
+To query the record mode of call graph, do
+
+	% perf config call-graph.record-mode
+
+If you want to know multiple config key/value pairs, you can do like
+
+	% perf config report.queue-size call-graph.order report.children
+
+To query the config value of sort order of call graph in user config file (i.e. `~/.perfconfig`), do
+
+	% perf config --user call-graph.sort-order
+
+To query the config value of buildid directory in system config file (i.e. `$(sysconf)/perfconfig`), do
+
+	% perf config --system buildid.dir
+
 Variables
 ~~~~~~~~~
 

+ 26 - 0
tools/perf/Makefile.config

@@ -758,6 +758,31 @@ ifndef NO_AUXTRACE
   endif
 endif
 
+ifndef NO_JVMTI
+  ifneq (,$(wildcard /usr/sbin/update-java-alternatives))
+    JDIR=$(shell /usr/sbin/update-java-alternatives -l | head -1 | awk '{print $$3}')
+  else
+    ifneq (,$(wildcard /usr/sbin/alternatives))
+      JDIR=$(shell alternatives --display java | tail -1 | cut -d' ' -f 5 | sed 's%/jre/bin/java.%%g')
+    endif
+  endif
+  ifndef JDIR
+    $(warning No alternatives command found, you need to set JDIR= to point to the root of your Java directory)
+    NO_JVMTI := 1
+  endif
+endif
+
+ifndef NO_JVMTI
+  FEATURE_CHECK_CFLAGS-jvmti := -I$(JDIR)/include -I$(JDIR)/include/linux
+  $(call feature_check,jvmti)
+  ifeq ($(feature-jvmti), 1)
+    $(call detected_var,JDIR)
+  else
+    $(warning No openjdk development package found, please install JDK package)
+    NO_JVMTI := 1
+  endif
+endif
+
 # Among the variables below, these:
 #   perfexecdir
 #   template_dir
@@ -850,6 +875,7 @@ ifeq ($(VF),1)
   $(call print_var,sysconfdir)
   $(call print_var,LIBUNWIND_DIR)
   $(call print_var,LIBDW_DIR)
+  $(call print_var,JDIR)
 
   ifeq ($(dwarf-post-unwind),1)
     $(call feature_print_text,"DWARF post unwind library", $(dwarf-post-unwind-text))

+ 23 - 1
tools/perf/Makefile.perf

@@ -86,6 +86,8 @@ include ../scripts/utilities.mak
 #
 # Define FEATURES_DUMP to provide features detection dump file
 # and bypass the feature detection
+#
+# Define NO_JVMTI if you do not want jvmti agent built
 
 # As per kernel Makefile, avoid funny character set dependencies
 unexport LC_ALL
@@ -283,6 +285,12 @@ ifndef NO_PERF_READ_VDSOX32
 PROGRAMS += $(OUTPUT)perf-read-vdsox32
 endif
 
+LIBJVMTI = libperf-jvmti.so
+
+ifndef NO_JVMTI
+PROGRAMS += $(OUTPUT)$(LIBJVMTI)
+endif
+
 # what 'all' will build and 'install' will install, in perfexecdir
 ALL_PROGRAMS = $(PROGRAMS) $(SCRIPTS)
 
@@ -551,6 +559,16 @@ $(OUTPUT)perf-read-vdsox32: perf-read-vdso.c util/find-vdso-map.c
 	$(QUIET_CC)$(CC) -mx32 $(filter -static,$(LDFLAGS)) -Wall -Werror -o $@ perf-read-vdso.c
 endif
 
+ifndef NO_JVMTI
+LIBJVMTI_IN := $(OUTPUT)jvmti/jvmti-in.o
+
+$(LIBJVMTI_IN): FORCE
+	$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=jvmti obj=jvmti
+
+$(OUTPUT)$(LIBJVMTI): $(LIBJVMTI_IN)
+	$(QUIET_LINK)$(CC) -shared -Wl,-soname -Wl,$(LIBJVMTI) -o $@ $< -lelf -lrt
+endif
+
 $(patsubst perf-%,%.o,$(PROGRAMS)): $(wildcard */*.h)
 
 LIBPERF_IN := $(OUTPUT)libperf-in.o
@@ -687,6 +705,10 @@ endif
 ifndef NO_PERF_READ_VDSOX32
 	$(call QUIET_INSTALL, perf-read-vdsox32) \
 		$(INSTALL) $(OUTPUT)perf-read-vdsox32 '$(DESTDIR_SQ)$(bindir_SQ)';
+endif
+ifndef NO_JVMTI
+	$(call QUIET_INSTALL, $(LIBJVMTI)) \
+		$(INSTALL) $(OUTPUT)$(LIBJVMTI) '$(DESTDIR_SQ)$(libdir_SQ)';
 endif
 	$(call QUIET_INSTALL, libexec) \
 		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
@@ -754,7 +776,7 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea
 	$(call QUIET_CLEAN, core-objs)  $(RM) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS)
 	$(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
 	$(Q)$(RM) $(OUTPUT).config-detected
-	$(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 $(OUTPUT)pmu-events/jevents
+	$(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 $(OUTPUT)pmu-events/jevents $(OUTPUT)$(LIBJVMTI).so
 	$(call QUIET_CLEAN, core-gen)   $(RM)  *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)FEATURE-DUMP $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* \
 		$(OUTPUT)util/intel-pt-decoder/inat-tables.c $(OUTPUT)fixdep \
 		$(OUTPUT)tests/llvm-src-{base,kbuild,prologue,relocation}.c \

+ 134 - 3
tools/perf/builtin-config.c

@@ -17,7 +17,7 @@
 static bool use_system_config, use_user_config;
 
 static const char * const config_usage[] = {
-	"perf config [<file-option>] [options]",
+	"perf config [<file-option>] [options] [section.name[=value] ...]",
 	NULL
 };
 
@@ -33,6 +33,73 @@ static struct option config_options[] = {
 	OPT_END()
 };
 
+static int set_config(struct perf_config_set *set, const char *file_name,
+		      const char *var, const char *value)
+{
+	struct perf_config_section *section = NULL;
+	struct perf_config_item *item = NULL;
+	const char *first_line = "# this file is auto-generated.";
+	FILE *fp;
+
+	if (set == NULL)
+		return -1;
+
+	fp = fopen(file_name, "w");
+	if (!fp)
+		return -1;
+
+	perf_config_set__collect(set, file_name, var, value);
+	fprintf(fp, "%s\n", first_line);
+
+	/* overwrite configvariables */
+	perf_config_items__for_each_entry(&set->sections, section) {
+		if (!use_system_config && section->from_system_config)
+			continue;
+		fprintf(fp, "[%s]\n", section->name);
+
+		perf_config_items__for_each_entry(&section->items, item) {
+			if (!use_system_config && section->from_system_config)
+				continue;
+			if (item->value)
+				fprintf(fp, "\t%s = %s\n",
+					item->name, item->value);
+		}
+	}
+	fclose(fp);
+
+	return 0;
+}
+
+static int show_spec_config(struct perf_config_set *set, const char *var)
+{
+	struct perf_config_section *section;
+	struct perf_config_item *item;
+
+	if (set == NULL)
+		return -1;
+
+	perf_config_items__for_each_entry(&set->sections, section) {
+		if (prefixcmp(var, section->name) != 0)
+			continue;
+
+		perf_config_items__for_each_entry(&section->items, item) {
+			const char *name = var + strlen(section->name) + 1;
+
+			if (strcmp(name, item->name) == 0) {
+				char *value = item->value;
+
+				if (value) {
+					printf("%s=%s\n", var, value);
+					return 0;
+				}
+			}
+
+		}
+	}
+
+	return 0;
+}
+
 static int show_config(struct perf_config_set *set)
 {
 	struct perf_config_section *section;
@@ -52,9 +119,44 @@ static int show_config(struct perf_config_set *set)
 	return 0;
 }
 
+static int parse_config_arg(char *arg, char **var, char **value)
+{
+	const char *last_dot = strchr(arg, '.');
+
+	/*
+	 * Since "var" actually contains the section name and the real
+	 * config variable name separated by a dot, we have to know where the dot is.
+	 */
+	if (last_dot == NULL || last_dot == arg) {
+		pr_err("The config variable does not contain a section name: %s\n", arg);
+		return -1;
+	}
+	if (!last_dot[1]) {
+		pr_err("The config variable does not contain a variable name: %s\n", arg);
+		return -1;
+	}
+
+	*value = strchr(arg, '=');
+	if (*value == NULL)
+		*var = arg;
+	else if (!strcmp(*value, "=")) {
+		pr_err("The config variable does not contain a value: %s\n", arg);
+		return -1;
+	} else {
+		*value = *value + 1; /* excluding a first character '=' */
+		*var = strsep(&arg, "=");
+		if (*var[0] == '\0') {
+			pr_err("invalid config variable: %s\n", arg);
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
 int cmd_config(int argc, const char **argv, const char *prefix __maybe_unused)
 {
-	int ret = 0;
+	int i, ret = 0;
 	struct perf_config_set *set;
 	char *user_config = mkpath("%s/.perfconfig", getenv("HOME"));
 
@@ -100,7 +202,36 @@ int cmd_config(int argc, const char **argv, const char *prefix __maybe_unused)
 		}
 		break;
 	default:
-		usage_with_options(config_usage, config_options);
+		if (argc) {
+			for (i = 0; argv[i]; i++) {
+				char *var, *value;
+				char *arg = strdup(argv[i]);
+
+				if (!arg) {
+					pr_err("%s: strdup failed\n", __func__);
+					ret = -1;
+					break;
+				}
+
+				if (parse_config_arg(arg, &var, &value) < 0) {
+					free(arg);
+					ret = -1;
+					break;
+				}
+
+				if (value == NULL)
+					ret = show_spec_config(set, var);
+				else {
+					const char *config_filename = config_exclusive_filename;
+
+					if (!config_exclusive_filename)
+						config_filename = user_config;
+					ret = set_config(set, config_filename, var, value);
+				}
+				free(arg);
+			}
+		} else
+			usage_with_options(config_usage, config_options);
 	}
 
 	perf_config_set__delete(set);

+ 3 - 0
tools/perf/builtin-report.c

@@ -911,6 +911,9 @@ repeat:
 	if (itrace_synth_opts.last_branch)
 		has_br_stack = true;
 
+	if (has_br_stack && branch_call_mode)
+		symbol_conf.show_branchflag_count = true;
+
 	/*
 	 * Branch mode is a tristate:
 	 * -1 means default, so decide based on the file having branch data.

+ 8 - 0
tools/perf/jvmti/Build

@@ -0,0 +1,8 @@
+jvmti-y += libjvmti.o
+jvmti-y += jvmti_agent.o
+
+CFLAGS_jvmti         = -fPIC -DPIC -I$(JDIR)/include -I$(JDIR)/include/linux
+CFLAGS_REMOVE_jvmti  = -Wmissing-declarations
+CFLAGS_REMOVE_jvmti += -Wstrict-prototypes
+CFLAGS_REMOVE_jvmti += -Wextra
+CFLAGS_REMOVE_jvmti += -Wwrite-strings

+ 0 - 89
tools/perf/jvmti/Makefile

@@ -1,89 +0,0 @@
-ARCH=$(shell uname -m)
-
-ifeq ($(ARCH), x86_64)
-JARCH=amd64
-endif
-ifeq ($(ARCH), armv7l)
-JARCH=armhf
-endif
-ifeq ($(ARCH), armv6l)
-JARCH=armhf
-endif
-ifeq ($(ARCH), aarch64)
-JARCH=aarch64
-endif
-ifeq ($(ARCH), ppc64)
-JARCH=powerpc
-endif
-ifeq ($(ARCH), ppc64le)
-JARCH=powerpc
-endif
-
-DESTDIR=/usr/local
-
-VERSION=1
-REVISION=0
-AGE=0
-
-LN=ln -sf
-RM=rm
-
-SLIBJVMTI=libjvmti.so.$(VERSION).$(REVISION).$(AGE)
-VLIBJVMTI=libjvmti.so.$(VERSION)
-SLDFLAGS=-shared -Wl,-soname -Wl,$(VLIBJVMTI)
-SOLIBEXT=so
-
-# The following works at least on fedora 23, you may need the next
-# line for other distros.
-ifneq (,$(wildcard /usr/sbin/update-java-alternatives))
-JDIR=$(shell /usr/sbin/update-java-alternatives -l | head -1 | awk '{print $$3}')
-else
-  ifneq (,$(wildcard /usr/sbin/alternatives))
-    JDIR=$(shell alternatives --display java | tail -1 | cut -d' ' -f 5 | sed 's%/jre/bin/java.%%g')
-  endif
-endif
-ifndef JDIR
-$(error Could not find alternatives command, you need to set JDIR= to point to the root of your Java directory)
-else
-  ifeq (,$(wildcard $(JDIR)/include/jvmti.h))
-  $(error the openjdk development package appears to me missing, install and try again)
-  endif
-endif
-$(info Using Java from $(JDIR))
-# -lrt required in 32-bit mode for clock_gettime()
-LIBS=-lelf -lrt
-INCDIR=-I $(JDIR)/include -I $(JDIR)/include/linux
-
-TARGETS=$(SLIBJVMTI)
-
-SRCS=libjvmti.c jvmti_agent.c
-OBJS=$(SRCS:.c=.o)
-SOBJS=$(OBJS:.o=.lo)
-OPT=-O2 -g -Werror -Wall
-
-CFLAGS=$(INCDIR) $(OPT)
-
-all: $(TARGETS)
-
-.c.o:
-	$(CC) $(CFLAGS) -c $*.c
-.c.lo:
-	$(CC) -fPIC -DPIC $(CFLAGS) -c $*.c -o $*.lo
-
-$(OBJS) $(SOBJS): Makefile jvmti_agent.h ../util/jitdump.h
-
-$(SLIBJVMTI):  $(SOBJS)
-	$(CC) $(CFLAGS) $(SLDFLAGS)  -o $@ $(SOBJS) $(LIBS)
-	$(LN) $@ libjvmti.$(SOLIBEXT)
-
-clean:
-	$(RM) -f *.o *.so.* *.so *.lo
-
-install:
-	-mkdir -p $(DESTDIR)/lib
-	install -m 755 $(SLIBJVMTI) $(DESTDIR)/lib/
-	(cd $(DESTDIR)/lib; $(LN) $(SLIBJVMTI) $(VLIBJVMTI))
-	(cd $(DESTDIR)/lib; $(LN) $(SLIBJVMTI) libjvmti.$(SOLIBEXT))
-	ldconfig
-
-.SUFFIXES: .c .S .o .lo

+ 1 - 1
tools/perf/tests/make

@@ -106,7 +106,7 @@ make_minimal        := NO_LIBPERL=1 NO_LIBPYTHON=1 NO_NEWT=1 NO_GTK2=1
 make_minimal        += NO_DEMANGLE=1 NO_LIBELF=1 NO_LIBUNWIND=1 NO_BACKTRACE=1
 make_minimal        += NO_LIBNUMA=1 NO_LIBAUDIT=1 NO_LIBBIONIC=1
 make_minimal        += NO_LIBDW_DWARF_UNWIND=1 NO_AUXTRACE=1 NO_LIBBPF=1
-make_minimal        += NO_LIBCRYPTO=1 NO_SDT=1
+make_minimal        += NO_LIBCRYPTO=1 NO_SDT=1 NO_JVMTI=1
 
 # $(run) contains all available tests
 run := make_pure

+ 18 - 2
tools/perf/ui/browsers/hists.c

@@ -738,6 +738,7 @@ static int hist_browser__show_callchain_list(struct hist_browser *browser,
 					     struct callchain_print_arg *arg)
 {
 	char bf[1024], *alloc_str;
+	char buf[64], *alloc_str2;
 	const char *str;
 
 	if (arg->row_offset != 0) {
@@ -746,12 +747,26 @@ static int hist_browser__show_callchain_list(struct hist_browser *browser,
 	}
 
 	alloc_str = NULL;
+	alloc_str2 = NULL;
+
 	str = callchain_list__sym_name(chain, bf, sizeof(bf),
 				       browser->show_dso);
 
-	if (need_percent) {
-		char buf[64];
+	if (symbol_conf.show_branchflag_count) {
+		if (need_percent)
+			callchain_list_counts__printf_value(node, chain, NULL,
+							    buf, sizeof(buf));
+		else
+			callchain_list_counts__printf_value(NULL, chain, NULL,
+							    buf, sizeof(buf));
+
+		if (asprintf(&alloc_str2, "%s%s", str, buf) < 0)
+			str = "Not enough memory!";
+		else
+			str = alloc_str2;
+	}
 
+	if (need_percent) {
 		callchain_node__scnprintf_value(node, buf, sizeof(buf),
 						total);
 
@@ -764,6 +779,7 @@ static int hist_browser__show_callchain_list(struct hist_browser *browser,
 	print(browser, chain, str, offset, row, arg);
 
 	free(alloc_str);
+	free(alloc_str2);
 	return 1;
 }
 

+ 31 - 4
tools/perf/ui/stdio/hist.c

@@ -41,7 +41,9 @@ static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_node *node,
 {
 	int i;
 	size_t ret = 0;
-	char bf[1024];
+	char bf[1024], *alloc_str = NULL;
+	char buf[64];
+	const char *str;
 
 	ret += callchain__fprintf_left_margin(fp, left_margin);
 	for (i = 0; i < depth; i++) {
@@ -56,8 +58,26 @@ static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_node *node,
 		} else
 			ret += fprintf(fp, "%s", "          ");
 	}
-	fputs(callchain_list__sym_name(chain, bf, sizeof(bf), false), fp);
+
+	str = callchain_list__sym_name(chain, bf, sizeof(bf), false);
+
+	if (symbol_conf.show_branchflag_count) {
+		if (!period)
+			callchain_list_counts__printf_value(node, chain, NULL,
+							    buf, sizeof(buf));
+		else
+			callchain_list_counts__printf_value(NULL, chain, NULL,
+							    buf, sizeof(buf));
+
+		if (asprintf(&alloc_str, "%s%s", str, buf) < 0)
+			str = "Not enough memory!";
+		else
+			str = alloc_str;
+	}
+
+	fputs(str, fp);
 	fputc('\n', fp);
+	free(alloc_str);
 	return ret;
 }
 
@@ -219,8 +239,15 @@ static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root,
 			} else
 				ret += callchain__fprintf_left_margin(fp, left_margin);
 
-			ret += fprintf(fp, "%s\n", callchain_list__sym_name(chain, bf, sizeof(bf),
-							false));
+			ret += fprintf(fp, "%s",
+				       callchain_list__sym_name(chain, bf,
+								sizeof(bf),
+								false));
+
+			if (symbol_conf.show_branchflag_count)
+				ret += callchain_list_counts__printf_value(
+						NULL, chain, fp, NULL, 0);
+			ret += fprintf(fp, "\n");
 
 			if (++entries_printed == callchain_param.print_limit)
 				break;

+ 200 - 5
tools/perf/util/callchain.c

@@ -193,7 +193,6 @@ int perf_callchain_config(const char *var, const char *value)
 
 	if (!strcmp(var, "record-mode"))
 		return parse_callchain_record_opt(value, &callchain_param);
-#ifdef HAVE_DWARF_UNWIND_SUPPORT
 	if (!strcmp(var, "dump-size")) {
 		unsigned long size = 0;
 		int ret;
@@ -203,7 +202,6 @@ int perf_callchain_config(const char *var, const char *value)
 
 		return ret;
 	}
-#endif
 	if (!strcmp(var, "print-type"))
 		return parse_callchain_mode(value);
 	if (!strcmp(var, "order"))
@@ -440,6 +438,21 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor)
 		call->ip = cursor_node->ip;
 		call->ms.sym = cursor_node->sym;
 		call->ms.map = cursor_node->map;
+
+		if (cursor_node->branch) {
+			call->branch_count = 1;
+
+			if (cursor_node->branch_flags.predicted)
+				call->predicted_count = 1;
+
+			if (cursor_node->branch_flags.abort)
+				call->abort_count = 1;
+
+			call->cycles_count = cursor_node->branch_flags.cycles;
+			call->iter_count = cursor_node->nr_loop_iter;
+			call->samples_count = cursor_node->samples;
+		}
+
 		list_add_tail(&call->list, &node->val);
 
 		callchain_cursor_advance(cursor);
@@ -499,8 +512,23 @@ static enum match_result match_chain(struct callchain_cursor_node *node,
 		right = node->ip;
 	}
 
-	if (left == right)
+	if (left == right) {
+		if (node->branch) {
+			cnode->branch_count++;
+
+			if (node->branch_flags.predicted)
+				cnode->predicted_count++;
+
+			if (node->branch_flags.abort)
+				cnode->abort_count++;
+
+			cnode->cycles_count += node->branch_flags.cycles;
+			cnode->iter_count += node->nr_loop_iter;
+			cnode->samples_count += node->samples;
+		}
+
 		return MATCH_EQ;
+	}
 
 	return left > right ? MATCH_GT : MATCH_LT;
 }
@@ -730,7 +758,8 @@ merge_chain_branch(struct callchain_cursor *cursor,
 
 	list_for_each_entry_safe(list, next_list, &src->val, list) {
 		callchain_cursor_append(cursor, list->ip,
-					list->ms.map, list->ms.sym);
+					list->ms.map, list->ms.sym,
+					false, NULL, 0, 0);
 		list_del(&list->list);
 		free(list);
 	}
@@ -767,7 +796,9 @@ int callchain_merge(struct callchain_cursor *cursor,
 }
 
 int callchain_cursor_append(struct callchain_cursor *cursor,
-			    u64 ip, struct map *map, struct symbol *sym)
+			    u64 ip, struct map *map, struct symbol *sym,
+			    bool branch, struct branch_flags *flags,
+			    int nr_loop_iter, int samples)
 {
 	struct callchain_cursor_node *node = *cursor->last;
 
@@ -782,6 +813,13 @@ int callchain_cursor_append(struct callchain_cursor *cursor,
 	node->ip = ip;
 	node->map = map;
 	node->sym = sym;
+	node->branch = branch;
+	node->nr_loop_iter = nr_loop_iter;
+	node->samples = samples;
+
+	if (flags)
+		memcpy(&node->branch_flags, flags,
+			sizeof(struct branch_flags));
 
 	cursor->nr++;
 
@@ -939,6 +977,163 @@ int callchain_node__fprintf_value(struct callchain_node *node,
 	return 0;
 }
 
+static void callchain_counts_value(struct callchain_node *node,
+				   u64 *branch_count, u64 *predicted_count,
+				   u64 *abort_count, u64 *cycles_count)
+{
+	struct callchain_list *clist;
+
+	list_for_each_entry(clist, &node->val, list) {
+		if (branch_count)
+			*branch_count += clist->branch_count;
+
+		if (predicted_count)
+			*predicted_count += clist->predicted_count;
+
+		if (abort_count)
+			*abort_count += clist->abort_count;
+
+		if (cycles_count)
+			*cycles_count += clist->cycles_count;
+	}
+}
+
+static int callchain_node_branch_counts_cumul(struct callchain_node *node,
+					      u64 *branch_count,
+					      u64 *predicted_count,
+					      u64 *abort_count,
+					      u64 *cycles_count)
+{
+	struct callchain_node *child;
+	struct rb_node *n;
+
+	n = rb_first(&node->rb_root_in);
+	while (n) {
+		child = rb_entry(n, struct callchain_node, rb_node_in);
+		n = rb_next(n);
+
+		callchain_node_branch_counts_cumul(child, branch_count,
+						   predicted_count,
+						   abort_count,
+						   cycles_count);
+
+		callchain_counts_value(child, branch_count,
+				       predicted_count, abort_count,
+				       cycles_count);
+	}
+
+	return 0;
+}
+
+int callchain_branch_counts(struct callchain_root *root,
+			    u64 *branch_count, u64 *predicted_count,
+			    u64 *abort_count, u64 *cycles_count)
+{
+	if (branch_count)
+		*branch_count = 0;
+
+	if (predicted_count)
+		*predicted_count = 0;
+
+	if (abort_count)
+		*abort_count = 0;
+
+	if (cycles_count)
+		*cycles_count = 0;
+
+	return callchain_node_branch_counts_cumul(&root->node,
+						  branch_count,
+						  predicted_count,
+						  abort_count,
+						  cycles_count);
+}
+
+static int callchain_counts_printf(FILE *fp, char *bf, int bfsize,
+				   u64 branch_count, u64 predicted_count,
+				   u64 abort_count, u64 cycles_count,
+				   u64 iter_count, u64 samples_count)
+{
+	double predicted_percent = 0.0;
+	const char *null_str = "";
+	char iter_str[32];
+	char *str;
+	u64 cycles = 0;
+
+	if (branch_count == 0) {
+		if (fp)
+			return fprintf(fp, " (calltrace)");
+
+		return scnprintf(bf, bfsize, " (calltrace)");
+	}
+
+	if (iter_count && samples_count) {
+		scnprintf(iter_str, sizeof(iter_str),
+			 ", iterations:%" PRId64 "",
+			 iter_count / samples_count);
+		str = iter_str;
+	} else
+		str = (char *)null_str;
+
+	predicted_percent = predicted_count * 100.0 / branch_count;
+	cycles = cycles_count / branch_count;
+
+	if ((predicted_percent >= 100.0) && (abort_count == 0)) {
+		if (fp)
+			return fprintf(fp, " (cycles:%" PRId64 "%s)",
+				       cycles, str);
+
+		return scnprintf(bf, bfsize, " (cycles:%" PRId64 "%s)",
+				 cycles, str);
+	}
+
+	if ((predicted_percent < 100.0) && (abort_count == 0)) {
+		if (fp)
+			return fprintf(fp,
+				" (predicted:%.1f%%, cycles:%" PRId64 "%s)",
+				predicted_percent, cycles, str);
+
+		return scnprintf(bf, bfsize,
+			" (predicted:%.1f%%, cycles:%" PRId64 "%s)",
+			predicted_percent, cycles, str);
+	}
+
+	if (fp)
+		return fprintf(fp,
+		" (predicted:%.1f%%, abort:%" PRId64 ", cycles:%" PRId64 "%s)",
+			predicted_percent, abort_count, cycles, str);
+
+	return scnprintf(bf, bfsize,
+		" (predicted:%.1f%%, abort:%" PRId64 ", cycles:%" PRId64 "%s)",
+		predicted_percent, abort_count, cycles, str);
+}
+
+int callchain_list_counts__printf_value(struct callchain_node *node,
+					struct callchain_list *clist,
+					FILE *fp, char *bf, int bfsize)
+{
+	u64 branch_count, predicted_count;
+	u64 abort_count, cycles_count;
+	u64 iter_count = 0, samples_count = 0;
+
+	branch_count = clist->branch_count;
+	predicted_count = clist->predicted_count;
+	abort_count = clist->abort_count;
+	cycles_count = clist->cycles_count;
+
+	if (node) {
+		struct callchain_list *call;
+
+		list_for_each_entry(call, &node->val, list) {
+			iter_count += call->iter_count;
+			samples_count += call->samples_count;
+		}
+	}
+
+	return callchain_counts_printf(fp, bf, bfsize, branch_count,
+				       predicted_count, abort_count,
+				       cycles_count, iter_count, samples_count);
+}
+
 static void free_callchain_node(struct callchain_node *node)
 {
 	struct callchain_list *list, *tmp;

+ 21 - 5
tools/perf/util/callchain.h

@@ -11,11 +11,7 @@
 
 #define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace):\n\n"
 
-#ifdef HAVE_DWARF_UNWIND_SUPPORT
 # define RECORD_MODE_HELP  HELP_PAD "record_mode:\tcall graph recording mode (fp|dwarf|lbr)\n"
-#else
-# define RECORD_MODE_HELP  HELP_PAD "record_mode:\tcall graph recording mode (fp|lbr)\n"
-#endif
 
 #define RECORD_SIZE_HELP						\
 	HELP_PAD "record_size:\tif record_mode is 'dwarf', max size of stack recording (<bytes>)\n" \
@@ -115,6 +111,12 @@ struct callchain_list {
 		bool		unfolded;
 		bool		has_children;
 	};
+	u64			branch_count;
+	u64			predicted_count;
+	u64			abort_count;
+	u64			cycles_count;
+	u64			iter_count;
+	u64			samples_count;
 	char		       *srcline;
 	struct list_head	list;
 };
@@ -129,6 +131,10 @@ struct callchain_cursor_node {
 	u64				ip;
 	struct map			*map;
 	struct symbol			*sym;
+	bool				branch;
+	struct branch_flags		branch_flags;
+	int				nr_loop_iter;
+	int				samples;
 	struct callchain_cursor_node	*next;
 };
 
@@ -183,7 +189,9 @@ static inline void callchain_cursor_reset(struct callchain_cursor *cursor)
 }
 
 int callchain_cursor_append(struct callchain_cursor *cursor, u64 ip,
-			    struct map *map, struct symbol *sym);
+			    struct map *map, struct symbol *sym,
+			    bool branch, struct branch_flags *flags,
+			    int nr_loop_iter, int samples);
 
 /* Close a cursor writing session. Initialize for the reader */
 static inline void callchain_cursor_commit(struct callchain_cursor *cursor)
@@ -261,8 +269,16 @@ char *callchain_node__scnprintf_value(struct callchain_node *node,
 int callchain_node__fprintf_value(struct callchain_node *node,
 				  FILE *fp, u64 total);
 
+int callchain_list_counts__printf_value(struct callchain_node *node,
+					struct callchain_list *clist,
+					FILE *fp, char *bf, int bfsize);
+
 void free_callchain(struct callchain_root *root);
 void decay_callchain(struct callchain_root *root);
 int callchain_node__make_parent_list(struct callchain_node *node);
 
+int callchain_branch_counts(struct callchain_root *root,
+			    u64 *branch_count, u64 *predicted_count,
+			    u64 *abort_count, u64 *cycles_count);
+
 #endif	/* __PERF_CALLCHAIN_H */

+ 20 - 0
tools/perf/util/config.c

@@ -594,6 +594,19 @@ static int collect_config(const char *var, const char *value,
 			goto out_free;
 	}
 
+	/* perf_config_set can contain both user and system config items.
+	 * So we should know where each value is from.
+	 * The classification would be needed when a particular config file
+	 * is overwrited by setting feature i.e. set_config().
+	 */
+	if (strcmp(config_file_name, perf_etc_perfconfig()) == 0) {
+		section->from_system_config = true;
+		item->from_system_config = true;
+	} else {
+		section->from_system_config = false;
+		item->from_system_config = false;
+	}
+
 	ret = set_value(item, value);
 	return ret;
 
@@ -602,6 +615,13 @@ out_free:
 	return -1;
 }
 
+int perf_config_set__collect(struct perf_config_set *set, const char *file_name,
+			     const char *var, const char *value)
+{
+	config_file_name = file_name;
+	return collect_config(var, value, set);
+}
+
 static int perf_config_set__init(struct perf_config_set *set)
 {
 	int ret = -1;

+ 4 - 0
tools/perf/util/config.h

@@ -7,12 +7,14 @@
 struct perf_config_item {
 	char *name;
 	char *value;
+	bool from_system_config;
 	struct list_head node;
 };
 
 struct perf_config_section {
 	char *name;
 	struct list_head items;
+	bool from_system_config;
 	struct list_head node;
 };
 
@@ -33,6 +35,8 @@ const char *perf_etc_perfconfig(void);
 
 struct perf_config_set *perf_config_set__new(void);
 void perf_config_set__delete(struct perf_config_set *set);
+int perf_config_set__collect(struct perf_config_set *set, const char *file_name,
+			     const char *var, const char *value);
 void perf_config__init(void);
 void perf_config__exit(void);
 void perf_config__refresh(void);

+ 67 - 15
tools/perf/util/machine.c

@@ -1616,7 +1616,11 @@ static int add_callchain_ip(struct thread *thread,
 			    struct symbol **parent,
 			    struct addr_location *root_al,
 			    u8 *cpumode,
-			    u64 ip)
+			    u64 ip,
+			    bool branch,
+			    struct branch_flags *flags,
+			    int nr_loop_iter,
+			    int samples)
 {
 	struct addr_location al;
 
@@ -1668,7 +1672,8 @@ static int add_callchain_ip(struct thread *thread,
 
 	if (symbol_conf.hide_unresolved && al.sym == NULL)
 		return 0;
-	return callchain_cursor_append(cursor, al.addr, al.map, al.sym);
+	return callchain_cursor_append(cursor, al.addr, al.map, al.sym,
+				       branch, flags, nr_loop_iter, samples);
 }
 
 struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
@@ -1757,7 +1762,9 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
 	/* LBR only affects the user callchain */
 	if (i != chain_nr) {
 		struct branch_stack *lbr_stack = sample->branch_stack;
-		int lbr_nr = lbr_stack->nr, j;
+		int lbr_nr = lbr_stack->nr, j, k;
+		bool branch;
+		struct branch_flags *flags;
 		/*
 		 * LBR callstack can only get user call chain.
 		 * The mix_chain_nr is kernel call chain
@@ -1772,23 +1779,41 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
 
 		for (j = 0; j < mix_chain_nr; j++) {
 			int err;
+			branch = false;
+			flags = NULL;
+
 			if (callchain_param.order == ORDER_CALLEE) {
 				if (j < i + 1)
 					ip = chain->ips[j];
-				else if (j > i + 1)
-					ip = lbr_stack->entries[j - i - 2].from;
-				else
+				else if (j > i + 1) {
+					k = j - i - 2;
+					ip = lbr_stack->entries[k].from;
+					branch = true;
+					flags = &lbr_stack->entries[k].flags;
+				} else {
 					ip = lbr_stack->entries[0].to;
+					branch = true;
+					flags = &lbr_stack->entries[0].flags;
+				}
 			} else {
-				if (j < lbr_nr)
-					ip = lbr_stack->entries[lbr_nr - j - 1].from;
+				if (j < lbr_nr) {
+					k = lbr_nr - j - 1;
+					ip = lbr_stack->entries[k].from;
+					branch = true;
+					flags = &lbr_stack->entries[k].flags;
+				}
 				else if (j > lbr_nr)
 					ip = chain->ips[i + 1 - (j - lbr_nr)];
-				else
+				else {
 					ip = lbr_stack->entries[0].to;
+					branch = true;
+					flags = &lbr_stack->entries[0].flags;
+				}
 			}
 
-			err = add_callchain_ip(thread, cursor, parent, root_al, &cpumode, ip);
+			err = add_callchain_ip(thread, cursor, parent,
+					       root_al, &cpumode, ip,
+					       branch, flags, 0, 0);
 			if (err)
 				return (err < 0) ? err : 0;
 		}
@@ -1813,6 +1838,7 @@ static int thread__resolve_callchain_sample(struct thread *thread,
 	int i, j, err, nr_entries;
 	int skip_idx = -1;
 	int first_call = 0;
+	int nr_loop_iter;
 
 	if (perf_evsel__has_branch_callstack(evsel)) {
 		err = resolve_lbr_callchain_sample(thread, cursor, sample, parent,
@@ -1868,14 +1894,37 @@ static int thread__resolve_callchain_sample(struct thread *thread,
 				be[i] = branch->entries[branch->nr - i - 1];
 		}
 
+		nr_loop_iter = nr;
 		nr = remove_loops(be, nr);
 
+		/*
+		 * Get the number of iterations.
+		 * It's only approximation, but good enough in practice.
+		 */
+		if (nr_loop_iter > nr)
+			nr_loop_iter = nr_loop_iter - nr + 1;
+		else
+			nr_loop_iter = 0;
+
 		for (i = 0; i < nr; i++) {
-			err = add_callchain_ip(thread, cursor, parent, root_al,
-					       NULL, be[i].to);
+			if (i == nr - 1)
+				err = add_callchain_ip(thread, cursor, parent,
+						       root_al,
+						       NULL, be[i].to,
+						       true, &be[i].flags,
+						       nr_loop_iter, 1);
+			else
+				err = add_callchain_ip(thread, cursor, parent,
+						       root_al,
+						       NULL, be[i].to,
+						       true, &be[i].flags,
+						       0, 0);
+
 			if (!err)
 				err = add_callchain_ip(thread, cursor, parent, root_al,
-						       NULL, be[i].from);
+						       NULL, be[i].from,
+						       true, &be[i].flags,
+						       0, 0);
 			if (err == -EINVAL)
 				break;
 			if (err)
@@ -1903,7 +1952,9 @@ check_calls:
 		if (ip < PERF_CONTEXT_MAX)
                        ++nr_entries;
 
-		err = add_callchain_ip(thread, cursor, parent, root_al, &cpumode, ip);
+		err = add_callchain_ip(thread, cursor, parent,
+				       root_al, &cpumode, ip,
+				       false, NULL, 0, 0);
 
 		if (err)
 			return (err < 0) ? err : 0;
@@ -1919,7 +1970,8 @@ static int unwind_entry(struct unwind_entry *entry, void *arg)
 	if (symbol_conf.hide_unresolved && entry->sym == NULL)
 		return 0;
 	return callchain_cursor_append(cursor, entry->ip,
-				       entry->map, entry->sym);
+				       entry->map, entry->sym,
+				       false, NULL, 0, 0);
 }
 
 static int thread__resolve_callchain_unwind(struct thread *thread,

+ 1 - 0
tools/perf/util/symbol.h

@@ -100,6 +100,7 @@ struct symbol_conf {
 			show_total_period,
 			use_callchain,
 			cumulate_callchain,
+			show_branchflag_count,
 			exclude_other,
 			show_cpu_utilization,
 			initialized,