Эх сурвалжийг харах

Merge tag 'perf-core-for-mingo-20161114' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

New features:

 - Allow querying and setting .perfconfig variables (Taeung Song)

 - Show branch information in callchains (predicted, TSX aborts, loop
   iteractions, etc) (Jin Yao)

Infrastructure changes:

 - Support kbuild's CFLAGS_REMOVE_ in tools/build (Jiri Olsa)

 - Plug building jvmti to the main perf Makefile (Jiri Olsa)

Documentation changes:

 - Update Intel PT documentation about context switch events (Arnaldo Carvalho de Melo)

 - Fix 'perf record --call-graph dwarf' help/config in builds not linking
   with a unwind library, mentioning that is a possible record option (Rabin Vincent)

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Ingo Molnar 8 жил өмнө
parent
commit
6a6b12e212

+ 3 - 1
tools/build/Build.include

@@ -89,7 +89,9 @@ if_changed = $(if $(strip $(any-prereq) $(arg-check)),             \
 # - per target C flags
 # - per target C flags
 # - per object C flags
 # - per object C flags
 # - BUILD_STR macro to allow '-D"$(variable)"' constructs
 # - BUILD_STR macro to allow '-D"$(variable)"' constructs
-c_flags = -Wp,-MD,$(depfile),-MT,$@ $(CFLAGS) -D"BUILD_STR(s)=\#s" $(CFLAGS_$(basetarget).o) $(CFLAGS_$(obj))
+c_flags_1 = -Wp,-MD,$(depfile),-MT,$@ $(CFLAGS) -D"BUILD_STR(s)=\#s" $(CFLAGS_$(basetarget).o) $(CFLAGS_$(obj))
+c_flags_2 = $(filter-out $(CFLAGS_REMOVE_$(basetarget).o), $(c_flags_1))
+c_flags   = $(filter-out $(CFLAGS_REMOVE_$(obj)), $(c_flags_2))
 cxx_flags = -Wp,-MD,$(depfile),-MT,$@ $(CXXFLAGS) -D"BUILD_STR(s)=\#s" $(CXXFLAGS_$(basetarget).o) $(CXXFLAGS_$(obj))
 cxx_flags = -Wp,-MD,$(depfile),-MT,$@ $(CXXFLAGS) -D"BUILD_STR(s)=\#s" $(CXXFLAGS_$(basetarget).o) $(CXXFLAGS_$(obj))
 
 
 ###
 ###

+ 4 - 2
tools/build/Documentation/Build.txt

@@ -135,8 +135,10 @@ CFLAGS
 
 
 It's possible to alter the standard object C flags in the following way:
 It's possible to alter the standard object C flags in the following way:
 
 
-  CFLAGS_perf.o += '...' - alters CFLAGS for perf.o object
-  CFLAGS_gtk += '...'    - alters CFLAGS for gtk build object
+  CFLAGS_perf.o        += '...'  - adds CFLAGS for perf.o object
+  CFLAGS_gtk           += '...'  - adds CFLAGS for gtk build object
+  CFLAGS_REMOVE_perf.o += '...'  - removes CFLAGS for perf.o object
+  CFLAGS_REMOVE_gtk    += '...'  - removes CFLAGS for gtk build object
 
 
 This C flags changes has the scope of the Build makefile they are defined in.
 This C flags changes has the scope of the Build makefile they are defined in.
 
 

+ 5 - 1
tools/build/feature/Makefile

@@ -47,7 +47,8 @@ FILES=					\
 	test-bpf.bin			\
 	test-bpf.bin			\
 	test-get_cpuid.bin		\
 	test-get_cpuid.bin		\
 	test-sdt.bin			\
 	test-sdt.bin			\
-	test-cxx.bin
+	test-cxx.bin			\
+	test-jvmti.bin
 
 
 FILES := $(addprefix $(OUTPUT),$(FILES))
 FILES := $(addprefix $(OUTPUT),$(FILES))
 
 
@@ -225,6 +226,9 @@ $(OUTPUT)test-sdt.bin:
 $(OUTPUT)test-cxx.bin:
 $(OUTPUT)test-cxx.bin:
 	$(BUILDXX) -std=gnu++11
 	$(BUILDXX) -std=gnu++11
 
 
+$(OUTPUT)test-jvmti.bin:
+	$(BUILD)
+
 -include $(OUTPUT)*.d
 -include $(OUTPUT)*.d
 
 
 ###############################
 ###############################

+ 13 - 0
tools/build/feature/test-jvmti.c

@@ -0,0 +1,13 @@
+#include <jvmti.h>
+#include <jvmticmlr.h>
+
+int main(void)
+{
+	JavaVM			jvm	__attribute__((unused));
+	jvmtiEventCallbacks	cb	__attribute__((unused));
+	jvmtiCapabilities	caps	__attribute__((unused));
+	jvmtiJlocationFormat	format	__attribute__((unused));
+	jvmtiEnv		jvmti	__attribute__((unused));
+
+	return 0;
+}

+ 17 - 2
tools/perf/Documentation/intel-pt.txt

@@ -550,6 +550,18 @@ Unless /proc/sys/kernel/perf_event_paranoid is set to -1, unprivileged users
 have memory limits imposed upon them.  That affects what buffer sizes they can
 have memory limits imposed upon them.  That affects what buffer sizes they can
 have as outlined above.
 have as outlined above.
 
 
+The v4.2 kernel introduced support for a context switch metadata event,
+PERF_RECORD_SWITCH, which allows unprivileged users to see when their processes
+are scheduled out and in, just not by whom, which is left for the
+PERF_RECORD_SWITCH_CPU_WIDE, that is only accessible in system wide context,
+which in turn requires CAP_SYS_ADMIN.
+
+Please see the 45ac1403f564 ("perf: Add PERF_RECORD_SWITCH to indicate context
+switches") commit, that introduces these metadata events for further info.
+
+When working with kernels < v4.2, the following considerations must be taken,
+as the sched:sched_switch tracepoints will be used to receive such information:
+
 Unless /proc/sys/kernel/perf_event_paranoid is set to -1, unprivileged users are
 Unless /proc/sys/kernel/perf_event_paranoid is set to -1, unprivileged users are
 not permitted to use tracepoints which means there is insufficient side-band
 not permitted to use tracepoints which means there is insufficient side-band
 information to decode Intel PT in per-cpu mode, and potentially workload-only
 information to decode Intel PT in per-cpu mode, and potentially workload-only
@@ -564,8 +576,11 @@ sched_switch tracepoint
 -----------------------
 -----------------------
 
 
 The sched_switch tracepoint is used to provide side-band data for Intel PT
 The sched_switch tracepoint is used to provide side-band data for Intel PT
-decoding.  sched_switch events are automatically added. e.g. the second event
-shown below
+decoding in kernels where the PERF_RECORD_SWITCH metadata event isn't
+available.
+
+The sched_switch events are automatically added. e.g. the second event shown
+below:
 
 
 	$ perf record -vv -e intel_pt//u uname
 	$ perf record -vv -e intel_pt//u uname
 	------------------------------------------------------------
 	------------------------------------------------------------

+ 35 - 0
tools/perf/Documentation/perf-config.txt

@@ -8,6 +8,8 @@ perf-config - Get and set variables in a configuration file.
 SYNOPSIS
 SYNOPSIS
 --------
 --------
 [verse]
 [verse]
+'perf config' [<file-option>] [section.name[=value] ...]
+or
 'perf config' [<file-option>] -l | --list
 'perf config' [<file-option>] -l | --list
 
 
 DESCRIPTION
 DESCRIPTION
@@ -118,6 +120,39 @@ Given a $HOME/.perfconfig like this:
 		children = true
 		children = true
 		group = true
 		group = true
 
 
+You can hide source code of annotate feature setting the config to false with
+
+	% perf config annotate.hide_src_code=true
+
+If you want to add or modify several config items, you can do like
+
+	% perf config ui.show-headers=false kmem.default=slab
+
+To modify the sort order of report functionality in user config file(i.e. `~/.perfconfig`), do
+
+	% perf config --user report sort-order=srcline
+
+To change colors of selected line to other foreground and background colors
+in system config file (i.e. `$(sysconf)/perfconfig`), do
+
+	% perf config --system colors.selected=yellow,green
+
+To query the record mode of call graph, do
+
+	% perf config call-graph.record-mode
+
+If you want to know multiple config key/value pairs, you can do like
+
+	% perf config report.queue-size call-graph.order report.children
+
+To query the config value of sort order of call graph in user config file (i.e. `~/.perfconfig`), do
+
+	% perf config --user call-graph.sort-order
+
+To query the config value of buildid directory in system config file (i.e. `$(sysconf)/perfconfig`), do
+
+	% perf config --system buildid.dir
+
 Variables
 Variables
 ~~~~~~~~~
 ~~~~~~~~~
 
 

+ 26 - 0
tools/perf/Makefile.config

@@ -758,6 +758,31 @@ ifndef NO_AUXTRACE
   endif
   endif
 endif
 endif
 
 
+ifndef NO_JVMTI
+  ifneq (,$(wildcard /usr/sbin/update-java-alternatives))
+    JDIR=$(shell /usr/sbin/update-java-alternatives -l | head -1 | awk '{print $$3}')
+  else
+    ifneq (,$(wildcard /usr/sbin/alternatives))
+      JDIR=$(shell alternatives --display java | tail -1 | cut -d' ' -f 5 | sed 's%/jre/bin/java.%%g')
+    endif
+  endif
+  ifndef JDIR
+    $(warning No alternatives command found, you need to set JDIR= to point to the root of your Java directory)
+    NO_JVMTI := 1
+  endif
+endif
+
+ifndef NO_JVMTI
+  FEATURE_CHECK_CFLAGS-jvmti := -I$(JDIR)/include -I$(JDIR)/include/linux
+  $(call feature_check,jvmti)
+  ifeq ($(feature-jvmti), 1)
+    $(call detected_var,JDIR)
+  else
+    $(warning No openjdk development package found, please install JDK package)
+    NO_JVMTI := 1
+  endif
+endif
+
 # Among the variables below, these:
 # Among the variables below, these:
 #   perfexecdir
 #   perfexecdir
 #   template_dir
 #   template_dir
@@ -850,6 +875,7 @@ ifeq ($(VF),1)
   $(call print_var,sysconfdir)
   $(call print_var,sysconfdir)
   $(call print_var,LIBUNWIND_DIR)
   $(call print_var,LIBUNWIND_DIR)
   $(call print_var,LIBDW_DIR)
   $(call print_var,LIBDW_DIR)
+  $(call print_var,JDIR)
 
 
   ifeq ($(dwarf-post-unwind),1)
   ifeq ($(dwarf-post-unwind),1)
     $(call feature_print_text,"DWARF post unwind library", $(dwarf-post-unwind-text))
     $(call feature_print_text,"DWARF post unwind library", $(dwarf-post-unwind-text))

+ 23 - 1
tools/perf/Makefile.perf

@@ -86,6 +86,8 @@ include ../scripts/utilities.mak
 #
 #
 # Define FEATURES_DUMP to provide features detection dump file
 # Define FEATURES_DUMP to provide features detection dump file
 # and bypass the feature detection
 # and bypass the feature detection
+#
+# Define NO_JVMTI if you do not want jvmti agent built
 
 
 # As per kernel Makefile, avoid funny character set dependencies
 # As per kernel Makefile, avoid funny character set dependencies
 unexport LC_ALL
 unexport LC_ALL
@@ -283,6 +285,12 @@ ifndef NO_PERF_READ_VDSOX32
 PROGRAMS += $(OUTPUT)perf-read-vdsox32
 PROGRAMS += $(OUTPUT)perf-read-vdsox32
 endif
 endif
 
 
+LIBJVMTI = libperf-jvmti.so
+
+ifndef NO_JVMTI
+PROGRAMS += $(OUTPUT)$(LIBJVMTI)
+endif
+
 # what 'all' will build and 'install' will install, in perfexecdir
 # what 'all' will build and 'install' will install, in perfexecdir
 ALL_PROGRAMS = $(PROGRAMS) $(SCRIPTS)
 ALL_PROGRAMS = $(PROGRAMS) $(SCRIPTS)
 
 
@@ -551,6 +559,16 @@ $(OUTPUT)perf-read-vdsox32: perf-read-vdso.c util/find-vdso-map.c
 	$(QUIET_CC)$(CC) -mx32 $(filter -static,$(LDFLAGS)) -Wall -Werror -o $@ perf-read-vdso.c
 	$(QUIET_CC)$(CC) -mx32 $(filter -static,$(LDFLAGS)) -Wall -Werror -o $@ perf-read-vdso.c
 endif
 endif
 
 
+ifndef NO_JVMTI
+LIBJVMTI_IN := $(OUTPUT)jvmti/jvmti-in.o
+
+$(LIBJVMTI_IN): FORCE
+	$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=jvmti obj=jvmti
+
+$(OUTPUT)$(LIBJVMTI): $(LIBJVMTI_IN)
+	$(QUIET_LINK)$(CC) -shared -Wl,-soname -Wl,$(LIBJVMTI) -o $@ $< -lelf -lrt
+endif
+
 $(patsubst perf-%,%.o,$(PROGRAMS)): $(wildcard */*.h)
 $(patsubst perf-%,%.o,$(PROGRAMS)): $(wildcard */*.h)
 
 
 LIBPERF_IN := $(OUTPUT)libperf-in.o
 LIBPERF_IN := $(OUTPUT)libperf-in.o
@@ -687,6 +705,10 @@ endif
 ifndef NO_PERF_READ_VDSOX32
 ifndef NO_PERF_READ_VDSOX32
 	$(call QUIET_INSTALL, perf-read-vdsox32) \
 	$(call QUIET_INSTALL, perf-read-vdsox32) \
 		$(INSTALL) $(OUTPUT)perf-read-vdsox32 '$(DESTDIR_SQ)$(bindir_SQ)';
 		$(INSTALL) $(OUTPUT)perf-read-vdsox32 '$(DESTDIR_SQ)$(bindir_SQ)';
+endif
+ifndef NO_JVMTI
+	$(call QUIET_INSTALL, $(LIBJVMTI)) \
+		$(INSTALL) $(OUTPUT)$(LIBJVMTI) '$(DESTDIR_SQ)$(libdir_SQ)';
 endif
 endif
 	$(call QUIET_INSTALL, libexec) \
 	$(call QUIET_INSTALL, libexec) \
 		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
 		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
@@ -754,7 +776,7 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea
 	$(call QUIET_CLEAN, core-objs)  $(RM) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS)
 	$(call QUIET_CLEAN, core-objs)  $(RM) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS)
 	$(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
 	$(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
 	$(Q)$(RM) $(OUTPUT).config-detected
 	$(Q)$(RM) $(OUTPUT).config-detected
-	$(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 $(OUTPUT)pmu-events/jevents
+	$(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 $(OUTPUT)pmu-events/jevents $(OUTPUT)$(LIBJVMTI).so
 	$(call QUIET_CLEAN, core-gen)   $(RM)  *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)FEATURE-DUMP $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* \
 	$(call QUIET_CLEAN, core-gen)   $(RM)  *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)FEATURE-DUMP $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* \
 		$(OUTPUT)util/intel-pt-decoder/inat-tables.c $(OUTPUT)fixdep \
 		$(OUTPUT)util/intel-pt-decoder/inat-tables.c $(OUTPUT)fixdep \
 		$(OUTPUT)tests/llvm-src-{base,kbuild,prologue,relocation}.c \
 		$(OUTPUT)tests/llvm-src-{base,kbuild,prologue,relocation}.c \

+ 134 - 3
tools/perf/builtin-config.c

@@ -17,7 +17,7 @@
 static bool use_system_config, use_user_config;
 static bool use_system_config, use_user_config;
 
 
 static const char * const config_usage[] = {
 static const char * const config_usage[] = {
-	"perf config [<file-option>] [options]",
+	"perf config [<file-option>] [options] [section.name[=value] ...]",
 	NULL
 	NULL
 };
 };
 
 
@@ -33,6 +33,73 @@ static struct option config_options[] = {
 	OPT_END()
 	OPT_END()
 };
 };
 
 
+static int set_config(struct perf_config_set *set, const char *file_name,
+		      const char *var, const char *value)
+{
+	struct perf_config_section *section = NULL;
+	struct perf_config_item *item = NULL;
+	const char *first_line = "# this file is auto-generated.";
+	FILE *fp;
+
+	if (set == NULL)
+		return -1;
+
+	fp = fopen(file_name, "w");
+	if (!fp)
+		return -1;
+
+	perf_config_set__collect(set, file_name, var, value);
+	fprintf(fp, "%s\n", first_line);
+
+	/* overwrite configvariables */
+	perf_config_items__for_each_entry(&set->sections, section) {
+		if (!use_system_config && section->from_system_config)
+			continue;
+		fprintf(fp, "[%s]\n", section->name);
+
+		perf_config_items__for_each_entry(&section->items, item) {
+			if (!use_system_config && section->from_system_config)
+				continue;
+			if (item->value)
+				fprintf(fp, "\t%s = %s\n",
+					item->name, item->value);
+		}
+	}
+	fclose(fp);
+
+	return 0;
+}
+
+static int show_spec_config(struct perf_config_set *set, const char *var)
+{
+	struct perf_config_section *section;
+	struct perf_config_item *item;
+
+	if (set == NULL)
+		return -1;
+
+	perf_config_items__for_each_entry(&set->sections, section) {
+		if (prefixcmp(var, section->name) != 0)
+			continue;
+
+		perf_config_items__for_each_entry(&section->items, item) {
+			const char *name = var + strlen(section->name) + 1;
+
+			if (strcmp(name, item->name) == 0) {
+				char *value = item->value;
+
+				if (value) {
+					printf("%s=%s\n", var, value);
+					return 0;
+				}
+			}
+
+		}
+	}
+
+	return 0;
+}
+
 static int show_config(struct perf_config_set *set)
 static int show_config(struct perf_config_set *set)
 {
 {
 	struct perf_config_section *section;
 	struct perf_config_section *section;
@@ -52,9 +119,44 @@ static int show_config(struct perf_config_set *set)
 	return 0;
 	return 0;
 }
 }
 
 
+static int parse_config_arg(char *arg, char **var, char **value)
+{
+	const char *last_dot = strchr(arg, '.');
+
+	/*
+	 * Since "var" actually contains the section name and the real
+	 * config variable name separated by a dot, we have to know where the dot is.
+	 */
+	if (last_dot == NULL || last_dot == arg) {
+		pr_err("The config variable does not contain a section name: %s\n", arg);
+		return -1;
+	}
+	if (!last_dot[1]) {
+		pr_err("The config variable does not contain a variable name: %s\n", arg);
+		return -1;
+	}
+
+	*value = strchr(arg, '=');
+	if (*value == NULL)
+		*var = arg;
+	else if (!strcmp(*value, "=")) {
+		pr_err("The config variable does not contain a value: %s\n", arg);
+		return -1;
+	} else {
+		*value = *value + 1; /* excluding a first character '=' */
+		*var = strsep(&arg, "=");
+		if (*var[0] == '\0') {
+			pr_err("invalid config variable: %s\n", arg);
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
 int cmd_config(int argc, const char **argv, const char *prefix __maybe_unused)
 int cmd_config(int argc, const char **argv, const char *prefix __maybe_unused)
 {
 {
-	int ret = 0;
+	int i, ret = 0;
 	struct perf_config_set *set;
 	struct perf_config_set *set;
 	char *user_config = mkpath("%s/.perfconfig", getenv("HOME"));
 	char *user_config = mkpath("%s/.perfconfig", getenv("HOME"));
 
 
@@ -100,7 +202,36 @@ int cmd_config(int argc, const char **argv, const char *prefix __maybe_unused)
 		}
 		}
 		break;
 		break;
 	default:
 	default:
-		usage_with_options(config_usage, config_options);
+		if (argc) {
+			for (i = 0; argv[i]; i++) {
+				char *var, *value;
+				char *arg = strdup(argv[i]);
+
+				if (!arg) {
+					pr_err("%s: strdup failed\n", __func__);
+					ret = -1;
+					break;
+				}
+
+				if (parse_config_arg(arg, &var, &value) < 0) {
+					free(arg);
+					ret = -1;
+					break;
+				}
+
+				if (value == NULL)
+					ret = show_spec_config(set, var);
+				else {
+					const char *config_filename = config_exclusive_filename;
+
+					if (!config_exclusive_filename)
+						config_filename = user_config;
+					ret = set_config(set, config_filename, var, value);
+				}
+				free(arg);
+			}
+		} else
+			usage_with_options(config_usage, config_options);
 	}
 	}
 
 
 	perf_config_set__delete(set);
 	perf_config_set__delete(set);

+ 3 - 0
tools/perf/builtin-report.c

@@ -911,6 +911,9 @@ repeat:
 	if (itrace_synth_opts.last_branch)
 	if (itrace_synth_opts.last_branch)
 		has_br_stack = true;
 		has_br_stack = true;
 
 
+	if (has_br_stack && branch_call_mode)
+		symbol_conf.show_branchflag_count = true;
+
 	/*
 	/*
 	 * Branch mode is a tristate:
 	 * Branch mode is a tristate:
 	 * -1 means default, so decide based on the file having branch data.
 	 * -1 means default, so decide based on the file having branch data.

+ 8 - 0
tools/perf/jvmti/Build

@@ -0,0 +1,8 @@
+jvmti-y += libjvmti.o
+jvmti-y += jvmti_agent.o
+
+CFLAGS_jvmti         = -fPIC -DPIC -I$(JDIR)/include -I$(JDIR)/include/linux
+CFLAGS_REMOVE_jvmti  = -Wmissing-declarations
+CFLAGS_REMOVE_jvmti += -Wstrict-prototypes
+CFLAGS_REMOVE_jvmti += -Wextra
+CFLAGS_REMOVE_jvmti += -Wwrite-strings

+ 0 - 89
tools/perf/jvmti/Makefile

@@ -1,89 +0,0 @@
-ARCH=$(shell uname -m)
-
-ifeq ($(ARCH), x86_64)
-JARCH=amd64
-endif
-ifeq ($(ARCH), armv7l)
-JARCH=armhf
-endif
-ifeq ($(ARCH), armv6l)
-JARCH=armhf
-endif
-ifeq ($(ARCH), aarch64)
-JARCH=aarch64
-endif
-ifeq ($(ARCH), ppc64)
-JARCH=powerpc
-endif
-ifeq ($(ARCH), ppc64le)
-JARCH=powerpc
-endif
-
-DESTDIR=/usr/local
-
-VERSION=1
-REVISION=0
-AGE=0
-
-LN=ln -sf
-RM=rm
-
-SLIBJVMTI=libjvmti.so.$(VERSION).$(REVISION).$(AGE)
-VLIBJVMTI=libjvmti.so.$(VERSION)
-SLDFLAGS=-shared -Wl,-soname -Wl,$(VLIBJVMTI)
-SOLIBEXT=so
-
-# The following works at least on fedora 23, you may need the next
-# line for other distros.
-ifneq (,$(wildcard /usr/sbin/update-java-alternatives))
-JDIR=$(shell /usr/sbin/update-java-alternatives -l | head -1 | awk '{print $$3}')
-else
-  ifneq (,$(wildcard /usr/sbin/alternatives))
-    JDIR=$(shell alternatives --display java | tail -1 | cut -d' ' -f 5 | sed 's%/jre/bin/java.%%g')
-  endif
-endif
-ifndef JDIR
-$(error Could not find alternatives command, you need to set JDIR= to point to the root of your Java directory)
-else
-  ifeq (,$(wildcard $(JDIR)/include/jvmti.h))
-  $(error the openjdk development package appears to me missing, install and try again)
-  endif
-endif
-$(info Using Java from $(JDIR))
-# -lrt required in 32-bit mode for clock_gettime()
-LIBS=-lelf -lrt
-INCDIR=-I $(JDIR)/include -I $(JDIR)/include/linux
-
-TARGETS=$(SLIBJVMTI)
-
-SRCS=libjvmti.c jvmti_agent.c
-OBJS=$(SRCS:.c=.o)
-SOBJS=$(OBJS:.o=.lo)
-OPT=-O2 -g -Werror -Wall
-
-CFLAGS=$(INCDIR) $(OPT)
-
-all: $(TARGETS)
-
-.c.o:
-	$(CC) $(CFLAGS) -c $*.c
-.c.lo:
-	$(CC) -fPIC -DPIC $(CFLAGS) -c $*.c -o $*.lo
-
-$(OBJS) $(SOBJS): Makefile jvmti_agent.h ../util/jitdump.h
-
-$(SLIBJVMTI):  $(SOBJS)
-	$(CC) $(CFLAGS) $(SLDFLAGS)  -o $@ $(SOBJS) $(LIBS)
-	$(LN) $@ libjvmti.$(SOLIBEXT)
-
-clean:
-	$(RM) -f *.o *.so.* *.so *.lo
-
-install:
-	-mkdir -p $(DESTDIR)/lib
-	install -m 755 $(SLIBJVMTI) $(DESTDIR)/lib/
-	(cd $(DESTDIR)/lib; $(LN) $(SLIBJVMTI) $(VLIBJVMTI))
-	(cd $(DESTDIR)/lib; $(LN) $(SLIBJVMTI) libjvmti.$(SOLIBEXT))
-	ldconfig
-
-.SUFFIXES: .c .S .o .lo

+ 1 - 1
tools/perf/tests/make

@@ -106,7 +106,7 @@ make_minimal        := NO_LIBPERL=1 NO_LIBPYTHON=1 NO_NEWT=1 NO_GTK2=1
 make_minimal        += NO_DEMANGLE=1 NO_LIBELF=1 NO_LIBUNWIND=1 NO_BACKTRACE=1
 make_minimal        += NO_DEMANGLE=1 NO_LIBELF=1 NO_LIBUNWIND=1 NO_BACKTRACE=1
 make_minimal        += NO_LIBNUMA=1 NO_LIBAUDIT=1 NO_LIBBIONIC=1
 make_minimal        += NO_LIBNUMA=1 NO_LIBAUDIT=1 NO_LIBBIONIC=1
 make_minimal        += NO_LIBDW_DWARF_UNWIND=1 NO_AUXTRACE=1 NO_LIBBPF=1
 make_minimal        += NO_LIBDW_DWARF_UNWIND=1 NO_AUXTRACE=1 NO_LIBBPF=1
-make_minimal        += NO_LIBCRYPTO=1 NO_SDT=1
+make_minimal        += NO_LIBCRYPTO=1 NO_SDT=1 NO_JVMTI=1
 
 
 # $(run) contains all available tests
 # $(run) contains all available tests
 run := make_pure
 run := make_pure

+ 18 - 2
tools/perf/ui/browsers/hists.c

@@ -738,6 +738,7 @@ static int hist_browser__show_callchain_list(struct hist_browser *browser,
 					     struct callchain_print_arg *arg)
 					     struct callchain_print_arg *arg)
 {
 {
 	char bf[1024], *alloc_str;
 	char bf[1024], *alloc_str;
+	char buf[64], *alloc_str2;
 	const char *str;
 	const char *str;
 
 
 	if (arg->row_offset != 0) {
 	if (arg->row_offset != 0) {
@@ -746,12 +747,26 @@ static int hist_browser__show_callchain_list(struct hist_browser *browser,
 	}
 	}
 
 
 	alloc_str = NULL;
 	alloc_str = NULL;
+	alloc_str2 = NULL;
+
 	str = callchain_list__sym_name(chain, bf, sizeof(bf),
 	str = callchain_list__sym_name(chain, bf, sizeof(bf),
 				       browser->show_dso);
 				       browser->show_dso);
 
 
-	if (need_percent) {
-		char buf[64];
+	if (symbol_conf.show_branchflag_count) {
+		if (need_percent)
+			callchain_list_counts__printf_value(node, chain, NULL,
+							    buf, sizeof(buf));
+		else
+			callchain_list_counts__printf_value(NULL, chain, NULL,
+							    buf, sizeof(buf));
+
+		if (asprintf(&alloc_str2, "%s%s", str, buf) < 0)
+			str = "Not enough memory!";
+		else
+			str = alloc_str2;
+	}
 
 
+	if (need_percent) {
 		callchain_node__scnprintf_value(node, buf, sizeof(buf),
 		callchain_node__scnprintf_value(node, buf, sizeof(buf),
 						total);
 						total);
 
 
@@ -764,6 +779,7 @@ static int hist_browser__show_callchain_list(struct hist_browser *browser,
 	print(browser, chain, str, offset, row, arg);
 	print(browser, chain, str, offset, row, arg);
 
 
 	free(alloc_str);
 	free(alloc_str);
+	free(alloc_str2);
 	return 1;
 	return 1;
 }
 }
 
 

+ 31 - 4
tools/perf/ui/stdio/hist.c

@@ -41,7 +41,9 @@ static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_node *node,
 {
 {
 	int i;
 	int i;
 	size_t ret = 0;
 	size_t ret = 0;
-	char bf[1024];
+	char bf[1024], *alloc_str = NULL;
+	char buf[64];
+	const char *str;
 
 
 	ret += callchain__fprintf_left_margin(fp, left_margin);
 	ret += callchain__fprintf_left_margin(fp, left_margin);
 	for (i = 0; i < depth; i++) {
 	for (i = 0; i < depth; i++) {
@@ -56,8 +58,26 @@ static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_node *node,
 		} else
 		} else
 			ret += fprintf(fp, "%s", "          ");
 			ret += fprintf(fp, "%s", "          ");
 	}
 	}
-	fputs(callchain_list__sym_name(chain, bf, sizeof(bf), false), fp);
+
+	str = callchain_list__sym_name(chain, bf, sizeof(bf), false);
+
+	if (symbol_conf.show_branchflag_count) {
+		if (!period)
+			callchain_list_counts__printf_value(node, chain, NULL,
+							    buf, sizeof(buf));
+		else
+			callchain_list_counts__printf_value(NULL, chain, NULL,
+							    buf, sizeof(buf));
+
+		if (asprintf(&alloc_str, "%s%s", str, buf) < 0)
+			str = "Not enough memory!";
+		else
+			str = alloc_str;
+	}
+
+	fputs(str, fp);
 	fputc('\n', fp);
 	fputc('\n', fp);
+	free(alloc_str);
 	return ret;
 	return ret;
 }
 }
 
 
@@ -219,8 +239,15 @@ static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root,
 			} else
 			} else
 				ret += callchain__fprintf_left_margin(fp, left_margin);
 				ret += callchain__fprintf_left_margin(fp, left_margin);
 
 
-			ret += fprintf(fp, "%s\n", callchain_list__sym_name(chain, bf, sizeof(bf),
-							false));
+			ret += fprintf(fp, "%s",
+				       callchain_list__sym_name(chain, bf,
+								sizeof(bf),
+								false));
+
+			if (symbol_conf.show_branchflag_count)
+				ret += callchain_list_counts__printf_value(
+						NULL, chain, fp, NULL, 0);
+			ret += fprintf(fp, "\n");
 
 
 			if (++entries_printed == callchain_param.print_limit)
 			if (++entries_printed == callchain_param.print_limit)
 				break;
 				break;

+ 200 - 5
tools/perf/util/callchain.c

@@ -193,7 +193,6 @@ int perf_callchain_config(const char *var, const char *value)
 
 
 	if (!strcmp(var, "record-mode"))
 	if (!strcmp(var, "record-mode"))
 		return parse_callchain_record_opt(value, &callchain_param);
 		return parse_callchain_record_opt(value, &callchain_param);
-#ifdef HAVE_DWARF_UNWIND_SUPPORT
 	if (!strcmp(var, "dump-size")) {
 	if (!strcmp(var, "dump-size")) {
 		unsigned long size = 0;
 		unsigned long size = 0;
 		int ret;
 		int ret;
@@ -203,7 +202,6 @@ int perf_callchain_config(const char *var, const char *value)
 
 
 		return ret;
 		return ret;
 	}
 	}
-#endif
 	if (!strcmp(var, "print-type"))
 	if (!strcmp(var, "print-type"))
 		return parse_callchain_mode(value);
 		return parse_callchain_mode(value);
 	if (!strcmp(var, "order"))
 	if (!strcmp(var, "order"))
@@ -440,6 +438,21 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor)
 		call->ip = cursor_node->ip;
 		call->ip = cursor_node->ip;
 		call->ms.sym = cursor_node->sym;
 		call->ms.sym = cursor_node->sym;
 		call->ms.map = cursor_node->map;
 		call->ms.map = cursor_node->map;
+
+		if (cursor_node->branch) {
+			call->branch_count = 1;
+
+			if (cursor_node->branch_flags.predicted)
+				call->predicted_count = 1;
+
+			if (cursor_node->branch_flags.abort)
+				call->abort_count = 1;
+
+			call->cycles_count = cursor_node->branch_flags.cycles;
+			call->iter_count = cursor_node->nr_loop_iter;
+			call->samples_count = cursor_node->samples;
+		}
+
 		list_add_tail(&call->list, &node->val);
 		list_add_tail(&call->list, &node->val);
 
 
 		callchain_cursor_advance(cursor);
 		callchain_cursor_advance(cursor);
@@ -499,8 +512,23 @@ static enum match_result match_chain(struct callchain_cursor_node *node,
 		right = node->ip;
 		right = node->ip;
 	}
 	}
 
 
-	if (left == right)
+	if (left == right) {
+		if (node->branch) {
+			cnode->branch_count++;
+
+			if (node->branch_flags.predicted)
+				cnode->predicted_count++;
+
+			if (node->branch_flags.abort)
+				cnode->abort_count++;
+
+			cnode->cycles_count += node->branch_flags.cycles;
+			cnode->iter_count += node->nr_loop_iter;
+			cnode->samples_count += node->samples;
+		}
+
 		return MATCH_EQ;
 		return MATCH_EQ;
+	}
 
 
 	return left > right ? MATCH_GT : MATCH_LT;
 	return left > right ? MATCH_GT : MATCH_LT;
 }
 }
@@ -730,7 +758,8 @@ merge_chain_branch(struct callchain_cursor *cursor,
 
 
 	list_for_each_entry_safe(list, next_list, &src->val, list) {
 	list_for_each_entry_safe(list, next_list, &src->val, list) {
 		callchain_cursor_append(cursor, list->ip,
 		callchain_cursor_append(cursor, list->ip,
-					list->ms.map, list->ms.sym);
+					list->ms.map, list->ms.sym,
+					false, NULL, 0, 0);
 		list_del(&list->list);
 		list_del(&list->list);
 		free(list);
 		free(list);
 	}
 	}
@@ -767,7 +796,9 @@ int callchain_merge(struct callchain_cursor *cursor,
 }
 }
 
 
 int callchain_cursor_append(struct callchain_cursor *cursor,
 int callchain_cursor_append(struct callchain_cursor *cursor,
-			    u64 ip, struct map *map, struct symbol *sym)
+			    u64 ip, struct map *map, struct symbol *sym,
+			    bool branch, struct branch_flags *flags,
+			    int nr_loop_iter, int samples)
 {
 {
 	struct callchain_cursor_node *node = *cursor->last;
 	struct callchain_cursor_node *node = *cursor->last;
 
 
@@ -782,6 +813,13 @@ int callchain_cursor_append(struct callchain_cursor *cursor,
 	node->ip = ip;
 	node->ip = ip;
 	node->map = map;
 	node->map = map;
 	node->sym = sym;
 	node->sym = sym;
+	node->branch = branch;
+	node->nr_loop_iter = nr_loop_iter;
+	node->samples = samples;
+
+	if (flags)
+		memcpy(&node->branch_flags, flags,
+			sizeof(struct branch_flags));
 
 
 	cursor->nr++;
 	cursor->nr++;
 
 
@@ -939,6 +977,163 @@ int callchain_node__fprintf_value(struct callchain_node *node,
 	return 0;
 	return 0;
 }
 }
 
 
+static void callchain_counts_value(struct callchain_node *node,
+				   u64 *branch_count, u64 *predicted_count,
+				   u64 *abort_count, u64 *cycles_count)
+{
+	struct callchain_list *clist;
+
+	list_for_each_entry(clist, &node->val, list) {
+		if (branch_count)
+			*branch_count += clist->branch_count;
+
+		if (predicted_count)
+			*predicted_count += clist->predicted_count;
+
+		if (abort_count)
+			*abort_count += clist->abort_count;
+
+		if (cycles_count)
+			*cycles_count += clist->cycles_count;
+	}
+}
+
+static int callchain_node_branch_counts_cumul(struct callchain_node *node,
+					      u64 *branch_count,
+					      u64 *predicted_count,
+					      u64 *abort_count,
+					      u64 *cycles_count)
+{
+	struct callchain_node *child;
+	struct rb_node *n;
+
+	n = rb_first(&node->rb_root_in);
+	while (n) {
+		child = rb_entry(n, struct callchain_node, rb_node_in);
+		n = rb_next(n);
+
+		callchain_node_branch_counts_cumul(child, branch_count,
+						   predicted_count,
+						   abort_count,
+						   cycles_count);
+
+		callchain_counts_value(child, branch_count,
+				       predicted_count, abort_count,
+				       cycles_count);
+	}
+
+	return 0;
+}
+
+int callchain_branch_counts(struct callchain_root *root,
+			    u64 *branch_count, u64 *predicted_count,
+			    u64 *abort_count, u64 *cycles_count)
+{
+	if (branch_count)
+		*branch_count = 0;
+
+	if (predicted_count)
+		*predicted_count = 0;
+
+	if (abort_count)
+		*abort_count = 0;
+
+	if (cycles_count)
+		*cycles_count = 0;
+
+	return callchain_node_branch_counts_cumul(&root->node,
+						  branch_count,
+						  predicted_count,
+						  abort_count,
+						  cycles_count);
+}
+
+static int callchain_counts_printf(FILE *fp, char *bf, int bfsize,
+				   u64 branch_count, u64 predicted_count,
+				   u64 abort_count, u64 cycles_count,
+				   u64 iter_count, u64 samples_count)
+{
+	double predicted_percent = 0.0;
+	const char *null_str = "";
+	char iter_str[32];
+	char *str;
+	u64 cycles = 0;
+
+	if (branch_count == 0) {
+		if (fp)
+			return fprintf(fp, " (calltrace)");
+
+		return scnprintf(bf, bfsize, " (calltrace)");
+	}
+
+	if (iter_count && samples_count) {
+		scnprintf(iter_str, sizeof(iter_str),
+			 ", iterations:%" PRId64 "",
+			 iter_count / samples_count);
+		str = iter_str;
+	} else
+		str = (char *)null_str;
+
+	predicted_percent = predicted_count * 100.0 / branch_count;
+	cycles = cycles_count / branch_count;
+
+	if ((predicted_percent >= 100.0) && (abort_count == 0)) {
+		if (fp)
+			return fprintf(fp, " (cycles:%" PRId64 "%s)",
+				       cycles, str);
+
+		return scnprintf(bf, bfsize, " (cycles:%" PRId64 "%s)",
+				 cycles, str);
+	}
+
+	if ((predicted_percent < 100.0) && (abort_count == 0)) {
+		if (fp)
+			return fprintf(fp,
+				" (predicted:%.1f%%, cycles:%" PRId64 "%s)",
+				predicted_percent, cycles, str);
+
+		return scnprintf(bf, bfsize,
+			" (predicted:%.1f%%, cycles:%" PRId64 "%s)",
+			predicted_percent, cycles, str);
+	}
+
+	if (fp)
+		return fprintf(fp,
+		" (predicted:%.1f%%, abort:%" PRId64 ", cycles:%" PRId64 "%s)",
+			predicted_percent, abort_count, cycles, str);
+
+	return scnprintf(bf, bfsize,
+		" (predicted:%.1f%%, abort:%" PRId64 ", cycles:%" PRId64 "%s)",
+		predicted_percent, abort_count, cycles, str);
+}
+
+int callchain_list_counts__printf_value(struct callchain_node *node,
+					struct callchain_list *clist,
+					FILE *fp, char *bf, int bfsize)
+{
+	u64 branch_count, predicted_count;
+	u64 abort_count, cycles_count;
+	u64 iter_count = 0, samples_count = 0;
+
+	branch_count = clist->branch_count;
+	predicted_count = clist->predicted_count;
+	abort_count = clist->abort_count;
+	cycles_count = clist->cycles_count;
+
+	if (node) {
+		struct callchain_list *call;
+
+		list_for_each_entry(call, &node->val, list) {
+			iter_count += call->iter_count;
+			samples_count += call->samples_count;
+		}
+	}
+
+	return callchain_counts_printf(fp, bf, bfsize, branch_count,
+				       predicted_count, abort_count,
+				       cycles_count, iter_count, samples_count);
+}
+
 static void free_callchain_node(struct callchain_node *node)
 static void free_callchain_node(struct callchain_node *node)
 {
 {
 	struct callchain_list *list, *tmp;
 	struct callchain_list *list, *tmp;

+ 21 - 5
tools/perf/util/callchain.h

@@ -11,11 +11,7 @@
 
 
 #define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace):\n\n"
 #define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace):\n\n"
 
 
-#ifdef HAVE_DWARF_UNWIND_SUPPORT
 # define RECORD_MODE_HELP  HELP_PAD "record_mode:\tcall graph recording mode (fp|dwarf|lbr)\n"
 # define RECORD_MODE_HELP  HELP_PAD "record_mode:\tcall graph recording mode (fp|dwarf|lbr)\n"
-#else
-# define RECORD_MODE_HELP  HELP_PAD "record_mode:\tcall graph recording mode (fp|lbr)\n"
-#endif
 
 
 #define RECORD_SIZE_HELP						\
 #define RECORD_SIZE_HELP						\
 	HELP_PAD "record_size:\tif record_mode is 'dwarf', max size of stack recording (<bytes>)\n" \
 	HELP_PAD "record_size:\tif record_mode is 'dwarf', max size of stack recording (<bytes>)\n" \
@@ -115,6 +111,12 @@ struct callchain_list {
 		bool		unfolded;
 		bool		unfolded;
 		bool		has_children;
 		bool		has_children;
 	};
 	};
+	u64			branch_count;
+	u64			predicted_count;
+	u64			abort_count;
+	u64			cycles_count;
+	u64			iter_count;
+	u64			samples_count;
 	char		       *srcline;
 	char		       *srcline;
 	struct list_head	list;
 	struct list_head	list;
 };
 };
@@ -129,6 +131,10 @@ struct callchain_cursor_node {
 	u64				ip;
 	u64				ip;
 	struct map			*map;
 	struct map			*map;
 	struct symbol			*sym;
 	struct symbol			*sym;
+	bool				branch;
+	struct branch_flags		branch_flags;
+	int				nr_loop_iter;
+	int				samples;
 	struct callchain_cursor_node	*next;
 	struct callchain_cursor_node	*next;
 };
 };
 
 
@@ -183,7 +189,9 @@ static inline void callchain_cursor_reset(struct callchain_cursor *cursor)
 }
 }
 
 
 int callchain_cursor_append(struct callchain_cursor *cursor, u64 ip,
 int callchain_cursor_append(struct callchain_cursor *cursor, u64 ip,
-			    struct map *map, struct symbol *sym);
+			    struct map *map, struct symbol *sym,
+			    bool branch, struct branch_flags *flags,
+			    int nr_loop_iter, int samples);
 
 
 /* Close a cursor writing session. Initialize for the reader */
 /* Close a cursor writing session. Initialize for the reader */
 static inline void callchain_cursor_commit(struct callchain_cursor *cursor)
 static inline void callchain_cursor_commit(struct callchain_cursor *cursor)
@@ -261,8 +269,16 @@ char *callchain_node__scnprintf_value(struct callchain_node *node,
 int callchain_node__fprintf_value(struct callchain_node *node,
 int callchain_node__fprintf_value(struct callchain_node *node,
 				  FILE *fp, u64 total);
 				  FILE *fp, u64 total);
 
 
+int callchain_list_counts__printf_value(struct callchain_node *node,
+					struct callchain_list *clist,
+					FILE *fp, char *bf, int bfsize);
+
 void free_callchain(struct callchain_root *root);
 void free_callchain(struct callchain_root *root);
 void decay_callchain(struct callchain_root *root);
 void decay_callchain(struct callchain_root *root);
 int callchain_node__make_parent_list(struct callchain_node *node);
 int callchain_node__make_parent_list(struct callchain_node *node);
 
 
+int callchain_branch_counts(struct callchain_root *root,
+			    u64 *branch_count, u64 *predicted_count,
+			    u64 *abort_count, u64 *cycles_count);
+
 #endif	/* __PERF_CALLCHAIN_H */
 #endif	/* __PERF_CALLCHAIN_H */

+ 20 - 0
tools/perf/util/config.c

@@ -594,6 +594,19 @@ static int collect_config(const char *var, const char *value,
 			goto out_free;
 			goto out_free;
 	}
 	}
 
 
+	/* perf_config_set can contain both user and system config items.
+	 * So we should know where each value is from.
+	 * The classification would be needed when a particular config file
+	 * is overwrited by setting feature i.e. set_config().
+	 */
+	if (strcmp(config_file_name, perf_etc_perfconfig()) == 0) {
+		section->from_system_config = true;
+		item->from_system_config = true;
+	} else {
+		section->from_system_config = false;
+		item->from_system_config = false;
+	}
+
 	ret = set_value(item, value);
 	ret = set_value(item, value);
 	return ret;
 	return ret;
 
 
@@ -602,6 +615,13 @@ out_free:
 	return -1;
 	return -1;
 }
 }
 
 
+int perf_config_set__collect(struct perf_config_set *set, const char *file_name,
+			     const char *var, const char *value)
+{
+	config_file_name = file_name;
+	return collect_config(var, value, set);
+}
+
 static int perf_config_set__init(struct perf_config_set *set)
 static int perf_config_set__init(struct perf_config_set *set)
 {
 {
 	int ret = -1;
 	int ret = -1;

+ 4 - 0
tools/perf/util/config.h

@@ -7,12 +7,14 @@
 struct perf_config_item {
 struct perf_config_item {
 	char *name;
 	char *name;
 	char *value;
 	char *value;
+	bool from_system_config;
 	struct list_head node;
 	struct list_head node;
 };
 };
 
 
 struct perf_config_section {
 struct perf_config_section {
 	char *name;
 	char *name;
 	struct list_head items;
 	struct list_head items;
+	bool from_system_config;
 	struct list_head node;
 	struct list_head node;
 };
 };
 
 
@@ -33,6 +35,8 @@ const char *perf_etc_perfconfig(void);
 
 
 struct perf_config_set *perf_config_set__new(void);
 struct perf_config_set *perf_config_set__new(void);
 void perf_config_set__delete(struct perf_config_set *set);
 void perf_config_set__delete(struct perf_config_set *set);
+int perf_config_set__collect(struct perf_config_set *set, const char *file_name,
+			     const char *var, const char *value);
 void perf_config__init(void);
 void perf_config__init(void);
 void perf_config__exit(void);
 void perf_config__exit(void);
 void perf_config__refresh(void);
 void perf_config__refresh(void);

+ 67 - 15
tools/perf/util/machine.c

@@ -1616,7 +1616,11 @@ static int add_callchain_ip(struct thread *thread,
 			    struct symbol **parent,
 			    struct symbol **parent,
 			    struct addr_location *root_al,
 			    struct addr_location *root_al,
 			    u8 *cpumode,
 			    u8 *cpumode,
-			    u64 ip)
+			    u64 ip,
+			    bool branch,
+			    struct branch_flags *flags,
+			    int nr_loop_iter,
+			    int samples)
 {
 {
 	struct addr_location al;
 	struct addr_location al;
 
 
@@ -1668,7 +1672,8 @@ static int add_callchain_ip(struct thread *thread,
 
 
 	if (symbol_conf.hide_unresolved && al.sym == NULL)
 	if (symbol_conf.hide_unresolved && al.sym == NULL)
 		return 0;
 		return 0;
-	return callchain_cursor_append(cursor, al.addr, al.map, al.sym);
+	return callchain_cursor_append(cursor, al.addr, al.map, al.sym,
+				       branch, flags, nr_loop_iter, samples);
 }
 }
 
 
 struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
 struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
@@ -1757,7 +1762,9 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
 	/* LBR only affects the user callchain */
 	/* LBR only affects the user callchain */
 	if (i != chain_nr) {
 	if (i != chain_nr) {
 		struct branch_stack *lbr_stack = sample->branch_stack;
 		struct branch_stack *lbr_stack = sample->branch_stack;
-		int lbr_nr = lbr_stack->nr, j;
+		int lbr_nr = lbr_stack->nr, j, k;
+		bool branch;
+		struct branch_flags *flags;
 		/*
 		/*
 		 * LBR callstack can only get user call chain.
 		 * LBR callstack can only get user call chain.
 		 * The mix_chain_nr is kernel call chain
 		 * The mix_chain_nr is kernel call chain
@@ -1772,23 +1779,41 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
 
 
 		for (j = 0; j < mix_chain_nr; j++) {
 		for (j = 0; j < mix_chain_nr; j++) {
 			int err;
 			int err;
+			branch = false;
+			flags = NULL;
+
 			if (callchain_param.order == ORDER_CALLEE) {
 			if (callchain_param.order == ORDER_CALLEE) {
 				if (j < i + 1)
 				if (j < i + 1)
 					ip = chain->ips[j];
 					ip = chain->ips[j];
-				else if (j > i + 1)
-					ip = lbr_stack->entries[j - i - 2].from;
-				else
+				else if (j > i + 1) {
+					k = j - i - 2;
+					ip = lbr_stack->entries[k].from;
+					branch = true;
+					flags = &lbr_stack->entries[k].flags;
+				} else {
 					ip = lbr_stack->entries[0].to;
 					ip = lbr_stack->entries[0].to;
+					branch = true;
+					flags = &lbr_stack->entries[0].flags;
+				}
 			} else {
 			} else {
-				if (j < lbr_nr)
-					ip = lbr_stack->entries[lbr_nr - j - 1].from;
+				if (j < lbr_nr) {
+					k = lbr_nr - j - 1;
+					ip = lbr_stack->entries[k].from;
+					branch = true;
+					flags = &lbr_stack->entries[k].flags;
+				}
 				else if (j > lbr_nr)
 				else if (j > lbr_nr)
 					ip = chain->ips[i + 1 - (j - lbr_nr)];
 					ip = chain->ips[i + 1 - (j - lbr_nr)];
-				else
+				else {
 					ip = lbr_stack->entries[0].to;
 					ip = lbr_stack->entries[0].to;
+					branch = true;
+					flags = &lbr_stack->entries[0].flags;
+				}
 			}
 			}
 
 
-			err = add_callchain_ip(thread, cursor, parent, root_al, &cpumode, ip);
+			err = add_callchain_ip(thread, cursor, parent,
+					       root_al, &cpumode, ip,
+					       branch, flags, 0, 0);
 			if (err)
 			if (err)
 				return (err < 0) ? err : 0;
 				return (err < 0) ? err : 0;
 		}
 		}
@@ -1813,6 +1838,7 @@ static int thread__resolve_callchain_sample(struct thread *thread,
 	int i, j, err, nr_entries;
 	int i, j, err, nr_entries;
 	int skip_idx = -1;
 	int skip_idx = -1;
 	int first_call = 0;
 	int first_call = 0;
+	int nr_loop_iter;
 
 
 	if (perf_evsel__has_branch_callstack(evsel)) {
 	if (perf_evsel__has_branch_callstack(evsel)) {
 		err = resolve_lbr_callchain_sample(thread, cursor, sample, parent,
 		err = resolve_lbr_callchain_sample(thread, cursor, sample, parent,
@@ -1868,14 +1894,37 @@ static int thread__resolve_callchain_sample(struct thread *thread,
 				be[i] = branch->entries[branch->nr - i - 1];
 				be[i] = branch->entries[branch->nr - i - 1];
 		}
 		}
 
 
+		nr_loop_iter = nr;
 		nr = remove_loops(be, nr);
 		nr = remove_loops(be, nr);
 
 
+		/*
+		 * Get the number of iterations.
+		 * It's only approximation, but good enough in practice.
+		 */
+		if (nr_loop_iter > nr)
+			nr_loop_iter = nr_loop_iter - nr + 1;
+		else
+			nr_loop_iter = 0;
+
 		for (i = 0; i < nr; i++) {
 		for (i = 0; i < nr; i++) {
-			err = add_callchain_ip(thread, cursor, parent, root_al,
-					       NULL, be[i].to);
+			if (i == nr - 1)
+				err = add_callchain_ip(thread, cursor, parent,
+						       root_al,
+						       NULL, be[i].to,
+						       true, &be[i].flags,
+						       nr_loop_iter, 1);
+			else
+				err = add_callchain_ip(thread, cursor, parent,
+						       root_al,
+						       NULL, be[i].to,
+						       true, &be[i].flags,
+						       0, 0);
+
 			if (!err)
 			if (!err)
 				err = add_callchain_ip(thread, cursor, parent, root_al,
 				err = add_callchain_ip(thread, cursor, parent, root_al,
-						       NULL, be[i].from);
+						       NULL, be[i].from,
+						       true, &be[i].flags,
+						       0, 0);
 			if (err == -EINVAL)
 			if (err == -EINVAL)
 				break;
 				break;
 			if (err)
 			if (err)
@@ -1903,7 +1952,9 @@ check_calls:
 		if (ip < PERF_CONTEXT_MAX)
 		if (ip < PERF_CONTEXT_MAX)
                        ++nr_entries;
                        ++nr_entries;
 
 
-		err = add_callchain_ip(thread, cursor, parent, root_al, &cpumode, ip);
+		err = add_callchain_ip(thread, cursor, parent,
+				       root_al, &cpumode, ip,
+				       false, NULL, 0, 0);
 
 
 		if (err)
 		if (err)
 			return (err < 0) ? err : 0;
 			return (err < 0) ? err : 0;
@@ -1919,7 +1970,8 @@ static int unwind_entry(struct unwind_entry *entry, void *arg)
 	if (symbol_conf.hide_unresolved && entry->sym == NULL)
 	if (symbol_conf.hide_unresolved && entry->sym == NULL)
 		return 0;
 		return 0;
 	return callchain_cursor_append(cursor, entry->ip,
 	return callchain_cursor_append(cursor, entry->ip,
-				       entry->map, entry->sym);
+				       entry->map, entry->sym,
+				       false, NULL, 0, 0);
 }
 }
 
 
 static int thread__resolve_callchain_unwind(struct thread *thread,
 static int thread__resolve_callchain_unwind(struct thread *thread,

+ 1 - 0
tools/perf/util/symbol.h

@@ -100,6 +100,7 @@ struct symbol_conf {
 			show_total_period,
 			show_total_period,
 			use_callchain,
 			use_callchain,
 			cumulate_callchain,
 			cumulate_callchain,
+			show_branchflag_count,
 			exclude_other,
 			exclude_other,
 			show_cpu_utilization,
 			show_cpu_utilization,
 			initialized,
 			initialized,