浏览代码

Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

  - Accept a zero --itrace period, meaning "as often as possible".  In the case
    of Intel PT that is the same as a period of 1 and a unit of 'instructions'
    (i.e.  --itrace=i1i). (Adrian Hunter)

  - Harmonize itrace's synthesized callchains with the existing --max-stack
    tool option. (Adrian Hunter)

  - Allow time to be displayed in nanoseconds in 'perf script'. (Adrian Hunter)

  - Fix potential infinite loop when handling Intel PT timestamps. (Adrian Hunter)

  - Slighly improve Intel PT debug logging. (Adrian Hunter)

  - Warn when AUX data has been lost, just like when processing PERF_RECORD_LOST.
    (Adrian Hunter)

  - Further document export-to-postgresql.py script. (Adrian Hunter)

  - Add option to synthesize branch stack from auxtrace data. (Adrian Hunter)

  - Use equivalent logic to avoid using dso->kernel. (Arnaldo Carvalho de Melo)

  - Show proper error messages when parsing bad terms for hw/sw events. (He Kuang)

  - Tracepoint event parsing improvements. (He Kuang)

  - Store tracing mountpoint for better error message. (Jiri Olsa)

  - Add fixdep to tools/build, bringing it closer to the kernel counterpart, from
    where it is being lifted. (Jiri Olsa)

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Ingo Molnar 10 年之前
父节点
当前提交
9c17dbc6eb
共有 47 个文件被更改,包括 1240 次插入167 次删除
  1. 1 0
      tools/build/Build
  2. 14 3
      tools/build/Build.include
  3. 39 13
      tools/build/Documentation/Build.txt
  4. 43 0
      tools/build/Makefile
  5. 7 0
      tools/build/Makefile.build
  6. 6 0
      tools/build/Makefile.include
  7. 168 0
      tools/build/fixdep.c
  8. 1 0
      tools/build/tests/ex/Build
  9. 8 5
      tools/build/tests/ex/Makefile
  10. 2 0
      tools/build/tests/ex/ex.c
  11. 8 0
      tools/build/tests/ex/inc.c
  12. 27 0
      tools/build/tests/run.sh
  13. 4 2
      tools/lib/api/Makefile
  14. 3 10
      tools/lib/api/fs/tracing_path.c
  15. 4 2
      tools/lib/bpf/Makefile
  16. 4 2
      tools/lib/lockdep/Makefile
  17. 44 0
      tools/perf/Documentation/intel-pt.txt
  18. 4 0
      tools/perf/Documentation/itrace.txt
  19. 3 0
      tools/perf/Documentation/perf-inject.txt
  20. 3 0
      tools/perf/Documentation/perf-script.txt
  21. 16 16
      tools/perf/Makefile.perf
  22. 125 2
      tools/perf/builtin-inject.c
  23. 27 4
      tools/perf/builtin-report.c
  24. 15 3
      tools/perf/builtin-script.c
  25. 1 1
      tools/perf/builtin-top.c
  26. 221 0
      tools/perf/scripts/python/export-to-postgresql.py
  27. 14 10
      tools/perf/ui/browsers/hists.c
  28. 23 1
      tools/perf/util/auxtrace.c
  29. 4 0
      tools/perf/util/auxtrace.h
  30. 1 1
      tools/perf/util/event.c
  31. 1 0
      tools/perf/util/event.h
  32. 22 0
      tools/perf/util/evlist.c
  33. 3 0
      tools/perf/util/evlist.h
  34. 4 2
      tools/perf/util/hist.c
  35. 1 0
      tools/perf/util/hist.h
  36. 2 2
      tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
  37. 11 10
      tools/perf/util/intel-pt-decoder/intel-pt-log.c
  38. 32 6
      tools/perf/util/intel-pt-decoder/intel-pt-log.h
  39. 132 3
      tools/perf/util/intel-pt.c
  40. 121 30
      tools/perf/util/parse-events.c
  41. 3 1
      tools/perf/util/parse-events.h
  42. 1 1
      tools/perf/util/parse-events.l
  43. 39 12
      tools/perf/util/parse-events.y
  44. 14 23
      tools/perf/util/pmu.c
  45. 1 1
      tools/perf/util/scripting-engines/trace-event-python.c
  46. 11 1
      tools/perf/util/session.c
  47. 2 0
      tools/perf/util/trace-event.h

+ 1 - 0
tools/build/Build

@@ -0,0 +1 @@
+fixdep-y := fixdep.o

+ 14 - 3
tools/build/Build.include

@@ -54,15 +54,26 @@ make-cmd = $(call escsq,$(subst \#,\\\#,$(subst $$,$$$$,$(cmd_$(1)))))
 # PHONY targets skipped in both cases.
 any-prereq = $(filter-out $(PHONY),$?) $(filter-out $(PHONY) $(wildcard $^),$^)
 
+###
+# Copy dependency data into .cmd file
+#  - gcc -M dependency info
+#  - command line to create object 'cmd_object :='
+dep-cmd = $(if $(wildcard $(fixdep)),                                           \
+           $(fixdep) $(depfile) $@ '$(make-cmd)' > $(dot-target).tmp;           \
+           rm -f $(depfile);                                                    \
+           mv -f $(dot-target).tmp $(dot-target).cmd,                           \
+           printf '\# cannot find fixdep (%s)\n' $(fixdep) > $(dot-target).cmd; \
+           printf '\# using basic dep data\n\n' >> $(dot-target).cmd;           \
+           cat $(depfile) >> $(dot-target).cmd;                                 \
+           printf '%s\n' 'cmd_$@ := $(make-cmd)' >> $(dot-target).cmd)
+
 ###
 # if_changed_dep  - execute command if any prerequisite is newer than
 #                   target, or command line has changed and update
 #                   dependencies in the cmd file
 if_changed_dep = $(if $(strip $(any-prereq) $(arg-check)),         \
 	@set -e;                                                   \
-	$(echo-cmd) $(cmd_$(1));                                   \
-	cat $(depfile) > $(dot-target).cmd;                        \
-	printf '%s\n' 'cmd_$@ := $(make-cmd)' >> $(dot-target).cmd)
+	$(echo-cmd) $(cmd_$(1)) && $(dep-cmd))
 
 # if_changed      - execute command if any prerequisite is newer than
 #                   target, or command line has changed

+ 39 - 13
tools/build/Documentation/Build.txt

@@ -11,8 +11,9 @@ Unlike the kernel we don't have a single build object 'obj-y' list that where
 we setup source objects, but we support more. This allows one 'Build' file to
 carry a sources list for multiple build objects.
 
-a) Build framework makefiles
-----------------------------
+
+Build framework makefiles
+-------------------------
 
 The build framework consists of 2 Makefiles:
 
@@ -23,7 +24,7 @@ While the 'Build.include' file contains just some generic definitions, the
 'Makefile.build' file is the makefile used from the outside. It's
 interface/usage is following:
 
-  $ make -f tools/build/Makefile srctree=$(KSRC) dir=$(DIR) obj=$(OBJECT)
+  $ make -f tools/build/Makefile.build srctree=$(KSRC) dir=$(DIR) obj=$(OBJECT)
 
 where:
 
@@ -38,8 +39,9 @@ called $(OBJECT)-in.o:
 
 which includes all compiled sources described in 'Build' makefiles.
 
-a) Build makefiles
-------------------
+
+Build makefiles
+---------------
 
 The user supplies 'Build' makefiles that contains a objects list, and connects
 the build to nested directories.
@@ -95,8 +97,31 @@ It's only a matter of 2 single commands to create the final binaries:
 
 You can check the 'ex' example in 'tools/build/tests/ex' for more details.
 
-b) Rules
---------
+
+Makefile.include
+----------------
+
+The tools/build/Makefile.include makefile could be included
+via user makefiles to get usefull definitions.
+
+It defines following interface:
+
+  - build macro definition:
+      build := -f $(srctree)/tools/build/Makefile.build dir=. obj
+
+    to make it easier to invoke build like:
+      make $(build)=ex
+
+
+Fixdep
+------
+It is necessary to build the fixdep helper before invoking the build.
+The Makefile.include file adds the fixdep target, that could be
+invoked by the user.
+
+
+Rules
+-----
 
 The build framework provides standard compilation rules to handle .S and .c
 compilation.
@@ -104,8 +129,9 @@ compilation.
 It's possible to include special rule if needed (like we do for flex or bison
 code generation).
 
-c) CFLAGS
----------
+
+CFLAGS
+------
 
 It's possible to alter the standard object C flags in the following way:
 
@@ -115,8 +141,8 @@ It's possible to alter the standard object C flags in the following way:
 This C flags changes has the scope of the Build makefile they are defined in.
 
 
-d) Dependencies
----------------
+Dependencies
+------------
 
 For each built object file 'a.o' the '.a.cmd' is created and holds:
 
@@ -130,8 +156,8 @@ All existing '.cmd' files are included in the Build process to follow properly
 the dependencies and trigger a rebuild when necessary.
 
 
-e) Single rules
----------------
+Single rules
+------------
 
 It's possible to build single object file by choice, like:
 

+ 43 - 0
tools/build/Makefile

@@ -0,0 +1,43 @@
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(shell pwd)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+endif
+
+include $(srctree)/tools//scripts/Makefile.include
+
+define allow-override
+  $(if $(or $(findstring environment,$(origin $(1))),\
+            $(findstring command line,$(origin $(1)))),,\
+    $(eval $(1) = $(2)))
+endef
+
+$(call allow-override,CC,$(CROSS_COMPILE)gcc)
+$(call allow-override,LD,$(CROSS_COMPILE)ld)
+
+ifeq ($(V),1)
+  Q =
+else
+  Q = @
+endif
+
+export Q srctree CC LD
+
+MAKEFLAGS := --no-print-directory
+build     := -f $(srctree)/tools/build/Makefile.build dir=. obj
+
+all: fixdep
+
+clean:
+	$(call QUIET_CLEAN, fixdep)
+	$(Q)find . -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
+	$(Q)rm -f fixdep
+
+$(OUTPUT)fixdep-in.o: FORCE
+	$(Q)$(MAKE) $(build)=fixdep
+
+$(OUTPUT)fixdep: $(OUTPUT)fixdep-in.o
+	$(QUIET_LINK)$(CC) $(LDFLAGS) -o $@ $<
+
+FORCE:
+
+.PHONY: FORCE

+ 7 - 0
tools/build/Makefile.build

@@ -21,6 +21,13 @@ endif
 
 build-dir := $(srctree)/tools/build
 
+# Define $(fixdep) for dep-cmd function
+ifeq ($(OUTPUT),)
+  fixdep := $(build-dir)/fixdep
+else
+  fixdep := $(OUTPUT)/fixdep
+endif
+
 # Generic definitions
 include $(build-dir)/Build.include
 

+ 6 - 0
tools/build/Makefile.include

@@ -0,0 +1,6 @@
+build := -f $(srctree)/tools/build/Makefile.build dir=. obj
+
+fixdep:
+	$(Q)$(MAKE) -C $(srctree)/tools/build fixdep
+
+.PHONY: fixdep

+ 168 - 0
tools/build/fixdep.c

@@ -0,0 +1,168 @@
+/*
+ * "Optimize" a list of dependencies as spit out by gcc -MD
+ * for the build framework.
+ *
+ * Original author:
+ *   Copyright    2002 by Kai Germaschewski  <kai.germaschewski@gmx.de>
+ *
+ * This code has been borrowed from kbuild's fixdep (scripts/basic/fixdep.c),
+ * Please check it for detailed explanation. This fixdep borow only the
+ * base transformation of dependecies without the CONFIG mangle.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <limits.h>
+
+char *target;
+char *depfile;
+char *cmdline;
+
+static void usage(void)
+{
+	fprintf(stderr, "Usage: fixdep <depfile> <target> <cmdline>\n");
+	exit(1);
+}
+
+/*
+ * Print out the commandline prefixed with cmd_<target filename> :=
+ */
+static void print_cmdline(void)
+{
+	printf("cmd_%s := %s\n\n", target, cmdline);
+}
+
+/*
+ * Important: The below generated source_foo.o and deps_foo.o variable
+ * assignments are parsed not only by make, but also by the rather simple
+ * parser in scripts/mod/sumversion.c.
+ */
+static void parse_dep_file(void *map, size_t len)
+{
+	char *m = map;
+	char *end = m + len;
+	char *p;
+	char s[PATH_MAX];
+	int is_target;
+	int saw_any_target = 0;
+	int is_first_dep = 0;
+
+	while (m < end) {
+		/* Skip any "white space" */
+		while (m < end && (*m == ' ' || *m == '\\' || *m == '\n'))
+			m++;
+		/* Find next "white space" */
+		p = m;
+		while (p < end && *p != ' ' && *p != '\\' && *p != '\n')
+			p++;
+		/* Is the token we found a target name? */
+		is_target = (*(p-1) == ':');
+		/* Don't write any target names into the dependency file */
+		if (is_target) {
+			/* The /next/ file is the first dependency */
+			is_first_dep = 1;
+		} else {
+			/* Save this token/filename */
+			memcpy(s, m, p-m);
+			s[p - m] = 0;
+
+			/*
+			 * Do not list the source file as dependency,
+			 * so that kbuild is not confused if a .c file
+			 * is rewritten into .S or vice versa. Storing
+			 * it in source_* is needed for modpost to
+			 * compute srcversions.
+			 */
+			if (is_first_dep) {
+				/*
+				 * If processing the concatenation of
+				 * multiple dependency files, only
+				 * process the first target name, which
+				 * will be the original source name,
+				 * and ignore any other target names,
+				 * which will be intermediate temporary
+				 * files.
+				 */
+				if (!saw_any_target) {
+					saw_any_target = 1;
+					printf("source_%s := %s\n\n",
+						target, s);
+					printf("deps_%s := \\\n",
+						target);
+				}
+				is_first_dep = 0;
+			} else
+				printf("  %s \\\n", s);
+		}
+		/*
+		 * Start searching for next token immediately after the first
+		 * "whitespace" character that follows this token.
+		 */
+		m = p + 1;
+	}
+
+	if (!saw_any_target) {
+		fprintf(stderr, "fixdep: parse error; no targets found\n");
+		exit(1);
+	}
+
+	printf("\n%s: $(deps_%s)\n\n", target, target);
+	printf("$(deps_%s):\n", target);
+}
+
+static void print_deps(void)
+{
+	struct stat st;
+	int fd;
+	void *map;
+
+	fd = open(depfile, O_RDONLY);
+	if (fd < 0) {
+		fprintf(stderr, "fixdep: error opening depfile: ");
+		perror(depfile);
+		exit(2);
+	}
+	if (fstat(fd, &st) < 0) {
+		fprintf(stderr, "fixdep: error fstat'ing depfile: ");
+		perror(depfile);
+		exit(2);
+	}
+	if (st.st_size == 0) {
+		fprintf(stderr, "fixdep: %s is empty\n", depfile);
+		close(fd);
+		return;
+	}
+	map = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+	if ((long) map == -1) {
+		perror("fixdep: mmap");
+		close(fd);
+		return;
+	}
+
+	parse_dep_file(map, st.st_size);
+
+	munmap(map, st.st_size);
+
+	close(fd);
+}
+
+int main(int argc, char **argv)
+{
+	if (argc != 4)
+		usage();
+
+	depfile = argv[1];
+	target  = argv[2];
+	cmdline = argv[3];
+
+	print_cmdline();
+	print_deps();
+
+	return 0;
+}

+ 1 - 0
tools/build/tests/ex/Build

@@ -4,6 +4,7 @@ ex-y += b.o
 ex-y += b.o
 ex-y += empty/
 ex-y += empty2/
+ex-y += inc.o
 
 libex-y += c.o
 libex-y += d.o

+ 8 - 5
tools/build/tests/ex/Makefile

@@ -1,19 +1,22 @@
-export srctree := ../../../..
+export srctree := $(abspath ../../../..)
 export CC      := gcc
 export LD      := ld
 export AR      := ar
 
-build := -f $(srctree)/tools/build/Makefile.build dir=. obj
+ex:
+
+include $(srctree)/tools/build/Makefile.include
+
 ex: ex-in.o libex-in.o
 	gcc -o $@ $^
 
-ex.%: FORCE
+ex.%: fixdep FORCE
 	make -f $(srctree)/tools/build/Makefile.build dir=. $@
 
-ex-in.o: FORCE
+ex-in.o: fixdep FORCE
 	make $(build)=ex
 
-libex-in.o: FORCE
+libex-in.o: fixdep FORCE
 	make $(build)=libex
 
 clean:

+ 2 - 0
tools/build/tests/ex/ex.c

@@ -5,6 +5,7 @@ int c(void);
 int d(void);
 int e(void);
 int f(void);
+int inc(void);
 
 int main(void)
 {
@@ -14,6 +15,7 @@ int main(void)
 	d();
 	e();
 	f();
+	inc();
 
 	return 0;
 }

+ 8 - 0
tools/build/tests/ex/inc.c

@@ -0,0 +1,8 @@
+#ifdef INCLUDE
+#include "krava.h"
+#endif
+
+int inc(void)
+{
+	return 0;
+}

+ 27 - 0
tools/build/tests/run.sh

@@ -34,9 +34,36 @@ function test_ex_suffix {
 	make -C ex V=1 clean > /dev/null 2>&1
 	rm -f ex.out
 }
+
+function test_ex_include {
+	make -C ex V=1 clean > ex.out 2>&1
+
+	# build with krava.h include
+	touch ex/krava.h
+	make -C ex V=1 CFLAGS=-DINCLUDE >> ex.out 2>&1
+
+	if [ ! -x ./ex/ex ]; then
+	  echo FAILED
+	  exit -1
+	fi
+
+	# build without the include
+	rm -f ex/krava.h ex/ex
+	make -C ex V=1 >> ex.out 2>&1
+
+	if [ ! -x ./ex/ex ]; then
+	  echo FAILED
+	  exit -1
+	fi
+
+	make -C ex V=1 clean > /dev/null 2>&1
+	rm -f ex.out
+}
+
 echo -n Testing..
 
 test_ex
 test_ex_suffix
+test_ex_include
 
 echo OK

+ 4 - 2
tools/lib/api/Makefile

@@ -21,12 +21,14 @@ CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64
 
 RM = rm -f
 
-build  := -f $(srctree)/tools/build/Makefile.build dir=. obj
 API_IN := $(OUTPUT)libapi-in.o
 
+all:
+
 export srctree OUTPUT CC LD CFLAGS V
+include $(srctree)/tools/build/Makefile.include
 
-all: $(LIBFILE)
+all: fixdep $(LIBFILE)
 
 $(API_IN): FORCE
 	@$(MAKE) $(build)=libapi

+ 3 - 10
tools/lib/api/fs/tracing_path.c

@@ -12,12 +12,14 @@
 #include "tracing_path.h"
 
 
+char tracing_mnt[PATH_MAX + 1]         = "/sys/kernel/debug";
 char tracing_path[PATH_MAX + 1]        = "/sys/kernel/debug/tracing";
 char tracing_events_path[PATH_MAX + 1] = "/sys/kernel/debug/tracing/events";
 
 
 static void __tracing_path_set(const char *tracing, const char *mountpoint)
 {
+	snprintf(tracing_mnt, sizeof(tracing_mnt), "%s", mountpoint);
 	snprintf(tracing_path, sizeof(tracing_path), "%s/%s",
 		 mountpoint, tracing);
 	snprintf(tracing_events_path, sizeof(tracing_events_path), "%s/%s%s",
@@ -109,19 +111,10 @@ static int strerror_open(int err, char *buf, size_t size, const char *filename)
 			 "Hint:\tTry 'sudo mount -t debugfs nodev /sys/kernel/debug'");
 		break;
 	case EACCES: {
-		const char *mountpoint = debugfs__mountpoint();
-
-		if (!access(mountpoint, R_OK) && strncmp(filename, "tracing/", 8) == 0) {
-			const char *tracefs_mntpoint = tracefs__mountpoint();
-
-			if (tracefs_mntpoint)
-				mountpoint = tracefs__mountpoint();
-		}
-
 		snprintf(buf, size,
 			 "Error:\tNo permissions to read %s/%s\n"
 			 "Hint:\tTry 'sudo mount -o remount,mode=755 %s'\n",
-			 tracing_events_path, filename, mountpoint);
+			 tracing_events_path, filename, tracing_mnt);
 	}
 		break;
 	default:

+ 4 - 2
tools/lib/bpf/Makefile

@@ -123,8 +123,10 @@ endif
 # the same command line setup.
 MAKEOVERRIDES=
 
+all:
+
 export srctree OUTPUT CC LD CFLAGS V
-build := -f $(srctree)/tools/build/Makefile.build dir=. obj
+include $(srctree)/tools/build/Makefile.include
 
 BPF_IN    := $(OUTPUT)libbpf-in.o
 LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE))
@@ -133,7 +135,7 @@ CMD_TARGETS = $(LIB_FILE)
 
 TARGETS = $(CMD_TARGETS)
 
-all: $(VERSION_FILES) all_cmd
+all: fixdep $(VERSION_FILES) all_cmd
 
 all_cmd: $(CMD_TARGETS)
 

+ 4 - 2
tools/lib/lockdep/Makefile

@@ -93,8 +93,10 @@ else
   print_install =		echo '  INSTALL  '$1'	to	$(DESTDIR_SQ)$2';
 endif
 
+all:
+
 export srctree OUTPUT CC LD CFLAGS V
-build := -f $(srctree)/tools/build/Makefile.build dir=. obj
+include $(srctree)/tools/build/Makefile.include
 
 do_compile_shared_library =			\
 	($(print_shared_lib_compile)		\
@@ -109,7 +111,7 @@ CMD_TARGETS = $(LIB_FILE)
 TARGETS = $(CMD_TARGETS)
 
 
-all: all_cmd
+all: fixdep all_cmd
 
 all_cmd: $(CMD_TARGETS)
 

+ 44 - 0
tools/perf/Documentation/intel-pt.txt

@@ -671,6 +671,7 @@ The letters are:
 	e	synthesize tracing error events
 	d	create a debug log
 	g	synthesize a call chain (use with i or x)
+	l	synthesize last branch entries (use with i or x)
 
 "Instructions" events look like they were recorded by "perf record -e
 instructions".
@@ -707,12 +708,26 @@ on the sample is *not* adjusted and reflects the last known value of TSC.
 
 For Intel PT, the default period is 100us.
 
+Setting it to a zero period means "as often as possible".
+
+In the case of Intel PT that is the same as a period of 1 and a unit of
+'instructions' (i.e. --itrace=i1i).
+
 Also the call chain size (default 16, max. 1024) for instructions or
 transactions events can be specified. e.g.
 
 	--itrace=ig32
 	--itrace=xg32
 
+Also the number of last branch entries (default 64, max. 1024) for instructions or
+transactions events can be specified. e.g.
+
+       --itrace=il10
+       --itrace=xl10
+
+Note that last branch entries are cleared for each sample, so there is no overlap
+from one sample to the next.
+
 To disable trace decoding entirely, use the option --no-itrace.
 
 
@@ -749,3 +764,32 @@ perf inject also accepts the --itrace option in which case tracing data is
 removed and replaced with the synthesized events. e.g.
 
 	perf inject --itrace -i perf.data -o perf.data.new
+
+Below is an example of using Intel PT with autofdo.  It requires autofdo
+(https://github.com/google/autofdo) and gcc version 5.  The bubble
+sort example is from the AutoFDO tutorial (https://gcc.gnu.org/wiki/AutoFDO/Tutorial)
+amended to take the number of elements as a parameter.
+
+	$ gcc-5 -O3 sort.c -o sort_optimized
+	$ ./sort_optimized 30000
+	Bubble sorting array of 30000 elements
+	2254 ms
+
+	$ cat ~/.perfconfig
+	[intel-pt]
+		mispred-all
+
+	$ perf record -e intel_pt//u ./sort 3000
+	Bubble sorting array of 3000 elements
+	58 ms
+	[ perf record: Woken up 2 times to write data ]
+	[ perf record: Captured and wrote 3.939 MB perf.data ]
+	$ perf inject -i perf.data -o inj --itrace=i100usle --strip
+	$ ./create_gcov --binary=./sort --profile=inj --gcov=sort.gcov -gcov_version=1
+	$ gcc-5 -O3 -fauto-profile=sort.gcov sort.c -o sort_autofdo
+	$ ./sort_autofdo 30000
+	Bubble sorting array of 30000 elements
+	2155 ms
+
+Note there is currently no advantage to using Intel PT instead of LBR, but
+that may change in the future if greater use is made of the data.

+ 4 - 0
tools/perf/Documentation/itrace.txt

@@ -6,6 +6,7 @@
 		e	synthesize error events
 		d	create a debug log
 		g	synthesize a call chain (use with i or x)
+		l	synthesize last branch entries (use with i or x)
 
 	The default is all events i.e. the same as --itrace=ibxe
 
@@ -20,3 +21,6 @@
 
 	Also the call chain size (default 16, max. 1024) for instructions or
 	transactions events can be specified.
+
+	Also the number of last branch entries (default 64, max. 1024) for
+	instructions or transactions events can be specified.

+ 3 - 0
tools/perf/Documentation/perf-inject.txt

@@ -50,6 +50,9 @@ OPTIONS
 
 include::itrace.txt[]
 
+--strip::
+	Use with --itrace to strip out non-synthesized events.
+
 SEE ALSO
 --------
 linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-archive[1]

+ 3 - 0
tools/perf/Documentation/perf-script.txt

@@ -249,6 +249,9 @@ include::itrace.txt[]
 --full-source-path::
 	Show the full path for source files for srcline output.
 
+--ns::
+	Use 9 decimal places when displaying time (i.e. show the nanoseconds)
+
 SEE ALSO
 --------
 linkperf:perf-record[1], linkperf:perf-script-perl[1],

+ 16 - 16
tools/perf/Makefile.perf

@@ -297,16 +297,16 @@ strip: $(PROGRAMS) $(OUTPUT)perf
 PERF_IN := $(OUTPUT)perf-in.o
 
 export srctree OUTPUT RM CC LD AR CFLAGS V BISON FLEX AWK
-build := -f $(srctree)/tools/build/Makefile.build dir=. obj
+include $(srctree)/tools/build/Makefile.include
 
-$(PERF_IN): $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h FORCE
+$(PERF_IN): prepare FORCE
 	$(Q)$(MAKE) $(build)=perf
 
 $(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(LIBTRACEEVENT_DYNAMIC_LIST)
 	$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS) \
 		$(PERF_IN) $(LIBS) -o $@
 
-$(GTK_IN): FORCE
+$(GTK_IN): fixdep FORCE
 	$(Q)$(MAKE) $(build)=gtk
 
 $(OUTPUT)libperf-gtk.so: $(GTK_IN) $(PERFLIBS)
@@ -349,27 +349,27 @@ endif
 __build-dir = $(subst $(OUTPUT),,$(dir $@))
 build-dir   = $(if $(__build-dir),$(__build-dir),.)
 
-single_dep: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h
+prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h fixdep
 
-$(OUTPUT)%.o: %.c single_dep FORCE
+$(OUTPUT)%.o: %.c prepare FORCE
 	$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@
 
-$(OUTPUT)%.i: %.c single_dep FORCE
+$(OUTPUT)%.i: %.c prepare FORCE
 	$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@
 
-$(OUTPUT)%.s: %.c single_dep FORCE
+$(OUTPUT)%.s: %.c prepare FORCE
 	$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@
 
-$(OUTPUT)%-bison.o: %.c single_dep FORCE
+$(OUTPUT)%-bison.o: %.c prepare FORCE
 	$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@
 
-$(OUTPUT)%-flex.o: %.c single_dep FORCE
+$(OUTPUT)%-flex.o: %.c prepare FORCE
 	$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@
 
-$(OUTPUT)%.o: %.S single_dep FORCE
+$(OUTPUT)%.o: %.S prepare FORCE
 	$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@
 
-$(OUTPUT)%.i: %.S single_dep FORCE
+$(OUTPUT)%.i: %.S prepare FORCE
 	$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@
 
 $(OUTPUT)perf-%: %.o $(PERFLIBS)
@@ -389,7 +389,7 @@ $(patsubst perf-%,%.o,$(PROGRAMS)): $(wildcard */*.h)
 
 LIBPERF_IN := $(OUTPUT)libperf-in.o
 
-$(LIBPERF_IN): FORCE
+$(LIBPERF_IN): fixdep FORCE
 	$(Q)$(MAKE) $(build)=libperf
 
 $(LIB_FILE): $(LIBPERF_IN)
@@ -397,10 +397,10 @@ $(LIB_FILE): $(LIBPERF_IN)
 
 LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ)
 
-$(LIBTRACEEVENT): FORCE
+$(LIBTRACEEVENT): fixdep FORCE
 	$(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent.a
 
-libtraceevent_plugins: FORCE
+libtraceevent_plugins: fixdep FORCE
 	$(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) plugins
 
 $(LIBTRACEEVENT_DYNAMIC_LIST): libtraceevent_plugins
@@ -413,7 +413,7 @@ $(LIBTRACEEVENT)-clean:
 install-traceevent-plugins: $(LIBTRACEEVENT)
 	$(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) install_plugins
 
-$(LIBAPI): FORCE
+$(LIBAPI): fixdep FORCE
 	$(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) $(OUTPUT)libapi.a
 
 $(LIBAPI)-clean:
@@ -591,6 +591,6 @@ FORCE:
 
 .PHONY: all install clean config-clean strip install-gtk
 .PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell
-.PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope FORCE single_dep
+.PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope FORCE prepare
 .PHONY: libtraceevent_plugins
 

+ 125 - 2
tools/perf/builtin-inject.c

@@ -28,9 +28,11 @@ struct perf_inject {
 	bool			build_ids;
 	bool			sched_stat;
 	bool			have_auxtrace;
+	bool			strip;
 	const char		*input_name;
 	struct perf_data_file	output;
 	u64			bytes_written;
+	u64			aux_id;
 	struct list_head	samples;
 	struct itrace_synth_opts itrace_synth_opts;
 };
@@ -176,6 +178,27 @@ static int perf_event__repipe(struct perf_tool *tool,
 	return perf_event__repipe_synth(tool, event);
 }
 
+static int perf_event__drop(struct perf_tool *tool __maybe_unused,
+			    union perf_event *event __maybe_unused,
+			    struct perf_sample *sample __maybe_unused,
+			    struct machine *machine __maybe_unused)
+{
+	return 0;
+}
+
+static int perf_event__drop_aux(struct perf_tool *tool,
+				union perf_event *event __maybe_unused,
+				struct perf_sample *sample,
+				struct machine *machine __maybe_unused)
+{
+	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
+
+	if (!inject->aux_id)
+		inject->aux_id = sample->id;
+
+	return 0;
+}
+
 typedef int (*inject_handler)(struct perf_tool *tool,
 			      union perf_event *event,
 			      struct perf_sample *sample,
@@ -466,6 +489,78 @@ static int perf_evsel__check_stype(struct perf_evsel *evsel,
 	return 0;
 }
 
+static int drop_sample(struct perf_tool *tool __maybe_unused,
+		       union perf_event *event __maybe_unused,
+		       struct perf_sample *sample __maybe_unused,
+		       struct perf_evsel *evsel __maybe_unused,
+		       struct machine *machine __maybe_unused)
+{
+	return 0;
+}
+
+static void strip_init(struct perf_inject *inject)
+{
+	struct perf_evlist *evlist = inject->session->evlist;
+	struct perf_evsel *evsel;
+
+	inject->tool.context_switch = perf_event__drop;
+
+	evlist__for_each(evlist, evsel)
+		evsel->handler = drop_sample;
+}
+
+static bool has_tracking(struct perf_evsel *evsel)
+{
+	return evsel->attr.mmap || evsel->attr.mmap2 || evsel->attr.comm ||
+	       evsel->attr.task;
+}
+
+#define COMPAT_MASK (PERF_SAMPLE_ID | PERF_SAMPLE_TID | PERF_SAMPLE_TIME | \
+		     PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_IDENTIFIER)
+
+/*
+ * In order that the perf.data file is parsable, tracking events like MMAP need
+ * their selected event to exist, except if there is only 1 selected event left
+ * and it has a compatible sample type.
+ */
+static bool ok_to_remove(struct perf_evlist *evlist,
+			 struct perf_evsel *evsel_to_remove)
+{
+	struct perf_evsel *evsel;
+	int cnt = 0;
+	bool ok = false;
+
+	if (!has_tracking(evsel_to_remove))
+		return true;
+
+	evlist__for_each(evlist, evsel) {
+		if (evsel->handler != drop_sample) {
+			cnt += 1;
+			if ((evsel->attr.sample_type & COMPAT_MASK) ==
+			    (evsel_to_remove->attr.sample_type & COMPAT_MASK))
+				ok = true;
+		}
+	}
+
+	return ok && cnt == 1;
+}
+
+static void strip_fini(struct perf_inject *inject)
+{
+	struct perf_evlist *evlist = inject->session->evlist;
+	struct perf_evsel *evsel, *tmp;
+
+	/* Remove non-synthesized evsels if possible */
+	evlist__for_each_safe(evlist, tmp, evsel) {
+		if (evsel->handler == drop_sample &&
+		    ok_to_remove(evlist, evsel)) {
+			pr_debug("Deleting %s\n", perf_evsel__name(evsel));
+			perf_evlist__remove(evlist, evsel);
+			perf_evsel__delete(evsel);
+		}
+	}
+}
+
 static int __cmd_inject(struct perf_inject *inject)
 {
 	int ret = -EINVAL;
@@ -512,10 +607,14 @@ static int __cmd_inject(struct perf_inject *inject)
 		inject->tool.id_index	    = perf_event__repipe_id_index;
 		inject->tool.auxtrace_info  = perf_event__process_auxtrace_info;
 		inject->tool.auxtrace	    = perf_event__process_auxtrace;
+		inject->tool.aux	    = perf_event__drop_aux;
+		inject->tool.itrace_start   = perf_event__drop_aux,
 		inject->tool.ordered_events = true;
 		inject->tool.ordering_requires_timestamps = true;
 		/* Allow space in the header for new attributes */
 		output_data_offset = 4096;
+		if (inject->strip)
+			strip_init(inject);
 	}
 
 	if (!inject->itrace_synth_opts.set)
@@ -535,11 +634,28 @@ static int __cmd_inject(struct perf_inject *inject)
 		}
 		/*
 		 * The AUX areas have been removed and replaced with
-		 * synthesized hardware events, so clear the feature flag.
+		 * synthesized hardware events, so clear the feature flag and
+		 * remove the evsel.
 		 */
-		if (inject->itrace_synth_opts.set)
+		if (inject->itrace_synth_opts.set) {
+			struct perf_evsel *evsel;
+
 			perf_header__clear_feat(&session->header,
 						HEADER_AUXTRACE);
+			if (inject->itrace_synth_opts.last_branch)
+				perf_header__set_feat(&session->header,
+						      HEADER_BRANCH_STACK);
+			evsel = perf_evlist__id2evsel_strict(session->evlist,
+							     inject->aux_id);
+			if (evsel) {
+				pr_debug("Deleting %s\n",
+					 perf_evsel__name(evsel));
+				perf_evlist__remove(session->evlist, evsel);
+				perf_evsel__delete(evsel);
+			}
+			if (inject->strip)
+				strip_fini(inject);
+		}
 		session->header.data_offset = output_data_offset;
 		session->header.data_size = inject->bytes_written;
 		perf_session__write_header(session, session->evlist, fd, true);
@@ -604,6 +720,8 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused)
 		OPT_CALLBACK_OPTARG(0, "itrace", &inject.itrace_synth_opts,
 				    NULL, "opts", "Instruction Tracing options",
 				    itrace_parse_synth_opts),
+		OPT_BOOLEAN(0, "strip", &inject.strip,
+			    "strip non-synthesized events (use with --itrace)"),
 		OPT_END()
 	};
 	const char * const inject_usage[] = {
@@ -619,6 +737,11 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused)
 	if (argc)
 		usage_with_options(inject_usage, options);
 
+	if (inject.strip && !inject.itrace_synth_opts.set) {
+		pr_err("--strip option requires --itrace option\n");
+		return -1;
+	}
+
 	if (perf_data_file__open(&inject.output)) {
 		perror("failed to create output file");
 		return -1;

+ 27 - 4
tools/perf/builtin-report.c

@@ -163,14 +163,21 @@ static int process_sample_event(struct perf_tool *tool,
 	if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap))
 		goto out_put;
 
-	if (sort__mode == SORT_MODE__BRANCH)
+	if (sort__mode == SORT_MODE__BRANCH) {
+		/*
+		 * A non-synthesized event might not have a branch stack if
+		 * branch stacks have been synthesized (using itrace options).
+		 */
+		if (!sample->branch_stack)
+			goto out_put;
 		iter.ops = &hist_iter_branch;
-	else if (rep->mem_mode)
+	} else if (rep->mem_mode) {
 		iter.ops = &hist_iter_mem;
-	else if (symbol_conf.cumulate_callchain)
+	} else if (symbol_conf.cumulate_callchain) {
 		iter.ops = &hist_iter_cumulative;
-	else
+	} else {
 		iter.ops = &hist_iter_normal;
+	}
 
 	if (al.map != NULL)
 		al.map->dso->hit = 1;
@@ -214,6 +221,15 @@ static int report__setup_sample_type(struct report *rep)
 	u64 sample_type = perf_evlist__combined_sample_type(session->evlist);
 	bool is_pipe = perf_data_file__is_pipe(session->file);
 
+	if (session->itrace_synth_opts->callchain ||
+	    (!is_pipe &&
+	     perf_header__has_feat(&session->header, HEADER_AUXTRACE) &&
+	     !session->itrace_synth_opts->set))
+		sample_type |= PERF_SAMPLE_CALLCHAIN;
+
+	if (session->itrace_synth_opts->last_branch)
+		sample_type |= PERF_SAMPLE_BRANCH_STACK;
+
 	if (!is_pipe && !(sample_type & PERF_SAMPLE_CALLCHAIN)) {
 		if (sort__has_parent) {
 			ui__error("Selected --sort parent, but no "
@@ -793,6 +809,10 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
 	if (report.inverted_callchain)
 		callchain_param.order = ORDER_CALLER;
 
+	if (itrace_synth_opts.callchain &&
+	    (int)itrace_synth_opts.callchain_sz > report.max_stack)
+		report.max_stack = itrace_synth_opts.callchain_sz;
+
 	if (!input_name || !strlen(input_name)) {
 		if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode))
 			input_name = "-";
@@ -820,6 +840,9 @@ repeat:
 	has_br_stack = perf_header__has_feat(&session->header,
 					     HEADER_BRANCH_STACK);
 
+	if (itrace_synth_opts.last_branch)
+		has_br_stack = true;
+
 	/*
 	 * Branch mode is a tristate:
 	 * -1 means default, so decide based on the file having branch data.

+ 15 - 3
tools/perf/builtin-script.c

@@ -29,9 +29,12 @@ static bool			no_callchain;
 static bool			latency_format;
 static bool			system_wide;
 static bool			print_flags;
+static bool			nanosecs;
 static const char		*cpu_list;
 static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
 
+unsigned int scripting_max_stack = PERF_MAX_STACK_DEPTH;
+
 enum perf_output_field {
 	PERF_OUTPUT_COMM            = 1U << 0,
 	PERF_OUTPUT_TID             = 1U << 1,
@@ -415,7 +418,10 @@ static void print_sample_start(struct perf_sample *sample,
 		secs = nsecs / NSECS_PER_SEC;
 		nsecs -= secs * NSECS_PER_SEC;
 		usecs = nsecs / NSECS_PER_USEC;
-		printf("%5lu.%06lu: ", secs, usecs);
+		if (nanosecs)
+			printf("%5lu.%09llu: ", secs, nsecs);
+		else
+			printf("%5lu.%06lu: ", secs, usecs);
 	}
 }
 
@@ -471,7 +477,7 @@ static void print_sample_bts(union perf_event *event,
 			}
 		}
 		perf_evsel__print_ip(evsel, sample, al, print_opts,
-				     PERF_MAX_STACK_DEPTH);
+				     scripting_max_stack);
 	}
 
 	/* print branch_to information */
@@ -548,7 +554,7 @@ static void process_event(union perf_event *event, struct perf_sample *sample,
 
 		perf_evsel__print_ip(evsel, sample, al,
 				     output[attr->type].print_ip_opts,
-				     PERF_MAX_STACK_DEPTH);
+				     scripting_max_stack);
 	}
 
 	if (PRINT_FIELD(IREGS))
@@ -1695,6 +1701,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
 	OPT_BOOLEAN('\0', "show-switch-events", &script.show_switch_events,
 		    "Show context switch events (if recorded)"),
 	OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"),
+	OPT_BOOLEAN(0, "ns", &nanosecs,
+		    "Use 9 decimal places when displaying time"),
 	OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts",
 			    "Instruction Tracing options",
 			    itrace_parse_synth_opts),
@@ -1740,6 +1748,10 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
 		}
 	}
 
+	if (itrace_synth_opts.callchain &&
+	    itrace_synth_opts.callchain_sz > scripting_max_stack)
+		scripting_max_stack = itrace_synth_opts.callchain_sz;
+
 	/* make sure PERF_EXEC_PATH is set for scripts */
 	perf_set_argv_exec_path(perf_exec_path());
 

+ 1 - 1
tools/perf/builtin-top.c

@@ -655,7 +655,7 @@ static int symbol_filter(struct map *map, struct symbol *sym)
 {
 	const char *name = sym->name;
 
-	if (!map->dso->kernel)
+	if (!__map__is_kernel(map))
 		return 0;
 	/*
 	 * ppc64 uses function descriptors and appends a '.' to the

+ 221 - 0
tools/perf/scripts/python/export-to-postgresql.py

@@ -61,6 +61,142 @@ import datetime
 #
 # An example of using the database is provided by the script
 # call-graph-from-postgresql.py.  Refer to that script for details.
+#
+# Tables:
+#
+#	The tables largely correspond to perf tools' data structures.  They are largely self-explanatory.
+#
+#	samples
+#
+#		'samples' is the main table. It represents what instruction was executing at a point in time
+#		when something (a selected event) happened.  The memory address is the instruction pointer or 'ip'.
+#
+#	calls
+#
+#		'calls' represents function calls and is related to 'samples' by 'call_id' and 'return_id'.
+#		'calls' is only created when the 'calls' option to this script is specified.
+#
+#	call_paths
+#
+#		'call_paths' represents all the call stacks.  Each 'call' has an associated record in 'call_paths'.
+#		'calls_paths' is only created when the 'calls' option to this script is specified.
+#
+#	branch_types
+#
+#		'branch_types' provides descriptions for each type of branch.
+#
+#	comm_threads
+#
+#		'comm_threads' shows how 'comms' relates to 'threads'.
+#
+#	comms
+#
+#		'comms' contains a record for each 'comm' - the name given to the executable that is running.
+#
+#	dsos
+#
+#		'dsos' contains a record for each executable file or library.
+#
+#	machines
+#
+#		'machines' can be used to distinguish virtual machines if virtualization is supported.
+#
+#	selected_events
+#
+#		'selected_events' contains a record for each kind of event that has been sampled.
+#
+#	symbols
+#
+#		'symbols' contains a record for each symbol.  Only symbols that have samples are present.
+#
+#	threads
+#
+#		'threads' contains a record for each thread.
+#
+# Views:
+#
+#	Most of the tables have views for more friendly display.  The views are:
+#
+#		calls_view
+#		call_paths_view
+#		comm_threads_view
+#		dsos_view
+#		machines_view
+#		samples_view
+#		symbols_view
+#		threads_view
+#
+# More examples of browsing the database with psql:
+#   Note that some of the examples are not the most optimal SQL query.
+#   Note that call information is only available if the script's 'calls' option has been used.
+#
+#	Top 10 function calls (not aggregated by symbol):
+#
+#		SELECT * FROM calls_view ORDER BY elapsed_time DESC LIMIT 10;
+#
+#	Top 10 function calls (aggregated by symbol):
+#
+#		SELECT symbol_id,(SELECT name FROM symbols WHERE id = symbol_id) AS symbol,
+#			SUM(elapsed_time) AS tot_elapsed_time,SUM(branch_count) AS tot_branch_count
+#			FROM calls_view GROUP BY symbol_id ORDER BY tot_elapsed_time DESC LIMIT 10;
+#
+#		Note that the branch count gives a rough estimation of cpu usage, so functions
+#		that took a long time but have a relatively low branch count must have spent time
+#		waiting.
+#
+#	Find symbols by pattern matching on part of the name (e.g. names containing 'alloc'):
+#
+#		SELECT * FROM symbols_view WHERE name LIKE '%alloc%';
+#
+#	Top 10 function calls for a specific symbol (e.g. whose symbol_id is 187):
+#
+#		SELECT * FROM calls_view WHERE symbol_id = 187 ORDER BY elapsed_time DESC LIMIT 10;
+#
+#	Show function calls made by function in the same context (i.e. same call path) (e.g. one with call_path_id 254):
+#
+#		SELECT * FROM calls_view WHERE parent_call_path_id = 254;
+#
+#	Show branches made during a function call (e.g. where call_id is 29357 and return_id is 29370 and tid is 29670)
+#
+#		SELECT * FROM samples_view WHERE id >= 29357 AND id <= 29370 AND tid = 29670 AND event LIKE 'branches%';
+#
+#	Show transactions:
+#
+#		SELECT * FROM samples_view WHERE event = 'transactions';
+#
+#		Note transaction start has 'in_tx' true whereas, transaction end has 'in_tx' false.
+#		Transaction aborts have branch_type_name 'transaction abort'
+#
+#	Show transaction aborts:
+#
+#		SELECT * FROM samples_view WHERE event = 'transactions' AND branch_type_name = 'transaction abort';
+#
+# To print a call stack requires walking the call_paths table.  For example this python script:
+#   #!/usr/bin/python2
+#
+#   import sys
+#   from PySide.QtSql import *
+#
+#   if __name__ == '__main__':
+#           if (len(sys.argv) < 3):
+#                   print >> sys.stderr, "Usage is: printcallstack.py <database name> <call_path_id>"
+#                   raise Exception("Too few arguments")
+#           dbname = sys.argv[1]
+#           call_path_id = sys.argv[2]
+#           db = QSqlDatabase.addDatabase('QPSQL')
+#           db.setDatabaseName(dbname)
+#           if not db.open():
+#                   raise Exception("Failed to open database " + dbname + " error: " + db.lastError().text())
+#           query = QSqlQuery(db)
+#           print "    id          ip  symbol_id  symbol                          dso_id  dso_short_name"
+#           while call_path_id != 0 and call_path_id != 1:
+#                   ret = query.exec_('SELECT * FROM call_paths_view WHERE id = ' + str(call_path_id))
+#                   if not ret:
+#                           raise Exception("Query failed: " + query.lastError().text())
+#                   if not query.next():
+#                           raise Exception("Query failed")
+#                   print "{0:>6}  {1:>10}  {2:>9}  {3:<30}  {4:>6}  {5:<30}".format(query.value(0), query.value(1), query.value(2), query.value(3), query.value(4), query.value(5))
+#                   call_path_id = query.value(6)
 
 from PySide.QtSql import *
 
@@ -244,6 +380,91 @@ if perf_db_export_calls:
 		'parent_call_path_id	bigint,'
 		'flags		integer)')
 
+do_query(query, 'CREATE VIEW machines_view AS '
+	'SELECT '
+		'id,'
+		'pid,'
+		'root_dir,'
+		'CASE WHEN id=0 THEN \'unknown\' WHEN pid=-1 THEN \'host\' ELSE \'guest\' END AS host_or_guest'
+	' FROM machines')
+
+do_query(query, 'CREATE VIEW dsos_view AS '
+	'SELECT '
+		'id,'
+		'machine_id,'
+		'(SELECT host_or_guest FROM machines_view WHERE id = machine_id) AS host_or_guest,'
+		'short_name,'
+		'long_name,'
+		'build_id'
+	' FROM dsos')
+
+do_query(query, 'CREATE VIEW symbols_view AS '
+	'SELECT '
+		'id,'
+		'name,'
+		'(SELECT short_name FROM dsos WHERE id=dso_id) AS dso,'
+		'dso_id,'
+		'sym_start,'
+		'sym_end,'
+		'CASE WHEN binding=0 THEN \'local\' WHEN binding=1 THEN \'global\' ELSE \'weak\' END AS binding'
+	' FROM symbols')
+
+do_query(query, 'CREATE VIEW threads_view AS '
+	'SELECT '
+		'id,'
+		'machine_id,'
+		'(SELECT host_or_guest FROM machines_view WHERE id = machine_id) AS host_or_guest,'
+		'process_id,'
+		'pid,'
+		'tid'
+	' FROM threads')
+
+do_query(query, 'CREATE VIEW comm_threads_view AS '
+	'SELECT '
+		'comm_id,'
+		'(SELECT comm FROM comms WHERE id = comm_id) AS command,'
+		'thread_id,'
+		'(SELECT pid FROM threads WHERE id = thread_id) AS pid,'
+		'(SELECT tid FROM threads WHERE id = thread_id) AS tid'
+	' FROM comm_threads')
+
+if perf_db_export_calls:
+	do_query(query, 'CREATE VIEW call_paths_view AS '
+		'SELECT '
+			'c.id,'
+			'to_hex(c.ip) AS ip,'
+			'c.symbol_id,'
+			'(SELECT name FROM symbols WHERE id = c.symbol_id) AS symbol,'
+			'(SELECT dso_id FROM symbols WHERE id = c.symbol_id) AS dso_id,'
+			'(SELECT dso FROM symbols_view  WHERE id = c.symbol_id) AS dso_short_name,'
+			'c.parent_id,'
+			'to_hex(p.ip) AS parent_ip,'
+			'p.symbol_id AS parent_symbol_id,'
+			'(SELECT name FROM symbols WHERE id = p.symbol_id) AS parent_symbol,'
+			'(SELECT dso_id FROM symbols WHERE id = p.symbol_id) AS parent_dso_id,'
+			'(SELECT dso FROM symbols_view  WHERE id = p.symbol_id) AS parent_dso_short_name'
+		' FROM call_paths c INNER JOIN call_paths p ON p.id = c.parent_id')
+	do_query(query, 'CREATE VIEW calls_view AS '
+		'SELECT '
+			'calls.id,'
+			'thread_id,'
+			'(SELECT pid FROM threads WHERE id = thread_id) AS pid,'
+			'(SELECT tid FROM threads WHERE id = thread_id) AS tid,'
+			'(SELECT comm FROM comms WHERE id = comm_id) AS command,'
+			'call_path_id,'
+			'to_hex(ip) AS ip,'
+			'symbol_id,'
+			'(SELECT name FROM symbols WHERE id = symbol_id) AS symbol,'
+			'call_time,'
+			'return_time,'
+			'return_time - call_time AS elapsed_time,'
+			'branch_count,'
+			'call_id,'
+			'return_id,'
+			'CASE WHEN flags=1 THEN \'no call\' WHEN flags=2 THEN \'no return\' WHEN flags=3 THEN \'no call/return\' ELSE \'\' END AS flags,'
+			'parent_call_path_id'
+		' FROM calls INNER JOIN call_paths ON call_paths.id = call_path_id')
+
 do_query(query, 'CREATE VIEW samples_view AS '
 	'SELECT '
 		'id,'

+ 14 - 10
tools/perf/ui/browsers/hists.c

@@ -1527,7 +1527,7 @@ add_thread_opt(struct hist_browser *browser, struct popup_action *act,
 static int
 do_zoom_dso(struct hist_browser *browser, struct popup_action *act)
 {
-	struct dso *dso = act->dso;
+	struct map *map = act->ms.map;
 
 	if (browser->hists->dso_filter) {
 		pstack__remove(browser->pstack, &browser->hists->dso_filter);
@@ -1535,11 +1535,11 @@ do_zoom_dso(struct hist_browser *browser, struct popup_action *act)
 		browser->hists->dso_filter = NULL;
 		ui_helpline__pop();
 	} else {
-		if (dso == NULL)
+		if (map == NULL)
 			return 0;
 		ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s DSO\"",
-				   dso->kernel ? "the Kernel" : dso->short_name);
-		browser->hists->dso_filter = dso;
+				   __map__is_kernel(map) ? "the Kernel" : map->dso->short_name);
+		browser->hists->dso_filter = map->dso;
 		perf_hpp__set_elide(HISTC_DSO, true);
 		pstack__push(browser->pstack, &browser->hists->dso_filter);
 	}
@@ -1551,17 +1551,18 @@ do_zoom_dso(struct hist_browser *browser, struct popup_action *act)
 
 static int
 add_dso_opt(struct hist_browser *browser, struct popup_action *act,
-	    char **optstr, struct dso *dso)
+	    char **optstr, struct map *map)
 {
-	if (dso == NULL)
+	if (map == NULL)
 		return 0;
 
 	if (asprintf(optstr, "Zoom %s %s DSO",
 		     browser->hists->dso_filter ? "out of" : "into",
-		     dso->kernel ? "the Kernel" : dso->short_name) < 0)
+		     __map__is_kernel(map) ? "the Kernel" : map->dso->short_name) < 0)
 		return 0;
 
-	act->dso = dso;
+	act->ms.map = map;
+	act->dso = map->dso;
 	act->fn = do_zoom_dso;
 	return 1;
 }
@@ -1814,6 +1815,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 	while (1) {
 		struct thread *thread = NULL;
 		struct dso *dso = NULL;
+		struct map *map = NULL;
 		int choice = 0;
 		int socked_id = -1;
 
@@ -1823,7 +1825,9 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 
 		if (browser->he_selection != NULL) {
 			thread = hist_browser__selected_thread(browser);
-			dso = browser->selection->map ? browser->selection->map->dso : NULL;
+			map = browser->selection->map;
+			if (map)
+				dso = map->dso;
 			socked_id = browser->he_selection->socket;
 		}
 		switch (key) {
@@ -2014,7 +2018,7 @@ skip_annotation:
 		nr_options += add_thread_opt(browser, &actions[nr_options],
 					     &options[nr_options], thread);
 		nr_options += add_dso_opt(browser, &actions[nr_options],
-					  &options[nr_options], dso);
+					  &options[nr_options], map);
 		nr_options += add_map_opt(browser, &actions[nr_options],
 					  &options[nr_options],
 					  browser->selection ?

+ 23 - 1
tools/perf/util/auxtrace.c

@@ -926,6 +926,8 @@ s64 perf_event__process_auxtrace(struct perf_tool *tool,
 #define PERF_ITRACE_DEFAULT_PERIOD		100000
 #define PERF_ITRACE_DEFAULT_CALLCHAIN_SZ	16
 #define PERF_ITRACE_MAX_CALLCHAIN_SZ		1024
+#define PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ	64
+#define PERF_ITRACE_MAX_LAST_BRANCH_SZ		1024
 
 void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts)
 {
@@ -936,6 +938,7 @@ void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts)
 	synth_opts->period_type = PERF_ITRACE_DEFAULT_PERIOD_TYPE;
 	synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD;
 	synth_opts->callchain_sz = PERF_ITRACE_DEFAULT_CALLCHAIN_SZ;
+	synth_opts->last_branch_sz = PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ;
 }
 
 /*
@@ -950,6 +953,7 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
 	const char *p;
 	char *endptr;
 	bool period_type_set = false;
+	bool period_set = false;
 
 	synth_opts->set = true;
 
@@ -971,6 +975,7 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
 				p += 1;
 			if (isdigit(*p)) {
 				synth_opts->period = strtoull(p, &endptr, 10);
+				period_set = true;
 				p = endptr;
 				while (*p == ' ' || *p == ',')
 					p += 1;
@@ -1041,6 +1046,23 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
 				synth_opts->callchain_sz = val;
 			}
 			break;
+		case 'l':
+			synth_opts->last_branch = true;
+			synth_opts->last_branch_sz =
+					PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ;
+			while (*p == ' ' || *p == ',')
+				p += 1;
+			if (isdigit(*p)) {
+				unsigned int val;
+
+				val = strtoul(p, &endptr, 10);
+				p = endptr;
+				if (!val ||
+				    val > PERF_ITRACE_MAX_LAST_BRANCH_SZ)
+					goto out_err;
+				synth_opts->last_branch_sz = val;
+			}
+			break;
 		case ' ':
 		case ',':
 			break;
@@ -1053,7 +1075,7 @@ out:
 		if (!period_type_set)
 			synth_opts->period_type =
 					PERF_ITRACE_DEFAULT_PERIOD_TYPE;
-		if (!synth_opts->period)
+		if (!period_set)
 			synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD;
 	}
 

+ 4 - 0
tools/perf/util/auxtrace.h

@@ -63,7 +63,9 @@ enum itrace_period_type {
  * @calls: limit branch samples to calls (can be combined with @returns)
  * @returns: limit branch samples to returns (can be combined with @calls)
  * @callchain: add callchain to 'instructions' events
+ * @last_branch: add branch context to 'instruction' events
  * @callchain_sz: maximum callchain size
+ * @last_branch_sz: branch context size
  * @period: 'instructions' events period
  * @period_type: 'instructions' events period type
  */
@@ -79,7 +81,9 @@ struct itrace_synth_opts {
 	bool			calls;
 	bool			returns;
 	bool			callchain;
+	bool			last_branch;
 	unsigned int		callchain_sz;
+	unsigned int		last_branch_sz;
 	unsigned long long	period;
 	enum itrace_period_type	period_type;
 };

+ 1 - 1
tools/perf/util/event.c

@@ -378,7 +378,7 @@ int perf_event__synthesize_modules(struct perf_tool *tool,
 	for (pos = maps__first(maps); pos; pos = map__next(pos)) {
 		size_t size;
 
-		if (pos->dso->kernel)
+		if (__map__is_kernel(pos))
 			continue;
 
 		size = PERF_ALIGN(pos->dso->long_name_len + 1, sizeof(u64));

+ 1 - 0
tools/perf/util/event.h

@@ -257,6 +257,7 @@ struct events_stats {
 	u64 total_non_filtered_period;
 	u64 total_lost;
 	u64 total_lost_samples;
+	u64 total_aux_lost;
 	u64 total_invalid_chains;
 	u32 nr_events[PERF_RECORD_HEADER_MAX];
 	u32 nr_non_filtered_samples;

+ 22 - 0
tools/perf/util/evlist.c

@@ -165,6 +165,13 @@ void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry)
 	__perf_evlist__propagate_maps(evlist, entry);
 }
 
+void perf_evlist__remove(struct perf_evlist *evlist, struct perf_evsel *evsel)
+{
+	evsel->evlist = NULL;
+	list_del_init(&evsel->node);
+	evlist->nr_entries -= 1;
+}
+
 void perf_evlist__splice_list_tail(struct perf_evlist *evlist,
 				   struct list_head *list)
 {
@@ -617,6 +624,21 @@ struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id)
 	return NULL;
 }
 
+struct perf_evsel *perf_evlist__id2evsel_strict(struct perf_evlist *evlist,
+						u64 id)
+{
+	struct perf_sample_id *sid;
+
+	if (!id)
+		return NULL;
+
+	sid = perf_evlist__id2sid(evlist, id);
+	if (sid)
+		return sid->evsel;
+
+	return NULL;
+}
+
 static int perf_evlist__event2id(struct perf_evlist *evlist,
 				 union perf_event *event, u64 *id)
 {

+ 3 - 0
tools/perf/util/evlist.h

@@ -73,6 +73,7 @@ void perf_evlist__exit(struct perf_evlist *evlist);
 void perf_evlist__delete(struct perf_evlist *evlist);
 
 void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry);
+void perf_evlist__remove(struct perf_evlist *evlist, struct perf_evsel *evsel);
 int perf_evlist__add_default(struct perf_evlist *evlist);
 int __perf_evlist__add_default_attrs(struct perf_evlist *evlist,
 				     struct perf_event_attr *attrs, size_t nr_attrs);
@@ -104,6 +105,8 @@ int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mas
 int perf_evlist__poll(struct perf_evlist *evlist, int timeout);
 
 struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id);
+struct perf_evsel *perf_evlist__id2evsel_strict(struct perf_evlist *evlist,
+						u64 id);
 
 struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id);
 

+ 4 - 2
tools/perf/util/hist.c

@@ -695,7 +695,7 @@ iter_finish_normal_entry(struct hist_entry_iter *iter,
 }
 
 static int
-iter_prepare_cumulative_entry(struct hist_entry_iter *iter __maybe_unused,
+iter_prepare_cumulative_entry(struct hist_entry_iter *iter,
 			      struct addr_location *al __maybe_unused)
 {
 	struct hist_entry **he_cache;
@@ -707,7 +707,7 @@ iter_prepare_cumulative_entry(struct hist_entry_iter *iter __maybe_unused,
 	 * cumulated only one time to prevent entries more than 100%
 	 * overhead.
 	 */
-	he_cache = malloc(sizeof(*he_cache) * (PERF_MAX_STACK_DEPTH + 1));
+	he_cache = malloc(sizeof(*he_cache) * (iter->max_stack + 1));
 	if (he_cache == NULL)
 		return -ENOMEM;
 
@@ -868,6 +868,8 @@ int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
 	if (err)
 		return err;
 
+	iter->max_stack = max_stack_depth;
+
 	err = iter->ops->prepare_entry(iter, al);
 	if (err)
 		goto out;

+ 1 - 0
tools/perf/util/hist.h

@@ -90,6 +90,7 @@ struct hist_entry_iter {
 	int curr;
 
 	bool hide_unresolved;
+	int max_stack;
 
 	struct perf_evsel *evsel;
 	struct perf_sample *sample;

+ 2 - 2
tools/perf/util/intel-pt-decoder/intel-pt-decoder.c

@@ -650,7 +650,7 @@ static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info)
 		if (data->from_mtc && timestamp < data->timestamp &&
 		    data->timestamp - timestamp < decoder->tsc_slip)
 			return 1;
-		while (timestamp < data->timestamp)
+		if (timestamp < data->timestamp)
 			timestamp += (1ULL << 56);
 		if (pkt_info->last_packet_type != INTEL_PT_CYC) {
 			if (data->from_mtc)
@@ -1191,7 +1191,7 @@ static void intel_pt_calc_tsc_timestamp(struct intel_pt_decoder *decoder)
 					timestamp);
 			timestamp = decoder->timestamp;
 		}
-		while (timestamp < decoder->timestamp) {
+		if (timestamp < decoder->timestamp) {
 			intel_pt_log_to("Wraparound timestamp", timestamp);
 			timestamp += (1ULL << 56);
 			decoder->tsc_timestamp = timestamp;

+ 11 - 10
tools/perf/util/intel-pt-decoder/intel-pt-log.c

@@ -29,18 +29,18 @@
 
 static FILE *f;
 static char log_name[MAX_LOG_NAME];
-static bool enable_logging;
+bool intel_pt_enable_logging;
 
 void intel_pt_log_enable(void)
 {
-	enable_logging = true;
+	intel_pt_enable_logging = true;
 }
 
 void intel_pt_log_disable(void)
 {
 	if (f)
 		fflush(f);
-	enable_logging = false;
+	intel_pt_enable_logging = false;
 }
 
 void intel_pt_log_set_name(const char *name)
@@ -80,7 +80,7 @@ static void intel_pt_print_no_data(uint64_t pos, int indent)
 
 static int intel_pt_log_open(void)
 {
-	if (!enable_logging)
+	if (!intel_pt_enable_logging)
 		return -1;
 
 	if (f)
@@ -91,15 +91,15 @@ static int intel_pt_log_open(void)
 
 	f = fopen(log_name, "w+");
 	if (!f) {
-		enable_logging = false;
+		intel_pt_enable_logging = false;
 		return -1;
 	}
 
 	return 0;
 }
 
-void intel_pt_log_packet(const struct intel_pt_pkt *packet, int pkt_len,
-			 uint64_t pos, const unsigned char *buf)
+void __intel_pt_log_packet(const struct intel_pt_pkt *packet, int pkt_len,
+			   uint64_t pos, const unsigned char *buf)
 {
 	char desc[INTEL_PT_PKT_DESC_MAX];
 
@@ -111,7 +111,7 @@ void intel_pt_log_packet(const struct intel_pt_pkt *packet, int pkt_len,
 	fprintf(f, "%s\n", desc);
 }
 
-void intel_pt_log_insn(struct intel_pt_insn *intel_pt_insn, uint64_t ip)
+void __intel_pt_log_insn(struct intel_pt_insn *intel_pt_insn, uint64_t ip)
 {
 	char desc[INTEL_PT_INSN_DESC_MAX];
 	size_t len = intel_pt_insn->length;
@@ -128,7 +128,8 @@ void intel_pt_log_insn(struct intel_pt_insn *intel_pt_insn, uint64_t ip)
 		fprintf(f, "Bad instruction!\n");
 }
 
-void intel_pt_log_insn_no_data(struct intel_pt_insn *intel_pt_insn, uint64_t ip)
+void __intel_pt_log_insn_no_data(struct intel_pt_insn *intel_pt_insn,
+				 uint64_t ip)
 {
 	char desc[INTEL_PT_INSN_DESC_MAX];
 
@@ -142,7 +143,7 @@ void intel_pt_log_insn_no_data(struct intel_pt_insn *intel_pt_insn, uint64_t ip)
 		fprintf(f, "Bad instruction!\n");
 }
 
-void intel_pt_log(const char *fmt, ...)
+void __intel_pt_log(const char *fmt, ...)
 {
 	va_list args;
 

+ 32 - 6
tools/perf/util/intel-pt-decoder/intel-pt-log.h

@@ -25,20 +25,46 @@ void intel_pt_log_enable(void);
 void intel_pt_log_disable(void);
 void intel_pt_log_set_name(const char *name);
 
-void intel_pt_log_packet(const struct intel_pt_pkt *packet, int pkt_len,
-			 uint64_t pos, const unsigned char *buf);
+void __intel_pt_log_packet(const struct intel_pt_pkt *packet, int pkt_len,
+			   uint64_t pos, const unsigned char *buf);
 
 struct intel_pt_insn;
 
-void intel_pt_log_insn(struct intel_pt_insn *intel_pt_insn, uint64_t ip);
-void intel_pt_log_insn_no_data(struct intel_pt_insn *intel_pt_insn,
-			       uint64_t ip);
+void __intel_pt_log_insn(struct intel_pt_insn *intel_pt_insn, uint64_t ip);
+void __intel_pt_log_insn_no_data(struct intel_pt_insn *intel_pt_insn,
+				 uint64_t ip);
 
 __attribute__((format(printf, 1, 2)))
-void intel_pt_log(const char *fmt, ...);
+void __intel_pt_log(const char *fmt, ...);
+
+#define intel_pt_log(fmt, ...) \
+	do { \
+		if (intel_pt_enable_logging) \
+			__intel_pt_log(fmt, ##__VA_ARGS__); \
+	} while (0)
+
+#define intel_pt_log_packet(arg, ...) \
+	do { \
+		if (intel_pt_enable_logging) \
+			__intel_pt_log_packet(arg, ##__VA_ARGS__); \
+	} while (0)
+
+#define intel_pt_log_insn(arg, ...) \
+	do { \
+		if (intel_pt_enable_logging) \
+			__intel_pt_log_insn(arg, ##__VA_ARGS__); \
+	} while (0)
+
+#define intel_pt_log_insn_no_data(arg, ...) \
+	do { \
+		if (intel_pt_enable_logging) \
+			__intel_pt_log_insn_no_data(arg, ##__VA_ARGS__); \
+	} while (0)
 
 #define x64_fmt "0x%" PRIx64
 
+extern bool intel_pt_enable_logging;
+
 static inline void intel_pt_log_at(const char *msg, uint64_t u)
 {
 	intel_pt_log("%s at " x64_fmt "\n", msg, u);

+ 132 - 3
tools/perf/util/intel-pt.c

@@ -22,6 +22,7 @@
 #include "../perf.h"
 #include "session.h"
 #include "machine.h"
+#include "sort.h"
 #include "tool.h"
 #include "event.h"
 #include "evlist.h"
@@ -63,6 +64,7 @@ struct intel_pt {
 	bool data_queued;
 	bool est_tsc;
 	bool sync_switch;
+	bool mispred_all;
 	int have_sched_switch;
 	u32 pmu_type;
 	u64 kernel_start;
@@ -115,6 +117,9 @@ struct intel_pt_queue {
 	void *decoder;
 	const struct intel_pt_state *state;
 	struct ip_callchain *chain;
+	struct branch_stack *last_branch;
+	struct branch_stack *last_branch_rb;
+	size_t last_branch_pos;
 	union perf_event *event_buf;
 	bool on_heap;
 	bool stop;
@@ -675,6 +680,19 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
 			goto out_free;
 	}
 
+	if (pt->synth_opts.last_branch) {
+		size_t sz = sizeof(struct branch_stack);
+
+		sz += pt->synth_opts.last_branch_sz *
+		      sizeof(struct branch_entry);
+		ptq->last_branch = zalloc(sz);
+		if (!ptq->last_branch)
+			goto out_free;
+		ptq->last_branch_rb = zalloc(sz);
+		if (!ptq->last_branch_rb)
+			goto out_free;
+	}
+
 	ptq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
 	if (!ptq->event_buf)
 		goto out_free;
@@ -720,7 +738,7 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
 
 		if (!params.period) {
 			params.period_type = INTEL_PT_PERIOD_INSTRUCTIONS;
-			params.period = 1000;
+			params.period = 1;
 		}
 	}
 
@@ -732,6 +750,8 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
 
 out_free:
 	zfree(&ptq->event_buf);
+	zfree(&ptq->last_branch);
+	zfree(&ptq->last_branch_rb);
 	zfree(&ptq->chain);
 	free(ptq);
 	return NULL;
@@ -746,6 +766,8 @@ static void intel_pt_free_queue(void *priv)
 	thread__zput(ptq->thread);
 	intel_pt_decoder_free(ptq->decoder);
 	zfree(&ptq->event_buf);
+	zfree(&ptq->last_branch);
+	zfree(&ptq->last_branch_rb);
 	zfree(&ptq->chain);
 	free(ptq);
 }
@@ -876,6 +898,58 @@ static int intel_pt_setup_queues(struct intel_pt *pt)
 	return 0;
 }
 
+static inline void intel_pt_copy_last_branch_rb(struct intel_pt_queue *ptq)
+{
+	struct branch_stack *bs_src = ptq->last_branch_rb;
+	struct branch_stack *bs_dst = ptq->last_branch;
+	size_t nr = 0;
+
+	bs_dst->nr = bs_src->nr;
+
+	if (!bs_src->nr)
+		return;
+
+	nr = ptq->pt->synth_opts.last_branch_sz - ptq->last_branch_pos;
+	memcpy(&bs_dst->entries[0],
+	       &bs_src->entries[ptq->last_branch_pos],
+	       sizeof(struct branch_entry) * nr);
+
+	if (bs_src->nr >= ptq->pt->synth_opts.last_branch_sz) {
+		memcpy(&bs_dst->entries[nr],
+		       &bs_src->entries[0],
+		       sizeof(struct branch_entry) * ptq->last_branch_pos);
+	}
+}
+
+static inline void intel_pt_reset_last_branch_rb(struct intel_pt_queue *ptq)
+{
+	ptq->last_branch_pos = 0;
+	ptq->last_branch_rb->nr = 0;
+}
+
+static void intel_pt_update_last_branch_rb(struct intel_pt_queue *ptq)
+{
+	const struct intel_pt_state *state = ptq->state;
+	struct branch_stack *bs = ptq->last_branch_rb;
+	struct branch_entry *be;
+
+	if (!ptq->last_branch_pos)
+		ptq->last_branch_pos = ptq->pt->synth_opts.last_branch_sz;
+
+	ptq->last_branch_pos -= 1;
+
+	be              = &bs->entries[ptq->last_branch_pos];
+	be->from        = state->from_ip;
+	be->to          = state->to_ip;
+	be->flags.abort = !!(state->flags & INTEL_PT_ABORT_TX);
+	be->flags.in_tx = !!(state->flags & INTEL_PT_IN_TX);
+	/* No support for mispredict */
+	be->flags.mispred = ptq->pt->mispred_all;
+
+	if (bs->nr < ptq->pt->synth_opts.last_branch_sz)
+		bs->nr += 1;
+}
+
 static int intel_pt_inject_event(union perf_event *event,
 				 struct perf_sample *sample, u64 type,
 				 bool swapped)
@@ -890,6 +964,13 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
 	struct intel_pt *pt = ptq->pt;
 	union perf_event *event = ptq->event_buf;
 	struct perf_sample sample = { .ip = 0, };
+	struct dummy_branch_stack {
+		u64			nr;
+		struct branch_entry	entries;
+	} dummy_bs;
+
+	if (pt->branches_filter && !(pt->branches_filter & ptq->flags))
+		return 0;
 
 	event->sample.header.type = PERF_RECORD_SAMPLE;
 	event->sample.header.misc = PERF_RECORD_MISC_USER;
@@ -909,8 +990,20 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
 	sample.flags = ptq->flags;
 	sample.insn_len = ptq->insn_len;
 
-	if (pt->branches_filter && !(pt->branches_filter & ptq->flags))
-		return 0;
+	/*
+	 * perf report cannot handle events without a branch stack when using
+	 * SORT_MODE__BRANCH so make a dummy one.
+	 */
+	if (pt->synth_opts.last_branch && sort__mode == SORT_MODE__BRANCH) {
+		dummy_bs = (struct dummy_branch_stack){
+			.nr = 1,
+			.entries = {
+				.from = sample.ip,
+				.to = sample.addr,
+			},
+		};
+		sample.branch_stack = (struct branch_stack *)&dummy_bs;
+	}
 
 	if (pt->synth_opts.inject) {
 		ret = intel_pt_inject_event(event, &sample,
@@ -961,6 +1054,11 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
 		sample.callchain = ptq->chain;
 	}
 
+	if (pt->synth_opts.last_branch) {
+		intel_pt_copy_last_branch_rb(ptq);
+		sample.branch_stack = ptq->last_branch;
+	}
+
 	if (pt->synth_opts.inject) {
 		ret = intel_pt_inject_event(event, &sample,
 					    pt->instructions_sample_type,
@@ -974,6 +1072,9 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
 		pr_err("Intel Processor Trace: failed to deliver instruction event, error %d\n",
 		       ret);
 
+	if (pt->synth_opts.last_branch)
+		intel_pt_reset_last_branch_rb(ptq);
+
 	return ret;
 }
 
@@ -1008,6 +1109,11 @@ static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
 		sample.callchain = ptq->chain;
 	}
 
+	if (pt->synth_opts.last_branch) {
+		intel_pt_copy_last_branch_rb(ptq);
+		sample.branch_stack = ptq->last_branch;
+	}
+
 	if (pt->synth_opts.inject) {
 		ret = intel_pt_inject_event(event, &sample,
 					    pt->transactions_sample_type,
@@ -1021,6 +1127,9 @@ static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
 		pr_err("Intel Processor Trace: failed to deliver transaction event, error %d\n",
 		       ret);
 
+	if (pt->synth_opts.callchain)
+		intel_pt_reset_last_branch_rb(ptq);
+
 	return ret;
 }
 
@@ -1116,6 +1225,9 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
 			return err;
 	}
 
+	if (pt->synth_opts.last_branch)
+		intel_pt_update_last_branch_rb(ptq);
+
 	if (!pt->sync_switch)
 		return 0;
 
@@ -1763,6 +1875,8 @@ static int intel_pt_synth_events(struct intel_pt *pt,
 		pt->instructions_sample_period = attr.sample_period;
 		if (pt->synth_opts.callchain)
 			attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
+		if (pt->synth_opts.last_branch)
+			attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
 		pr_debug("Synthesizing 'instructions' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
 			 id, (u64)attr.sample_type);
 		err = intel_pt_synth_event(session, &attr, id);
@@ -1782,6 +1896,8 @@ static int intel_pt_synth_events(struct intel_pt *pt,
 		attr.sample_period = 1;
 		if (pt->synth_opts.callchain)
 			attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
+		if (pt->synth_opts.last_branch)
+			attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
 		pr_debug("Synthesizing 'transactions' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
 			 id, (u64)attr.sample_type);
 		err = intel_pt_synth_event(session, &attr, id);
@@ -1808,6 +1924,7 @@ static int intel_pt_synth_events(struct intel_pt *pt,
 		attr.sample_period = 1;
 		attr.sample_type |= PERF_SAMPLE_ADDR;
 		attr.sample_type &= ~(u64)PERF_SAMPLE_CALLCHAIN;
+		attr.sample_type &= ~(u64)PERF_SAMPLE_BRANCH_STACK;
 		pr_debug("Synthesizing 'branches' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
 			 id, (u64)attr.sample_type);
 		err = intel_pt_synth_event(session, &attr, id);
@@ -1852,6 +1969,16 @@ static bool intel_pt_find_switch(struct perf_evlist *evlist)
 	return false;
 }
 
+static int intel_pt_perf_config(const char *var, const char *value, void *data)
+{
+	struct intel_pt *pt = data;
+
+	if (!strcmp(var, "intel-pt.mispred-all"))
+		pt->mispred_all = perf_config_bool(var, value);
+
+	return 0;
+}
+
 static const char * const intel_pt_info_fmts[] = {
 	[INTEL_PT_PMU_TYPE]		= "  PMU Type            %"PRId64"\n",
 	[INTEL_PT_TIME_SHIFT]		= "  Time Shift          %"PRIu64"\n",
@@ -1896,6 +2023,8 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
 	if (!pt)
 		return -ENOMEM;
 
+	perf_config(intel_pt_perf_config, pt);
+
 	err = auxtrace_queues__init(&pt->queues);
 	if (err)
 		goto err_free;

+ 121 - 30
tools/perf/util/parse-events.c

@@ -27,6 +27,8 @@
 extern int parse_events_debug;
 #endif
 int parse_events_parse(void *data, void *scanner);
+static int get_config_terms(struct list_head *head_config,
+			    struct list_head *head_terms __maybe_unused);
 
 static struct perf_pmu_event_symbol *perf_pmu_events_list;
 /*
@@ -416,7 +418,8 @@ static void tracepoint_error(struct parse_events_error *error, int err,
 
 static int add_tracepoint(struct list_head *list, int *idx,
 			  char *sys_name, char *evt_name,
-			  struct parse_events_error *error __maybe_unused)
+			  struct parse_events_error *error __maybe_unused,
+			  struct list_head *head_config)
 {
 	struct perf_evsel *evsel;
 
@@ -426,13 +429,22 @@ static int add_tracepoint(struct list_head *list, int *idx,
 		return PTR_ERR(evsel);
 	}
 
+	if (head_config) {
+		LIST_HEAD(config_terms);
+
+		if (get_config_terms(head_config, &config_terms))
+			return -ENOMEM;
+		list_splice(&config_terms, &evsel->config_terms);
+	}
+
 	list_add_tail(&evsel->node, list);
 	return 0;
 }
 
 static int add_tracepoint_multi_event(struct list_head *list, int *idx,
 				      char *sys_name, char *evt_name,
-				      struct parse_events_error *error)
+				      struct parse_events_error *error,
+				      struct list_head *head_config)
 {
 	char evt_path[MAXPATHLEN];
 	struct dirent *evt_ent;
@@ -456,7 +468,8 @@ static int add_tracepoint_multi_event(struct list_head *list, int *idx,
 		if (!strglobmatch(evt_ent->d_name, evt_name))
 			continue;
 
-		ret = add_tracepoint(list, idx, sys_name, evt_ent->d_name, error);
+		ret = add_tracepoint(list, idx, sys_name, evt_ent->d_name,
+				     error, head_config);
 	}
 
 	closedir(evt_dir);
@@ -465,16 +478,20 @@ static int add_tracepoint_multi_event(struct list_head *list, int *idx,
 
 static int add_tracepoint_event(struct list_head *list, int *idx,
 				char *sys_name, char *evt_name,
-				struct parse_events_error *error)
+				struct parse_events_error *error,
+				struct list_head *head_config)
 {
 	return strpbrk(evt_name, "*?") ?
-	       add_tracepoint_multi_event(list, idx, sys_name, evt_name, error) :
-	       add_tracepoint(list, idx, sys_name, evt_name, error);
+	       add_tracepoint_multi_event(list, idx, sys_name, evt_name,
+					  error, head_config) :
+	       add_tracepoint(list, idx, sys_name, evt_name,
+			      error, head_config);
 }
 
 static int add_tracepoint_multi_sys(struct list_head *list, int *idx,
 				    char *sys_name, char *evt_name,
-				    struct parse_events_error *error)
+				    struct parse_events_error *error,
+				    struct list_head *head_config)
 {
 	struct dirent *events_ent;
 	DIR *events_dir;
@@ -498,23 +515,13 @@ static int add_tracepoint_multi_sys(struct list_head *list, int *idx,
 			continue;
 
 		ret = add_tracepoint_event(list, idx, events_ent->d_name,
-					   evt_name, error);
+					   evt_name, error, head_config);
 	}
 
 	closedir(events_dir);
 	return ret;
 }
 
-int parse_events_add_tracepoint(struct list_head *list, int *idx,
-				char *sys, char *event,
-				struct parse_events_error *error)
-{
-	if (strpbrk(sys, "*?"))
-		return add_tracepoint_multi_sys(list, idx, sys, event, error);
-	else
-		return add_tracepoint_event(list, idx, sys, event, error);
-}
-
 static int
 parse_breakpoint_type(const char *type, struct perf_event_attr *attr)
 {
@@ -599,9 +606,13 @@ static int check_type_val(struct parse_events_term *term,
 	return -EINVAL;
 }
 
-static int config_term(struct perf_event_attr *attr,
-		       struct parse_events_term *term,
-		       struct parse_events_error *err)
+typedef int config_term_func_t(struct perf_event_attr *attr,
+			       struct parse_events_term *term,
+			       struct parse_events_error *err);
+
+static int config_term_common(struct perf_event_attr *attr,
+			      struct parse_events_term *term,
+			      struct parse_events_error *err)
 {
 #define CHECK_TYPE_VAL(type)						   \
 do {									   \
@@ -610,12 +621,6 @@ do {									   \
 } while (0)
 
 	switch (term->type_term) {
-	case PARSE_EVENTS__TERM_TYPE_USER:
-		/*
-		 * Always succeed for sysfs terms, as we dont know
-		 * at this point what type they need to have.
-		 */
-		return 0;
 	case PARSE_EVENTS__TERM_TYPE_CONFIG:
 		CHECK_TYPE_VAL(NUM);
 		attr->config = term->val.num;
@@ -658,6 +663,9 @@ do {									   \
 		CHECK_TYPE_VAL(STR);
 		break;
 	default:
+		err->str = strdup("unknown term");
+		err->idx = term->err_term;
+		err->help = parse_events_formats_error_string(NULL);
 		return -EINVAL;
 	}
 
@@ -665,9 +673,44 @@ do {									   \
 #undef CHECK_TYPE_VAL
 }
 
+static int config_term_pmu(struct perf_event_attr *attr,
+			   struct parse_events_term *term,
+			   struct parse_events_error *err)
+{
+	if (term->type_term == PARSE_EVENTS__TERM_TYPE_USER)
+		/*
+		 * Always succeed for sysfs terms, as we dont know
+		 * at this point what type they need to have.
+		 */
+		return 0;
+	else
+		return config_term_common(attr, term, err);
+}
+
+static int config_term_tracepoint(struct perf_event_attr *attr,
+				  struct parse_events_term *term,
+				  struct parse_events_error *err)
+{
+	switch (term->type_term) {
+	case PARSE_EVENTS__TERM_TYPE_CALLGRAPH:
+	case PARSE_EVENTS__TERM_TYPE_STACKSIZE:
+		return config_term_common(attr, term, err);
+	default:
+		if (err) {
+			err->idx = term->err_term;
+			err->str = strdup("unknown term");
+			err->help = strdup("valid terms: call-graph,stack-size\n");
+		}
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int config_attr(struct perf_event_attr *attr,
 		       struct list_head *head,
-		       struct parse_events_error *err)
+		       struct parse_events_error *err,
+		       config_term_func_t config_term)
 {
 	struct parse_events_term *term;
 
@@ -722,6 +765,27 @@ do {								\
 	return 0;
 }
 
+int parse_events_add_tracepoint(struct list_head *list, int *idx,
+				char *sys, char *event,
+				struct parse_events_error *error,
+				struct list_head *head_config)
+{
+	if (head_config) {
+		struct perf_event_attr attr;
+
+		if (config_attr(&attr, head_config, error,
+				config_term_tracepoint))
+			return -EINVAL;
+	}
+
+	if (strpbrk(sys, "*?"))
+		return add_tracepoint_multi_sys(list, idx, sys, event,
+						error, head_config);
+	else
+		return add_tracepoint_event(list, idx, sys, event,
+					    error, head_config);
+}
+
 int parse_events_add_numeric(struct parse_events_evlist *data,
 			     struct list_head *list,
 			     u32 type, u64 config,
@@ -735,7 +799,8 @@ int parse_events_add_numeric(struct parse_events_evlist *data,
 	attr.config = config;
 
 	if (head_config) {
-		if (config_attr(&attr, head_config, data->error))
+		if (config_attr(&attr, head_config, data->error,
+				config_term_common))
 			return -EINVAL;
 
 		if (get_config_terms(head_config, &config_terms))
@@ -795,7 +860,7 @@ int parse_events_add_pmu(struct parse_events_evlist *data,
 	 * Configure hardcoded terms first, no need to check
 	 * return value when called with fail == 0 ;)
 	 */
-	if (config_attr(&attr, head_config, data->error))
+	if (config_attr(&attr, head_config, data->error, config_term_pmu))
 		return -EINVAL;
 
 	if (get_config_terms(head_config, &config_terms))
@@ -1861,3 +1926,29 @@ void parse_events_evlist_error(struct parse_events_evlist *data,
 	err->str = strdup(str);
 	WARN_ONCE(!err->str, "WARNING: failed to allocate error string");
 }
+
+/*
+ * Return string contains valid config terms of an event.
+ * @additional_terms: For terms such as PMU sysfs terms.
+ */
+char *parse_events_formats_error_string(char *additional_terms)
+{
+	char *str;
+	static const char *static_terms = "config,config1,config2,name,"
+					  "period,freq,branch_type,time,"
+					  "call-graph,stack-size\n";
+
+	/* valid terms */
+	if (additional_terms) {
+		if (!asprintf(&str, "valid terms: %s,%s",
+			      additional_terms, static_terms))
+			goto fail;
+	} else {
+		if (!asprintf(&str, "valid terms: %s", static_terms))
+			goto fail;
+	}
+	return str;
+
+fail:
+	return NULL;
+}

+ 3 - 1
tools/perf/util/parse-events.h

@@ -119,7 +119,8 @@ int parse_events__modifier_group(struct list_head *list, char *event_mod);
 int parse_events_name(struct list_head *list, char *name);
 int parse_events_add_tracepoint(struct list_head *list, int *idx,
 				char *sys, char *event,
-				struct parse_events_error *error);
+				struct parse_events_error *error,
+				struct list_head *head_config);
 int parse_events_add_numeric(struct parse_events_evlist *data,
 			     struct list_head *list,
 			     u32 type, u64 config,
@@ -156,5 +157,6 @@ int print_hwcache_events(const char *event_glob, bool name_only);
 extern int is_valid_tracepoint(const char *event_string);
 
 int valid_event_mount(const char *eventfs);
+char *parse_events_formats_error_string(char *additional_terms);
 
 #endif /* __PERF_PARSE_EVENTS_H */

+ 1 - 1
tools/perf/util/parse-events.l

@@ -174,7 +174,7 @@ modifier_bp	[rwx]{1,3}
 
 <config>{
 	/*
-	 * Please update formats_error_string any time
+	 * Please update parse_events_formats_error_string any time
 	 * new static term is added.
 	 */
 config			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG); }

+ 39 - 12
tools/perf/util/parse-events.y

@@ -67,6 +67,7 @@ static inc_group_count(struct list_head *list,
 %type <head> event_legacy_cache
 %type <head> event_legacy_mem
 %type <head> event_legacy_tracepoint
+%type <tracepoint_name> tracepoint_name
 %type <head> event_legacy_numeric
 %type <head> event_legacy_raw
 %type <head> event_def
@@ -84,6 +85,10 @@ static inc_group_count(struct list_head *list,
 	u64 num;
 	struct list_head *head;
 	struct parse_events_term *term;
+	struct tracepoint_name {
+		char *sys;
+		char *event;
+	} tracepoint_name;
 }
 %%
 
@@ -368,38 +373,60 @@ PE_PREFIX_MEM PE_VALUE sep_dc
 }
 
 event_legacy_tracepoint:
-PE_NAME '-' PE_NAME ':' PE_NAME
+tracepoint_name
 {
 	struct parse_events_evlist *data = _data;
 	struct parse_events_error *error = data->error;
 	struct list_head *list;
-	char sys_name[128];
-	snprintf(&sys_name, 128, "%s-%s", $1, $3);
 
 	ALLOC_LIST(list);
-	if (parse_events_add_tracepoint(list, &data->idx, &sys_name, $5, error)) {
-		if (error)
-			error->idx = @1.first_column;
+	if (error)
+		error->idx = @1.first_column;
+
+	if (parse_events_add_tracepoint(list, &data->idx, $1.sys, $1.event,
+					error, NULL))
 		return -1;
-	}
+
 	$$ = list;
 }
 |
-PE_NAME ':' PE_NAME
+tracepoint_name '/' event_config '/'
 {
 	struct parse_events_evlist *data = _data;
 	struct parse_events_error *error = data->error;
 	struct list_head *list;
 
 	ALLOC_LIST(list);
-	if (parse_events_add_tracepoint(list, &data->idx, $1, $3, error)) {
-		if (error)
-			error->idx = @1.first_column;
+	if (error)
+		error->idx = @1.first_column;
+
+	if (parse_events_add_tracepoint(list, &data->idx, $1.sys, $1.event,
+					error, $3))
 		return -1;
-	}
+
 	$$ = list;
 }
 
+tracepoint_name:
+PE_NAME '-' PE_NAME ':' PE_NAME
+{
+	char sys_name[128];
+	struct tracepoint_name tracepoint;
+
+	snprintf(&sys_name, 128, "%s-%s", $1, $3);
+	tracepoint.sys = &sys_name;
+	tracepoint.event = $5;
+
+	$$ = tracepoint;
+}
+|
+PE_NAME ':' PE_NAME
+{
+	struct tracepoint_name tracepoint = {$1, $3};
+
+	$$ = tracepoint;
+}
+
 event_legacy_numeric:
 PE_VALUE ':' PE_VALUE
 {

+ 14 - 23
tools/perf/util/pmu.c

@@ -626,38 +626,26 @@ static int pmu_resolve_param_term(struct parse_events_term *term,
 	return -1;
 }
 
-static char *formats_error_string(struct list_head *formats)
+static char *pmu_formats_string(struct list_head *formats)
 {
 	struct perf_pmu_format *format;
-	char *err, *str;
-	static const char *static_terms = "config,config1,config2,name,"
-					  "period,freq,branch_type,time,"
-					  "call-graph,stack-size\n";
+	char *str;
+	struct strbuf buf;
 	unsigned i = 0;
 
-	if (!asprintf(&str, "valid terms:"))
+	if (!formats)
 		return NULL;
 
+	strbuf_init(&buf, 0);
 	/* sysfs exported terms */
-	list_for_each_entry(format, formats, list) {
-		char c = i++ ? ',' : ' ';
-
-		err = str;
-		if (!asprintf(&str, "%s%c%s", err, c, format->name))
-			goto fail;
-		free(err);
-	}
+	list_for_each_entry(format, formats, list)
+		strbuf_addf(&buf, i++ ? ",%s" : "%s",
+			    format->name);
 
-	/* static terms */
-	err = str;
-	if (!asprintf(&str, "%s,%s", err, static_terms))
-		goto fail;
+	str = strbuf_detach(&buf, NULL);
+	strbuf_release(&buf);
 
-	free(err);
 	return str;
-fail:
-	free(err);
-	return NULL;
 }
 
 /*
@@ -693,9 +681,12 @@ static int pmu_config_term(struct list_head *formats,
 		if (verbose)
 			printf("Invalid event/parameter '%s'\n", term->config);
 		if (err) {
+			char *pmu_term = pmu_formats_string(formats);
+
 			err->idx  = term->err_term;
 			err->str  = strdup("unknown term");
-			err->help = formats_error_string(formats);
+			err->help = parse_events_formats_error_string(pmu_term);
+			free(pmu_term);
 		}
 		return -EINVAL;
 	}

+ 1 - 1
tools/perf/util/scripting-engines/trace-event-python.c

@@ -319,7 +319,7 @@ static PyObject *python_process_callchain(struct perf_sample *sample,
 
 	if (thread__resolve_callchain(al->thread, evsel,
 				      sample, NULL, NULL,
-				      PERF_MAX_STACK_DEPTH) != 0) {
+				      scripting_max_stack) != 0) {
 		pr_err("Failed to resolve callchain. Skipping\n");
 		goto exit;
 	}

+ 11 - 1
tools/perf/util/session.c

@@ -1101,6 +1101,9 @@ static int machines__deliver_event(struct machines *machines,
 	case PERF_RECORD_UNTHROTTLE:
 		return tool->unthrottle(tool, event, sample, machine);
 	case PERF_RECORD_AUX:
+		if (tool->aux == perf_event__process_aux &&
+		    (event->aux.flags & PERF_AUX_FLAG_TRUNCATED))
+			evlist->stats.total_aux_lost += 1;
 		return tool->aux(tool, event, sample, machine);
 	case PERF_RECORD_ITRACE_START:
 		return tool->itrace_start(tool, event, sample, machine);
@@ -1346,6 +1349,13 @@ static void perf_session__warn_about_errors(const struct perf_session *session)
 		}
 	}
 
+	if (session->tool->aux == perf_event__process_aux &&
+	    stats->total_aux_lost != 0) {
+		ui__warning("AUX data lost %" PRIu64 " times out of %u!\n\n",
+			    stats->total_aux_lost,
+			    stats->nr_events[PERF_RECORD_AUX]);
+	}
+
 	if (stats->nr_unknown_events != 0) {
 		ui__warning("Found %u unknown events!\n\n"
 			    "Is this an older tool processing a perf.data "
@@ -1790,7 +1800,7 @@ void perf_evsel__print_ip(struct perf_evsel *evsel, struct perf_sample *sample,
 
 		if (thread__resolve_callchain(al->thread, evsel,
 					      sample, NULL, NULL,
-					      PERF_MAX_STACK_DEPTH) != 0) {
+					      stack_depth) != 0) {
 			if (verbose)
 				error("Failed to resolve callchain. Skipping\n");
 			return;

+ 2 - 0
tools/perf/util/trace-event.h

@@ -78,6 +78,8 @@ struct scripting_ops {
 	int (*generate_script) (struct pevent *pevent, const char *outfile);
 };
 
+extern unsigned int scripting_max_stack;
+
 int script_spec_register(const char *spec, struct scripting_ops *ops);
 
 void setup_perl_scripting(void);