9 лет назад · 8c2accc8ca
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -96,7 +96,7 @@ cgroup_install firewire_install hv_install lguest_install perf_install usb_insta
 
				 	$(call descend,$(@:_install=),install)
			
 
				 
			
 
				 selftests_install:
			
 
				-	$(call descend,testing/$(@:_clean=),install)
			
 
				+	$(call descend,testing/$(@:_install=),install)
			
 
				 
			
 
				 turbostat_install x86_energy_perf_policy_install:
			
 
				 	$(call descend,power/x86/$(@:_install=),install)
			
--- a/tools/include/linux/string.h
+++ b/tools/include/linux/string.h
@@ -0,0 +1,11 @@
 
				+#ifndef _TOOLS_LINUX_STRING_H_
			
 
				+#define _TOOLS_LINUX_STRING_H_
			
 
				+
			
 
				+
			
 
				+#include <linux/types.h>	/* for size_t */
			
 
				+
			
 
				+void *memdup(const void *src, size_t len);
			
 
				+
			
 
				+int strtobool(const char *s, bool *res);
			
 
				+
			
 
				+#endif /* _LINUX_STRING_H_ */
			
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -152,7 +152,11 @@ struct bpf_program {
 
				 	} *reloc_desc;
			
 
				 	int nr_reloc;
			
 
				 
			
 
				-	int fd;
			
 
				+	struct {
			
 
				+		int nr;
			
 
				+		int *fds;
			
 
				+	} instances;
			
 
				+	bpf_program_prep_t preprocessor;
			
 
				 
			
 
				 	struct bpf_object *obj;
			
 
				 	void *priv;
			
@@ -206,10 +210,25 @@ struct bpf_object {
 
				 
			
 
				 static void bpf_program__unload(struct bpf_program *prog)
			
 
				 {
			
 
				+	int i;
			
 
				+
			
 
				 	if (!prog)
			
 
				 		return;
			
 
				 
			
 
				-	zclose(prog->fd);
			
 
				+	/*
			
 
				+	 * If the object is opened but the program was never loaded,
			
 
				+	 * it is possible that prog->instances.nr == -1.
			
 
				+	 */
			
 
				+	if (prog->instances.nr > 0) {
			
 
				+		for (i = 0; i < prog->instances.nr; i++)
			
 
				+			zclose(prog->instances.fds[i]);
			
 
				+	} else if (prog->instances.nr != -1) {
			
 
				+		pr_warning("Internal error: instances.nr is %d\n",
			
 
				+			   prog->instances.nr);
			
 
				+	}
			
 
				+
			
 
				+	prog->instances.nr = -1;
			
 
				+	zfree(&prog->instances.fds);
			
 
				 }
			
 
				 
			
 
				 static void bpf_program__exit(struct bpf_program *prog)
			
@@ -260,7 +279,8 @@ bpf_program__init(void *data, size_t size, char *name, int idx,
 
				 	memcpy(prog->insns, data,
			
 
				 	       prog->insns_cnt * sizeof(struct bpf_insn));
			
 
				 	prog->idx = idx;
			
 
				-	prog->fd = -1;
			
 
				+	prog->instances.fds = NULL;
			
 
				+	prog->instances.nr = -1;
			
 
				 
			
 
				 	return 0;
			
 
				 errout:
			
@@ -860,13 +880,73 @@ static int
 
				 bpf_program__load(struct bpf_program *prog,
			
 
				 		  char *license, u32 kern_version)
			
 
				 {
			
 
				-	int err, fd;
			
 
				+	int err = 0, fd, i;
			
 
				 
			
 
				-	err = load_program(prog->insns, prog->insns_cnt,
			
 
				-			   license, kern_version, &fd);
			
 
				-	if (!err)
			
 
				-		prog->fd = fd;
			
 
				+	if (prog->instances.nr < 0 || !prog->instances.fds) {
			
 
				+		if (prog->preprocessor) {
			
 
				+			pr_warning("Internal error: can't load program '%s'\n",
			
 
				+				   prog->section_name);
			
 
				+			return -LIBBPF_ERRNO__INTERNAL;
			
 
				+		}
			
 
				 
			
 
				+		prog->instances.fds = malloc(sizeof(int));
			
 
				+		if (!prog->instances.fds) {
			
 
				+			pr_warning("Not enough memory for BPF fds\n");
			
 
				+			return -ENOMEM;
			
 
				+		}
			
 
				+		prog->instances.nr = 1;
			
 
				+		prog->instances.fds[0] = -1;
			
 
				+	}
			
 
				+
			
 
				+	if (!prog->preprocessor) {
			
 
				+		if (prog->instances.nr != 1) {
			
 
				+			pr_warning("Program '%s' is inconsistent: nr(%d) != 1\n",
			
 
				+				   prog->section_name, prog->instances.nr);
			
 
				+		}
			
 
				+		err = load_program(prog->insns, prog->insns_cnt,
			
 
				+				   license, kern_version, &fd);
			
 
				+		if (!err)
			
 
				+			prog->instances.fds[0] = fd;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	for (i = 0; i < prog->instances.nr; i++) {
			
 
				+		struct bpf_prog_prep_result result;
			
 
				+		bpf_program_prep_t preprocessor = prog->preprocessor;
			
 
				+
			
 
				+		bzero(&result, sizeof(result));
			
 
				+		err = preprocessor(prog, i, prog->insns,
			
 
				+				   prog->insns_cnt, &result);
			
 
				+		if (err) {
			
 
				+			pr_warning("Preprocessing the %dth instance of program '%s' failed\n",
			
 
				+				   i, prog->section_name);
			
 
				+			goto out;
			
 
				+		}
			
 
				+
			
 
				+		if (!result.new_insn_ptr || !result.new_insn_cnt) {
			
 
				+			pr_debug("Skip loading the %dth instance of program '%s'\n",
			
 
				+				 i, prog->section_name);
			
 
				+			prog->instances.fds[i] = -1;
			
 
				+			if (result.pfd)
			
 
				+				*result.pfd = -1;
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		err = load_program(result.new_insn_ptr,
			
 
				+				   result.new_insn_cnt,
			
 
				+				   license, kern_version, &fd);
			
 
				+
			
 
				+		if (err) {
			
 
				+			pr_warning("Loading the %dth instance of program '%s' failed\n",
			
 
				+					i, prog->section_name);
			
 
				+			goto out;
			
 
				+		}
			
 
				+
			
 
				+		if (result.pfd)
			
 
				+			*result.pfd = fd;
			
 
				+		prog->instances.fds[i] = fd;
			
 
				+	}
			
 
				+out:
			
 
				 	if (err)
			
 
				 		pr_warning("failed to load program '%s'\n",
			
 
				 			   prog->section_name);
			
@@ -1121,5 +1201,53 @@ const char *bpf_program__title(struct bpf_program *prog, bool needs_copy)
 
				 
			
 
				 int bpf_program__fd(struct bpf_program *prog)
			
 
				 {
			
 
				-	return prog->fd;
			
 
				+	return bpf_program__nth_fd(prog, 0);
			
 
				+}
			
 
				+
			
 
				+int bpf_program__set_prep(struct bpf_program *prog, int nr_instances,
			
 
				+			  bpf_program_prep_t prep)
			
 
				+{
			
 
				+	int *instances_fds;
			
 
				+
			
 
				+	if (nr_instances <= 0 || !prep)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	if (prog->instances.nr > 0 || prog->instances.fds) {
			
 
				+		pr_warning("Can't set pre-processor after loading\n");
			
 
				+		return -EINVAL;
			
 
				+	}
			
 
				+
			
 
				+	instances_fds = malloc(sizeof(int) * nr_instances);
			
 
				+	if (!instances_fds) {
			
 
				+		pr_warning("alloc memory failed for fds\n");
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	/* fill all fd with -1 */
			
 
				+	memset(instances_fds, -1, sizeof(int) * nr_instances);
			
 
				+
			
 
				+	prog->instances.nr = nr_instances;
			
 
				+	prog->instances.fds = instances_fds;
			
 
				+	prog->preprocessor = prep;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int bpf_program__nth_fd(struct bpf_program *prog, int n)
			
 
				+{
			
 
				+	int fd;
			
 
				+
			
 
				+	if (n >= prog->instances.nr || n < 0) {
			
 
				+		pr_warning("Can't get the %dth fd from program %s: only %d instances\n",
			
 
				+			   n, prog->section_name, prog->instances.nr);
			
 
				+		return -EINVAL;
			
 
				+	}
			
 
				+
			
 
				+	fd = prog->instances.fds[n];
			
 
				+	if (fd < 0) {
			
 
				+		pr_warning("%dth instance of program '%s' is invalid\n",
			
 
				+			   n, prog->section_name);
			
 
				+		return -ENOENT;
			
 
				+	}
			
 
				+
			
 
				+	return fd;
			
 
				 }
			
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -88,6 +88,70 @@ const char *bpf_program__title(struct bpf_program *prog, bool needs_copy);
 
				 
			
 
				 int bpf_program__fd(struct bpf_program *prog);
			
 
				 
			
 
				+struct bpf_insn;
			
 
				+
			
 
				+/*
			
 
				+ * Libbpf allows callers to adjust BPF programs before being loaded
			
 
				+ * into kernel. One program in an object file can be transform into
			
 
				+ * multiple variants to be attached to different code.
			
 
				+ *
			
 
				+ * bpf_program_prep_t, bpf_program__set_prep and bpf_program__nth_fd
			
 
				+ * are APIs for this propose.
			
 
				+ *
			
 
				+ * - bpf_program_prep_t:
			
 
				+ *   It defines 'preprocessor', which is a caller defined function
			
 
				+ *   passed to libbpf through bpf_program__set_prep(), and will be
			
 
				+ *   called before program is loaded. The processor should adjust
			
 
				+ *   the program one time for each instances according to the number
			
 
				+ *   passed to it.
			
 
				+ *
			
 
				+ * - bpf_program__set_prep:
			
 
				+ *   Attachs a preprocessor to a BPF program. The number of instances
			
 
				+ *   whould be created is also passed through this function.
			
 
				+ *
			
 
				+ * - bpf_program__nth_fd:
			
 
				+ *   After the program is loaded, get resuling fds from bpf program for
			
 
				+ *   each instances.
			
 
				+ *
			
 
				+ * If bpf_program__set_prep() is not used, the program whould be loaded
			
 
				+ * without adjustment during bpf_object__load(). The program has only
			
 
				+ * one instance. In this case bpf_program__fd(prog) is equal to
			
 
				+ * bpf_program__nth_fd(prog, 0).
			
 
				+ */
			
 
				+
			
 
				+struct bpf_prog_prep_result {
			
 
				+	/*
			
 
				+	 * If not NULL, load new instruction array.
			
 
				+	 * If set to NULL, don't load this instance.
			
 
				+	 */
			
 
				+	struct bpf_insn *new_insn_ptr;
			
 
				+	int new_insn_cnt;
			
 
				+
			
 
				+	/* If not NULL, result fd is set to it */
			
 
				+	int *pfd;
			
 
				+};
			
 
				+
			
 
				+/*
			
 
				+ * Parameters of bpf_program_prep_t:
			
 
				+ *  - prog:	The bpf_program being loaded.
			
 
				+ *  - n:	Index of instance being generated.
			
 
				+ *  - insns:	BPF instructions array.
			
 
				+ *  - insns_cnt:Number of instructions in insns.
			
 
				+ *  - res:	Output parameter, result of transformation.
			
 
				+ *
			
 
				+ * Return value:
			
 
				+ *  - Zero: pre-processing success.
			
 
				+ *  - Non-zero: pre-processing, stop loading.
			
 
				+ */
			
 
				+typedef int (*bpf_program_prep_t)(struct bpf_program *prog, int n,
			
 
				+				  struct bpf_insn *insns, int insns_cnt,
			
 
				+				  struct bpf_prog_prep_result *res);
			
 
				+
			
 
				+int bpf_program__set_prep(struct bpf_program *prog, int nr_instance,
			
 
				+			  bpf_program_prep_t prep);
			
 
				+
			
 
				+int bpf_program__nth_fd(struct bpf_program *prog, int n);
			
 
				+
			
 
				 /*
			
 
				  * We don't need __attribute__((packed)) now since it is
			
 
				  * unnecessary for 'bpf_map_def' because they are all aligned.
			
--- a/tools/lib/string.c
+++ b/tools/lib/string.c
@@ -0,0 +1,62 @@
 
				+/*
			
 
				+ *  linux/tools/lib/string.c
			
 
				+ *
			
 
				+ *  Copied from linux/lib/string.c, where it is:
			
 
				+ *
			
 
				+ *  Copyright (C) 1991, 1992  Linus Torvalds
			
 
				+ *
			
 
				+ *  More specifically, the first copied function was strtobool, which
			
 
				+ *  was introduced by:
			
 
				+ *
			
 
				+ *  d0f1fed29e6e ("Add a strtobool function matching semantics of existing in kernel equivalents")
			
 
				+ *  Author: Jonathan Cameron <jic23@cam.ac.uk>
			
 
				+ */
			
 
				+
			
 
				+#include <stdlib.h>
			
 
				+#include <string.h>
			
 
				+#include <errno.h>
			
 
				+#include <linux/string.h>
			
 
				+
			
 
				+/**
			
 
				+ * memdup - duplicate region of memory
			
 
				+ *
			
 
				+ * @src: memory region to duplicate
			
 
				+ * @len: memory region length
			
 
				+ */
			
 
				+void *memdup(const void *src, size_t len)
			
 
				+{
			
 
				+	void *p = malloc(len);
			
 
				+
			
 
				+	if (p)
			
 
				+		memcpy(p, src, len);
			
 
				+
			
 
				+	return p;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * strtobool - convert common user inputs into boolean values
			
 
				+ * @s: input string
			
 
				+ * @res: result
			
 
				+ *
			
 
				+ * This routine returns 0 iff the first character is one of 'Yy1Nn0'.
			
 
				+ * Otherwise it will return -EINVAL.  Value pointed to by res is
			
 
				+ * updated upon finding a match.
			
 
				+ */
			
 
				+int strtobool(const char *s, bool *res)
			
 
				+{
			
 
				+	switch (s[0]) {
			
 
				+	case 'y':
			
 
				+	case 'Y':
			
 
				+	case '1':
			
 
				+		*res = true;
			
 
				+		break;
			
 
				+	case 'n':
			
 
				+	case 'N':
			
 
				+	case '0':
			
 
				+		*res = false;
			
 
				+		break;
			
 
				+	default:
			
 
				+		return -EINVAL;
			
 
				+	}
			
 
				+	return 0;
			
 
				+}
			
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -170,17 +170,18 @@ OPTIONS
 
				         Dump raw trace in ASCII.
			
 
				 
			
 
				 -g::
			
 
				---call-graph=<print_type,threshold[,print_limit],order,sort_key,branch>::
			
 
				+--call-graph=<print_type,threshold[,print_limit],order,sort_key[,branch],value>::
			
 
				         Display call chains using type, min percent threshold, print limit,
			
 
				-	call order, sort key and branch.  Note that ordering of parameters is not
			
 
				-	fixed so any parement can be given in an arbitraty order.  One exception
			
 
				-	is the print_limit which should be preceded by threshold.
			
 
				+	call order, sort key, optional branch and value.  Note that ordering of
			
 
				+	parameters is not fixed so any parement can be given in an arbitraty order.
			
 
				+	One exception is the print_limit which should be preceded by threshold.
			
 
				 
			
 
				 	print_type can be either:
			
 
				 	- flat: single column, linear exposure of call chains.
			
 
				 	- graph: use a graph tree, displaying absolute overhead rates. (default)
			
 
				 	- fractal: like graph, but displays relative rates. Each branch of
			
 
				 		 the tree is considered as a new profiled object.
			
 
				+	- folded: call chains are displayed in a line, separated by semicolons
			
 
				 	- none: disable call chain display.
			
 
				 
			
 
				 	threshold is a percentage value which specifies a minimum percent to be
			
@@ -204,6 +205,11 @@ OPTIONS
 
				 	- branch: include last branch information in callgraph when available.
			
 
				 	          Usually more convenient to use --branch-history for this.
			
 
				 
			
 
				+	value can be:
			
 
				+	- percent: diplay overhead percent (default)
			
 
				+	- period: display event period
			
 
				+	- count: display event count
			
 
				+
			
 
				 --children::
			
 
				 	Accumulate callchain of children to parent entry so that then can
			
 
				 	show up in the output.  The output will have a new "Children" column
			
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -22,6 +22,7 @@ tools/lib/api
 
				 tools/lib/bpf
			
 
				 tools/lib/hweight.c
			
 
				 tools/lib/rbtree.c
			
 
				+tools/lib/string.c
			
 
				 tools/lib/symbol/kallsyms.c
			
 
				 tools/lib/symbol/kallsyms.h
			
 
				 tools/lib/util/find_next_bit.c
			
@@ -50,6 +51,7 @@ tools/include/linux/log2.h
 
				 tools/include/linux/poison.h
			
 
				 tools/include/linux/rbtree.h
			
 
				 tools/include/linux/rbtree_augmented.h
			
 
				+tools/include/linux/string.h
			
 
				 tools/include/linux/types.h
			
 
				 tools/include/linux/err.h
			
 
				 include/asm-generic/bitops/arch_hweight.h
			
--- a/tools/perf/arch/x86/include/arch-tests.h
+++ b/tools/perf/arch/x86/include/arch-tests.h
@@ -2,10 +2,10 @@
 
				 #define ARCH_TESTS_H
			
 
				 
			
 
				 /* Tests */
			
 
				-int test__rdpmc(void);
			
 
				-int test__perf_time_to_tsc(void);
			
 
				-int test__insn_x86(void);
			
 
				-int test__intel_cqm_count_nmi_context(void);
			
 
				+int test__rdpmc(int subtest);
			
 
				+int test__perf_time_to_tsc(int subtest);
			
 
				+int test__insn_x86(int subtest);
			
 
				+int test__intel_cqm_count_nmi_context(int subtest);
			
 
				 
			
 
				 #ifdef HAVE_DWARF_UNWIND_SUPPORT
			
 
				 struct thread;
			
--- a/tools/perf/arch/x86/tests/insn-x86.c
+++ b/tools/perf/arch/x86/tests/insn-x86.c
@@ -171,7 +171,7 @@ static int test_data_set(struct test_data *dat_set, int x86_64)
 
				  * verbose (-v) option to see all the instructions and whether or not they
			
 
				  * decoded successfuly.
			
 
				  */
			
 
				-int test__insn_x86(void)
			
 
				+int test__insn_x86(int subtest __maybe_unused)
			
 
				 {
			
 
				 	int ret = 0;
			
 
				 
			
--- a/tools/perf/arch/x86/tests/intel-cqm.c
+++ b/tools/perf/arch/x86/tests/intel-cqm.c
@@ -33,7 +33,7 @@ static pid_t spawn(void)
 
				  * the last read counter value to avoid triggering a WARN_ON_ONCE() in
			
 
				  * smp_call_function_many() caused by sending IPIs from NMI context.
			
 
				  */
			
 
				-int test__intel_cqm_count_nmi_context(void)
			
 
				+int test__intel_cqm_count_nmi_context(int subtest __maybe_unused)
			
 
				 {
			
 
				 	struct perf_evlist *evlist = NULL;
			
 
				 	struct perf_evsel *evsel = NULL;
			
--- a/tools/perf/arch/x86/tests/perf-time-to-tsc.c
+++ b/tools/perf/arch/x86/tests/perf-time-to-tsc.c
@@ -35,7 +35,7 @@
 
				  * %0 is returned, otherwise %-1 is returned.  If TSC conversion is not
			
 
				  * supported then then the test passes but " (not supported)" is printed.
			
 
				  */
			
 
				-int test__perf_time_to_tsc(void)
			
 
				+int test__perf_time_to_tsc(int subtest __maybe_unused)
			
 
				 {
			
 
				 	struct record_opts opts = {
			
 
				 		.mmap_pages	     = UINT_MAX,
			
--- a/tools/perf/arch/x86/tests/rdpmc.c
+++ b/tools/perf/arch/x86/tests/rdpmc.c
@@ -149,7 +149,7 @@ out_close:
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-int test__rdpmc(void)
			
 
				+int test__rdpmc(int subtest __maybe_unused)
			
 
				 {
			
 
				 	int status = 0;
			
 
				 	int wret = 0;
			
--- a/tools/perf/arch/x86/util/Build
+++ b/tools/perf/arch/x86/util/Build
@@ -5,6 +5,7 @@ libperf-y += kvm-stat.o
 
				 libperf-y += perf_regs.o
			
 
				 
			
 
				 libperf-$(CONFIG_DWARF) += dwarf-regs.o
			
 
				+libperf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o
			
 
				 
			
 
				 libperf-$(CONFIG_LIBUNWIND)          += unwind-libunwind.o
			
 
				 libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
			
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -625,7 +625,7 @@ parse_percent_limit(const struct option *opt, const char *str,
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-#define CALLCHAIN_DEFAULT_OPT  "graph,0.5,caller,function"
			
 
				+#define CALLCHAIN_DEFAULT_OPT  "graph,0.5,caller,function,percent"
			
 
				 
			
 
				 const char report_callchain_help[] = "Display call graph (stack chain/backtrace):\n\n"
			
 
				 				     CALLCHAIN_REPORT_HELP
			
@@ -708,7 +708,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 	OPT_BOOLEAN('x', "exclude-other", &symbol_conf.exclude_other,
			
 
				 		    "Only display entries with parent-match"),
			
 
				 	OPT_CALLBACK_DEFAULT('g', "call-graph", &report,
			
 
				-			     "print_type,threshold[,print_limit],order,sort_key[,branch]",
			
 
				+			     "print_type,threshold[,print_limit],order,sort_key[,branch],value",
			
 
				 			     report_callchain_help, &report_parse_callchain_opt,
			
 
				 			     callchain_default_opt),
			
 
				 	OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain,
			
--- a/tools/perf/config/Makefile
+++ b/tools/perf/config/Makefile
@@ -318,6 +318,18 @@ ifndef NO_LIBELF
 
				       CFLAGS += -DHAVE_LIBBPF_SUPPORT
			
 
				       $(call detected,CONFIG_LIBBPF)
			
 
				     endif
			
 
				+
			
 
				+    ifndef NO_DWARF
			
 
				+      ifdef PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET
			
 
				+        CFLAGS += -DHAVE_BPF_PROLOGUE
			
 
				+        $(call detected,CONFIG_BPF_PROLOGUE)
			
 
				+      else
			
 
				+        msg := $(warning BPF prologue is not supported by architecture $(ARCH), missing regs_query_register_offset());
			
 
				+      endif
			
 
				+    else
			
 
				+      msg := $(warning DWARF support is off, BPF prologue is disabled);
			
 
				+    endif
			
 
				+
			
 
				   endif # NO_LIBBPF
			
 
				 endif # NO_LIBELF
			
 
				 
			
--- a/tools/perf/tests/.gitignore
+++ b/tools/perf/tests/.gitignore
@@ -1,2 +1,3 @@
 
				 llvm-src-base.c
			
 
				 llvm-src-kbuild.c
			
 
				+llvm-src-prologue.c
			
--- a/tools/perf/tests/Build
+++ b/tools/perf/tests/Build
@@ -31,7 +31,7 @@ perf-y += sample-parsing.o
 
				 perf-y += parse-no-sample-id-all.o
			
 
				 perf-y += kmod-path.o
			
 
				 perf-y += thread-map.o
			
 
				-perf-y += llvm.o llvm-src-base.o llvm-src-kbuild.o
			
 
				+perf-y += llvm.o llvm-src-base.o llvm-src-kbuild.o llvm-src-prologue.o
			
 
				 perf-y += bpf.o
			
 
				 perf-y += topology.o
			
 
				 
			
@@ -49,6 +49,13 @@ $(OUTPUT)tests/llvm-src-kbuild.c: tests/bpf-script-test-kbuild.c
 
				 	$(Q)sed -e 's/"/\\"/g' -e 's/\(.*\)/"\1\\n"/g' $< >> $@
			
 
				 	$(Q)echo ';' >> $@
			
 
				 
			
 
				+$(OUTPUT)tests/llvm-src-prologue.c: tests/bpf-script-test-prologue.c
			
 
				+	$(call rule_mkdir)
			
 
				+	$(Q)echo '#include <tests/llvm.h>' > $@
			
 
				+	$(Q)echo 'const char test_llvm__bpf_test_prologue_prog[] =' >> $@
			
 
				+	$(Q)sed -e 's/"/\\"/g' -e 's/\(.*\)/"\1\\n"/g' $< >> $@
			
 
				+	$(Q)echo ';' >> $@
			
 
				+
			
 
				 ifeq ($(ARCH),$(filter $(ARCH),x86 arm arm64))
			
 
				 perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
			
 
				 endif
			
--- a/tools/perf/tests/attr.c
+++ b/tools/perf/tests/attr.c
@@ -153,7 +153,7 @@ static int run_dir(const char *d, const char *perf)
 
				 	return system(cmd);
			
 
				 }
			
 
				 
			
 
				-int test__attr(void)
			
 
				+int test__attr(int subtest __maybe_unused)
			
 
				 {
			
 
				 	struct stat st;
			
 
				 	char path_perf[PATH_MAX];
			
--- a/tools/perf/tests/bp_signal.c
+++ b/tools/perf/tests/bp_signal.c
@@ -111,7 +111,7 @@ static long long bp_count(int fd)
 
				 	return count;
			
 
				 }
			
 
				 
			
 
				-int test__bp_signal(void)
			
 
				+int test__bp_signal(int subtest __maybe_unused)
			
 
				 {
			
 
				 	struct sigaction sa;
			
 
				 	long long count1, count2;
			
--- a/tools/perf/tests/bp_signal_overflow.c
+++ b/tools/perf/tests/bp_signal_overflow.c
@@ -58,7 +58,7 @@ static long long bp_count(int fd)
 
				 #define EXECUTIONS 10000
			
 
				 #define THRESHOLD  100
			
 
				 
			
 
				-int test__bp_signal_overflow(void)
			
 
				+int test__bp_signal_overflow(int subtest __maybe_unused)
			
 
				 {
			
 
				 	struct perf_event_attr pe;
			
 
				 	struct sigaction sa;
			
--- a/tools/perf/tests/bpf-script-test-prologue.c
+++ b/tools/perf/tests/bpf-script-test-prologue.c
@@ -0,0 +1,35 @@
 
				+/*
			
 
				+ * bpf-script-test-prologue.c
			
 
				+ * Test BPF prologue
			
 
				+ */
			
 
				+#ifndef LINUX_VERSION_CODE
			
 
				+# error Need LINUX_VERSION_CODE
			
 
				+# error Example: for 4.2 kernel, put 'clang-opt="-DLINUX_VERSION_CODE=0x40200" into llvm section of ~/.perfconfig'
			
 
				+#endif
			
 
				+#define SEC(NAME) __attribute__((section(NAME), used))
			
 
				+
			
 
				+#include <uapi/linux/fs.h>
			
 
				+
			
 
				+#define FMODE_READ		0x1
			
 
				+#define FMODE_WRITE		0x2
			
 
				+
			
 
				+static void (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) =
			
 
				+	(void *) 6;
			
 
				+
			
 
				+SEC("func=null_lseek file->f_mode offset orig")
			
 
				+int bpf_func__null_lseek(void *ctx, int err, unsigned long f_mode,
			
 
				+			 unsigned long offset, unsigned long orig)
			
 
				+{
			
 
				+	if (err)
			
 
				+		return 0;
			
 
				+	if (f_mode & FMODE_WRITE)
			
 
				+		return 0;
			
 
				+	if (offset & 1)
			
 
				+		return 0;
			
 
				+	if (orig == SEEK_CUR)
			
 
				+		return 0;
			
 
				+	return 1;
			
 
				+}
			
 
				+
			
 
				+char _license[] SEC("license") = "GPL";
			
 
				+int _version SEC("version") = LINUX_VERSION_CODE;
			
--- a/tools/perf/tests/bpf.c
+++ b/tools/perf/tests/bpf.c
@@ -19,6 +19,29 @@ static int epoll_pwait_loop(void)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+#ifdef HAVE_BPF_PROLOGUE
			
 
				+
			
 
				+static int llseek_loop(void)
			
 
				+{
			
 
				+	int fds[2], i;
			
 
				+
			
 
				+	fds[0] = open("/dev/null", O_RDONLY);
			
 
				+	fds[1] = open("/dev/null", O_RDWR);
			
 
				+
			
 
				+	if (fds[0] < 0 || fds[1] < 0)
			
 
				+		return -1;
			
 
				+
			
 
				+	for (i = 0; i < NR_ITERS; i++) {
			
 
				+		lseek(fds[i % 2], i, (i / 2) % 2 ? SEEK_CUR : SEEK_SET);
			
 
				+		lseek(fds[(i + 1) % 2], i, (i / 2) % 2 ? SEEK_CUR : SEEK_SET);
			
 
				+	}
			
 
				+	close(fds[0]);
			
 
				+	close(fds[1]);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+#endif
			
 
				+
			
 
				 static struct {
			
 
				 	enum test_llvm__testcase prog_id;
			
 
				 	const char *desc;
			
@@ -37,6 +60,17 @@ static struct {
 
				 		&epoll_pwait_loop,
			
 
				 		(NR_ITERS + 1) / 2,
			
 
				 	},
			
 
				+#ifdef HAVE_BPF_PROLOGUE
			
 
				+	{
			
 
				+		LLVM_TESTCASE_BPF_PROLOGUE,
			
 
				+		"Test BPF prologue generation",
			
 
				+		"[bpf_prologue_test]",
			
 
				+		"fix kbuild first",
			
 
				+		"check your vmlinux setting?",
			
 
				+		&llseek_loop,
			
 
				+		(NR_ITERS + 1) / 4,
			
 
				+	},
			
 
				+#endif
			
 
				 };
			
 
				 
			
 
				 static int do_test(struct bpf_object *obj, int (*func)(void),
			
@@ -68,8 +102,7 @@ static int do_test(struct bpf_object *obj, int (*func)(void),
 
				 	err = parse_events_load_bpf_obj(&parse_evlist, &parse_evlist.list, obj);
			
 
				 	if (err || list_empty(&parse_evlist.list)) {
			
 
				 		pr_debug("Failed to add events selected by BPF\n");
			
 
				-		if (!err)
			
 
				-			return TEST_FAIL;
			
 
				+		return TEST_FAIL;
			
 
				 	}
			
 
				 
			
 
				 	snprintf(pid, sizeof(pid), "%d", getpid());
			
@@ -123,8 +156,10 @@ static int do_test(struct bpf_object *obj, int (*func)(void),
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	if (count != expect)
			
 
				+	if (count != expect) {
			
 
				 		pr_debug("BPF filter result incorrect\n");
			
 
				+		goto out_delete_evlist;
			
 
				+	}
			
 
				 
			
 
				 	ret = TEST_OK;
			
 
				 
			
@@ -146,7 +181,7 @@ prepare_bpf(void *obj_buf, size_t obj_buf_sz, const char *name)
 
				 	return obj;
			
 
				 }
			
 
				 
			
 
				-static int __test__bpf(int index)
			
 
				+static int __test__bpf(int idx)
			
 
				 {
			
 
				 	int ret;
			
 
				 	void *obj_buf;
			
@@ -154,54 +189,72 @@ static int __test__bpf(int index)
 
				 	struct bpf_object *obj;
			
 
				 
			
 
				 	ret = test_llvm__fetch_bpf_obj(&obj_buf, &obj_buf_sz,
			
 
				-				       bpf_testcase_table[index].prog_id,
			
 
				+				       bpf_testcase_table[idx].prog_id,
			
 
				 				       true);
			
 
				 	if (ret != TEST_OK || !obj_buf || !obj_buf_sz) {
			
 
				 		pr_debug("Unable to get BPF object, %s\n",
			
 
				-			 bpf_testcase_table[index].msg_compile_fail);
			
 
				-		if (index == 0)
			
 
				+			 bpf_testcase_table[idx].msg_compile_fail);
			
 
				+		if (idx == 0)
			
 
				 			return TEST_SKIP;
			
 
				 		else
			
 
				 			return TEST_FAIL;
			
 
				 	}
			
 
				 
			
 
				 	obj = prepare_bpf(obj_buf, obj_buf_sz,
			
 
				-			  bpf_testcase_table[index].name);
			
 
				+			  bpf_testcase_table[idx].name);
			
 
				 	if (!obj) {
			
 
				 		ret = TEST_FAIL;
			
 
				 		goto out;
			
 
				 	}
			
 
				 
			
 
				 	ret = do_test(obj,
			
 
				-		      bpf_testcase_table[index].target_func,
			
 
				-		      bpf_testcase_table[index].expect_result);
			
 
				+		      bpf_testcase_table[idx].target_func,
			
 
				+		      bpf_testcase_table[idx].expect_result);
			
 
				 out:
			
 
				 	bpf__clear();
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-int test__bpf(void)
			
 
				+int test__bpf_subtest_get_nr(void)
			
 
				+{
			
 
				+	return (int)ARRAY_SIZE(bpf_testcase_table);
			
 
				+}
			
 
				+
			
 
				+const char *test__bpf_subtest_get_desc(int i)
			
 
				+{
			
 
				+	if (i < 0 || i >= (int)ARRAY_SIZE(bpf_testcase_table))
			
 
				+		return NULL;
			
 
				+	return bpf_testcase_table[i].desc;
			
 
				+}
			
 
				+
			
 
				+int test__bpf(int i)
			
 
				 {
			
 
				-	unsigned int i;
			
 
				 	int err;
			
 
				 
			
 
				+	if (i < 0 || i >= (int)ARRAY_SIZE(bpf_testcase_table))
			
 
				+		return TEST_FAIL;
			
 
				+
			
 
				 	if (geteuid() != 0) {
			
 
				 		pr_debug("Only root can run BPF test\n");
			
 
				 		return TEST_SKIP;
			
 
				 	}
			
 
				 
			
 
				-	for (i = 0; i < ARRAY_SIZE(bpf_testcase_table); i++) {
			
 
				-		err = __test__bpf(i);
			
 
				+	err = __test__bpf(i);
			
 
				+	return err;
			
 
				+}
			
 
				 
			
 
				-		if (err != TEST_OK)
			
 
				-			return err;
			
 
				-	}
			
 
				+#else
			
 
				+int test__bpf_subtest_get_nr(void)
			
 
				+{
			
 
				+	return 0;
			
 
				+}
			
 
				 
			
 
				-	return TEST_OK;
			
 
				+const char *test__bpf_subtest_get_desc(int i __maybe_unused)
			
 
				+{
			
 
				+	return NULL;
			
 
				 }
			
 
				 
			
 
				-#else
			
 
				-int test__bpf(void)
			
 
				+int test__bpf(int i __maybe_unused)
			
 
				 {
			
 
				 	pr_debug("Skip BPF test because BPF support is not compiled\n");
			
 
				 	return TEST_SKIP;
			
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -160,6 +160,11 @@ static struct test generic_tests[] = {
 
				 	{
			
 
				 		.desc = "Test LLVM searching and compiling",
			
 
				 		.func = test__llvm,
			
 
				+		.subtest = {
			
 
				+			.skip_if_fail	= true,
			
 
				+			.get_nr		= test__llvm_subtest_get_nr,
			
 
				+			.get_desc	= test__llvm_subtest_get_desc,
			
 
				+		},
			
 
				 	},
			
 
				 	{
			
 
				 		.desc = "Test topology in session",
			
@@ -168,6 +173,11 @@ static struct test generic_tests[] = {
 
				 	{
			
 
				 		.desc = "Test BPF filter",
			
 
				 		.func = test__bpf,
			
 
				+		.subtest = {
			
 
				+			.skip_if_fail	= true,
			
 
				+			.get_nr		= test__bpf_subtest_get_nr,
			
 
				+			.get_desc	= test__bpf_subtest_get_desc,
			
 
				+		},
			
 
				 	},
			
 
				 	{
			
 
				 		.func = NULL,
			
@@ -203,7 +213,7 @@ static bool perf_test__matches(struct test *test, int curr, int argc, const char
 
				 	return false;
			
 
				 }
			
 
				 
			
 
				-static int run_test(struct test *test)
			
 
				+static int run_test(struct test *test, int subtest)
			
 
				 {
			
 
				 	int status, err = -1, child = fork();
			
 
				 	char sbuf[STRERR_BUFSIZE];
			
@@ -216,7 +226,19 @@ static int run_test(struct test *test)
 
				 
			
 
				 	if (!child) {
			
 
				 		pr_debug("test child forked, pid %d\n", getpid());
			
 
				-		err = test->func();
			
 
				+		if (!verbose) {
			
 
				+			int nullfd = open("/dev/null", O_WRONLY);
			
 
				+			if (nullfd >= 0) {
			
 
				+				close(STDERR_FILENO);
			
 
				+				close(STDOUT_FILENO);
			
 
				+
			
 
				+				dup2(nullfd, STDOUT_FILENO);
			
 
				+				dup2(STDOUT_FILENO, STDERR_FILENO);
			
 
				+				close(nullfd);
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		err = test->func(subtest);
			
 
				 		exit(err);
			
 
				 	}
			
 
				 
			
@@ -237,6 +259,40 @@ static int run_test(struct test *test)
 
				 	for (j = 0; j < ARRAY_SIZE(tests); j++)	\
			
 
				 		for (t = &tests[j][0]; t->func; t++)
			
 
				 
			
 
				+static int test_and_print(struct test *t, bool force_skip, int subtest)
			
 
				+{
			
 
				+	int err;
			
 
				+
			
 
				+	if (!force_skip) {
			
 
				+		pr_debug("\n--- start ---\n");
			
 
				+		err = run_test(t, subtest);
			
 
				+		pr_debug("---- end ----\n");
			
 
				+	} else {
			
 
				+		pr_debug("\n--- force skipped ---\n");
			
 
				+		err = TEST_SKIP;
			
 
				+	}
			
 
				+
			
 
				+	if (!t->subtest.get_nr)
			
 
				+		pr_debug("%s:", t->desc);
			
 
				+	else
			
 
				+		pr_debug("%s subtest %d:", t->desc, subtest);
			
 
				+
			
 
				+	switch (err) {
			
 
				+	case TEST_OK:
			
 
				+		pr_info(" Ok\n");
			
 
				+		break;
			
 
				+	case TEST_SKIP:
			
 
				+		color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip\n");
			
 
				+		break;
			
 
				+	case TEST_FAIL:
			
 
				+	default:
			
 
				+		color_fprintf(stderr, PERF_COLOR_RED, " FAILED!\n");
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				+	return err;
			
 
				+}
			
 
				+
			
 
				 static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist)
			
 
				 {
			
 
				 	struct test *t;
			
@@ -264,21 +320,43 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist)
 
				 			continue;
			
 
				 		}
			
 
				 
			
 
				-		pr_debug("\n--- start ---\n");
			
 
				-		err = run_test(t);
			
 
				-		pr_debug("---- end ----\n%s:", t->desc);
			
 
				-
			
 
				-		switch (err) {
			
 
				-		case TEST_OK:
			
 
				-			pr_info(" Ok\n");
			
 
				-			break;
			
 
				-		case TEST_SKIP:
			
 
				-			color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip\n");
			
 
				-			break;
			
 
				-		case TEST_FAIL:
			
 
				-		default:
			
 
				-			color_fprintf(stderr, PERF_COLOR_RED, " FAILED!\n");
			
 
				-			break;
			
 
				+		if (!t->subtest.get_nr) {
			
 
				+			test_and_print(t, false, -1);
			
 
				+		} else {
			
 
				+			int subn = t->subtest.get_nr();
			
 
				+			/*
			
 
				+			 * minus 2 to align with normal testcases.
			
 
				+			 * For subtest we print additional '.x' in number.
			
 
				+			 * for example:
			
 
				+			 *
			
 
				+			 * 35: Test LLVM searching and compiling                        :
			
 
				+			 * 35.1: Basic BPF llvm compiling test                          : Ok
			
 
				+			 */
			
 
				+			int subw = width > 2 ? width - 2 : width;
			
 
				+			bool skip = false;
			
 
				+			int subi;
			
 
				+
			
 
				+			if (subn <= 0) {
			
 
				+				color_fprintf(stderr, PERF_COLOR_YELLOW,
			
 
				+					      " Skip (not compiled in)\n");
			
 
				+				continue;
			
 
				+			}
			
 
				+			pr_info("\n");
			
 
				+
			
 
				+			for (subi = 0; subi < subn; subi++) {
			
 
				+				int len = strlen(t->subtest.get_desc(subi));
			
 
				+
			
 
				+				if (subw < len)
			
 
				+					subw = len;
			
 
				+			}
			
 
				+
			
 
				+			for (subi = 0; subi < subn; subi++) {
			
 
				+				pr_info("%2d.%1d: %-*s:", i, subi + 1, subw,
			
 
				+					t->subtest.get_desc(subi));
			
 
				+				err = test_and_print(t, skip, subi);
			
 
				+				if (err != TEST_OK && t->subtest.skip_if_fail)
			
 
				+					skip = true;
			
 
				+			}
			
 
				 		}
			
 
				 	}
			
 
				 
			
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -601,7 +601,7 @@ out_err:
 
				 	return err;
			
 
				 }
			
 
				 
			
 
				-int test__code_reading(void)
			
 
				+int test__code_reading(int subtest __maybe_unused)
			
 
				 {
			
 
				 	int ret;
			
 
				 
			
--- a/tools/perf/tests/dso-data.c
+++ b/tools/perf/tests/dso-data.c
@@ -110,7 +110,7 @@ static int dso__data_fd(struct dso *dso, struct machine *machine)
 
				 	return fd;
			
 
				 }
			
 
				 
			
 
				-int test__dso_data(void)
			
 
				+int test__dso_data(int subtest __maybe_unused)
			
 
				 {
			
 
				 	struct machine machine;
			
 
				 	struct dso *dso;
			
@@ -245,7 +245,7 @@ static int set_fd_limit(int n)
 
				 	return setrlimit(RLIMIT_NOFILE, &rlim);
			
 
				 }
			
 
				 
			
 
				-int test__dso_data_cache(void)
			
 
				+int test__dso_data_cache(int subtest __maybe_unused)
			
 
				 {
			
 
				 	struct machine machine;
			
 
				 	long nr_end, nr = open_files_cnt();
			
@@ -302,7 +302,7 @@ int test__dso_data_cache(void)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-int test__dso_data_reopen(void)
			
 
				+int test__dso_data_reopen(int subtest __maybe_unused)
			
 
				 {
			
 
				 	struct machine machine;
			
 
				 	long nr_end, nr = open_files_cnt();
			
--- a/tools/perf/tests/dwarf-unwind.c
+++ b/tools/perf/tests/dwarf-unwind.c
@@ -142,7 +142,7 @@ static int krava_1(struct thread *thread)
 
				 	return krava_2(thread);
			
 
				 }
			
 
				 
			
 
				-int test__dwarf_unwind(void)
			
 
				+int test__dwarf_unwind(int subtest __maybe_unused)
			
 
				 {
			
 
				 	struct machines machines;
			
 
				 	struct machine *machine;
			
--- a/tools/perf/tests/evsel-roundtrip-name.c
+++ b/tools/perf/tests/evsel-roundtrip-name.c
@@ -95,7 +95,7 @@ out_delete_evlist:
 
				 #define perf_evsel__name_array_test(names) \
			
 
				 	__perf_evsel__name_array_test(names, ARRAY_SIZE(names))
			
 
				 
			
 
				-int test__perf_evsel__roundtrip_name_test(void)
			
 
				+int test__perf_evsel__roundtrip_name_test(int subtest __maybe_unused)
			
 
				 {
			
 
				 	int err = 0, ret = 0;
			
 
				 
			
--- a/tools/perf/tests/evsel-tp-sched.c
+++ b/tools/perf/tests/evsel-tp-sched.c
@@ -32,7 +32,7 @@ static int perf_evsel__test_field(struct perf_evsel *evsel, const char *name,
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-int test__perf_evsel__tp_sched_test(void)
			
 
				+int test__perf_evsel__tp_sched_test(int subtest __maybe_unused)
			
 
				 {
			
 
				 	struct perf_evsel *evsel = perf_evsel__newtp("sched", "sched_switch");
			
 
				 	int ret = 0;
			
--- a/tools/perf/tests/fdarray.c
+++ b/tools/perf/tests/fdarray.c
@@ -25,7 +25,7 @@ static int fdarray__fprintf_prefix(struct fdarray *fda, const char *prefix, FILE
 
				 	return printed + fdarray__fprintf(fda, fp);
			
 
				 }
			
 
				 
			
 
				-int test__fdarray__filter(void)
			
 
				+int test__fdarray__filter(int subtest __maybe_unused)
			
 
				 {
			
 
				 	int nr_fds, expected_fd[2], fd, err = TEST_FAIL;
			
 
				 	struct fdarray *fda = fdarray__new(5, 5);
			
@@ -103,7 +103,7 @@ out:
 
				 	return err;
			
 
				 }
			
 
				 
			
 
				-int test__fdarray__add(void)
			
 
				+int test__fdarray__add(int subtest __maybe_unused)
			
 
				 {
			
 
				 	int err = TEST_FAIL;
			
 
				 	struct fdarray *fda = fdarray__new(2, 2);
			
--- a/tools/perf/tests/hists_cumulate.c
+++ b/tools/perf/tests/hists_cumulate.c
@@ -686,7 +686,7 @@ out:
 
				 	return err;
			
 
				 }
			
 
				 
			
 
				-int test__hists_cumulate(void)
			
 
				+int test__hists_cumulate(int subtest __maybe_unused)
			
 
				 {
			
 
				 	int err = TEST_FAIL;
			
 
				 	struct machines machines;
			
--- a/tools/perf/tests/hists_filter.c
+++ b/tools/perf/tests/hists_filter.c
@@ -104,7 +104,7 @@ out:
 
				 	return TEST_FAIL;
			
 
				 }
			
 
				 
			
 
				-int test__hists_filter(void)
			
 
				+int test__hists_filter(int subtest __maybe_unused)
			
 
				 {
			
 
				 	int err = TEST_FAIL;
			
 
				 	struct machines machines;
			
--- a/tools/perf/tests/hists_link.c
+++ b/tools/perf/tests/hists_link.c
@@ -274,7 +274,7 @@ static int validate_link(struct hists *leader, struct hists *other)
 
				 	return __validate_link(leader, 0) || __validate_link(other, 1);
			
 
				 }
			
 
				 
			
 
				-int test__hists_link(void)
			
 
				+int test__hists_link(int subtest __maybe_unused)
			
 
				 {
			
 
				 	int err = -1;
			
 
				 	struct hists *hists, *first_hists;
			
--- a/tools/perf/tests/hists_output.c
+++ b/tools/perf/tests/hists_output.c
@@ -576,7 +576,7 @@ out:
 
				 	return err;
			
 
				 }
			
 
				 
			
 
				-int test__hists_output(void)
			
 
				+int test__hists_output(int subtest __maybe_unused)
			
 
				 {
			
 
				 	int err = TEST_FAIL;
			
 
				 	struct machines machines;
			
--- a/tools/perf/tests/keep-tracking.c
+++ b/tools/perf/tests/keep-tracking.c
@@ -49,7 +49,7 @@ static int find_comm(struct perf_evlist *evlist, const char *comm)
 
				  * when an event is disabled but a dummy software event is not disabled.  If the
			
 
				  * test passes %0 is returned, otherwise %-1 is returned.
			
 
				  */
			
 
				-int test__keep_tracking(void)
			
 
				+int test__keep_tracking(int subtest __maybe_unused)
			
 
				 {
			
 
				 	struct record_opts opts = {
			
 
				 		.mmap_pages	     = UINT_MAX,
			
--- a/tools/perf/tests/kmod-path.c
+++ b/tools/perf/tests/kmod-path.c
@@ -49,7 +49,7 @@ static int test_is_kernel_module(const char *path, int cpumode, bool expect)
 
				 #define M(path, c, e) \
			
 
				 	TEST_ASSERT_VAL("failed", !test_is_kernel_module(path, c, e))
			
 
				 
			
 
				-int test__kmod_path__parse(void)
			
 
				+int test__kmod_path__parse(int subtest __maybe_unused)
			
 
				 {
			
 
				 	/* path                alloc_name  alloc_ext   kmod  comp   name     ext */
			
 
				 	T("/xxxx/xxxx/x-x.ko", true      , true      , true, false, "[x_x]", NULL);
			
--- a/tools/perf/tests/llvm.c
+++ b/tools/perf/tests/llvm.c
@@ -44,13 +44,17 @@ static struct {
 
				 		.source = test_llvm__bpf_test_kbuild_prog,
			
 
				 		.desc = "Test kbuild searching",
			
 
				 	},
			
 
				+	[LLVM_TESTCASE_BPF_PROLOGUE] = {
			
 
				+		.source = test_llvm__bpf_test_prologue_prog,
			
 
				+		.desc = "Compile source for BPF prologue generation test",
			
 
				+	},
			
 
				 };
			
 
				 
			
 
				 
			
 
				 int
			
 
				 test_llvm__fetch_bpf_obj(void **p_obj_buf,
			
 
				 			 size_t *p_obj_buf_sz,
			
 
				-			 enum test_llvm__testcase index,
			
 
				+			 enum test_llvm__testcase idx,
			
 
				 			 bool force)
			
 
				 {
			
 
				 	const char *source;
			
@@ -59,11 +63,11 @@ test_llvm__fetch_bpf_obj(void **p_obj_buf,
 
				 	char *tmpl_new = NULL, *clang_opt_new = NULL;
			
 
				 	int err, old_verbose, ret = TEST_FAIL;
			
 
				 
			
 
				-	if (index >= __LLVM_TESTCASE_MAX)
			
 
				+	if (idx >= __LLVM_TESTCASE_MAX)
			
 
				 		return TEST_FAIL;
			
 
				 
			
 
				-	source = bpf_source_table[index].source;
			
 
				-	desc = bpf_source_table[index].desc;
			
 
				+	source = bpf_source_table[idx].source;
			
 
				+	desc = bpf_source_table[idx].desc;
			
 
				 
			
 
				 	perf_config(perf_config_cb, NULL);
			
 
				 
			
@@ -127,44 +131,39 @@ out:
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-int test__llvm(void)
			
 
				+int test__llvm(int subtest)
			
 
				 {
			
 
				-	enum test_llvm__testcase i;
			
 
				+	int ret;
			
 
				+	void *obj_buf = NULL;
			
 
				+	size_t obj_buf_sz = 0;
			
 
				 
			
 
				-	for (i = 0; i < __LLVM_TESTCASE_MAX; i++) {
			
 
				-		int ret;
			
 
				-		void *obj_buf = NULL;
			
 
				-		size_t obj_buf_sz = 0;
			
 
				+	if ((subtest < 0) || (subtest >= __LLVM_TESTCASE_MAX))
			
 
				+		return TEST_FAIL;
			
 
				 
			
 
				-		ret = test_llvm__fetch_bpf_obj(&obj_buf, &obj_buf_sz,
			
 
				-					       i, false);
			
 
				+	ret = test_llvm__fetch_bpf_obj(&obj_buf, &obj_buf_sz,
			
 
				+				       subtest, false);
			
 
				 
			
 
				-		if (ret == TEST_OK) {
			
 
				-			ret = test__bpf_parsing(obj_buf, obj_buf_sz);
			
 
				-			if (ret != TEST_OK)
			
 
				-				pr_debug("Failed to parse test case '%s'\n",
			
 
				-					 bpf_source_table[i].desc);
			
 
				-		}
			
 
				-		free(obj_buf);
			
 
				-
			
 
				-		switch (ret) {
			
 
				-		case TEST_SKIP:
			
 
				-			return TEST_SKIP;
			
 
				-		case TEST_OK:
			
 
				-			break;
			
 
				-		default:
			
 
				-			/*
			
 
				-			 * Test 0 is the basic LLVM test. If test 0
			
 
				-			 * fail, the basic LLVM support not functional
			
 
				-			 * so the whole test should fail. If other test
			
 
				-			 * case fail, it can be fixed by adjusting
			
 
				-			 * config so don't report error.
			
 
				-			 */
			
 
				-			if (i == 0)
			
 
				-				return TEST_FAIL;
			
 
				-			else
			
 
				-				return TEST_SKIP;
			
 
				+	if (ret == TEST_OK) {
			
 
				+		ret = test__bpf_parsing(obj_buf, obj_buf_sz);
			
 
				+		if (ret != TEST_OK) {
			
 
				+			pr_debug("Failed to parse test case '%s'\n",
			
 
				+				 bpf_source_table[subtest].desc);
			
 
				 		}
			
 
				 	}
			
 
				-	return TEST_OK;
			
 
				+	free(obj_buf);
			
 
				+
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+int test__llvm_subtest_get_nr(void)
			
 
				+{
			
 
				+	return __LLVM_TESTCASE_MAX;
			
 
				+}
			
 
				+
			
 
				+const char *test__llvm_subtest_get_desc(int subtest)
			
 
				+{
			
 
				+	if ((subtest < 0) || (subtest >= __LLVM_TESTCASE_MAX))
			
 
				+		return NULL;
			
 
				+
			
 
				+	return bpf_source_table[subtest].desc;
			
 
				 }
			
--- a/tools/perf/tests/llvm.h
+++ b/tools/perf/tests/llvm.h
@@ -6,10 +6,12 @@
 
				 
			
 
				 extern const char test_llvm__bpf_base_prog[];
			
 
				 extern const char test_llvm__bpf_test_kbuild_prog[];
			
 
				+extern const char test_llvm__bpf_test_prologue_prog[];
			
 
				 
			
 
				 enum test_llvm__testcase {
			
 
				 	LLVM_TESTCASE_BASE,
			
 
				 	LLVM_TESTCASE_KBUILD,
			
 
				+	LLVM_TESTCASE_BPF_PROLOGUE,
			
 
				 	__LLVM_TESTCASE_MAX,
			
 
				 };
			
 
				 
			
--- a/tools/perf/tests/mmap-basic.c
+++ b/tools/perf/tests/mmap-basic.c
@@ -16,7 +16,7 @@
 
				  * Then it checks if the number of syscalls reported as perf events by
			
 
				  * the kernel corresponds to the number of syscalls made.
			
 
				  */
			
 
				-int test__basic_mmap(void)
			
 
				+int test__basic_mmap(int subtest __maybe_unused)
			
 
				 {
			
 
				 	int err = -1;
			
 
				 	union perf_event *event;
			
--- a/tools/perf/tests/mmap-thread-lookup.c
+++ b/tools/perf/tests/mmap-thread-lookup.c
@@ -221,7 +221,7 @@ static int mmap_events(synth_cb synth)
 
				  *
			
 
				  * by using all thread objects.
			
 
				  */
			
 
				-int test__mmap_thread_lookup(void)
			
 
				+int test__mmap_thread_lookup(int subtest __maybe_unused)
			
 
				 {
			
 
				 	/* perf_event__synthesize_threads synthesize */
			
 
				 	TEST_ASSERT_VAL("failed with sythesizing all",
			
--- a/tools/perf/tests/openat-syscall-all-cpus.c
+++ b/tools/perf/tests/openat-syscall-all-cpus.c
@@ -7,7 +7,7 @@
 
				 #include "debug.h"
			
 
				 #include "stat.h"
			
 
				 
			
 
				-int test__openat_syscall_event_on_all_cpus(void)
			
 
				+int test__openat_syscall_event_on_all_cpus(int subtest __maybe_unused)
			
 
				 {
			
 
				 	int err = -1, fd, cpu;
			
 
				 	struct cpu_map *cpus;
			
--- a/tools/perf/tests/openat-syscall-tp-fields.c
+++ b/tools/perf/tests/openat-syscall-tp-fields.c
@@ -6,7 +6,7 @@
 
				 #include "tests.h"
			
 
				 #include "debug.h"
			
 
				 
			
 
				-int test__syscall_openat_tp_fields(void)
			
 
				+int test__syscall_openat_tp_fields(int subtest __maybe_unused)
			
 
				 {
			
 
				 	struct record_opts opts = {
			
 
				 		.target = {
			
--- a/tools/perf/tests/openat-syscall.c
+++ b/tools/perf/tests/openat-syscall.c
@@ -5,7 +5,7 @@
 
				 #include "debug.h"
			
 
				 #include "tests.h"
			
 
				 
			
 
				-int test__openat_syscall_event(void)
			
 
				+int test__openat_syscall_event(int subtest __maybe_unused)
			
 
				 {
			
 
				 	int err = -1, fd;
			
 
				 	struct perf_evsel *evsel;
			
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -1765,7 +1765,7 @@ static void debug_warn(const char *warn, va_list params)
 
				 	fprintf(stderr, " Warning: %s\n", msg);
			
 
				 }
			
 
				 
			
 
				-int test__parse_events(void)
			
 
				+int test__parse_events(int subtest __maybe_unused)
			
 
				 {
			
 
				 	int ret1, ret2 = 0;
			
 
				 
			
--- a/tools/perf/tests/parse-no-sample-id-all.c
+++ b/tools/perf/tests/parse-no-sample-id-all.c
@@ -67,7 +67,7 @@ struct test_attr_event {
 
				  *
			
 
				  * Return: %0 on success, %-1 if the test fails.
			
 
				  */
			
 
				-int test__parse_no_sample_id_all(void)
			
 
				+int test__parse_no_sample_id_all(int subtest __maybe_unused)
			
 
				 {
			
 
				 	int err;
			
 
				 
			
--- a/tools/perf/tests/perf-record.c
+++ b/tools/perf/tests/perf-record.c
@@ -32,7 +32,7 @@ realloc:
 
				 	return cpu;
			
 
				 }
			
 
				 
			
 
				-int test__PERF_RECORD(void)
			
 
				+int test__PERF_RECORD(int subtest __maybe_unused)
			
 
				 {
			
 
				 	struct record_opts opts = {
			
 
				 		.target = {
			
--- a/tools/perf/tests/pmu.c
+++ b/tools/perf/tests/pmu.c
@@ -133,7 +133,7 @@ static struct list_head *test_terms_list(void)
 
				 	return &terms;
			
 
				 }
			
 
				 
			
 
				-int test__pmu(void)
			
 
				+int test__pmu(int subtest __maybe_unused)
			
 
				 {
			
 
				 	char *format = test_format_dir_get();
			
 
				 	LIST_HEAD(formats);
			
--- a/tools/perf/tests/python-use.c
+++ b/tools/perf/tests/python-use.c
@@ -4,11 +4,12 @@
 
				 
			
 
				 #include <stdio.h>
			
 
				 #include <stdlib.h>
			
 
				+#include <linux/compiler.h>
			
 
				 #include "tests.h"
			
 
				 
			
 
				 extern int verbose;
			
 
				 
			
 
				-int test__python_use(void)
			
 
				+int test__python_use(int subtest __maybe_unused)
			
 
				 {
			
 
				 	char *cmd;
			
 
				 	int ret;
			
--- a/tools/perf/tests/sample-parsing.c
+++ b/tools/perf/tests/sample-parsing.c
@@ -290,7 +290,7 @@ out_free:
 
				  * checks sample format bits separately and together.  If the test passes %0 is
			
 
				  * returned, otherwise %-1 is returned.
			
 
				  */
			
 
				-int test__sample_parsing(void)
			
 
				+int test__sample_parsing(int subtest __maybe_unused)
			
 
				 {
			
 
				 	const u64 rf[] = {4, 5, 6, 7, 12, 13, 14, 15};
			
 
				 	u64 sample_type;
			
--- a/tools/perf/tests/sw-clock.c
+++ b/tools/perf/tests/sw-clock.c
@@ -122,7 +122,7 @@ out_delete_evlist:
 
				 	return err;
			
 
				 }
			
 
				 
			
 
				-int test__sw_clock_freq(void)
			
 
				+int test__sw_clock_freq(int subtest __maybe_unused)
			
 
				 {
			
 
				 	int ret;
			
 
				 
			
--- a/tools/perf/tests/switch-tracking.c
+++ b/tools/perf/tests/switch-tracking.c
@@ -305,7 +305,7 @@ out_free_nodes:
 
				  * evsel->system_wide and evsel->tracking flags (respectively) with other events
			
 
				  * sometimes enabled or disabled.
			
 
				  */
			
 
				-int test__switch_tracking(void)
			
 
				+int test__switch_tracking(int subtest __maybe_unused)
			
 
				 {
			
 
				 	const char *sched_switch = "sched:sched_switch";
			
 
				 	struct switch_tracking switch_tracking = { .tids = NULL, };
			
--- a/tools/perf/tests/task-exit.c
+++ b/tools/perf/tests/task-exit.c
@@ -31,7 +31,7 @@ static void workload_exec_failed_signal(int signo __maybe_unused,
 
				  * if the number of exit event reported by the kernel is 1 or not
			
 
				  * in order to check the kernel returns correct number of event.
			
 
				  */
			
 
				-int test__task_exit(void)
			
 
				+int test__task_exit(int subtest __maybe_unused)
			
 
				 {
			
 
				 	int err = -1;
			
 
				 	union perf_event *event;
			
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -1,6 +1,8 @@
 
				 #ifndef TESTS_H
			
 
				 #define TESTS_H
			
 
				 
			
 
				+#include <stdbool.h>
			
 
				+
			
 
				 #define TEST_ASSERT_VAL(text, cond)					 \
			
 
				 do {									 \
			
 
				 	if (!(cond)) {							 \
			
@@ -26,48 +28,57 @@ enum {
 
				 
			
 
				 struct test {
			
 
				 	const char *desc;
			
 
				-	int (*func)(void);
			
 
				+	int (*func)(int subtest);
			
 
				+	struct {
			
 
				+		bool skip_if_fail;
			
 
				+		int (*get_nr)(void);
			
 
				+		const char *(*get_desc)(int subtest);
			
 
				+	} subtest;
			
 
				 };
			
 
				 
			
 
				 /* Tests */
			
 
				-int test__vmlinux_matches_kallsyms(void);
			
 
				-int test__openat_syscall_event(void);
			
 
				-int test__openat_syscall_event_on_all_cpus(void);
			
 
				-int test__basic_mmap(void);
			
 
				-int test__PERF_RECORD(void);
			
 
				-int test__perf_evsel__roundtrip_name_test(void);
			
 
				-int test__perf_evsel__tp_sched_test(void);
			
 
				-int test__syscall_openat_tp_fields(void);
			
 
				-int test__pmu(void);
			
 
				-int test__attr(void);
			
 
				-int test__dso_data(void);
			
 
				-int test__dso_data_cache(void);
			
 
				-int test__dso_data_reopen(void);
			
 
				-int test__parse_events(void);
			
 
				-int test__hists_link(void);
			
 
				-int test__python_use(void);
			
 
				-int test__bp_signal(void);
			
 
				-int test__bp_signal_overflow(void);
			
 
				-int test__task_exit(void);
			
 
				-int test__sw_clock_freq(void);
			
 
				-int test__code_reading(void);
			
 
				-int test__sample_parsing(void);
			
 
				-int test__keep_tracking(void);
			
 
				-int test__parse_no_sample_id_all(void);
			
 
				-int test__dwarf_unwind(void);
			
 
				-int test__hists_filter(void);
			
 
				-int test__mmap_thread_lookup(void);
			
 
				-int test__thread_mg_share(void);
			
 
				-int test__hists_output(void);
			
 
				-int test__hists_cumulate(void);
			
 
				-int test__switch_tracking(void);
			
 
				-int test__fdarray__filter(void);
			
 
				-int test__fdarray__add(void);
			
 
				-int test__kmod_path__parse(void);
			
 
				-int test__thread_map(void);
			
 
				-int test__llvm(void);
			
 
				-int test__bpf(void);
			
 
				-int test_session_topology(void);
			
 
				+int test__vmlinux_matches_kallsyms(int subtest);
			
 
				+int test__openat_syscall_event(int subtest);
			
 
				+int test__openat_syscall_event_on_all_cpus(int subtest);
			
 
				+int test__basic_mmap(int subtest);
			
 
				+int test__PERF_RECORD(int subtest);
			
 
				+int test__perf_evsel__roundtrip_name_test(int subtest);
			
 
				+int test__perf_evsel__tp_sched_test(int subtest);
			
 
				+int test__syscall_openat_tp_fields(int subtest);
			
 
				+int test__pmu(int subtest);
			
 
				+int test__attr(int subtest);
			
 
				+int test__dso_data(int subtest);
			
 
				+int test__dso_data_cache(int subtest);
			
 
				+int test__dso_data_reopen(int subtest);
			
 
				+int test__parse_events(int subtest);
			
 
				+int test__hists_link(int subtest);
			
 
				+int test__python_use(int subtest);
			
 
				+int test__bp_signal(int subtest);
			
 
				+int test__bp_signal_overflow(int subtest);
			
 
				+int test__task_exit(int subtest);
			
 
				+int test__sw_clock_freq(int subtest);
			
 
				+int test__code_reading(int subtest);
			
 
				+int test__sample_parsing(int subtest);
			
 
				+int test__keep_tracking(int subtest);
			
 
				+int test__parse_no_sample_id_all(int subtest);
			
 
				+int test__dwarf_unwind(int subtest);
			
 
				+int test__hists_filter(int subtest);
			
 
				+int test__mmap_thread_lookup(int subtest);
			
 
				+int test__thread_mg_share(int subtest);
			
 
				+int test__hists_output(int subtest);
			
 
				+int test__hists_cumulate(int subtest);
			
 
				+int test__switch_tracking(int subtest);
			
 
				+int test__fdarray__filter(int subtest);
			
 
				+int test__fdarray__add(int subtest);
			
 
				+int test__kmod_path__parse(int subtest);
			
 
				+int test__thread_map(int subtest);
			
 
				+int test__llvm(int subtest);
			
 
				+const char *test__llvm_subtest_get_desc(int subtest);
			
 
				+int test__llvm_subtest_get_nr(void);
			
 
				+int test__bpf(int subtest);
			
 
				+const char *test__bpf_subtest_get_desc(int subtest);
			
 
				+int test__bpf_subtest_get_nr(void);
			
 
				+int test_session_topology(int subtest);
			
 
				 
			
 
				 #if defined(__arm__) || defined(__aarch64__)
			
 
				 #ifdef HAVE_DWARF_UNWIND_SUPPORT
			
--- a/tools/perf/tests/thread-map.c
+++ b/tools/perf/tests/thread-map.c
@@ -4,7 +4,7 @@
 
				 #include "thread_map.h"
			
 
				 #include "debug.h"
			
 
				 
			
 
				-int test__thread_map(void)
			
 
				+int test__thread_map(int subtest __maybe_unused)
			
 
				 {
			
 
				 	struct thread_map *map;
			
 
				 
			
--- a/tools/perf/tests/thread-mg-share.c
+++ b/tools/perf/tests/thread-mg-share.c
@@ -4,7 +4,7 @@
 
				 #include "map.h"
			
 
				 #include "debug.h"
			
 
				 
			
 
				-int test__thread_mg_share(void)
			
 
				+int test__thread_mg_share(int subtest __maybe_unused)
			
 
				 {
			
 
				 	struct machines machines;
			
 
				 	struct machine *machine;
			
--- a/tools/perf/tests/topology.c
+++ b/tools/perf/tests/topology.c
@@ -84,7 +84,7 @@ static int check_cpu_topology(char *path, struct cpu_map *map)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-int test_session_topology(void)
			
 
				+int test_session_topology(int subtest __maybe_unused)
			
 
				 {
			
 
				 	char path[PATH_MAX];
			
 
				 	struct cpu_map *map;
			
--- a/tools/perf/tests/vmlinux-kallsyms.c
+++ b/tools/perf/tests/vmlinux-kallsyms.c
@@ -18,7 +18,7 @@ static int vmlinux_matches_kallsyms_filter(struct map *map __maybe_unused,
 
				 
			
 
				 #define UM(x) kallsyms_map->unmap_ip(kallsyms_map, (x))
			
 
				 
			
 
				-int test__vmlinux_matches_kallsyms(void)
			
 
				+int test__vmlinux_matches_kallsyms(int subtest __maybe_unused)
			
 
				 {
			
 
				 	int err = -1;
			
 
				 	struct rb_node *nd;
			
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -178,12 +178,51 @@ static int callchain_node__count_rows_rb_tree(struct callchain_node *node)
 
				 	return n;
			
 
				 }
			
 
				 
			
 
				+static int callchain_node__count_flat_rows(struct callchain_node *node)
			
 
				+{
			
 
				+	struct callchain_list *chain;
			
 
				+	char folded_sign = 0;
			
 
				+	int n = 0;
			
 
				+
			
 
				+	list_for_each_entry(chain, &node->parent_val, list) {
			
 
				+		if (!folded_sign) {
			
 
				+			/* only check first chain list entry */
			
 
				+			folded_sign = callchain_list__folded(chain);
			
 
				+			if (folded_sign == '+')
			
 
				+				return 1;
			
 
				+		}
			
 
				+		n++;
			
 
				+	}
			
 
				+
			
 
				+	list_for_each_entry(chain, &node->val, list) {
			
 
				+		if (!folded_sign) {
			
 
				+			/* node->parent_val list might be empty */
			
 
				+			folded_sign = callchain_list__folded(chain);
			
 
				+			if (folded_sign == '+')
			
 
				+				return 1;
			
 
				+		}
			
 
				+		n++;
			
 
				+	}
			
 
				+
			
 
				+	return n;
			
 
				+}
			
 
				+
			
 
				+static int callchain_node__count_folded_rows(struct callchain_node *node __maybe_unused)
			
 
				+{
			
 
				+	return 1;
			
 
				+}
			
 
				+
			
 
				 static int callchain_node__count_rows(struct callchain_node *node)
			
 
				 {
			
 
				 	struct callchain_list *chain;
			
 
				 	bool unfolded = false;
			
 
				 	int n = 0;
			
 
				 
			
 
				+	if (callchain_param.mode == CHAIN_FLAT)
			
 
				+		return callchain_node__count_flat_rows(node);
			
 
				+	else if (callchain_param.mode == CHAIN_FOLDED)
			
 
				+		return callchain_node__count_folded_rows(node);
			
 
				+
			
 
				 	list_for_each_entry(chain, &node->val, list) {
			
 
				 		++n;
			
 
				 		unfolded = chain->unfolded;
			
@@ -263,7 +302,7 @@ static void callchain_node__init_have_children(struct callchain_node *node,
 
				 	chain = list_entry(node->val.next, struct callchain_list, list);
			
 
				 	chain->has_children = has_sibling;
			
 
				 
			
 
				-	if (!list_empty(&node->val)) {
			
 
				+	if (node->val.next != node->val.prev) {
			
 
				 		chain = list_entry(node->val.prev, struct callchain_list, list);
			
 
				 		chain->has_children = !RB_EMPTY_ROOT(&node->rb_root);
			
 
				 	}
			
@@ -279,6 +318,9 @@ static void callchain__init_have_children(struct rb_root *root)
 
				 	for (nd = rb_first(root); nd; nd = rb_next(nd)) {
			
 
				 		struct callchain_node *node = rb_entry(nd, struct callchain_node, rb_node);
			
 
				 		callchain_node__init_have_children(node, has_sibling);
			
 
				+		if (callchain_param.mode == CHAIN_FLAT ||
			
 
				+		    callchain_param.mode == CHAIN_FOLDED)
			
 
				+			callchain_node__make_parent_list(node);
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -574,6 +616,231 @@ static bool hist_browser__check_dump_full(struct hist_browser *browser __maybe_u
 
				 
			
 
				 #define LEVEL_OFFSET_STEP 3
			
 
				 
			
 
				+static int hist_browser__show_callchain_list(struct hist_browser *browser,
			
 
				+					     struct callchain_node *node,
			
 
				+					     struct callchain_list *chain,
			
 
				+					     unsigned short row, u64 total,
			
 
				+					     bool need_percent, int offset,
			
 
				+					     print_callchain_entry_fn print,
			
 
				+					     struct callchain_print_arg *arg)
			
 
				+{
			
 
				+	char bf[1024], *alloc_str;
			
 
				+	const char *str;
			
 
				+
			
 
				+	if (arg->row_offset != 0) {
			
 
				+		arg->row_offset--;
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	alloc_str = NULL;
			
 
				+	str = callchain_list__sym_name(chain, bf, sizeof(bf),
			
 
				+				       browser->show_dso);
			
 
				+
			
 
				+	if (need_percent) {
			
 
				+		char buf[64];
			
 
				+
			
 
				+		callchain_node__scnprintf_value(node, buf, sizeof(buf),
			
 
				+						total);
			
 
				+
			
 
				+		if (asprintf(&alloc_str, "%s %s", buf, str) < 0)
			
 
				+			str = "Not enough memory!";
			
 
				+		else
			
 
				+			str = alloc_str;
			
 
				+	}
			
 
				+
			
 
				+	print(browser, chain, str, offset, row, arg);
			
 
				+
			
 
				+	free(alloc_str);
			
 
				+	return 1;
			
 
				+}
			
 
				+
			
 
				+static int hist_browser__show_callchain_flat(struct hist_browser *browser,
			
 
				+					     struct rb_root *root,
			
 
				+					     unsigned short row, u64 total,
			
 
				+					     print_callchain_entry_fn print,
			
 
				+					     struct callchain_print_arg *arg,
			
 
				+					     check_output_full_fn is_output_full)
			
 
				+{
			
 
				+	struct rb_node *node;
			
 
				+	int first_row = row, offset = LEVEL_OFFSET_STEP;
			
 
				+	bool need_percent;
			
 
				+
			
 
				+	node = rb_first(root);
			
 
				+	need_percent = node && rb_next(node);
			
 
				+
			
 
				+	while (node) {
			
 
				+		struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node);
			
 
				+		struct rb_node *next = rb_next(node);
			
 
				+		struct callchain_list *chain;
			
 
				+		char folded_sign = ' ';
			
 
				+		int first = true;
			
 
				+		int extra_offset = 0;
			
 
				+
			
 
				+		list_for_each_entry(chain, &child->parent_val, list) {
			
 
				+			bool was_first = first;
			
 
				+
			
 
				+			if (first)
			
 
				+				first = false;
			
 
				+			else if (need_percent)
			
 
				+				extra_offset = LEVEL_OFFSET_STEP;
			
 
				+
			
 
				+			folded_sign = callchain_list__folded(chain);
			
 
				+
			
 
				+			row += hist_browser__show_callchain_list(browser, child,
			
 
				+							chain, row, total,
			
 
				+							was_first && need_percent,
			
 
				+							offset + extra_offset,
			
 
				+							print, arg);
			
 
				+
			
 
				+			if (is_output_full(browser, row))
			
 
				+				goto out;
			
 
				+
			
 
				+			if (folded_sign == '+')
			
 
				+				goto next;
			
 
				+		}
			
 
				+
			
 
				+		list_for_each_entry(chain, &child->val, list) {
			
 
				+			bool was_first = first;
			
 
				+
			
 
				+			if (first)
			
 
				+				first = false;
			
 
				+			else if (need_percent)
			
 
				+				extra_offset = LEVEL_OFFSET_STEP;
			
 
				+
			
 
				+			folded_sign = callchain_list__folded(chain);
			
 
				+
			
 
				+			row += hist_browser__show_callchain_list(browser, child,
			
 
				+							chain, row, total,
			
 
				+							was_first && need_percent,
			
 
				+							offset + extra_offset,
			
 
				+							print, arg);
			
 
				+
			
 
				+			if (is_output_full(browser, row))
			
 
				+				goto out;
			
 
				+
			
 
				+			if (folded_sign == '+')
			
 
				+				break;
			
 
				+		}
			
 
				+
			
 
				+next:
			
 
				+		if (is_output_full(browser, row))
			
 
				+			break;
			
 
				+		node = next;
			
 
				+	}
			
 
				+out:
			
 
				+	return row - first_row;
			
 
				+}
			
 
				+
			
 
				+static char *hist_browser__folded_callchain_str(struct hist_browser *browser,
			
 
				+						struct callchain_list *chain,
			
 
				+						char *value_str, char *old_str)
			
 
				+{
			
 
				+	char bf[1024];
			
 
				+	const char *str;
			
 
				+	char *new;
			
 
				+
			
 
				+	str = callchain_list__sym_name(chain, bf, sizeof(bf),
			
 
				+				       browser->show_dso);
			
 
				+	if (old_str) {
			
 
				+		if (asprintf(&new, "%s%s%s", old_str,
			
 
				+			     symbol_conf.field_sep ?: ";", str) < 0)
			
 
				+			new = NULL;
			
 
				+	} else {
			
 
				+		if (value_str) {
			
 
				+			if (asprintf(&new, "%s %s", value_str, str) < 0)
			
 
				+				new = NULL;
			
 
				+		} else {
			
 
				+			if (asprintf(&new, "%s", str) < 0)
			
 
				+				new = NULL;
			
 
				+		}
			
 
				+	}
			
 
				+	return new;
			
 
				+}
			
 
				+
			
 
				+static int hist_browser__show_callchain_folded(struct hist_browser *browser,
			
 
				+					       struct rb_root *root,
			
 
				+					       unsigned short row, u64 total,
			
 
				+					       print_callchain_entry_fn print,
			
 
				+					       struct callchain_print_arg *arg,
			
 
				+					       check_output_full_fn is_output_full)
			
 
				+{
			
 
				+	struct rb_node *node;
			
 
				+	int first_row = row, offset = LEVEL_OFFSET_STEP;
			
 
				+	bool need_percent;
			
 
				+
			
 
				+	node = rb_first(root);
			
 
				+	need_percent = node && rb_next(node);
			
 
				+
			
 
				+	while (node) {
			
 
				+		struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node);
			
 
				+		struct rb_node *next = rb_next(node);
			
 
				+		struct callchain_list *chain, *first_chain = NULL;
			
 
				+		int first = true;
			
 
				+		char *value_str = NULL, *value_str_alloc = NULL;
			
 
				+		char *chain_str = NULL, *chain_str_alloc = NULL;
			
 
				+
			
 
				+		if (arg->row_offset != 0) {
			
 
				+			arg->row_offset--;
			
 
				+			goto next;
			
 
				+		}
			
 
				+
			
 
				+		if (need_percent) {
			
 
				+			char buf[64];
			
 
				+
			
 
				+			callchain_node__scnprintf_value(child, buf, sizeof(buf), total);
			
 
				+			if (asprintf(&value_str, "%s", buf) < 0) {
			
 
				+				value_str = (char *)"<...>";
			
 
				+				goto do_print;
			
 
				+			}
			
 
				+			value_str_alloc = value_str;
			
 
				+		}
			
 
				+
			
 
				+		list_for_each_entry(chain, &child->parent_val, list) {
			
 
				+			chain_str = hist_browser__folded_callchain_str(browser,
			
 
				+						chain, value_str, chain_str);
			
 
				+			if (first) {
			
 
				+				first = false;
			
 
				+				first_chain = chain;
			
 
				+			}
			
 
				+
			
 
				+			if (chain_str == NULL) {
			
 
				+				chain_str = (char *)"Not enough memory!";
			
 
				+				goto do_print;
			
 
				+			}
			
 
				+
			
 
				+			chain_str_alloc = chain_str;
			
 
				+		}
			
 
				+
			
 
				+		list_for_each_entry(chain, &child->val, list) {
			
 
				+			chain_str = hist_browser__folded_callchain_str(browser,
			
 
				+						chain, value_str, chain_str);
			
 
				+			if (first) {
			
 
				+				first = false;
			
 
				+				first_chain = chain;
			
 
				+			}
			
 
				+
			
 
				+			if (chain_str == NULL) {
			
 
				+				chain_str = (char *)"Not enough memory!";
			
 
				+				goto do_print;
			
 
				+			}
			
 
				+
			
 
				+			chain_str_alloc = chain_str;
			
 
				+		}
			
 
				+
			
 
				+do_print:
			
 
				+		print(browser, first_chain, chain_str, offset, row++, arg);
			
 
				+		free(value_str_alloc);
			
 
				+		free(chain_str_alloc);
			
 
				+
			
 
				+next:
			
 
				+		if (is_output_full(browser, row))
			
 
				+			break;
			
 
				+		node = next;
			
 
				+	}
			
 
				+
			
 
				+	return row - first_row;
			
 
				+}
			
 
				+
			
 
				 static int hist_browser__show_callchain(struct hist_browser *browser,
			
 
				 					struct rb_root *root, int level,
			
 
				 					unsigned short row, u64 total,
			
@@ -592,15 +859,12 @@ static int hist_browser__show_callchain(struct hist_browser *browser,
 
				 	while (node) {
			
 
				 		struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node);
			
 
				 		struct rb_node *next = rb_next(node);
			
 
				-		u64 cumul = callchain_cumul_hits(child);
			
 
				 		struct callchain_list *chain;
			
 
				 		char folded_sign = ' ';
			
 
				 		int first = true;
			
 
				 		int extra_offset = 0;
			
 
				 
			
 
				 		list_for_each_entry(chain, &child->val, list) {
			
 
				-			char bf[1024], *alloc_str;
			
 
				-			const char *str;
			
 
				 			bool was_first = first;
			
 
				 
			
 
				 			if (first)
			
@@ -609,31 +873,16 @@ static int hist_browser__show_callchain(struct hist_browser *browser,
 
				 				extra_offset = LEVEL_OFFSET_STEP;
			
 
				 
			
 
				 			folded_sign = callchain_list__folded(chain);
			
 
				-			if (arg->row_offset != 0) {
			
 
				-				arg->row_offset--;
			
 
				-				goto do_next;
			
 
				-			}
			
 
				-
			
 
				-			alloc_str = NULL;
			
 
				-			str = callchain_list__sym_name(chain, bf, sizeof(bf),
			
 
				-						       browser->show_dso);
			
 
				-
			
 
				-			if (was_first && need_percent) {
			
 
				-				double percent = cumul * 100.0 / total;
			
 
				 
			
 
				-				if (asprintf(&alloc_str, "%2.2f%% %s", percent, str) < 0)
			
 
				-					str = "Not enough memory!";
			
 
				-				else
			
 
				-					str = alloc_str;
			
 
				-			}
			
 
				+			row += hist_browser__show_callchain_list(browser, child,
			
 
				+							chain, row, total,
			
 
				+							was_first && need_percent,
			
 
				+							offset + extra_offset,
			
 
				+							print, arg);
			
 
				 
			
 
				-			print(browser, chain, str, offset + extra_offset, row, arg);
			
 
				-
			
 
				-			free(alloc_str);
			
 
				-
			
 
				-			if (is_output_full(browser, ++row))
			
 
				+			if (is_output_full(browser, row))
			
 
				 				goto out;
			
 
				-do_next:
			
 
				+
			
 
				 			if (folded_sign == '+')
			
 
				 				break;
			
 
				 		}
			
@@ -844,10 +1093,22 @@ static int hist_browser__show_entry(struct hist_browser *browser,
 
				 				total = entry->stat.period;
			
 
				 		}
			
 
				 
			
 
				-		printed += hist_browser__show_callchain(browser,
			
 
				+		if (callchain_param.mode == CHAIN_FLAT) {
			
 
				+			printed += hist_browser__show_callchain_flat(browser,
			
 
				+					&entry->sorted_chain, row, total,
			
 
				+					hist_browser__show_callchain_entry, &arg,
			
 
				+					hist_browser__check_output_full);
			
 
				+		} else if (callchain_param.mode == CHAIN_FOLDED) {
			
 
				+			printed += hist_browser__show_callchain_folded(browser,
			
 
				+					&entry->sorted_chain, row, total,
			
 
				+					hist_browser__show_callchain_entry, &arg,
			
 
				+					hist_browser__check_output_full);
			
 
				+		} else {
			
 
				+			printed += hist_browser__show_callchain(browser,
			
 
				 					&entry->sorted_chain, 1, row, total,
			
 
				 					hist_browser__show_callchain_entry, &arg,
			
 
				 					hist_browser__check_output_full);
			
 
				+		}
			
 
				 
			
 
				 		if (arg.is_current_entry)
			
 
				 			browser->he_selection = entry;
			
--- a/tools/perf/ui/gtk/hists.c
+++ b/tools/perf/ui/gtk/hists.c
@@ -89,8 +89,8 @@ void perf_gtk__init_hpp(void)
 
				 				perf_gtk__hpp_color_overhead_acc;
			
 
				 }
			
 
				 
			
 
				-static void perf_gtk__add_callchain(struct rb_root *root, GtkTreeStore *store,
			
 
				-				    GtkTreeIter *parent, int col, u64 total)
			
 
				+static void perf_gtk__add_callchain_flat(struct rb_root *root, GtkTreeStore *store,
			
 
				+					 GtkTreeIter *parent, int col, u64 total)
			
 
				 {
			
 
				 	struct rb_node *nd;
			
 
				 	bool has_single_node = (rb_first(root) == rb_last(root));
			
@@ -100,13 +100,132 @@ static void perf_gtk__add_callchain(struct rb_root *root, GtkTreeStore *store,
 
				 		struct callchain_list *chain;
			
 
				 		GtkTreeIter iter, new_parent;
			
 
				 		bool need_new_parent;
			
 
				-		double percent;
			
 
				-		u64 hits, child_total;
			
 
				 
			
 
				 		node = rb_entry(nd, struct callchain_node, rb_node);
			
 
				 
			
 
				-		hits = callchain_cumul_hits(node);
			
 
				-		percent = 100.0 * hits / total;
			
 
				+		new_parent = *parent;
			
 
				+		need_new_parent = !has_single_node;
			
 
				+
			
 
				+		callchain_node__make_parent_list(node);
			
 
				+
			
 
				+		list_for_each_entry(chain, &node->parent_val, list) {
			
 
				+			char buf[128];
			
 
				+
			
 
				+			gtk_tree_store_append(store, &iter, &new_parent);
			
 
				+
			
 
				+			callchain_node__scnprintf_value(node, buf, sizeof(buf), total);
			
 
				+			gtk_tree_store_set(store, &iter, 0, buf, -1);
			
 
				+
			
 
				+			callchain_list__sym_name(chain, buf, sizeof(buf), false);
			
 
				+			gtk_tree_store_set(store, &iter, col, buf, -1);
			
 
				+
			
 
				+			if (need_new_parent) {
			
 
				+				/*
			
 
				+				 * Only show the top-most symbol in a callchain
			
 
				+				 * if it's not the only callchain.
			
 
				+				 */
			
 
				+				new_parent = iter;
			
 
				+				need_new_parent = false;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		list_for_each_entry(chain, &node->val, list) {
			
 
				+			char buf[128];
			
 
				+
			
 
				+			gtk_tree_store_append(store, &iter, &new_parent);
			
 
				+
			
 
				+			callchain_node__scnprintf_value(node, buf, sizeof(buf), total);
			
 
				+			gtk_tree_store_set(store, &iter, 0, buf, -1);
			
 
				+
			
 
				+			callchain_list__sym_name(chain, buf, sizeof(buf), false);
			
 
				+			gtk_tree_store_set(store, &iter, col, buf, -1);
			
 
				+
			
 
				+			if (need_new_parent) {
			
 
				+				/*
			
 
				+				 * Only show the top-most symbol in a callchain
			
 
				+				 * if it's not the only callchain.
			
 
				+				 */
			
 
				+				new_parent = iter;
			
 
				+				need_new_parent = false;
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static void perf_gtk__add_callchain_folded(struct rb_root *root, GtkTreeStore *store,
			
 
				+					   GtkTreeIter *parent, int col, u64 total)
			
 
				+{
			
 
				+	struct rb_node *nd;
			
 
				+
			
 
				+	for (nd = rb_first(root); nd; nd = rb_next(nd)) {
			
 
				+		struct callchain_node *node;
			
 
				+		struct callchain_list *chain;
			
 
				+		GtkTreeIter iter;
			
 
				+		char buf[64];
			
 
				+		char *str, *str_alloc = NULL;
			
 
				+		bool first = true;
			
 
				+
			
 
				+		node = rb_entry(nd, struct callchain_node, rb_node);
			
 
				+
			
 
				+		callchain_node__make_parent_list(node);
			
 
				+
			
 
				+		list_for_each_entry(chain, &node->parent_val, list) {
			
 
				+			char name[1024];
			
 
				+
			
 
				+			callchain_list__sym_name(chain, name, sizeof(name), false);
			
 
				+
			
 
				+			if (asprintf(&str, "%s%s%s",
			
 
				+				     first ? "" : str_alloc,
			
 
				+				     first ? "" : symbol_conf.field_sep ?: "; ",
			
 
				+				     name) < 0)
			
 
				+				return;
			
 
				+
			
 
				+			first = false;
			
 
				+			free(str_alloc);
			
 
				+			str_alloc = str;
			
 
				+		}
			
 
				+
			
 
				+		list_for_each_entry(chain, &node->val, list) {
			
 
				+			char name[1024];
			
 
				+
			
 
				+			callchain_list__sym_name(chain, name, sizeof(name), false);
			
 
				+
			
 
				+			if (asprintf(&str, "%s%s%s",
			
 
				+				     first ? "" : str_alloc,
			
 
				+				     first ? "" : symbol_conf.field_sep ?: "; ",
			
 
				+				     name) < 0)
			
 
				+				return;
			
 
				+
			
 
				+			first = false;
			
 
				+			free(str_alloc);
			
 
				+			str_alloc = str;
			
 
				+		}
			
 
				+
			
 
				+		gtk_tree_store_append(store, &iter, parent);
			
 
				+
			
 
				+		callchain_node__scnprintf_value(node, buf, sizeof(buf), total);
			
 
				+		gtk_tree_store_set(store, &iter, 0, buf, -1);
			
 
				+
			
 
				+		gtk_tree_store_set(store, &iter, col, str, -1);
			
 
				+
			
 
				+		free(str_alloc);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static void perf_gtk__add_callchain_graph(struct rb_root *root, GtkTreeStore *store,
			
 
				+					  GtkTreeIter *parent, int col, u64 total)
			
 
				+{
			
 
				+	struct rb_node *nd;
			
 
				+	bool has_single_node = (rb_first(root) == rb_last(root));
			
 
				+
			
 
				+	for (nd = rb_first(root); nd; nd = rb_next(nd)) {
			
 
				+		struct callchain_node *node;
			
 
				+		struct callchain_list *chain;
			
 
				+		GtkTreeIter iter, new_parent;
			
 
				+		bool need_new_parent;
			
 
				+		u64 child_total;
			
 
				+
			
 
				+		node = rb_entry(nd, struct callchain_node, rb_node);
			
 
				 
			
 
				 		new_parent = *parent;
			
 
				 		need_new_parent = !has_single_node && (node->val_nr > 1);
			
@@ -116,7 +235,7 @@ static void perf_gtk__add_callchain(struct rb_root *root, GtkTreeStore *store,
 
				 
			
 
				 			gtk_tree_store_append(store, &iter, &new_parent);
			
 
				 
			
 
				-			scnprintf(buf, sizeof(buf), "%5.2f%%", percent);
			
 
				+			callchain_node__scnprintf_value(node, buf, sizeof(buf), total);
			
 
				 			gtk_tree_store_set(store, &iter, 0, buf, -1);
			
 
				 
			
 
				 			callchain_list__sym_name(chain, buf, sizeof(buf), false);
			
@@ -138,11 +257,22 @@ static void perf_gtk__add_callchain(struct rb_root *root, GtkTreeStore *store,
 
				 			child_total = total;
			
 
				 
			
 
				 		/* Now 'iter' contains info of the last callchain_list */
			
 
				-		perf_gtk__add_callchain(&node->rb_root, store, &iter, col,
			
 
				-					child_total);
			
 
				+		perf_gtk__add_callchain_graph(&node->rb_root, store, &iter, col,
			
 
				+					      child_total);
			
 
				 	}
			
 
				 }
			
 
				 
			
 
				+static void perf_gtk__add_callchain(struct rb_root *root, GtkTreeStore *store,
			
 
				+				    GtkTreeIter *parent, int col, u64 total)
			
 
				+{
			
 
				+	if (callchain_param.mode == CHAIN_FLAT)
			
 
				+		perf_gtk__add_callchain_flat(root, store, parent, col, total);
			
 
				+	else if (callchain_param.mode == CHAIN_FOLDED)
			
 
				+		perf_gtk__add_callchain_folded(root, store, parent, col, total);
			
 
				+	else
			
 
				+		perf_gtk__add_callchain_graph(root, store, parent, col, total);
			
 
				+}
			
 
				+
			
 
				 static void on_row_activated(GtkTreeView *view, GtkTreePath *path,
			
 
				 			     GtkTreeViewColumn *col __maybe_unused,
			
 
				 			     gpointer user_data __maybe_unused)
			
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -34,10 +34,10 @@ static size_t ipchain__fprintf_graph_line(FILE *fp, int depth, int depth_mask,
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_list *chain,
			
 
				+static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_node *node,
			
 
				+				     struct callchain_list *chain,
			
 
				 				     int depth, int depth_mask, int period,
			
 
				-				     u64 total_samples, u64 hits,
			
 
				-				     int left_margin)
			
 
				+				     u64 total_samples, int left_margin)
			
 
				 {
			
 
				 	int i;
			
 
				 	size_t ret = 0;
			
@@ -50,10 +50,9 @@ static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_list *chain,
 
				 		else
			
 
				 			ret += fprintf(fp, " ");
			
 
				 		if (!period && i == depth - 1) {
			
 
				-			double percent;
			
 
				-
			
 
				-			percent = hits * 100.0 / total_samples;
			
 
				-			ret += percent_color_fprintf(fp, "--%2.2f%%-- ", percent);
			
 
				+			ret += fprintf(fp, "--");
			
 
				+			ret += callchain_node__fprintf_value(node, fp, total_samples);
			
 
				+			ret += fprintf(fp, "--");
			
 
				 		} else
			
 
				 			ret += fprintf(fp, "%s", "          ");
			
 
				 	}
			
@@ -82,13 +81,14 @@ static size_t __callchain__fprintf_graph(FILE *fp, struct rb_root *root,
 
				 					 int depth_mask, int left_margin)
			
 
				 {
			
 
				 	struct rb_node *node, *next;
			
 
				-	struct callchain_node *child;
			
 
				+	struct callchain_node *child = NULL;
			
 
				 	struct callchain_list *chain;
			
 
				 	int new_depth_mask = depth_mask;
			
 
				 	u64 remaining;
			
 
				 	size_t ret = 0;
			
 
				 	int i;
			
 
				 	uint entries_printed = 0;
			
 
				+	int cumul_count = 0;
			
 
				 
			
 
				 	remaining = total_samples;
			
 
				 
			
@@ -100,6 +100,7 @@ static size_t __callchain__fprintf_graph(FILE *fp, struct rb_root *root,
 
				 		child = rb_entry(node, struct callchain_node, rb_node);
			
 
				 		cumul = callchain_cumul_hits(child);
			
 
				 		remaining -= cumul;
			
 
				+		cumul_count += callchain_cumul_counts(child);
			
 
				 
			
 
				 		/*
			
 
				 		 * The depth mask manages the output of pipes that show
			
@@ -120,10 +121,9 @@ static size_t __callchain__fprintf_graph(FILE *fp, struct rb_root *root,
 
				 						   left_margin);
			
 
				 		i = 0;
			
 
				 		list_for_each_entry(chain, &child->val, list) {
			
 
				-			ret += ipchain__fprintf_graph(fp, chain, depth,
			
 
				+			ret += ipchain__fprintf_graph(fp, child, chain, depth,
			
 
				 						      new_depth_mask, i++,
			
 
				 						      total_samples,
			
 
				-						      cumul,
			
 
				 						      left_margin);
			
 
				 		}
			
 
				 
			
@@ -143,14 +143,23 @@ static size_t __callchain__fprintf_graph(FILE *fp, struct rb_root *root,
 
				 
			
 
				 	if (callchain_param.mode == CHAIN_GRAPH_REL &&
			
 
				 		remaining && remaining != total_samples) {
			
 
				+		struct callchain_node rem_node = {
			
 
				+			.hit = remaining,
			
 
				+		};
			
 
				 
			
 
				 		if (!rem_sq_bracket)
			
 
				 			return ret;
			
 
				 
			
 
				+		if (callchain_param.value == CCVAL_COUNT && child && child->parent) {
			
 
				+			rem_node.count = child->parent->children_count - cumul_count;
			
 
				+			if (rem_node.count <= 0)
			
 
				+				return ret;
			
 
				+		}
			
 
				+
			
 
				 		new_depth_mask &= ~(1 << (depth - 1));
			
 
				-		ret += ipchain__fprintf_graph(fp, &rem_hits, depth,
			
 
				+		ret += ipchain__fprintf_graph(fp, &rem_node, &rem_hits, depth,
			
 
				 					      new_depth_mask, 0, total_samples,
			
 
				-					      remaining, left_margin);
			
 
				+					      left_margin);
			
 
				 	}
			
 
				 
			
 
				 	return ret;
			
@@ -243,12 +252,11 @@ static size_t callchain__fprintf_flat(FILE *fp, struct rb_root *tree,
 
				 	struct rb_node *rb_node = rb_first(tree);
			
 
				 
			
 
				 	while (rb_node) {
			
 
				-		double percent;
			
 
				-
			
 
				 		chain = rb_entry(rb_node, struct callchain_node, rb_node);
			
 
				-		percent = chain->hit * 100.0 / total_samples;
			
 
				 
			
 
				-		ret = percent_color_fprintf(fp, "           %6.2f%%\n", percent);
			
 
				+		ret += fprintf(fp, "           ");
			
 
				+		ret += callchain_node__fprintf_value(chain, fp, total_samples);
			
 
				+		ret += fprintf(fp, "\n");
			
 
				 		ret += __callchain__fprintf_flat(fp, chain, total_samples);
			
 
				 		ret += fprintf(fp, "\n");
			
 
				 		if (++entries_printed == callchain_param.print_limit)
			
@@ -260,6 +268,57 @@ static size_t callchain__fprintf_flat(FILE *fp, struct rb_root *tree,
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				+static size_t __callchain__fprintf_folded(FILE *fp, struct callchain_node *node)
			
 
				+{
			
 
				+	const char *sep = symbol_conf.field_sep ?: ";";
			
 
				+	struct callchain_list *chain;
			
 
				+	size_t ret = 0;
			
 
				+	char bf[1024];
			
 
				+	bool first;
			
 
				+
			
 
				+	if (!node)
			
 
				+		return 0;
			
 
				+
			
 
				+	ret += __callchain__fprintf_folded(fp, node->parent);
			
 
				+
			
 
				+	first = (ret == 0);
			
 
				+	list_for_each_entry(chain, &node->val, list) {
			
 
				+		if (chain->ip >= PERF_CONTEXT_MAX)
			
 
				+			continue;
			
 
				+		ret += fprintf(fp, "%s%s", first ? "" : sep,
			
 
				+			       callchain_list__sym_name(chain,
			
 
				+						bf, sizeof(bf), false));
			
 
				+		first = false;
			
 
				+	}
			
 
				+
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+static size_t callchain__fprintf_folded(FILE *fp, struct rb_root *tree,
			
 
				+					u64 total_samples)
			
 
				+{
			
 
				+	size_t ret = 0;
			
 
				+	u32 entries_printed = 0;
			
 
				+	struct callchain_node *chain;
			
 
				+	struct rb_node *rb_node = rb_first(tree);
			
 
				+
			
 
				+	while (rb_node) {
			
 
				+
			
 
				+		chain = rb_entry(rb_node, struct callchain_node, rb_node);
			
 
				+
			
 
				+		ret += callchain_node__fprintf_value(chain, fp, total_samples);
			
 
				+		ret += fprintf(fp, " ");
			
 
				+		ret += __callchain__fprintf_folded(fp, chain);
			
 
				+		ret += fprintf(fp, "\n");
			
 
				+		if (++entries_printed == callchain_param.print_limit)
			
 
				+			break;
			
 
				+
			
 
				+		rb_node = rb_next(rb_node);
			
 
				+	}
			
 
				+
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				 static size_t hist_entry_callchain__fprintf(struct hist_entry *he,
			
 
				 					    u64 total_samples, int left_margin,
			
 
				 					    FILE *fp)
			
@@ -278,6 +337,9 @@ static size_t hist_entry_callchain__fprintf(struct hist_entry *he,
 
				 	case CHAIN_FLAT:
			
 
				 		return callchain__fprintf_flat(fp, &he->sorted_chain, total_samples);
			
 
				 		break;
			
 
				+	case CHAIN_FOLDED:
			
 
				+		return callchain__fprintf_folded(fp, &he->sorted_chain, total_samples);
			
 
				+		break;
			
 
				 	case CHAIN_NONE:
			
 
				 		break;
			
 
				 	default:
			
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -21,6 +21,7 @@ libperf-y += parse-events.o
 
				 libperf-y += perf_regs.o
			
 
				 libperf-y += path.o
			
 
				 libperf-y += rbtree.o
			
 
				+libperf-y += libstring.o
			
 
				 libperf-y += bitmap.o
			
 
				 libperf-y += hweight.o
			
 
				 libperf-y += run-command.o
			
@@ -88,6 +89,7 @@ libperf-y += parse-branch-options.o
 
				 libperf-y += parse-regs-options.o
			
 
				 
			
 
				 libperf-$(CONFIG_LIBBPF) += bpf-loader.o
			
 
				+libperf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o
			
 
				 libperf-$(CONFIG_LIBELF) += symbol-elf.o
			
 
				 libperf-$(CONFIG_LIBELF) += probe-file.o
			
 
				 libperf-$(CONFIG_LIBELF) += probe-event.o
			
@@ -138,6 +140,7 @@ $(OUTPUT)util/pmu.o: $(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-bison.c
 
				 
			
 
				 CFLAGS_find_next_bit.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
			
 
				 CFLAGS_rbtree.o        += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
			
 
				+CFLAGS_libstring.o     += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
			
 
				 CFLAGS_hweight.o       += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
			
 
				 CFLAGS_parse-events.o  += -Wno-redundant-decls
			
 
				 
			
@@ -153,6 +156,10 @@ $(OUTPUT)util/rbtree.o: ../lib/rbtree.c FORCE
 
				 	$(call rule_mkdir)
			
 
				 	$(call if_changed_dep,cc_o_c)
			
 
				 
			
 
				+$(OUTPUT)util/libstring.o: ../lib/string.c FORCE
			
 
				+	$(call rule_mkdir)
			
 
				+	$(call if_changed_dep,cc_o_c)
			
 
				+
			
 
				 $(OUTPUT)util/hweight.o: ../lib/hweight.c FORCE
			
 
				 	$(call rule_mkdir)
			
 
				 	$(call if_changed_dep,cc_o_c)
			
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -5,11 +5,15 @@
 
				  * Copyright (C) 2015 Huawei Inc.
			
 
				  */
			
 
				 
			
 
				+#include <linux/bpf.h>
			
 
				 #include <bpf/libbpf.h>
			
 
				 #include <linux/err.h>
			
 
				+#include <linux/string.h>
			
 
				 #include "perf.h"
			
 
				 #include "debug.h"
			
 
				 #include "bpf-loader.h"
			
 
				+#include "bpf-prologue.h"
			
 
				+#include "llvm-utils.h"
			
 
				 #include "probe-event.h"
			
 
				 #include "probe-finder.h" // for MAX_PROBES
			
 
				 #include "llvm-utils.h"
			
@@ -32,6 +36,10 @@ DEFINE_PRINT_FN(debug, 1)
 
				 
			
 
				 struct bpf_prog_priv {
			
 
				 	struct perf_probe_event pev;
			
 
				+	bool need_prologue;
			
 
				+	struct bpf_insn *insns_buf;
			
 
				+	int nr_types;
			
 
				+	int *type_mapping;
			
 
				 };
			
 
				 
			
 
				 static bool libbpf_initialized;
			
@@ -106,9 +114,178 @@ bpf_prog_priv__clear(struct bpf_program *prog __maybe_unused,
 
				 	struct bpf_prog_priv *priv = _priv;
			
 
				 
			
 
				 	cleanup_perf_probe_events(&priv->pev, 1);
			
 
				+	zfree(&priv->insns_buf);
			
 
				+	zfree(&priv->type_mapping);
			
 
				 	free(priv);
			
 
				 }
			
 
				 
			
 
				+static int
			
 
				+config__exec(const char *value, struct perf_probe_event *pev)
			
 
				+{
			
 
				+	pev->uprobes = true;
			
 
				+	pev->target = strdup(value);
			
 
				+	if (!pev->target)
			
 
				+		return -ENOMEM;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+config__module(const char *value, struct perf_probe_event *pev)
			
 
				+{
			
 
				+	pev->uprobes = false;
			
 
				+	pev->target = strdup(value);
			
 
				+	if (!pev->target)
			
 
				+		return -ENOMEM;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+config__bool(const char *value,
			
 
				+	     bool *pbool, bool invert)
			
 
				+{
			
 
				+	int err;
			
 
				+	bool bool_value;
			
 
				+
			
 
				+	if (!pbool)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	err = strtobool(value, &bool_value);
			
 
				+	if (err)
			
 
				+		return err;
			
 
				+
			
 
				+	*pbool = invert ? !bool_value : bool_value;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+config__inlines(const char *value,
			
 
				+		struct perf_probe_event *pev __maybe_unused)
			
 
				+{
			
 
				+	return config__bool(value, &probe_conf.no_inlines, true);
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+config__force(const char *value,
			
 
				+	      struct perf_probe_event *pev __maybe_unused)
			
 
				+{
			
 
				+	return config__bool(value, &probe_conf.force_add, false);
			
 
				+}
			
 
				+
			
 
				+static struct {
			
 
				+	const char *key;
			
 
				+	const char *usage;
			
 
				+	const char *desc;
			
 
				+	int (*func)(const char *, struct perf_probe_event *);
			
 
				+} bpf_config_terms[] = {
			
 
				+	{
			
 
				+		.key	= "exec",
			
 
				+		.usage	= "exec=<full path of file>",
			
 
				+		.desc	= "Set uprobe target",
			
 
				+		.func	= config__exec,
			
 
				+	},
			
 
				+	{
			
 
				+		.key	= "module",
			
 
				+		.usage	= "module=<module name>    ",
			
 
				+		.desc	= "Set kprobe module",
			
 
				+		.func	= config__module,
			
 
				+	},
			
 
				+	{
			
 
				+		.key	= "inlines",
			
 
				+		.usage	= "inlines=[yes|no]        ",
			
 
				+		.desc	= "Probe at inline symbol",
			
 
				+		.func	= config__inlines,
			
 
				+	},
			
 
				+	{
			
 
				+		.key	= "force",
			
 
				+		.usage	= "force=[yes|no]          ",
			
 
				+		.desc	= "Forcibly add events with existing name",
			
 
				+		.func	= config__force,
			
 
				+	},
			
 
				+};
			
 
				+
			
 
				+static int
			
 
				+do_config(const char *key, const char *value,
			
 
				+	  struct perf_probe_event *pev)
			
 
				+{
			
 
				+	unsigned int i;
			
 
				+
			
 
				+	pr_debug("config bpf program: %s=%s\n", key, value);
			
 
				+	for (i = 0; i < ARRAY_SIZE(bpf_config_terms); i++)
			
 
				+		if (strcmp(key, bpf_config_terms[i].key) == 0)
			
 
				+			return bpf_config_terms[i].func(value, pev);
			
 
				+
			
 
				+	pr_debug("BPF: ERROR: invalid config option in object: %s=%s\n",
			
 
				+		 key, value);
			
 
				+
			
 
				+	pr_debug("\nHint: Currently valid options are:\n");
			
 
				+	for (i = 0; i < ARRAY_SIZE(bpf_config_terms); i++)
			
 
				+		pr_debug("\t%s:\t%s\n", bpf_config_terms[i].usage,
			
 
				+			 bpf_config_terms[i].desc);
			
 
				+	pr_debug("\n");
			
 
				+
			
 
				+	return -BPF_LOADER_ERRNO__CONFIG_TERM;
			
 
				+}
			
 
				+
			
 
				+static const char *
			
 
				+parse_config_kvpair(const char *config_str, struct perf_probe_event *pev)
			
 
				+{
			
 
				+	char *text = strdup(config_str);
			
 
				+	char *sep, *line;
			
 
				+	const char *main_str = NULL;
			
 
				+	int err = 0;
			
 
				+
			
 
				+	if (!text) {
			
 
				+		pr_debug("No enough memory: dup config_str failed\n");
			
 
				+		return ERR_PTR(-ENOMEM);
			
 
				+	}
			
 
				+
			
 
				+	line = text;
			
 
				+	while ((sep = strchr(line, ';'))) {
			
 
				+		char *equ;
			
 
				+
			
 
				+		*sep = '\0';
			
 
				+		equ = strchr(line, '=');
			
 
				+		if (!equ) {
			
 
				+			pr_warning("WARNING: invalid config in BPF object: %s\n",
			
 
				+				   line);
			
 
				+			pr_warning("\tShould be 'key=value'.\n");
			
 
				+			goto nextline;
			
 
				+		}
			
 
				+		*equ = '\0';
			
 
				+
			
 
				+		err = do_config(line, equ + 1, pev);
			
 
				+		if (err)
			
 
				+			break;
			
 
				+nextline:
			
 
				+		line = sep + 1;
			
 
				+	}
			
 
				+
			
 
				+	if (!err)
			
 
				+		main_str = config_str + (line - text);
			
 
				+	free(text);
			
 
				+
			
 
				+	return err ? ERR_PTR(err) : main_str;
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+parse_config(const char *config_str, struct perf_probe_event *pev)
			
 
				+{
			
 
				+	int err;
			
 
				+	const char *main_str = parse_config_kvpair(config_str, pev);
			
 
				+
			
 
				+	if (IS_ERR(main_str))
			
 
				+		return PTR_ERR(main_str);
			
 
				+
			
 
				+	err = parse_perf_probe_command(main_str, pev);
			
 
				+	if (err < 0) {
			
 
				+		pr_debug("bpf: '%s' is not a valid config string\n",
			
 
				+			 config_str);
			
 
				+		/* parse failed, don't need clear pev. */
			
 
				+		return -BPF_LOADER_ERRNO__CONFIG;
			
 
				+	}
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				 static int
			
 
				 config_bpf_program(struct bpf_program *prog)
			
 
				 {
			
@@ -117,6 +294,10 @@ config_bpf_program(struct bpf_program *prog)
 
				 	const char *config_str;
			
 
				 	int err;
			
 
				 
			
 
				+	/* Initialize per-program probing setting */
			
 
				+	probe_conf.no_inlines = false;
			
 
				+	probe_conf.force_add = false;
			
 
				+
			
 
				 	config_str = bpf_program__title(prog, false);
			
 
				 	if (IS_ERR(config_str)) {
			
 
				 		pr_debug("bpf: unable to get title for program\n");
			
@@ -131,13 +312,9 @@ config_bpf_program(struct bpf_program *prog)
 
				 	pev = &priv->pev;
			
 
				 
			
 
				 	pr_debug("bpf: config program '%s'\n", config_str);
			
 
				-	err = parse_perf_probe_command(config_str, pev);
			
 
				-	if (err < 0) {
			
 
				-		pr_debug("bpf: '%s' is not a valid config string\n",
			
 
				-			 config_str);
			
 
				-		err = -BPF_LOADER_ERRNO__CONFIG;
			
 
				+	err = parse_config(config_str, pev);
			
 
				+	if (err)
			
 
				 		goto errout;
			
 
				-	}
			
 
				 
			
 
				 	if (pev->group && strcmp(pev->group, PERF_BPF_PROBE_GROUP)) {
			
 
				 		pr_debug("bpf: '%s': group for event is set and not '%s'.\n",
			
@@ -197,6 +374,220 @@ static int bpf__prepare_probe(void)
 
				 	return err;
			
 
				 }
			
 
				 
			
 
				+static int
			
 
				+preproc_gen_prologue(struct bpf_program *prog, int n,
			
 
				+		     struct bpf_insn *orig_insns, int orig_insns_cnt,
			
 
				+		     struct bpf_prog_prep_result *res)
			
 
				+{
			
 
				+	struct probe_trace_event *tev;
			
 
				+	struct perf_probe_event *pev;
			
 
				+	struct bpf_prog_priv *priv;
			
 
				+	struct bpf_insn *buf;
			
 
				+	size_t prologue_cnt = 0;
			
 
				+	int i, err;
			
 
				+
			
 
				+	err = bpf_program__get_private(prog, (void **)&priv);
			
 
				+	if (err || !priv)
			
 
				+		goto errout;
			
 
				+
			
 
				+	pev = &priv->pev;
			
 
				+
			
 
				+	if (n < 0 || n >= priv->nr_types)
			
 
				+		goto errout;
			
 
				+
			
 
				+	/* Find a tev belongs to that type */
			
 
				+	for (i = 0; i < pev->ntevs; i++) {
			
 
				+		if (priv->type_mapping[i] == n)
			
 
				+			break;
			
 
				+	}
			
 
				+
			
 
				+	if (i >= pev->ntevs) {
			
 
				+		pr_debug("Internal error: prologue type %d not found\n", n);
			
 
				+		return -BPF_LOADER_ERRNO__PROLOGUE;
			
 
				+	}
			
 
				+
			
 
				+	tev = &pev->tevs[i];
			
 
				+
			
 
				+	buf = priv->insns_buf;
			
 
				+	err = bpf__gen_prologue(tev->args, tev->nargs,
			
 
				+				buf, &prologue_cnt,
			
 
				+				BPF_MAXINSNS - orig_insns_cnt);
			
 
				+	if (err) {
			
 
				+		const char *title;
			
 
				+
			
 
				+		title = bpf_program__title(prog, false);
			
 
				+		if (!title)
			
 
				+			title = "[unknown]";
			
 
				+
			
 
				+		pr_debug("Failed to generate prologue for program %s\n",
			
 
				+			 title);
			
 
				+		return err;
			
 
				+	}
			
 
				+
			
 
				+	memcpy(&buf[prologue_cnt], orig_insns,
			
 
				+	       sizeof(struct bpf_insn) * orig_insns_cnt);
			
 
				+
			
 
				+	res->new_insn_ptr = buf;
			
 
				+	res->new_insn_cnt = prologue_cnt + orig_insns_cnt;
			
 
				+	res->pfd = NULL;
			
 
				+	return 0;
			
 
				+
			
 
				+errout:
			
 
				+	pr_debug("Internal error in preproc_gen_prologue\n");
			
 
				+	return -BPF_LOADER_ERRNO__PROLOGUE;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * compare_tev_args is reflexive, transitive and antisymmetric.
			
 
				+ * I can proof it but this margin is too narrow to contain.
			
 
				+ */
			
 
				+static int compare_tev_args(const void *ptev1, const void *ptev2)
			
 
				+{
			
 
				+	int i, ret;
			
 
				+	const struct probe_trace_event *tev1 =
			
 
				+		*(const struct probe_trace_event **)ptev1;
			
 
				+	const struct probe_trace_event *tev2 =
			
 
				+		*(const struct probe_trace_event **)ptev2;
			
 
				+
			
 
				+	ret = tev2->nargs - tev1->nargs;
			
 
				+	if (ret)
			
 
				+		return ret;
			
 
				+
			
 
				+	for (i = 0; i < tev1->nargs; i++) {
			
 
				+		struct probe_trace_arg *arg1, *arg2;
			
 
				+		struct probe_trace_arg_ref *ref1, *ref2;
			
 
				+
			
 
				+		arg1 = &tev1->args[i];
			
 
				+		arg2 = &tev2->args[i];
			
 
				+
			
 
				+		ret = strcmp(arg1->value, arg2->value);
			
 
				+		if (ret)
			
 
				+			return ret;
			
 
				+
			
 
				+		ref1 = arg1->ref;
			
 
				+		ref2 = arg2->ref;
			
 
				+
			
 
				+		while (ref1 && ref2) {
			
 
				+			ret = ref2->offset - ref1->offset;
			
 
				+			if (ret)
			
 
				+				return ret;
			
 
				+
			
 
				+			ref1 = ref1->next;
			
 
				+			ref2 = ref2->next;
			
 
				+		}
			
 
				+
			
 
				+		if (ref1 || ref2)
			
 
				+			return ref2 ? 1 : -1;
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Assign a type number to each tevs in a pev.
			
 
				+ * mapping is an array with same slots as tevs in that pev.
			
 
				+ * nr_types will be set to number of types.
			
 
				+ */
			
 
				+static int map_prologue(struct perf_probe_event *pev, int *mapping,
			
 
				+			int *nr_types)
			
 
				+{
			
 
				+	int i, type = 0;
			
 
				+	struct probe_trace_event **ptevs;
			
 
				+
			
 
				+	size_t array_sz = sizeof(*ptevs) * pev->ntevs;
			
 
				+
			
 
				+	ptevs = malloc(array_sz);
			
 
				+	if (!ptevs) {
			
 
				+		pr_debug("No ehough memory: alloc ptevs failed\n");
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	pr_debug("In map_prologue, ntevs=%d\n", pev->ntevs);
			
 
				+	for (i = 0; i < pev->ntevs; i++)
			
 
				+		ptevs[i] = &pev->tevs[i];
			
 
				+
			
 
				+	qsort(ptevs, pev->ntevs, sizeof(*ptevs),
			
 
				+	      compare_tev_args);
			
 
				+
			
 
				+	for (i = 0; i < pev->ntevs; i++) {
			
 
				+		int n;
			
 
				+
			
 
				+		n = ptevs[i] - pev->tevs;
			
 
				+		if (i == 0) {
			
 
				+			mapping[n] = type;
			
 
				+			pr_debug("mapping[%d]=%d\n", n, type);
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		if (compare_tev_args(ptevs + i, ptevs + i - 1) == 0)
			
 
				+			mapping[n] = type;
			
 
				+		else
			
 
				+			mapping[n] = ++type;
			
 
				+
			
 
				+		pr_debug("mapping[%d]=%d\n", n, mapping[n]);
			
 
				+	}
			
 
				+	free(ptevs);
			
 
				+	*nr_types = type + 1;
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int hook_load_preprocessor(struct bpf_program *prog)
			
 
				+{
			
 
				+	struct perf_probe_event *pev;
			
 
				+	struct bpf_prog_priv *priv;
			
 
				+	bool need_prologue = false;
			
 
				+	int err, i;
			
 
				+
			
 
				+	err = bpf_program__get_private(prog, (void **)&priv);
			
 
				+	if (err || !priv) {
			
 
				+		pr_debug("Internal error when hook preprocessor\n");
			
 
				+		return -BPF_LOADER_ERRNO__INTERNAL;
			
 
				+	}
			
 
				+
			
 
				+	pev = &priv->pev;
			
 
				+	for (i = 0; i < pev->ntevs; i++) {
			
 
				+		struct probe_trace_event *tev = &pev->tevs[i];
			
 
				+
			
 
				+		if (tev->nargs > 0) {
			
 
				+			need_prologue = true;
			
 
				+			break;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Since all tevs don't have argument, we don't need generate
			
 
				+	 * prologue.
			
 
				+	 */
			
 
				+	if (!need_prologue) {
			
 
				+		priv->need_prologue = false;
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	priv->need_prologue = true;
			
 
				+	priv->insns_buf = malloc(sizeof(struct bpf_insn) * BPF_MAXINSNS);
			
 
				+	if (!priv->insns_buf) {
			
 
				+		pr_debug("No enough memory: alloc insns_buf failed\n");
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	priv->type_mapping = malloc(sizeof(int) * pev->ntevs);
			
 
				+	if (!priv->type_mapping) {
			
 
				+		pr_debug("No enough memory: alloc type_mapping failed\n");
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+	memset(priv->type_mapping, -1,
			
 
				+	       sizeof(int) * pev->ntevs);
			
 
				+
			
 
				+	err = map_prologue(pev, priv->type_mapping, &priv->nr_types);
			
 
				+	if (err)
			
 
				+		return err;
			
 
				+
			
 
				+	err = bpf_program__set_prep(prog, priv->nr_types,
			
 
				+				    preproc_gen_prologue);
			
 
				+	return err;
			
 
				+}
			
 
				+
			
 
				 int bpf__probe(struct bpf_object *obj)
			
 
				 {
			
 
				 	int err = 0;
			
@@ -231,6 +622,18 @@ int bpf__probe(struct bpf_object *obj)
 
				 			pr_debug("bpf_probe: failed to apply perf probe events");
			
 
				 			goto out;
			
 
				 		}
			
 
				+
			
 
				+		/*
			
 
				+		 * After probing, let's consider prologue, which
			
 
				+		 * adds program fetcher to BPF programs.
			
 
				+		 *
			
 
				+		 * hook_load_preprocessorr() hooks pre-processor
			
 
				+		 * to bpf_program, let it generate prologue
			
 
				+		 * dynamically during loading.
			
 
				+		 */
			
 
				+		err = hook_load_preprocessor(prog);
			
 
				+		if (err)
			
 
				+			goto out;
			
 
				 	}
			
 
				 out:
			
 
				 	return err < 0 ? err : 0;
			
@@ -314,7 +717,14 @@ int bpf__foreach_tev(struct bpf_object *obj,
 
				 		for (i = 0; i < pev->ntevs; i++) {
			
 
				 			tev = &pev->tevs[i];
			
 
				 
			
 
				-			fd = bpf_program__fd(prog);
			
 
				+			if (priv->need_prologue) {
			
 
				+				int type = priv->type_mapping[i];
			
 
				+
			
 
				+				fd = bpf_program__nth_fd(prog, type);
			
 
				+			} else {
			
 
				+				fd = bpf_program__fd(prog);
			
 
				+			}
			
 
				+
			
 
				 			if (fd < 0) {
			
 
				 				pr_debug("bpf: failed to get file descriptor\n");
			
 
				 				return fd;
			
@@ -340,6 +750,10 @@ static const char *bpf_loader_strerror_table[NR_ERRNO] = {
 
				 	[ERRCODE_OFFSET(EVENTNAME)]	= "No event name found in config string",
			
 
				 	[ERRCODE_OFFSET(INTERNAL)]	= "BPF loader internal error",
			
 
				 	[ERRCODE_OFFSET(COMPILE)]	= "Error when compiling BPF scriptlet",
			
 
				+	[ERRCODE_OFFSET(CONFIG_TERM)]	= "Invalid config term in config string",
			
 
				+	[ERRCODE_OFFSET(PROLOGUE)]	= "Failed to generate prologue",
			
 
				+	[ERRCODE_OFFSET(PROLOGUE2BIG)]	= "Prologue too big for program",
			
 
				+	[ERRCODE_OFFSET(PROLOGUEOOB)]	= "Offset out of bound for prologue",
			
 
				 };
			
 
				 
			
 
				 static int
			
@@ -420,7 +834,11 @@ int bpf__strerror_probe(struct bpf_object *obj __maybe_unused,
 
				 			int err, char *buf, size_t size)
			
 
				 {
			
 
				 	bpf__strerror_head(err, buf, size);
			
 
				-	bpf__strerror_entry(EEXIST, "Probe point exist. Try use 'perf probe -d \"*\"'");
			
 
				+	case BPF_LOADER_ERRNO__CONFIG_TERM: {
			
 
				+		scnprintf(buf, size, "%s (add -v to see detail)", emsg);
			
 
				+		break;
			
 
				+	}
			
 
				+	bpf__strerror_entry(EEXIST, "Probe point exist. Try 'perf probe -d \"*\"' and set 'force=yes'");
			
 
				 	bpf__strerror_entry(EACCES, "You need to be root");
			
 
				 	bpf__strerror_entry(EPERM, "You need to be root, and /proc/sys/kernel/kptr_restrict should be 0");
			
 
				 	bpf__strerror_entry(ENOENT, "You need to check probing points in BPF file");
			
--- a/tools/perf/util/bpf-loader.h
+++ b/tools/perf/util/bpf-loader.h
@@ -20,6 +20,10 @@ enum bpf_loader_errno {
 
				 	BPF_LOADER_ERRNO__EVENTNAME,	/* Event name is missing */
			
 
				 	BPF_LOADER_ERRNO__INTERNAL,	/* BPF loader internal error */
			
 
				 	BPF_LOADER_ERRNO__COMPILE,	/* Error when compiling BPF scriptlet */
			
 
				+	BPF_LOADER_ERRNO__CONFIG_TERM,	/* Invalid config term in config term */
			
 
				+	BPF_LOADER_ERRNO__PROLOGUE,	/* Failed to generate prologue */
			
 
				+	BPF_LOADER_ERRNO__PROLOGUE2BIG,	/* Prologue too big for program */
			
 
				+	BPF_LOADER_ERRNO__PROLOGUEOOB,	/* Offset out of bound for prologue */
			
 
				 	__BPF_LOADER_ERRNO__END,
			
 
				 };
			
 
				 
			
--- a/tools/perf/util/bpf-prologue.c
+++ b/tools/perf/util/bpf-prologue.c
@@ -0,0 +1,455 @@
 
				+/*
			
 
				+ * bpf-prologue.c
			
 
				+ *
			
 
				+ * Copyright (C) 2015 He Kuang <hekuang@huawei.com>
			
 
				+ * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
			
 
				+ * Copyright (C) 2015 Huawei Inc.
			
 
				+ */
			
 
				+
			
 
				+#include <bpf/libbpf.h>
			
 
				+#include "perf.h"
			
 
				+#include "debug.h"
			
 
				+#include "bpf-loader.h"
			
 
				+#include "bpf-prologue.h"
			
 
				+#include "probe-finder.h"
			
 
				+#include <dwarf-regs.h>
			
 
				+#include <linux/filter.h>
			
 
				+
			
 
				+#define BPF_REG_SIZE		8
			
 
				+
			
 
				+#define JMP_TO_ERROR_CODE	-1
			
 
				+#define JMP_TO_SUCCESS_CODE	-2
			
 
				+#define JMP_TO_USER_CODE	-3
			
 
				+
			
 
				+struct bpf_insn_pos {
			
 
				+	struct bpf_insn *begin;
			
 
				+	struct bpf_insn *end;
			
 
				+	struct bpf_insn *pos;
			
 
				+};
			
 
				+
			
 
				+static inline int
			
 
				+pos_get_cnt(struct bpf_insn_pos *pos)
			
 
				+{
			
 
				+	return pos->pos - pos->begin;
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+append_insn(struct bpf_insn new_insn, struct bpf_insn_pos *pos)
			
 
				+{
			
 
				+	if (!pos->pos)
			
 
				+		return -BPF_LOADER_ERRNO__PROLOGUE2BIG;
			
 
				+
			
 
				+	if (pos->pos + 1 >= pos->end) {
			
 
				+		pr_err("bpf prologue: prologue too long\n");
			
 
				+		pos->pos = NULL;
			
 
				+		return -BPF_LOADER_ERRNO__PROLOGUE2BIG;
			
 
				+	}
			
 
				+
			
 
				+	*(pos->pos)++ = new_insn;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+check_pos(struct bpf_insn_pos *pos)
			
 
				+{
			
 
				+	if (!pos->pos || pos->pos >= pos->end)
			
 
				+		return -BPF_LOADER_ERRNO__PROLOGUE2BIG;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/* Give it a shorter name */
			
 
				+#define ins(i, p) append_insn((i), (p))
			
 
				+
			
 
				+/*
			
 
				+ * Give a register name (in 'reg'), generate instruction to
			
 
				+ * load register into an eBPF register rd:
			
 
				+ *   'ldd target_reg, offset(ctx_reg)', where:
			
 
				+ * ctx_reg is pre initialized to pointer of 'struct pt_regs'.
			
 
				+ */
			
 
				+static int
			
 
				+gen_ldx_reg_from_ctx(struct bpf_insn_pos *pos, int ctx_reg,
			
 
				+		     const char *reg, int target_reg)
			
 
				+{
			
 
				+	int offset = regs_query_register_offset(reg);
			
 
				+
			
 
				+	if (offset < 0) {
			
 
				+		pr_err("bpf: prologue: failed to get register %s\n",
			
 
				+		       reg);
			
 
				+		return offset;
			
 
				+	}
			
 
				+	ins(BPF_LDX_MEM(BPF_DW, target_reg, ctx_reg, offset), pos);
			
 
				+
			
 
				+	return check_pos(pos);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Generate a BPF_FUNC_probe_read function call.
			
 
				+ *
			
 
				+ * src_base_addr_reg is a register holding base address,
			
 
				+ * dst_addr_reg is a register holding dest address (on stack),
			
 
				+ * result is:
			
 
				+ *
			
 
				+ *  *[dst_addr_reg] = *([src_base_addr_reg] + offset)
			
 
				+ *
			
 
				+ * Arguments of BPF_FUNC_probe_read:
			
 
				+ *     ARG1: ptr to stack (dest)
			
 
				+ *     ARG2: size (8)
			
 
				+ *     ARG3: unsafe ptr (src)
			
 
				+ */
			
 
				+static int
			
 
				+gen_read_mem(struct bpf_insn_pos *pos,
			
 
				+	     int src_base_addr_reg,
			
 
				+	     int dst_addr_reg,
			
 
				+	     long offset)
			
 
				+{
			
 
				+	/* mov arg3, src_base_addr_reg */
			
 
				+	if (src_base_addr_reg != BPF_REG_ARG3)
			
 
				+		ins(BPF_MOV64_REG(BPF_REG_ARG3, src_base_addr_reg), pos);
			
 
				+	/* add arg3, #offset */
			
 
				+	if (offset)
			
 
				+		ins(BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG3, offset), pos);
			
 
				+
			
 
				+	/* mov arg2, #reg_size */
			
 
				+	ins(BPF_ALU64_IMM(BPF_MOV, BPF_REG_ARG2, BPF_REG_SIZE), pos);
			
 
				+
			
 
				+	/* mov arg1, dst_addr_reg */
			
 
				+	if (dst_addr_reg != BPF_REG_ARG1)
			
 
				+		ins(BPF_MOV64_REG(BPF_REG_ARG1, dst_addr_reg), pos);
			
 
				+
			
 
				+	/* Call probe_read  */
			
 
				+	ins(BPF_EMIT_CALL(BPF_FUNC_probe_read), pos);
			
 
				+	/*
			
 
				+	 * Error processing: if read fail, goto error code,
			
 
				+	 * will be relocated. Target should be the start of
			
 
				+	 * error processing code.
			
 
				+	 */
			
 
				+	ins(BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, JMP_TO_ERROR_CODE),
			
 
				+	    pos);
			
 
				+
			
 
				+	return check_pos(pos);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Each arg should be bare register. Fetch and save them into argument
			
 
				+ * registers (r3 - r5).
			
 
				+ *
			
 
				+ * BPF_REG_1 should have been initialized with pointer to
			
 
				+ * 'struct pt_regs'.
			
 
				+ */
			
 
				+static int
			
 
				+gen_prologue_fastpath(struct bpf_insn_pos *pos,
			
 
				+		      struct probe_trace_arg *args, int nargs)
			
 
				+{
			
 
				+	int i, err = 0;
			
 
				+
			
 
				+	for (i = 0; i < nargs; i++) {
			
 
				+		err = gen_ldx_reg_from_ctx(pos, BPF_REG_1, args[i].value,
			
 
				+					   BPF_PROLOGUE_START_ARG_REG + i);
			
 
				+		if (err)
			
 
				+			goto errout;
			
 
				+	}
			
 
				+
			
 
				+	return check_pos(pos);
			
 
				+errout:
			
 
				+	return err;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Slow path:
			
 
				+ *   At least one argument has the form of 'offset($rx)'.
			
 
				+ *
			
 
				+ * Following code first stores them into stack, then loads all of then
			
 
				+ * to r2 - r5.
			
 
				+ * Before final loading, the final result should be:
			
 
				+ *
			
 
				+ * low address
			
 
				+ * BPF_REG_FP - 24  ARG3
			
 
				+ * BPF_REG_FP - 16  ARG2
			
 
				+ * BPF_REG_FP - 8   ARG1
			
 
				+ * BPF_REG_FP
			
 
				+ * high address
			
 
				+ *
			
 
				+ * For each argument (described as: offn(...off2(off1(reg)))),
			
 
				+ * generates following code:
			
 
				+ *
			
 
				+ *  r7 <- fp
			
 
				+ *  r7 <- r7 - stack_offset  // Ideal code should initialize r7 using
			
 
				+ *                           // fp before generating args. However,
			
 
				+ *                           // eBPF won't regard r7 as stack pointer
			
 
				+ *                           // if it is generated by minus 8 from
			
 
				+ *                           // another stack pointer except fp.
			
 
				+ *                           // This is why we have to set r7
			
 
				+ *                           // to fp for each variable.
			
 
				+ *  r3 <- value of 'reg'-> generated using gen_ldx_reg_from_ctx()
			
 
				+ *  (r7) <- r3       // skip following instructions for bare reg
			
 
				+ *  r3 <- r3 + off1  . // skip if off1 == 0
			
 
				+ *  r2 <- 8           \
			
 
				+ *  r1 <- r7           |-> generated by gen_read_mem()
			
 
				+ *  call probe_read    /
			
 
				+ *  jnei r0, 0, err  ./
			
 
				+ *  r3 <- (r7)
			
 
				+ *  r3 <- r3 + off2  . // skip if off2 == 0
			
 
				+ *  r2 <- 8           \  // r2 may be broken by probe_read, so set again
			
 
				+ *  r1 <- r7           |-> generated by gen_read_mem()
			
 
				+ *  call probe_read    /
			
 
				+ *  jnei r0, 0, err  ./
			
 
				+ *  ...
			
 
				+ */
			
 
				+static int
			
 
				+gen_prologue_slowpath(struct bpf_insn_pos *pos,
			
 
				+		      struct probe_trace_arg *args, int nargs)
			
 
				+{
			
 
				+	int err, i;
			
 
				+
			
 
				+	for (i = 0; i < nargs; i++) {
			
 
				+		struct probe_trace_arg *arg = &args[i];
			
 
				+		const char *reg = arg->value;
			
 
				+		struct probe_trace_arg_ref *ref = NULL;
			
 
				+		int stack_offset = (i + 1) * -8;
			
 
				+
			
 
				+		pr_debug("prologue: fetch arg %d, base reg is %s\n",
			
 
				+			 i, reg);
			
 
				+
			
 
				+		/* value of base register is stored into ARG3 */
			
 
				+		err = gen_ldx_reg_from_ctx(pos, BPF_REG_CTX, reg,
			
 
				+					   BPF_REG_ARG3);
			
 
				+		if (err) {
			
 
				+			pr_err("prologue: failed to get offset of register %s\n",
			
 
				+			       reg);
			
 
				+			goto errout;
			
 
				+		}
			
 
				+
			
 
				+		/* Make r7 the stack pointer. */
			
 
				+		ins(BPF_MOV64_REG(BPF_REG_7, BPF_REG_FP), pos);
			
 
				+		/* r7 += -8 */
			
 
				+		ins(BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, stack_offset), pos);
			
 
				+		/*
			
 
				+		 * Store r3 (base register) onto stack
			
 
				+		 * Ensure fp[offset] is set.
			
 
				+		 * fp is the only valid base register when storing
			
 
				+		 * into stack. We are not allowed to use r7 as base
			
 
				+		 * register here.
			
 
				+		 */
			
 
				+		ins(BPF_STX_MEM(BPF_DW, BPF_REG_FP, BPF_REG_ARG3,
			
 
				+				stack_offset), pos);
			
 
				+
			
 
				+		ref = arg->ref;
			
 
				+		while (ref) {
			
 
				+			pr_debug("prologue: arg %d: offset %ld\n",
			
 
				+				 i, ref->offset);
			
 
				+			err = gen_read_mem(pos, BPF_REG_3, BPF_REG_7,
			
 
				+					   ref->offset);
			
 
				+			if (err) {
			
 
				+				pr_err("prologue: failed to generate probe_read function call\n");
			
 
				+				goto errout;
			
 
				+			}
			
 
				+
			
 
				+			ref = ref->next;
			
 
				+			/*
			
 
				+			 * Load previous result into ARG3. Use
			
 
				+			 * BPF_REG_FP instead of r7 because verifier
			
 
				+			 * allows FP based addressing only.
			
 
				+			 */
			
 
				+			if (ref)
			
 
				+				ins(BPF_LDX_MEM(BPF_DW, BPF_REG_ARG3,
			
 
				+						BPF_REG_FP, stack_offset), pos);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	/* Final pass: read to registers */
			
 
				+	for (i = 0; i < nargs; i++)
			
 
				+		ins(BPF_LDX_MEM(BPF_DW, BPF_PROLOGUE_START_ARG_REG + i,
			
 
				+				BPF_REG_FP, -BPF_REG_SIZE * (i + 1)), pos);
			
 
				+
			
 
				+	ins(BPF_JMP_IMM(BPF_JA, BPF_REG_0, 0, JMP_TO_SUCCESS_CODE), pos);
			
 
				+
			
 
				+	return check_pos(pos);
			
 
				+errout:
			
 
				+	return err;
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+prologue_relocate(struct bpf_insn_pos *pos, struct bpf_insn *error_code,
			
 
				+		  struct bpf_insn *success_code, struct bpf_insn *user_code)
			
 
				+{
			
 
				+	struct bpf_insn *insn;
			
 
				+
			
 
				+	if (check_pos(pos))
			
 
				+		return -BPF_LOADER_ERRNO__PROLOGUE2BIG;
			
 
				+
			
 
				+	for (insn = pos->begin; insn < pos->pos; insn++) {
			
 
				+		struct bpf_insn *target;
			
 
				+		u8 class = BPF_CLASS(insn->code);
			
 
				+		u8 opcode;
			
 
				+
			
 
				+		if (class != BPF_JMP)
			
 
				+			continue;
			
 
				+		opcode = BPF_OP(insn->code);
			
 
				+		if (opcode == BPF_CALL)
			
 
				+			continue;
			
 
				+
			
 
				+		switch (insn->off) {
			
 
				+		case JMP_TO_ERROR_CODE:
			
 
				+			target = error_code;
			
 
				+			break;
			
 
				+		case JMP_TO_SUCCESS_CODE:
			
 
				+			target = success_code;
			
 
				+			break;
			
 
				+		case JMP_TO_USER_CODE:
			
 
				+			target = user_code;
			
 
				+			break;
			
 
				+		default:
			
 
				+			pr_err("bpf prologue: internal error: relocation failed\n");
			
 
				+			return -BPF_LOADER_ERRNO__PROLOGUE;
			
 
				+		}
			
 
				+
			
 
				+		insn->off = target - (insn + 1);
			
 
				+	}
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int bpf__gen_prologue(struct probe_trace_arg *args, int nargs,
			
 
				+		      struct bpf_insn *new_prog, size_t *new_cnt,
			
 
				+		      size_t cnt_space)
			
 
				+{
			
 
				+	struct bpf_insn *success_code = NULL;
			
 
				+	struct bpf_insn *error_code = NULL;
			
 
				+	struct bpf_insn *user_code = NULL;
			
 
				+	struct bpf_insn_pos pos;
			
 
				+	bool fastpath = true;
			
 
				+	int err = 0, i;
			
 
				+
			
 
				+	if (!new_prog || !new_cnt)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	if (cnt_space > BPF_MAXINSNS)
			
 
				+		cnt_space = BPF_MAXINSNS;
			
 
				+
			
 
				+	pos.begin = new_prog;
			
 
				+	pos.end = new_prog + cnt_space;
			
 
				+	pos.pos = new_prog;
			
 
				+
			
 
				+	if (!nargs) {
			
 
				+		ins(BPF_ALU64_IMM(BPF_MOV, BPF_PROLOGUE_FETCH_RESULT_REG, 0),
			
 
				+		    &pos);
			
 
				+
			
 
				+		if (check_pos(&pos))
			
 
				+			goto errout;
			
 
				+
			
 
				+		*new_cnt = pos_get_cnt(&pos);
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	if (nargs > BPF_PROLOGUE_MAX_ARGS) {
			
 
				+		pr_warning("bpf: prologue: %d arguments are dropped\n",
			
 
				+			   nargs - BPF_PROLOGUE_MAX_ARGS);
			
 
				+		nargs = BPF_PROLOGUE_MAX_ARGS;
			
 
				+	}
			
 
				+
			
 
				+	/* First pass: validation */
			
 
				+	for (i = 0; i < nargs; i++) {
			
 
				+		struct probe_trace_arg_ref *ref = args[i].ref;
			
 
				+
			
 
				+		if (args[i].value[0] == '@') {
			
 
				+			/* TODO: fetch global variable */
			
 
				+			pr_err("bpf: prologue: global %s%+ld not support\n",
			
 
				+				args[i].value, ref ? ref->offset : 0);
			
 
				+			return -ENOTSUP;
			
 
				+		}
			
 
				+
			
 
				+		while (ref) {
			
 
				+			/* fastpath is true if all args has ref == NULL */
			
 
				+			fastpath = false;
			
 
				+
			
 
				+			/*
			
 
				+			 * Instruction encodes immediate value using
			
 
				+			 * s32, ref->offset is long. On systems which
			
 
				+			 * can't fill long in s32, refuse to process if
			
 
				+			 * ref->offset too large (or small).
			
 
				+			 */
			
 
				+#ifdef __LP64__
			
 
				+#define OFFSET_MAX	((1LL << 31) - 1)
			
 
				+#define OFFSET_MIN	((1LL << 31) * -1)
			
 
				+			if (ref->offset > OFFSET_MAX ||
			
 
				+					ref->offset < OFFSET_MIN) {
			
 
				+				pr_err("bpf: prologue: offset out of bound: %ld\n",
			
 
				+				       ref->offset);
			
 
				+				return -BPF_LOADER_ERRNO__PROLOGUEOOB;
			
 
				+			}
			
 
				+#endif
			
 
				+			ref = ref->next;
			
 
				+		}
			
 
				+	}
			
 
				+	pr_debug("prologue: pass validation\n");
			
 
				+
			
 
				+	if (fastpath) {
			
 
				+		/* If all variables are registers... */
			
 
				+		pr_debug("prologue: fast path\n");
			
 
				+		err = gen_prologue_fastpath(&pos, args, nargs);
			
 
				+		if (err)
			
 
				+			goto errout;
			
 
				+	} else {
			
 
				+		pr_debug("prologue: slow path\n");
			
 
				+
			
 
				+		/* Initialization: move ctx to a callee saved register. */
			
 
				+		ins(BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1), &pos);
			
 
				+
			
 
				+		err = gen_prologue_slowpath(&pos, args, nargs);
			
 
				+		if (err)
			
 
				+			goto errout;
			
 
				+		/*
			
 
				+		 * start of ERROR_CODE (only slow pass needs error code)
			
 
				+		 *   mov r2 <- 1  // r2 is error number
			
 
				+		 *   mov r3 <- 0  // r3, r4... should be touched or
			
 
				+		 *                // verifier would complain
			
 
				+		 *   mov r4 <- 0
			
 
				+		 *   ...
			
 
				+		 *   goto usercode
			
 
				+		 */
			
 
				+		error_code = pos.pos;
			
 
				+		ins(BPF_ALU64_IMM(BPF_MOV, BPF_PROLOGUE_FETCH_RESULT_REG, 1),
			
 
				+		    &pos);
			
 
				+
			
 
				+		for (i = 0; i < nargs; i++)
			
 
				+			ins(BPF_ALU64_IMM(BPF_MOV,
			
 
				+					  BPF_PROLOGUE_START_ARG_REG + i,
			
 
				+					  0),
			
 
				+			    &pos);
			
 
				+		ins(BPF_JMP_IMM(BPF_JA, BPF_REG_0, 0, JMP_TO_USER_CODE),
			
 
				+				&pos);
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * start of SUCCESS_CODE:
			
 
				+	 *   mov r2 <- 0
			
 
				+	 *   goto usercode  // skip
			
 
				+	 */
			
 
				+	success_code = pos.pos;
			
 
				+	ins(BPF_ALU64_IMM(BPF_MOV, BPF_PROLOGUE_FETCH_RESULT_REG, 0), &pos);
			
 
				+
			
 
				+	/*
			
 
				+	 * start of USER_CODE:
			
 
				+	 *   Restore ctx to r1
			
 
				+	 */
			
 
				+	user_code = pos.pos;
			
 
				+	if (!fastpath) {
			
 
				+		/*
			
 
				+		 * Only slow path needs restoring of ctx. In fast path,
			
 
				+		 * register are loaded directly from r1.
			
 
				+		 */
			
 
				+		ins(BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX), &pos);
			
 
				+		err = prologue_relocate(&pos, error_code, success_code,
			
 
				+					user_code);
			
 
				+		if (err)
			
 
				+			goto errout;
			
 
				+	}
			
 
				+
			
 
				+	err = check_pos(&pos);
			
 
				+	if (err)
			
 
				+		goto errout;
			
 
				+
			
 
				+	*new_cnt = pos_get_cnt(&pos);
			
 
				+	return 0;
			
 
				+errout:
			
 
				+	return err;
			
 
				+}
			
--- a/tools/perf/util/bpf-prologue.h
+++ b/tools/perf/util/bpf-prologue.h
@@ -0,0 +1,34 @@
 
				+/*
			
 
				+ * Copyright (C) 2015, He Kuang <hekuang@huawei.com>
			
 
				+ * Copyright (C) 2015, Huawei Inc.
			
 
				+ */
			
 
				+#ifndef __BPF_PROLOGUE_H
			
 
				+#define __BPF_PROLOGUE_H
			
 
				+
			
 
				+#include <linux/compiler.h>
			
 
				+#include <linux/filter.h>
			
 
				+#include "probe-event.h"
			
 
				+
			
 
				+#define BPF_PROLOGUE_MAX_ARGS 3
			
 
				+#define BPF_PROLOGUE_START_ARG_REG BPF_REG_3
			
 
				+#define BPF_PROLOGUE_FETCH_RESULT_REG BPF_REG_2
			
 
				+
			
 
				+#ifdef HAVE_BPF_PROLOGUE
			
 
				+int bpf__gen_prologue(struct probe_trace_arg *args, int nargs,
			
 
				+		      struct bpf_insn *new_prog, size_t *new_cnt,
			
 
				+		      size_t cnt_space);
			
 
				+#else
			
 
				+static inline int
			
 
				+bpf__gen_prologue(struct probe_trace_arg *args __maybe_unused,
			
 
				+		  int nargs __maybe_unused,
			
 
				+		  struct bpf_insn *new_prog __maybe_unused,
			
 
				+		  size_t *new_cnt,
			
 
				+		  size_t cnt_space __maybe_unused)
			
 
				+{
			
 
				+	if (!new_cnt)
			
 
				+		return -EINVAL;
			
 
				+	*new_cnt = 0;
			
 
				+	return -ENOTSUP;
			
 
				+}
			
 
				+#endif
			
 
				+#endif /* __BPF_PROLOGUE_H */
			
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -44,6 +44,10 @@ static int parse_callchain_mode(const char *value)
 
				 		callchain_param.mode = CHAIN_GRAPH_REL;
			
 
				 		return 0;
			
 
				 	}
			
 
				+	if (!strncmp(value, "folded", strlen(value))) {
			
 
				+		callchain_param.mode = CHAIN_FOLDED;
			
 
				+		return 0;
			
 
				+	}
			
 
				 	return -1;
			
 
				 }
			
 
				 
			
@@ -79,6 +83,23 @@ static int parse_callchain_sort_key(const char *value)
 
				 	return -1;
			
 
				 }
			
 
				 
			
 
				+static int parse_callchain_value(const char *value)
			
 
				+{
			
 
				+	if (!strncmp(value, "percent", strlen(value))) {
			
 
				+		callchain_param.value = CCVAL_PERCENT;
			
 
				+		return 0;
			
 
				+	}
			
 
				+	if (!strncmp(value, "period", strlen(value))) {
			
 
				+		callchain_param.value = CCVAL_PERIOD;
			
 
				+		return 0;
			
 
				+	}
			
 
				+	if (!strncmp(value, "count", strlen(value))) {
			
 
				+		callchain_param.value = CCVAL_COUNT;
			
 
				+		return 0;
			
 
				+	}
			
 
				+	return -1;
			
 
				+}
			
 
				+
			
 
				 static int
			
 
				 __parse_callchain_report_opt(const char *arg, bool allow_record_opt)
			
 
				 {
			
@@ -102,7 +123,8 @@ __parse_callchain_report_opt(const char *arg, bool allow_record_opt)
 
				 
			
 
				 		if (!parse_callchain_mode(tok) ||
			
 
				 		    !parse_callchain_order(tok) ||
			
 
				-		    !parse_callchain_sort_key(tok)) {
			
 
				+		    !parse_callchain_sort_key(tok) ||
			
 
				+		    !parse_callchain_value(tok)) {
			
 
				 			/* parsing ok - move on to the next */
			
 
				 			try_stack_size = false;
			
 
				 			goto next;
			
@@ -218,6 +240,7 @@ rb_insert_callchain(struct rb_root *root, struct callchain_node *chain,
 
				 
			
 
				 		switch (mode) {
			
 
				 		case CHAIN_FLAT:
			
 
				+		case CHAIN_FOLDED:
			
 
				 			if (rnode->hit < chain->hit)
			
 
				 				p = &(*p)->rb_left;
			
 
				 			else
			
@@ -338,6 +361,7 @@ int callchain_register_param(struct callchain_param *param)
 
				 		param->sort = sort_chain_graph_rel;
			
 
				 		break;
			
 
				 	case CHAIN_FLAT:
			
 
				+	case CHAIN_FOLDED:
			
 
				 		param->sort = sort_chain_flat;
			
 
				 		break;
			
 
				 	case CHAIN_NONE:
			
@@ -363,6 +387,7 @@ create_child(struct callchain_node *parent, bool inherit_children)
 
				 	}
			
 
				 	new->parent = parent;
			
 
				 	INIT_LIST_HEAD(&new->val);
			
 
				+	INIT_LIST_HEAD(&new->parent_val);
			
 
				 
			
 
				 	if (inherit_children) {
			
 
				 		struct rb_node *n;
			
@@ -431,6 +456,8 @@ add_child(struct callchain_node *parent,
 
				 
			
 
				 	new->children_hit = 0;
			
 
				 	new->hit = period;
			
 
				+	new->children_count = 0;
			
 
				+	new->count = 1;
			
 
				 	return new;
			
 
				 }
			
 
				 
			
@@ -478,6 +505,9 @@ split_add_child(struct callchain_node *parent,
 
				 	parent->children_hit = callchain_cumul_hits(new);
			
 
				 	new->val_nr = parent->val_nr - idx_local;
			
 
				 	parent->val_nr = idx_local;
			
 
				+	new->count = parent->count;
			
 
				+	new->children_count = parent->children_count;
			
 
				+	parent->children_count = callchain_cumul_counts(new);
			
 
				 
			
 
				 	/* create a new child for the new branch if any */
			
 
				 	if (idx_total < cursor->nr) {
			
@@ -488,6 +518,8 @@ split_add_child(struct callchain_node *parent,
 
				 
			
 
				 		parent->hit = 0;
			
 
				 		parent->children_hit += period;
			
 
				+		parent->count = 0;
			
 
				+		parent->children_count += 1;
			
 
				 
			
 
				 		node = callchain_cursor_current(cursor);
			
 
				 		new = add_child(parent, cursor, period);
			
@@ -510,6 +542,7 @@ split_add_child(struct callchain_node *parent,
 
				 		rb_insert_color(&new->rb_node_in, &parent->rb_root_in);
			
 
				 	} else {
			
 
				 		parent->hit = period;
			
 
				+		parent->count = 1;
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -556,6 +589,7 @@ append_chain_children(struct callchain_node *root,
 
				 
			
 
				 inc_children_hit:
			
 
				 	root->children_hit += period;
			
 
				+	root->children_count++;
			
 
				 }
			
 
				 
			
 
				 static int
			
@@ -608,6 +642,7 @@ append_chain(struct callchain_node *root,
 
				 	/* we match 100% of the path, increment the hit */
			
 
				 	if (matches == root->val_nr && cursor->pos == cursor->nr) {
			
 
				 		root->hit += period;
			
 
				+		root->count++;
			
 
				 		return 0;
			
 
				 	}
			
 
				 
			
@@ -799,12 +834,72 @@ char *callchain_list__sym_name(struct callchain_list *cl,
 
				 	return bf;
			
 
				 }
			
 
				 
			
 
				+char *callchain_node__scnprintf_value(struct callchain_node *node,
			
 
				+				      char *bf, size_t bfsize, u64 total)
			
 
				+{
			
 
				+	double percent = 0.0;
			
 
				+	u64 period = callchain_cumul_hits(node);
			
 
				+	unsigned count = callchain_cumul_counts(node);
			
 
				+
			
 
				+	if (callchain_param.mode == CHAIN_FOLDED) {
			
 
				+		period = node->hit;
			
 
				+		count = node->count;
			
 
				+	}
			
 
				+
			
 
				+	switch (callchain_param.value) {
			
 
				+	case CCVAL_PERIOD:
			
 
				+		scnprintf(bf, bfsize, "%"PRIu64, period);
			
 
				+		break;
			
 
				+	case CCVAL_COUNT:
			
 
				+		scnprintf(bf, bfsize, "%u", count);
			
 
				+		break;
			
 
				+	case CCVAL_PERCENT:
			
 
				+	default:
			
 
				+		if (total)
			
 
				+			percent = period * 100.0 / total;
			
 
				+		scnprintf(bf, bfsize, "%.2f%%", percent);
			
 
				+		break;
			
 
				+	}
			
 
				+	return bf;
			
 
				+}
			
 
				+
			
 
				+int callchain_node__fprintf_value(struct callchain_node *node,
			
 
				+				 FILE *fp, u64 total)
			
 
				+{
			
 
				+	double percent = 0.0;
			
 
				+	u64 period = callchain_cumul_hits(node);
			
 
				+	unsigned count = callchain_cumul_counts(node);
			
 
				+
			
 
				+	if (callchain_param.mode == CHAIN_FOLDED) {
			
 
				+		period = node->hit;
			
 
				+		count = node->count;
			
 
				+	}
			
 
				+
			
 
				+	switch (callchain_param.value) {
			
 
				+	case CCVAL_PERIOD:
			
 
				+		return fprintf(fp, "%"PRIu64, period);
			
 
				+	case CCVAL_COUNT:
			
 
				+		return fprintf(fp, "%u", count);
			
 
				+	case CCVAL_PERCENT:
			
 
				+	default:
			
 
				+		if (total)
			
 
				+			percent = period * 100.0 / total;
			
 
				+		return percent_color_fprintf(fp, "%.2f%%", percent);
			
 
				+	}
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				 static void free_callchain_node(struct callchain_node *node)
			
 
				 {
			
 
				 	struct callchain_list *list, *tmp;
			
 
				 	struct callchain_node *child;
			
 
				 	struct rb_node *n;
			
 
				 
			
 
				+	list_for_each_entry_safe(list, tmp, &node->parent_val, list) {
			
 
				+		list_del(&list->list);
			
 
				+		free(list);
			
 
				+	}
			
 
				+
			
 
				 	list_for_each_entry_safe(list, tmp, &node->val, list) {
			
 
				 		list_del(&list->list);
			
 
				 		free(list);
			
@@ -828,3 +923,41 @@ void free_callchain(struct callchain_root *root)
 
				 
			
 
				 	free_callchain_node(&root->node);
			
 
				 }
			
 
				+
			
 
				+int callchain_node__make_parent_list(struct callchain_node *node)
			
 
				+{
			
 
				+	struct callchain_node *parent = node->parent;
			
 
				+	struct callchain_list *chain, *new;
			
 
				+	LIST_HEAD(head);
			
 
				+
			
 
				+	while (parent) {
			
 
				+		list_for_each_entry_reverse(chain, &parent->val, list) {
			
 
				+			new = malloc(sizeof(*new));
			
 
				+			if (new == NULL)
			
 
				+				goto out;
			
 
				+			*new = *chain;
			
 
				+			new->has_children = false;
			
 
				+			list_add_tail(&new->list, &head);
			
 
				+		}
			
 
				+		parent = parent->parent;
			
 
				+	}
			
 
				+
			
 
				+	list_for_each_entry_safe_reverse(chain, new, &head, list)
			
 
				+		list_move_tail(&chain->list, &node->parent_val);
			
 
				+
			
 
				+	if (!list_empty(&node->parent_val)) {
			
 
				+		chain = list_first_entry(&node->parent_val, struct callchain_list, list);
			
 
				+		chain->has_children = rb_prev(&node->rb_node) || rb_next(&node->rb_node);
			
 
				+
			
 
				+		chain = list_first_entry(&node->val, struct callchain_list, list);
			
 
				+		chain->has_children = false;
			
 
				+	}
			
 
				+	return 0;
			
 
				+
			
 
				+out:
			
 
				+	list_for_each_entry_safe(chain, new, &head, list) {
			
 
				+		list_del(&chain->list);
			
 
				+		free(chain);
			
 
				+	}
			
 
				+	return -ENOMEM;
			
 
				+}
			
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -24,12 +24,13 @@
 
				 #define CALLCHAIN_RECORD_HELP  CALLCHAIN_HELP RECORD_MODE_HELP RECORD_SIZE_HELP
			
 
				 
			
 
				 #define CALLCHAIN_REPORT_HELP						\
			
 
				-	HELP_PAD "print_type:\tcall graph printing style (graph|flat|fractal|none)\n" \
			
 
				+	HELP_PAD "print_type:\tcall graph printing style (graph|flat|fractal|folded|none)\n" \
			
 
				 	HELP_PAD "threshold:\tminimum call graph inclusion threshold (<percent>)\n" \
			
 
				 	HELP_PAD "print_limit:\tmaximum number of call graph entry (<number>)\n" \
			
 
				 	HELP_PAD "order:\t\tcall graph order (caller|callee)\n" \
			
 
				 	HELP_PAD "sort_key:\tcall graph sort key (function|address)\n"	\
			
 
				-	HELP_PAD "branch:\t\tinclude last branch info to call graph (branch)\n"
			
 
				+	HELP_PAD "branch:\t\tinclude last branch info to call graph (branch)\n" \
			
 
				+	HELP_PAD "value:\t\tcall graph value (percent|period|count)\n"
			
 
				 
			
 
				 enum perf_call_graph_mode {
			
 
				 	CALLCHAIN_NONE,
			
@@ -43,7 +44,8 @@ enum chain_mode {
 
				 	CHAIN_NONE,
			
 
				 	CHAIN_FLAT,
			
 
				 	CHAIN_GRAPH_ABS,
			
 
				-	CHAIN_GRAPH_REL
			
 
				+	CHAIN_GRAPH_REL,
			
 
				+	CHAIN_FOLDED,
			
 
				 };
			
 
				 
			
 
				 enum chain_order {
			
@@ -54,11 +56,14 @@ enum chain_order {
 
				 struct callchain_node {
			
 
				 	struct callchain_node	*parent;
			
 
				 	struct list_head	val;
			
 
				+	struct list_head	parent_val;
			
 
				 	struct rb_node		rb_node_in; /* to insert nodes in an rbtree */
			
 
				 	struct rb_node		rb_node;    /* to sort nodes in an output tree */
			
 
				 	struct rb_root		rb_root_in; /* input tree of children */
			
 
				 	struct rb_root		rb_root;    /* sorted output tree of children */
			
 
				 	unsigned int		val_nr;
			
 
				+	unsigned int		count;
			
 
				+	unsigned int		children_count;
			
 
				 	u64			hit;
			
 
				 	u64			children_hit;
			
 
				 };
			
@@ -78,6 +83,12 @@ enum chain_key {
 
				 	CCKEY_ADDRESS
			
 
				 };
			
 
				 
			
 
				+enum chain_value {
			
 
				+	CCVAL_PERCENT,
			
 
				+	CCVAL_PERIOD,
			
 
				+	CCVAL_COUNT,
			
 
				+};
			
 
				+
			
 
				 struct callchain_param {
			
 
				 	bool			enabled;
			
 
				 	enum perf_call_graph_mode record_mode;
			
@@ -90,6 +101,7 @@ struct callchain_param {
 
				 	bool			order_set;
			
 
				 	enum chain_key		key;
			
 
				 	bool			branch_callstack;
			
 
				+	enum chain_value	value;
			
 
				 };
			
 
				 
			
 
				 extern struct callchain_param callchain_param;
			
@@ -144,6 +156,11 @@ static inline u64 callchain_cumul_hits(struct callchain_node *node)
 
				 	return node->hit + node->children_hit;
			
 
				 }
			
 
				 
			
 
				+static inline unsigned callchain_cumul_counts(struct callchain_node *node)
			
 
				+{
			
 
				+	return node->count + node->children_count;
			
 
				+}
			
 
				+
			
 
				 int callchain_register_param(struct callchain_param *param);
			
 
				 int callchain_append(struct callchain_root *root,
			
 
				 		     struct callchain_cursor *cursor,
			
@@ -229,7 +246,12 @@ static inline int arch_skip_callchain_idx(struct thread *thread __maybe_unused,
 
				 
			
 
				 char *callchain_list__sym_name(struct callchain_list *cl,
			
 
				 			       char *bf, size_t bfsize, bool show_dso);
			
 
				+char *callchain_node__scnprintf_value(struct callchain_node *node,
			
 
				+				      char *bf, size_t bfsize, u64 total);
			
 
				+int callchain_node__fprintf_value(struct callchain_node *node,
			
 
				+				  FILE *fp, u64 total);
			
 
				 
			
 
				 void free_callchain(struct callchain_root *root);
			
 
				+int callchain_node__make_parent_list(struct callchain_node *node);
			
 
				 
			
 
				 #endif	/* __PERF_CALLCHAIN_H */
			
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -1243,6 +1243,8 @@ struct dso *__dsos__addnew(struct dsos *dsos, const char *name)
 
				 	if (dso != NULL) {
			
 
				 		__dsos__add(dsos, dso);
			
 
				 		dso__set_basename(dso);
			
 
				+		/* Put dso here because __dsos_add already got it */
			
 
				+		dso__put(dso);
			
 
				 	}
			
 
				 	return dso;
			
 
				 }
			
--- a/tools/perf/util/exec_cmd.c
+++ b/tools/perf/util/exec_cmd.c
@@ -9,17 +9,17 @@
 
				 static const char *argv_exec_path;
			
 
				 static const char *argv0_path;
			
 
				 
			
 
				-const char *system_path(const char *path)
			
 
				+char *system_path(const char *path)
			
 
				 {
			
 
				 	static const char *prefix = PREFIX;
			
 
				 	struct strbuf d = STRBUF_INIT;
			
 
				 
			
 
				 	if (is_absolute_path(path))
			
 
				-		return path;
			
 
				+		return strdup(path);
			
 
				 
			
 
				 	strbuf_addf(&d, "%s/%s", prefix, path);
			
 
				 	path = strbuf_detach(&d, NULL);
			
 
				-	return path;
			
 
				+	return (char *)path;
			
 
				 }
			
 
				 
			
 
				 const char *perf_extract_argv0_path(const char *argv0)
			
@@ -52,17 +52,16 @@ void perf_set_argv_exec_path(const char *exec_path)
 
				 
			
 
				 
			
 
				 /* Returns the highest-priority, location to look for perf programs. */
			
 
				-const char *perf_exec_path(void)
			
 
				+char *perf_exec_path(void)
			
 
				 {
			
 
				-	const char *env;
			
 
				+	char *env;
			
 
				 
			
 
				 	if (argv_exec_path)
			
 
				-		return argv_exec_path;
			
 
				+		return strdup(argv_exec_path);
			
 
				 
			
 
				 	env = getenv(EXEC_PATH_ENVIRONMENT);
			
 
				-	if (env && *env) {
			
 
				-		return env;
			
 
				-	}
			
 
				+	if (env && *env)
			
 
				+		return strdup(env);
			
 
				 
			
 
				 	return system_path(PERF_EXEC_PATH);
			
 
				 }
			
@@ -83,9 +82,11 @@ void setup_path(void)
 
				 {
			
 
				 	const char *old_path = getenv("PATH");
			
 
				 	struct strbuf new_path = STRBUF_INIT;
			
 
				+	char *tmp = perf_exec_path();
			
 
				 
			
 
				-	add_path(&new_path, perf_exec_path());
			
 
				+	add_path(&new_path, tmp);
			
 
				 	add_path(&new_path, argv0_path);
			
 
				+	free(tmp);
			
 
				 
			
 
				 	if (old_path)
			
 
				 		strbuf_addstr(&new_path, old_path);
			
--- a/tools/perf/util/exec_cmd.h
+++ b/tools/perf/util/exec_cmd.h
@@ -3,10 +3,11 @@
 
				 
			
 
				 extern void perf_set_argv_exec_path(const char *exec_path);
			
 
				 extern const char *perf_extract_argv0_path(const char *path);
			
 
				-extern const char *perf_exec_path(void);
			
 
				 extern void setup_path(void);
			
 
				 extern int execv_perf_cmd(const char **argv); /* NULL terminated */
			
 
				 extern int execl_perf_cmd(const char *cmd, ...);
			
 
				-extern const char *system_path(const char *path);
			
 
				+/* perf_exec_path and system_path return malloc'd string, caller must free it */
			
 
				+extern char *perf_exec_path(void);
			
 
				+extern char *system_path(const char *path);
			
 
				 
			
 
				 #endif /* __PERF_EXEC_CMD_H */
			
--- a/tools/perf/util/help.c
+++ b/tools/perf/util/help.c
@@ -159,7 +159,7 @@ void load_command_list(const char *prefix,
 
				 		struct cmdnames *other_cmds)
			
 
				 {
			
 
				 	const char *env_path = getenv("PATH");
			
 
				-	const char *exec_path = perf_exec_path();
			
 
				+	char *exec_path = perf_exec_path();
			
 
				 
			
 
				 	if (exec_path) {
			
 
				 		list_commands_in_dir(main_cmds, exec_path, prefix);
			
@@ -187,6 +187,7 @@ void load_command_list(const char *prefix,
 
				 		      sizeof(*other_cmds->names), cmdname_compare);
			
 
				 		uniq(other_cmds);
			
 
				 	}
			
 
				+	free(exec_path);
			
 
				 	exclude_cmds(other_cmds, main_cmds);
			
 
				 }
			
 
				 
			
@@ -203,13 +204,14 @@ void list_commands(const char *title, struct cmdnames *main_cmds,
 
				 			longest = other_cmds->names[i]->len;
			
 
				 
			
 
				 	if (main_cmds->cnt) {
			
 
				-		const char *exec_path = perf_exec_path();
			
 
				+		char *exec_path = perf_exec_path();
			
 
				 		printf("available %s in '%s'\n", title, exec_path);
			
 
				 		printf("----------------");
			
 
				 		mput_char('-', strlen(title) + strlen(exec_path));
			
 
				 		putchar('\n');
			
 
				 		pretty_print_string_list(main_cmds, longest);
			
 
				 		putchar('\n');
			
 
				+		free(exec_path);
			
 
				 	}
			
 
				 
			
 
				 	if (other_cmds->cnt) {
			
--- a/tools/perf/util/include/linux/string.h
+++ b/tools/perf/util/include/linux/string.h
@@ -1,3 +0,0 @@
 
				-#include <string.h>
			
 
				-
			
 
				-void *memdup(const void *src, size_t len);
			
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -122,6 +122,7 @@ void machine__delete_threads(struct machine *machine)
 
				 
			
 
				 void machine__exit(struct machine *machine)
			
 
				 {
			
 
				+	machine__destroy_kernel_maps(machine);
			
 
				 	map_groups__exit(&machine->kmaps);
			
 
				 	dsos__exit(&machine->dsos);
			
 
				 	machine__exit_vdso(machine);
			
@@ -564,7 +565,7 @@ struct map *machine__findnew_module_map(struct machine *machine, u64 start,
 
				 					const char *filename)
			
 
				 {
			
 
				 	struct map *map = NULL;
			
 
				-	struct dso *dso;
			
 
				+	struct dso *dso = NULL;
			
 
				 	struct kmod_path m;
			
 
				 
			
 
				 	if (kmod_path__parse_name(&m, filename))
			
@@ -585,7 +586,11 @@ struct map *machine__findnew_module_map(struct machine *machine, u64 start,
 
				 
			
 
				 	map_groups__insert(&machine->kmaps, map);
			
 
				 
			
 
				+	/* Put the map here because map_groups__insert alread got it */
			
 
				+	map__put(map);
			
 
				 out:
			
 
				+	/* put the dso here, corresponding to  machine__findnew_module_dso */
			
 
				+	dso__put(dso);
			
 
				 	free(m.name);
			
 
				 	return map;
			
 
				 }
			
@@ -788,6 +793,7 @@ void machine__destroy_kernel_maps(struct machine *machine)
 
				 				kmap->ref_reloc_sym = NULL;
			
 
				 		}
			
 
				 
			
 
				+		map__put(machine->vmlinux_maps[type]);
			
 
				 		machine->vmlinux_maps[type] = NULL;
			
 
				 	}
			
 
				 }
			
@@ -1084,11 +1090,14 @@ int machine__create_kernel_maps(struct machine *machine)
 
				 	struct dso *kernel = machine__get_kernel(machine);
			
 
				 	const char *name;
			
 
				 	u64 addr = machine__get_running_kernel_start(machine, &name);
			
 
				-	if (!addr)
			
 
				+	int ret;
			
 
				+
			
 
				+	if (!addr || kernel == NULL)
			
 
				 		return -1;
			
 
				 
			
 
				-	if (kernel == NULL ||
			
 
				-	    __machine__create_kernel_maps(machine, kernel) < 0)
			
 
				+	ret = __machine__create_kernel_maps(machine, kernel);
			
 
				+	dso__put(kernel);
			
 
				+	if (ret < 0)
			
 
				 		return -1;
			
 
				 
			
 
				 	if (symbol_conf.use_modules && machine__create_modules(machine) < 0) {
			
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -2326,8 +2326,11 @@ static int get_new_event_name(char *buf, size_t len, const char *base,
 
				 		goto out;
			
 
				 
			
 
				 	if (!allow_suffix) {
			
 
				-		pr_warning("Error: event \"%s\" already exists. "
			
 
				-			   "(Use -f to force duplicates.)\n", buf);
			
 
				+		pr_warning("Error: event \"%s\" already exists.\n"
			
 
				+			   " Hint: Remove existing event by 'perf probe -d'\n"
			
 
				+			   "       or force duplicates by 'perf probe -f'\n"
			
 
				+			   "       or set 'force=yes' in BPF source.\n",
			
 
				+			   buf);
			
 
				 		ret = -EEXIST;
			
 
				 		goto out;
			
 
				 	}
			
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -683,21 +683,24 @@ static int call_probe_finder(Dwarf_Die *sc_die, struct probe_finder *pf)
 
				 	ret = dwarf_getlocation_addr(&fb_attr, pf->addr, &pf->fb_ops, &nops, 1);
			
 
				 	if (ret <= 0 || nops == 0) {
			
 
				 		pf->fb_ops = NULL;
			
 
				+		ret = 0;
			
 
				 #if _ELFUTILS_PREREQ(0, 142)
			
 
				 	} else if (nops == 1 && pf->fb_ops[0].atom == DW_OP_call_frame_cfa &&
			
 
				 		   pf->cfi != NULL) {
			
 
				-		Dwarf_Frame *frame;
			
 
				+		Dwarf_Frame *frame = NULL;
			
 
				 		if (dwarf_cfi_addrframe(pf->cfi, pf->addr, &frame) != 0 ||
			
 
				 		    dwarf_frame_cfa(frame, &pf->fb_ops, &nops) != 0) {
			
 
				 			pr_warning("Failed to get call frame on 0x%jx\n",
			
 
				 				   (uintmax_t)pf->addr);
			
 
				-			return -ENOENT;
			
 
				+			ret = -ENOENT;
			
 
				 		}
			
 
				+		free(frame);
			
 
				 #endif
			
 
				 	}
			
 
				 
			
 
				 	/* Call finder's callback handler */
			
 
				-	ret = pf->callback(sc_die, pf);
			
 
				+	if (ret >= 0)
			
 
				+		ret = pf->callback(sc_die, pf);
			
 
				 
			
 
				 	/* *pf->fb_ops will be cached in libdw. Don't free it. */
			
 
				 	pf->fb_ops = NULL;
			
--- a/tools/perf/util/string.c
+++ b/tools/perf/util/string.c
@@ -342,22 +342,6 @@ char *rtrim(char *s)
 
				 	return s;
			
 
				 }
			
 
				 
			
 
				-/**
			
 
				- * memdup - duplicate region of memory
			
 
				- * @src: memory region to duplicate
			
 
				- * @len: memory region length
			
 
				- */
			
 
				-void *memdup(const void *src, size_t len)
			
 
				-{
			
 
				-	void *p;
			
 
				-
			
 
				-	p = malloc(len);
			
 
				-	if (p)
			
 
				-		memcpy(p, src, len);
			
 
				-
			
 
				-	return p;
			
 
				-}
			
 
				-
			
 
				 char *asprintf_expr_inout_ints(const char *var, bool in, size_t nints, int *ints)
			
 
				 {
			
 
				 	/*
			
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -1042,6 +1042,8 @@ int dso__load_sym(struct dso *dso, struct map *map,
 
				 				}
			
 
				 				curr_dso->symtab_type = dso->symtab_type;
			
 
				 				map_groups__insert(kmaps, curr_map);
			
 
				+				/* kmaps already got it */
			
 
				+				map__put(curr_map);
			
 
				 				dsos__add(&map->groups->machine->dsos, curr_dso);
			
 
				 				dso__set_loaded(curr_dso, map->type);
			
 
				 			} else
			
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -21,7 +21,8 @@ struct callchain_param	callchain_param = {
 
				 	.mode	= CHAIN_GRAPH_ABS,
			
 
				 	.min_percent = 0.5,
			
 
				 	.order  = ORDER_CALLEE,
			
 
				-	.key	= CCKEY_FUNCTION
			
 
				+	.key	= CCKEY_FUNCTION,
			
 
				+	.value	= CCVAL_PERCENT,
			
 
				 };
			
 
				 
			
 
				 /*