瀏覽代碼

Merge tag 'perf-core-for-mingo-4.14-20170725' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvemends and fixes for v4.14:

New features:

- Filter out 'sshd' in the tracer ancestry in 'perf trace' syswide tracing,
  to elliminate tracing loops (Arnaldo Carvalho de Melo)

- Support lookup of symbols in other mount namespaces in 'perf top' (Krister Johansen)

- Initial 'clone' syscall args beautifier in 'perf trace' (Arnaldo Carvalho de Melo)

User visible changes:

- Ignore 'fd' and 'offset' args for MAP_ANONYMOUS in 'perf trace'
  (Arnaldo Carvalho de Melo)

- Process tracing data in 'perf annotate' pipe mode (David Carrillo-Cisneros)

- Make 'perf report --branch-history' work without callgraphs(-g) option
  in perf record (Jin Yao)

- Tag branch type/flag on "to" and tag cycles on "from" in 'perf report' (Jin Yao)

Fixes:

- Fix jvmti linker error when libelf config is disabled (Sudeep Holla)

- Fix cgroups refcount usage (Arnaldo Carvalho de Melo)

- Fix kernel symbol adjustment for s390x (Thomas Richter)

- Fix 'perf report --stdio --show-total-period', it was showing the
  number of samples, not the total period (Taeung Song)

Infrastructure changes:

- Add perf_sample dictionary to tracepoint handlers in 'perf script'
  python, which were already present for other types of events
  (hardware, etc) (Arun Kalyanasundaram)

- Make build fail on vendor events JSON parse error (Andi Kleen)

- Adopt strstarts() from the kernel (Arnaldo Carvalho de Melo)

Arch specific changes:

- Set no_aux_samples for the tracking event in Intel PT (Kan Liang)

- Always set no branch for Intel PT dummy event (Kan Liang)

Trivial changes:

- Simplify some error handlers in 'perf script' (Dan Carpenter)

- Add EXCLUDE_EXTLIBS and EXTRA_PERFLIBS to makefile (David Carrillo-Cisneros)

Signed-off-by: Ingo Molnar <mingo@kernel.org>
Ingo Molnar 8 年之前
父節點
當前提交
ee438ec8f3
共有 43 個文件被更改,包括 798 次插入300 次删除
  1. 10 2
      tools/include/linux/string.h
  2. 52 0
      tools/include/uapi/linux/sched.h
  3. 25 16
      tools/lib/string.c
  4. 1 1
      tools/lib/subcmd/help.c
  5. 9 9
      tools/lib/subcmd/parse-options.c
  6. 4 0
      tools/perf/Documentation/perf-top.txt
  7. 2 0
      tools/perf/MANIFEST
  8. 8 2
      tools/perf/Makefile.perf
  9. 1 0
      tools/perf/arch/s390/util/Build
  10. 22 0
      tools/perf/arch/s390/util/sym-handling.c
  11. 3 0
      tools/perf/arch/x86/util/intel-pt.c
  12. 2 2
      tools/perf/builtin-annotate.c
  13. 2 1
      tools/perf/builtin-config.c
  14. 1 1
      tools/perf/builtin-ftrace.c
  15. 3 3
      tools/perf/builtin-help.c
  16. 15 12
      tools/perf/builtin-report.c
  17. 1 6
      tools/perf/builtin-script.c
  18. 18 2
      tools/perf/builtin-top.c
  19. 98 42
      tools/perf/builtin-trace.c
  20. 1 0
      tools/perf/check-headers.sh
  21. 8 8
      tools/perf/perf.c
  22. 0 4
      tools/perf/pmu-events/README
  23. 14 7
      tools/perf/pmu-events/jevents.c
  24. 1 0
      tools/perf/trace/beauty/Build
  25. 3 0
      tools/perf/trace/beauty/beauty.h
  26. 75 0
      tools/perf/trace/beauty/clone.c
  27. 3 0
      tools/perf/trace/beauty/mmap.c
  28. 2 1
      tools/perf/ui/browser.c
  29. 5 4
      tools/perf/ui/browsers/annotate.c
  30. 2 2
      tools/perf/ui/gtk/annotate.c
  31. 2 1
      tools/perf/ui/stdio/hist.c
  32. 56 43
      tools/perf/util/annotate.c
  33. 13 5
      tools/perf/util/annotate.h
  34. 1 1
      tools/perf/util/bpf-loader.c
  35. 6 5
      tools/perf/util/branch.h
  36. 110 47
      tools/perf/util/callchain.c
  37. 5 3
      tools/perf/util/cgroup.c
  38. 7 6
      tools/perf/util/config.c
  39. 8 0
      tools/perf/util/evsel.c
  40. 2 0
      tools/perf/util/hist.c
  41. 1 1
      tools/perf/util/llvm-utils.c
  42. 12 1
      tools/perf/util/machine.c
  43. 184 62
      tools/perf/util/scripting-engines/trace-event-python.c

+ 10 - 2
tools/include/linux/string.h

@@ -1,8 +1,8 @@
 #ifndef _TOOLS_LINUX_STRING_H_
 #define _TOOLS_LINUX_STRING_H_
 
-
 #include <linux/types.h>	/* for size_t */
+#include <string.h>
 
 void *memdup(const void *src, size_t len);
 
@@ -18,6 +18,14 @@ extern size_t strlcpy(char *dest, const char *src, size_t size);
 
 char *str_error_r(int errnum, char *buf, size_t buflen);
 
-int prefixcmp(const char *str, const char *prefix);
+/**
+ * strstarts - does @str start with @prefix?
+ * @str: string to examine
+ * @prefix: prefix to look for.
+ */
+static inline bool strstarts(const char *str, const char *prefix)
+{
+	return strncmp(str, prefix, strlen(prefix)) == 0;
+}
 
 #endif /* _LINUX_STRING_H_ */

+ 52 - 0
tools/include/uapi/linux/sched.h

@@ -0,0 +1,52 @@
+#ifndef _UAPI_LINUX_SCHED_H
+#define _UAPI_LINUX_SCHED_H
+
+/*
+ * cloning flags:
+ */
+#define CSIGNAL		0x000000ff	/* signal mask to be sent at exit */
+#define CLONE_VM	0x00000100	/* set if VM shared between processes */
+#define CLONE_FS	0x00000200	/* set if fs info shared between processes */
+#define CLONE_FILES	0x00000400	/* set if open files shared between processes */
+#define CLONE_SIGHAND	0x00000800	/* set if signal handlers and blocked signals shared */
+#define CLONE_PTRACE	0x00002000	/* set if we want to let tracing continue on the child too */
+#define CLONE_VFORK	0x00004000	/* set if the parent wants the child to wake it up on mm_release */
+#define CLONE_PARENT	0x00008000	/* set if we want to have the same parent as the cloner */
+#define CLONE_THREAD	0x00010000	/* Same thread group? */
+#define CLONE_NEWNS	0x00020000	/* New mount namespace group */
+#define CLONE_SYSVSEM	0x00040000	/* share system V SEM_UNDO semantics */
+#define CLONE_SETTLS	0x00080000	/* create a new TLS for the child */
+#define CLONE_PARENT_SETTID	0x00100000	/* set the TID in the parent */
+#define CLONE_CHILD_CLEARTID	0x00200000	/* clear the TID in the child */
+#define CLONE_DETACHED		0x00400000	/* Unused, ignored */
+#define CLONE_UNTRACED		0x00800000	/* set if the tracing process can't force CLONE_PTRACE on this clone */
+#define CLONE_CHILD_SETTID	0x01000000	/* set the TID in the child */
+#define CLONE_NEWCGROUP		0x02000000	/* New cgroup namespace */
+#define CLONE_NEWUTS		0x04000000	/* New utsname namespace */
+#define CLONE_NEWIPC		0x08000000	/* New ipc namespace */
+#define CLONE_NEWUSER		0x10000000	/* New user namespace */
+#define CLONE_NEWPID		0x20000000	/* New pid namespace */
+#define CLONE_NEWNET		0x40000000	/* New network namespace */
+#define CLONE_IO		0x80000000	/* Clone io context */
+
+/*
+ * Scheduling policies
+ */
+#define SCHED_NORMAL		0
+#define SCHED_FIFO		1
+#define SCHED_RR		2
+#define SCHED_BATCH		3
+/* SCHED_ISO: reserved but not implemented yet */
+#define SCHED_IDLE		5
+#define SCHED_DEADLINE		6
+
+/* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */
+#define SCHED_RESET_ON_FORK     0x40000000
+
+/*
+ * For the sched_{set,get}attr() calls
+ */
+#define SCHED_FLAG_RESET_ON_FORK	0x01
+#define SCHED_FLAG_RECLAIM		0x02
+
+#endif /* _UAPI_LINUX_SCHED_H */

+ 25 - 16
tools/lib/string.c

@@ -39,27 +39,45 @@ void *memdup(const void *src, size_t len)
  * @s: input string
  * @res: result
  *
- * This routine returns 0 iff the first character is one of 'Yy1Nn0'.
- * Otherwise it will return -EINVAL.  Value pointed to by res is
- * updated upon finding a match.
+ * This routine returns 0 iff the first character is one of 'Yy1Nn0', or
+ * [oO][NnFf] for "on" and "off". Otherwise it will return -EINVAL.  Value
+ * pointed to by res is updated upon finding a match.
  */
 int strtobool(const char *s, bool *res)
 {
+	if (!s)
+		return -EINVAL;
+
 	switch (s[0]) {
 	case 'y':
 	case 'Y':
 	case '1':
 		*res = true;
-		break;
+		return 0;
 	case 'n':
 	case 'N':
 	case '0':
 		*res = false;
-		break;
+		return 0;
+	case 'o':
+	case 'O':
+		switch (s[1]) {
+		case 'n':
+		case 'N':
+			*res = true;
+			return 0;
+		case 'f':
+		case 'F':
+			*res = false;
+			return 0;
+		default:
+			break;
+		}
 	default:
-		return -EINVAL;
+		break;
 	}
-	return 0;
+
+	return -EINVAL;
 }
 
 /**
@@ -87,12 +105,3 @@ size_t __weak strlcpy(char *dest, const char *src, size_t size)
 	}
 	return ret;
 }
-
-int prefixcmp(const char *str, const char *prefix)
-{
-	for (; ; str++, prefix++)
-		if (!*prefix)
-			return 0;
-		else if (*str != *prefix)
-			return (unsigned char)*prefix - (unsigned char)*str;
-}

+ 1 - 1
tools/lib/subcmd/help.c

@@ -171,7 +171,7 @@ static void list_commands_in_dir(struct cmdnames *cmds,
 	while ((de = readdir(dir)) != NULL) {
 		int entlen;
 
-		if (prefixcmp(de->d_name, prefix))
+		if (!strstarts(de->d_name, prefix))
 			continue;
 
 		astrcat(&buf, de->d_name);

+ 9 - 9
tools/lib/subcmd/parse-options.c

@@ -368,7 +368,7 @@ retry:
 			return 0;
 		}
 		if (!rest) {
-			if (!prefixcmp(options->long_name, "no-")) {
+			if (strstarts(options->long_name, "no-")) {
 				/*
 				 * The long name itself starts with "no-", so
 				 * accept the option without "no-" so that users
@@ -381,7 +381,7 @@ retry:
 					goto match;
 				}
 				/* Abbreviated case */
-				if (!prefixcmp(options->long_name + 3, arg)) {
+				if (strstarts(options->long_name + 3, arg)) {
 					flags |= OPT_UNSET;
 					goto is_abbreviated;
 				}
@@ -406,7 +406,7 @@ is_abbreviated:
 				continue;
 			}
 			/* negated and abbreviated very much? */
-			if (!prefixcmp("no-", arg)) {
+			if (strstarts("no-", arg)) {
 				flags |= OPT_UNSET;
 				goto is_abbreviated;
 			}
@@ -416,7 +416,7 @@ is_abbreviated:
 			flags |= OPT_UNSET;
 			rest = skip_prefix(arg + 3, options->long_name);
 			/* abbreviated and negated? */
-			if (!rest && !prefixcmp(options->long_name, arg + 3))
+			if (!rest && strstarts(options->long_name, arg + 3))
 				goto is_abbreviated;
 			if (!rest)
 				continue;
@@ -456,7 +456,7 @@ static void check_typos(const char *arg, const struct option *options)
 	if (strlen(arg) < 3)
 		return;
 
-	if (!prefixcmp(arg, "no-")) {
+	if (strstarts(arg, "no-")) {
 		fprintf(stderr, " Error: did you mean `--%s` (with two dashes ?)", arg);
 		exit(129);
 	}
@@ -464,7 +464,7 @@ static void check_typos(const char *arg, const struct option *options)
 	for (; options->type != OPTION_END; options++) {
 		if (!options->long_name)
 			continue;
-		if (!prefixcmp(options->long_name, arg)) {
+		if (strstarts(options->long_name, arg)) {
 			fprintf(stderr, " Error: did you mean `--%s` (with two dashes ?)", arg);
 			exit(129);
 		}
@@ -933,10 +933,10 @@ opt:
 		if (opts->long_name == NULL)
 			continue;
 
-		if (!prefixcmp(opts->long_name, optstr))
+		if (strstarts(opts->long_name, optstr))
 			print_option_help(opts, 0);
-		if (!prefixcmp("no-", optstr) &&
-		    !prefixcmp(opts->long_name, optstr + 3))
+		if (strstarts("no-", optstr) &&
+		    strstarts(opts->long_name, optstr + 3))
 			print_option_help(opts, 0);
 	}
 

+ 4 - 0
tools/perf/Documentation/perf-top.txt

@@ -237,6 +237,10 @@ Default is to monitor all CPUS.
 --hierarchy::
 	Enable hierarchy output.
 
+--force::
+	Don't do ownership validation.
+
+
 INTERACTIVE PROMPTING KEYS
 --------------------------
 

+ 2 - 0
tools/perf/MANIFEST

@@ -70,6 +70,7 @@ tools/include/linux/hash.h
 tools/include/linux/kernel.h
 tools/include/linux/list.h
 tools/include/linux/log2.h
+tools/include/uapi/asm-generic/fcntl.h
 tools/include/uapi/asm-generic/mman-common.h
 tools/include/uapi/asm-generic/mman.h
 tools/include/uapi/linux/bpf.h
@@ -78,6 +79,7 @@ tools/include/uapi/linux/fcntl.h
 tools/include/uapi/linux/hw_breakpoint.h
 tools/include/uapi/linux/mman.h
 tools/include/uapi/linux/perf_event.h
+tools/include/uapi/linux/sched.h
 tools/include/uapi/linux/stat.h
 tools/include/linux/poison.h
 tools/include/linux/rbtree.h

+ 8 - 2
tools/perf/Makefile.perf

@@ -33,6 +33,11 @@ include ../scripts/utilities.mak
 #
 # Define EXTRA_CFLAGS=-m64 or EXTRA_CFLAGS=-m32 as appropriate for cross-builds.
 #
+# Define EXCLUDE_EXTLIBS=-lmylib to exclude libmylib from the auto-generated
+# EXTLIBS.
+#
+# Define EXTRA_PERFLIBS to pass extra libraries to PERFLIBS.
+#
 # Define NO_DWARF if you do not want debug-info analysis feature at all.
 #
 # Define WERROR=0 to disable treating any warnings as errors.
@@ -352,7 +357,8 @@ ifdef ASCIIDOC8
   export ASCIIDOC8
 endif
 
-LIBS = -Wl,--whole-archive $(PERFLIBS) -Wl,--no-whole-archive -Wl,--start-group $(EXTLIBS) -Wl,--end-group
+EXTLIBS := $(call filter-out,$(EXCLUDE_EXTLIBS),$(EXTLIBS))
+LIBS = -Wl,--whole-archive $(PERFLIBS) $(EXTRA_PERFLIBS) -Wl,--no-whole-archive -Wl,--start-group $(EXTLIBS) -Wl,--end-group
 
 ifeq ($(USE_CLANG), 1)
   CLANGLIBS_LIST = AST Basic CodeGen Driver Frontend Lex Tooling Edit Sema Analysis Parse Serialization
@@ -512,7 +518,7 @@ $(LIBJVMTI_IN): FORCE
 	$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=jvmti obj=jvmti
 
 $(OUTPUT)$(LIBJVMTI): $(LIBJVMTI_IN)
-	$(QUIET_LINK)$(CC) -shared -Wl,-soname -Wl,$(LIBJVMTI) -o $@ $< -lelf -lrt
+	$(QUIET_LINK)$(CC) -shared -Wl,-soname -Wl,$(LIBJVMTI) -o $@ $<
 endif
 
 $(patsubst perf-%,%.o,$(PROGRAMS)): $(wildcard */*.h)

+ 1 - 0
tools/perf/arch/s390/util/Build

@@ -1,4 +1,5 @@
 libperf-y += header.o
+libperf-y += sym-handling.o
 libperf-y += kvm-stat.o
 
 libperf-$(CONFIG_DWARF) += dwarf-regs.o

+ 22 - 0
tools/perf/arch/s390/util/sym-handling.c

@@ -0,0 +1,22 @@
+/*
+ * Architecture specific ELF symbol handling and relocation mapping.
+ *
+ * Copyright 2017 IBM Corp.
+ * Author(s): Thomas Richter <tmricht@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ */
+
+#include "symbol.h"
+
+#ifdef HAVE_LIBELF_SUPPORT
+bool elf__needs_adjust_symbols(GElf_Ehdr ehdr)
+{
+	if (ehdr.e_type == ET_EXEC)
+		return false;
+	return ehdr.e_type == ET_REL || ehdr.e_type == ET_DYN;
+}
+
+#endif

+ 3 - 0
tools/perf/arch/x86/util/intel-pt.c

@@ -701,6 +701,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
 				perf_evsel__set_sample_bit(switch_evsel, TID);
 				perf_evsel__set_sample_bit(switch_evsel, TIME);
 				perf_evsel__set_sample_bit(switch_evsel, CPU);
+				perf_evsel__reset_sample_bit(switch_evsel, BRANCH_STACK);
 
 				opts->record_switch_events = false;
 				ptr->have_sched_switch = 3;
@@ -752,6 +753,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
 		tracking_evsel->attr.freq = 0;
 		tracking_evsel->attr.sample_period = 1;
 
+		tracking_evsel->no_aux_samples = true;
 		if (need_immediate)
 			tracking_evsel->immediate = true;
 
@@ -761,6 +763,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
 			/* And the CPU for switch events */
 			perf_evsel__set_sample_bit(tracking_evsel, CPU);
 		}
+		perf_evsel__reset_sample_bit(tracking_evsel, BRANCH_STACK);
 	}
 
 	/*

+ 2 - 2
tools/perf/builtin-annotate.c

@@ -177,14 +177,13 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel,
 	 */
 	process_branch_stack(sample->branch_stack, al, sample);
 
-	sample->period = 1;
 	sample->weight = 1;
 
 	he = hists__add_entry(hists, al, NULL, NULL, NULL, sample, true);
 	if (he == NULL)
 		return -ENOMEM;
 
-	ret = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
+	ret = hist_entry__inc_addr_samples(he, sample, evsel->idx, al->addr);
 	hists__inc_nr_samples(hists, true);
 	return ret;
 }
@@ -397,6 +396,7 @@ int cmd_annotate(int argc, const char **argv)
 			.namespaces = perf_event__process_namespaces,
 			.attr	= perf_event__process_attr,
 			.build_id = perf_event__process_build_id,
+			.tracing_data   = perf_event__process_tracing_data,
 			.feature	= perf_event__process_feature,
 			.ordered_events = true,
 			.ordering_requires_timestamps = true,

+ 2 - 1
tools/perf/builtin-config.c

@@ -13,6 +13,7 @@
 #include "util/util.h"
 #include "util/debug.h"
 #include "util/config.h"
+#include <linux/string.h>
 
 static bool use_system_config, use_user_config;
 
@@ -79,7 +80,7 @@ static int show_spec_config(struct perf_config_set *set, const char *var)
 		return -1;
 
 	perf_config_items__for_each_entry(&set->sections, section) {
-		if (prefixcmp(var, section->name) != 0)
+		if (!strstarts(var, section->name))
 			continue;
 
 		perf_config_items__for_each_entry(&section->items, item) {

+ 1 - 1
tools/perf/builtin-ftrace.c

@@ -381,7 +381,7 @@ static int perf_ftrace_config(const char *var, const char *value, void *cb)
 {
 	struct perf_ftrace *ftrace = cb;
 
-	if (prefixcmp(var, "ftrace."))
+	if (!strstarts(var, "ftrace."))
 		return 0;
 
 	if (strcmp(var, "ftrace.tracer"))

+ 3 - 3
tools/perf/builtin-help.c

@@ -90,7 +90,7 @@ static int check_emacsclient_version(void)
 	 */
 	finish_command(&ec_process);
 
-	if (prefixcmp(buffer.buf, "emacsclient")) {
+	if (!strstarts(buffer.buf, "emacsclient")) {
 		fprintf(stderr, "Failed to parse emacsclient version.\n");
 		goto out;
 	}
@@ -283,7 +283,7 @@ static int perf_help_config(const char *var, const char *value, void *cb)
 		add_man_viewer(value);
 		return 0;
 	}
-	if (!prefixcmp(var, "man."))
+	if (!strstarts(var, "man."))
 		return add_man_viewer_info(var, value);
 
 	return 0;
@@ -313,7 +313,7 @@ static const char *cmd_to_page(const char *perf_cmd)
 
 	if (!perf_cmd)
 		return "perf";
-	else if (!prefixcmp(perf_cmd, "perf"))
+	else if (!strstarts(perf_cmd, "perf"))
 		return perf_cmd;
 
 	return asprintf(&s, "perf-%s", perf_cmd) < 0 ? NULL : s;

+ 15 - 12
tools/perf/builtin-report.c

@@ -115,37 +115,38 @@ static int hist_iter__report_callback(struct hist_entry_iter *iter,
 	struct report *rep = arg;
 	struct hist_entry *he = iter->he;
 	struct perf_evsel *evsel = iter->evsel;
+	struct perf_sample *sample = iter->sample;
 	struct mem_info *mi;
 	struct branch_info *bi;
 
 	if (!ui__has_annotation())
 		return 0;
 
-	hist__account_cycles(iter->sample->branch_stack, al, iter->sample,
+	hist__account_cycles(sample->branch_stack, al, sample,
 			     rep->nonany_branch_mode);
 
 	if (sort__mode == SORT_MODE__BRANCH) {
 		bi = he->branch_info;
-		err = addr_map_symbol__inc_samples(&bi->from, evsel->idx);
+		err = addr_map_symbol__inc_samples(&bi->from, sample, evsel->idx);
 		if (err)
 			goto out;
 
-		err = addr_map_symbol__inc_samples(&bi->to, evsel->idx);
+		err = addr_map_symbol__inc_samples(&bi->to, sample, evsel->idx);
 
 	} else if (rep->mem_mode) {
 		mi = he->mem_info;
-		err = addr_map_symbol__inc_samples(&mi->daddr, evsel->idx);
+		err = addr_map_symbol__inc_samples(&mi->daddr, sample, evsel->idx);
 		if (err)
 			goto out;
 
-		err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
+		err = hist_entry__inc_addr_samples(he, sample, evsel->idx, al->addr);
 
 	} else if (symbol_conf.cumulate_callchain) {
 		if (single)
-			err = hist_entry__inc_addr_samples(he, evsel->idx,
+			err = hist_entry__inc_addr_samples(he, sample, evsel->idx,
 							   al->addr);
 	} else {
-		err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
+		err = hist_entry__inc_addr_samples(he, sample, evsel->idx, al->addr);
 	}
 
 out:
@@ -278,10 +279,11 @@ static int report__setup_sample_type(struct report *rep)
 				    "'perf record' without -g?\n");
 			return -EINVAL;
 		}
-		if (symbol_conf.use_callchain) {
-			ui__error("Selected -g or --branch-history but no "
-				  "callchain data. Did\n"
-				  "you call 'perf record' without -g?\n");
+		if (symbol_conf.use_callchain &&
+			!symbol_conf.show_branchflag_count) {
+			ui__error("Selected -g or --branch-history.\n"
+				  "But no callchain or branch data.\n"
+				  "Did you call 'perf record' without -g or -b?\n");
 			return -1;
 		}
 	} else if (!callchain_param.enabled &&
@@ -416,7 +418,8 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist,
 
 		hists__fprintf_nr_sample_events(hists, rep, evname, stdout);
 		hists__fprintf(hists, !quiet, 0, 0, rep->min_percent, stdout,
-			       symbol_conf.use_callchain);
+			       symbol_conf.use_callchain ||
+			       symbol_conf.show_branchflag_count);
 		fprintf(stdout, "\n\n");
 	}
 

+ 1 - 6
tools/perf/builtin-script.c

@@ -2199,16 +2199,11 @@ static struct script_desc *script_desc__findnew(const char *name)
 
 	s = script_desc__new(name);
 	if (!s)
-		goto out_delete_desc;
+		return NULL;
 
 	script_desc__add(s);
 
 	return s;
-
-out_delete_desc:
-	script_desc__delete(s);
-
-	return NULL;
 }
 
 static const char *ends_with(const char *str, const char *suffix)

+ 18 - 2
tools/perf/builtin-top.c

@@ -183,6 +183,7 @@ static void ui__warn_map_erange(struct map *map, struct symbol *sym, u64 ip)
 
 static void perf_top__record_precise_ip(struct perf_top *top,
 					struct hist_entry *he,
+					struct perf_sample *sample,
 					int counter, u64 ip)
 {
 	struct annotation *notes;
@@ -199,7 +200,7 @@ static void perf_top__record_precise_ip(struct perf_top *top,
 	if (pthread_mutex_trylock(&notes->lock))
 		return;
 
-	err = hist_entry__inc_addr_samples(he, counter, ip);
+	err = hist_entry__inc_addr_samples(he, sample, counter, ip);
 
 	pthread_mutex_unlock(&notes->lock);
 
@@ -586,6 +587,13 @@ static void *display_thread_tui(void *arg)
 		.refresh	= top->delay_secs,
 	};
 
+	/* In order to read symbols from other namespaces perf to  needs to call
+	 * setns(2).  This isn't permitted if the struct_fs has multiple users.
+	 * unshare(2) the fs so that we may continue to setns into namespaces
+	 * that we're observing.
+	 */
+	unshare(CLONE_FS);
+
 	perf_top__sort_new_samples(top);
 
 	/*
@@ -627,6 +635,13 @@ static void *display_thread(void *arg)
 	struct perf_top *top = arg;
 	int delay_msecs, c;
 
+	/* In order to read symbols from other namespaces perf to  needs to call
+	 * setns(2).  This isn't permitted if the struct_fs has multiple users.
+	 * unshare(2) the fs so that we may continue to setns into namespaces
+	 * that we're observing.
+	 */
+	unshare(CLONE_FS);
+
 	display_setup_sig();
 	pthread__unblock_sigwinch();
 repeat:
@@ -671,7 +686,7 @@ static int hist_iter__top_callback(struct hist_entry_iter *iter,
 	struct perf_evsel *evsel = iter->evsel;
 
 	if (perf_hpp_list.sym && single)
-		perf_top__record_precise_ip(top, he, evsel->idx, al->addr);
+		perf_top__record_precise_ip(top, he, iter->sample, evsel->idx, al->addr);
 
 	hist__account_cycles(iter->sample->branch_stack, al, iter->sample,
 		     !(top->record_opts.branch_stack & PERF_SAMPLE_BRANCH_ANY));
@@ -1205,6 +1220,7 @@ int cmd_top(int argc, const char **argv)
 		    "Show raw trace event output (do not use print fmt or plugins)"),
 	OPT_BOOLEAN(0, "hierarchy", &symbol_conf.report_hierarchy,
 		    "Show entries in a hierarchy"),
+	OPT_BOOLEAN(0, "force", &symbol_conf.force, "don't complain, do it"),
 	OPT_END()
 	};
 	const char * const top_usage[] = {

+ 98 - 42
tools/perf/builtin-trace.c

@@ -604,6 +604,7 @@ static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
 struct syscall_arg_fmt {
 	size_t	   (*scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
 	void	   *parm;
+	const char *name;
 	bool	   show_zero;
 };
 
@@ -611,6 +612,7 @@ static struct syscall_fmt {
 	const char *name;
 	const char *alias;
 	struct syscall_arg_fmt arg[6];
+	u8	   nr_args;
 	bool	   errpid;
 	bool	   timeout;
 	bool	   hexret;
@@ -624,7 +626,12 @@ static struct syscall_fmt {
 	  .arg = { [0] = { .scnprintf = SCA_HEX, /* brk */ }, }, },
 	{ .name     = "clock_gettime",
 	  .arg = { [0] = STRARRAY(clk_id, clockid), }, },
-	{ .name	    = "clone",	    .errpid = true, },
+	{ .name	    = "clone",	    .errpid = true, .nr_args = 5,
+	  .arg = { [0] = { .name = "flags",	    .scnprintf = SCA_CLONE_FLAGS, },
+		   [1] = { .name = "child_stack",   .scnprintf = SCA_HEX, },
+		   [2] = { .name = "parent_tidptr", .scnprintf = SCA_HEX, },
+		   [3] = { .name = "child_tidptr",  .scnprintf = SCA_HEX, },
+		   [4] = { .name = "tls",	    .scnprintf = SCA_HEX, }, }, },
 	{ .name	    = "close",
 	  .arg = { [0] = { .scnprintf = SCA_CLOSE_FD, /* fd */ }, }, },
 	{ .name	    = "epoll_ctl",
@@ -1165,22 +1172,34 @@ static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
 	return err;
 }
 
-static int syscall__set_arg_fmts(struct syscall *sc)
+static int syscall__alloc_arg_fmts(struct syscall *sc, int nr_args)
 {
-	struct format_field *field;
-	int idx = 0, len;
+	int idx;
+
+	if (nr_args == 6 && sc->fmt && sc->fmt->nr_args != 0)
+		nr_args = sc->fmt->nr_args;
 
-	sc->arg_fmt = calloc(sc->nr_args, sizeof(*sc->arg_fmt));
+	sc->arg_fmt = calloc(nr_args, sizeof(*sc->arg_fmt));
 	if (sc->arg_fmt == NULL)
 		return -1;
 
-	for (field = sc->args; field; field = field->next, ++idx) {
-		if (sc->fmt) {
+	for (idx = 0; idx < nr_args; ++idx) {
+		if (sc->fmt)
 			sc->arg_fmt[idx] = sc->fmt->arg[idx];
+	}
 
-			if (sc->fmt->arg[idx].scnprintf)
-				continue;
-		}
+	sc->nr_args = nr_args;
+	return 0;
+}
+
+static int syscall__set_arg_fmts(struct syscall *sc)
+{
+	struct format_field *field;
+	int idx = 0, len;
+
+	for (field = sc->args; field; field = field->next, ++idx) {
+		if (sc->fmt && sc->fmt->arg[idx].scnprintf)
+			continue;
 
 		if (strcmp(field->type, "const char *") == 0 &&
 			 (strcmp(field->name, "filename") == 0 ||
@@ -1251,11 +1270,13 @@ static int trace__read_syscall_info(struct trace *trace, int id)
 		sc->tp_format = trace_event__tp_format("syscalls", tp_name);
 	}
 
+	if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ? 6 : sc->tp_format->format.nr_fields))
+		return -1;
+
 	if (IS_ERR(sc->tp_format))
 		return -1;
 
 	sc->args = sc->tp_format->format.fields;
-	sc->nr_args = sc->tp_format->format.nr_fields;
 	/*
 	 * We need to check and discard the first variable '__syscall_nr'
 	 * or 'nr' that mean the syscall number. It is needless here.
@@ -1325,18 +1346,34 @@ out:
  * variable to read it. Most notably this avoids extended load instructions
  * on unaligned addresses
  */
-static unsigned long __syscall_arg__val(unsigned char *args, u8 idx)
+unsigned long syscall_arg__val(struct syscall_arg *arg, u8 idx)
 {
 	unsigned long val;
-	unsigned char *p = args + sizeof(unsigned long) * idx;
+	unsigned char *p = arg->args + sizeof(unsigned long) * idx;
 
 	memcpy(&val, p, sizeof(val));
 	return val;
 }
 
-unsigned long syscall_arg__val(struct syscall_arg *arg, u8 idx)
+static size_t syscall__scnprintf_name(struct syscall *sc, char *bf, size_t size,
+				      struct syscall_arg *arg)
+{
+	if (sc->arg_fmt && sc->arg_fmt[arg->idx].name)
+		return scnprintf(bf, size, "%s: ", sc->arg_fmt[arg->idx].name);
+
+	return scnprintf(bf, size, "arg%d: ", arg->idx);
+}
+
+static size_t syscall__scnprintf_val(struct syscall *sc, char *bf, size_t size,
+				     struct syscall_arg *arg, unsigned long val)
 {
-	return __syscall_arg__val(arg->args, idx);
+	if (sc->arg_fmt && sc->arg_fmt[arg->idx].scnprintf) {
+		arg->val = val;
+		if (sc->arg_fmt[arg->idx].parm)
+			arg->parm = sc->arg_fmt[arg->idx].parm;
+		return sc->arg_fmt[arg->idx].scnprintf(bf, size, arg);
+	}
+	return scnprintf(bf, size, "%ld", val);
 }
 
 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
@@ -1345,6 +1382,14 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
 {
 	size_t printed = 0;
 	unsigned long val;
+	u8 bit = 1;
+	struct syscall_arg arg = {
+		.args	= args,
+		.idx	= 0,
+		.mask	= 0,
+		.trace  = trace,
+		.thread = thread,
+	};
 	struct thread_trace *ttrace = thread__priv(thread);
 
 	/*
@@ -1356,14 +1401,6 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
 
 	if (sc->args != NULL) {
 		struct format_field *field;
-		u8 bit = 1;
-		struct syscall_arg arg = {
-			.args	= args,
-			.idx	= 0,
-			.mask	= 0,
-			.trace  = trace,
-			.thread = thread,
-		};
 
 		for (field = sc->args; field;
 		     field = field->next, ++arg.idx, bit <<= 1) {
@@ -1387,15 +1424,7 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
 
 			printed += scnprintf(bf + printed, size - printed,
 					     "%s%s: ", printed ? ", " : "", field->name);
-			if (sc->arg_fmt && sc->arg_fmt[arg.idx].scnprintf) {
-				arg.val = val;
-				if (sc->arg_fmt[arg.idx].parm)
-					arg.parm = sc->arg_fmt[arg.idx].parm;
-				printed += sc->arg_fmt[arg.idx].scnprintf(bf + printed, size - printed, &arg);
-			} else {
-				printed += scnprintf(bf + printed, size - printed,
-						     "%ld", val);
-			}
+			printed += syscall__scnprintf_val(sc, bf + printed, size - printed, &arg, val);
 		}
 	} else if (IS_ERR(sc->tp_format)) {
 		/*
@@ -1403,14 +1432,17 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
 		 * may end up not having any args, like with gettid(), so only
 		 * print the raw args when we didn't manage to read it.
 		 */
-		int i = 0;
-
-		while (i < 6) {
-			val = __syscall_arg__val(args, i);
-			printed += scnprintf(bf + printed, size - printed,
-					     "%sarg%d: %ld",
-					     printed ? ", " : "", i, val);
-			++i;
+		while (arg.idx < sc->nr_args) {
+			if (arg.mask & bit)
+				goto next_arg;
+			val = syscall_arg__val(&arg, arg.idx);
+			if (printed)
+				printed += scnprintf(bf + printed, size - printed, ", ");
+			printed += syscall__scnprintf_name(sc, bf + printed, size - printed, &arg);
+			printed += syscall__scnprintf_val(sc, bf + printed, size - printed, &arg, val);
+next_arg:
+			++arg.idx;
+			bit <<= 1;
 		}
 	}
 
@@ -1660,7 +1692,7 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
 		if (ret < 0)
 			goto errno_print;
 signed_print:
-		fprintf(trace->output, ") %ld", ret);
+		fprintf(trace->output, ") = %ld", ret);
 	} else if (ret < 0) {
 errno_print: {
 		char bf[STRERR_BUFSIZE];
@@ -2207,6 +2239,30 @@ out_enomem:
 	goto out;
 }
 
+static int trace__set_filter_loop_pids(struct trace *trace)
+{
+	unsigned int nr = 1;
+	pid_t pids[32] = {
+		getpid(),
+	};
+	struct thread *thread = machine__find_thread(trace->host, pids[0], pids[0]);
+
+	while (thread && nr < ARRAY_SIZE(pids)) {
+		struct thread *parent = machine__find_thread(trace->host, thread->ppid, thread->ppid);
+
+		if (parent == NULL)
+			break;
+
+		if (!strcmp(thread__comm_str(parent), "sshd")) {
+			pids[nr++] = parent->tid;
+			break;
+		}
+		thread = parent;
+	}
+
+	return perf_evlist__set_filter_pids(trace->evlist, nr, pids);
+}
+
 static int trace__run(struct trace *trace, int argc, const char **argv)
 {
 	struct perf_evlist *evlist = trace->evlist;
@@ -2330,7 +2386,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
 	if (trace->filter_pids.nr > 0)
 		err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
 	else if (thread_map__pid(evlist->threads, 0) == -1)
-		err = perf_evlist__set_filter_pid(evlist, getpid());
+		err = trace__set_filter_loop_pids(trace);
 
 	if (err < 0)
 		goto out_error_mem;

+ 1 - 0
tools/perf/check-headers.sh

@@ -3,6 +3,7 @@
 HEADERS='
 include/uapi/linux/fcntl.h
 include/uapi/linux/perf_event.h
+include/uapi/linux/sched.h
 include/uapi/linux/stat.h
 include/linux/hash.h
 include/uapi/linux/hw_breakpoint.h

+ 8 - 8
tools/perf/perf.c

@@ -89,7 +89,7 @@ struct pager_config {
 static int pager_command_config(const char *var, const char *value, void *data)
 {
 	struct pager_config *c = data;
-	if (!prefixcmp(var, "pager.") && !strcmp(var + 6, c->cmd))
+	if (strstarts(var, "pager.") && !strcmp(var + 6, c->cmd))
 		c->val = perf_config_bool(var, value);
 	return 0;
 }
@@ -108,9 +108,9 @@ static int check_pager_config(const char *cmd)
 static int browser_command_config(const char *var, const char *value, void *data)
 {
 	struct pager_config *c = data;
-	if (!prefixcmp(var, "tui.") && !strcmp(var + 4, c->cmd))
+	if (strstarts(var, "tui.") && !strcmp(var + 4, c->cmd))
 		c->val = perf_config_bool(var, value);
-	if (!prefixcmp(var, "gtk.") && !strcmp(var + 4, c->cmd))
+	if (strstarts(var, "gtk.") && !strcmp(var + 4, c->cmd))
 		c->val = perf_config_bool(var, value) ? 2 : 0;
 	return 0;
 }
@@ -192,7 +192,7 @@ static int handle_options(const char ***argv, int *argc, int *envchanged)
 		/*
 		 * Check remaining flags.
 		 */
-		if (!prefixcmp(cmd, CMD_EXEC_PATH)) {
+		if (strstarts(cmd, CMD_EXEC_PATH)) {
 			cmd += strlen(CMD_EXEC_PATH);
 			if (*cmd == '=')
 				set_argv_exec_path(cmd + 1);
@@ -229,7 +229,7 @@ static int handle_options(const char ***argv, int *argc, int *envchanged)
 				*envchanged = 1;
 			(*argv)++;
 			(*argc)--;
-		} else if (!prefixcmp(cmd, CMD_DEBUGFS_DIR)) {
+		} else if (strstarts(cmd, CMD_DEBUGFS_DIR)) {
 			tracing_path_set(cmd + strlen(CMD_DEBUGFS_DIR));
 			fprintf(stderr, "dir: %s\n", tracing_path);
 			if (envchanged)
@@ -470,14 +470,14 @@ int main(int argc, const char **argv)
 	 * So we just directly call the internal command handler, and
 	 * die if that one cannot handle it.
 	 */
-	if (!prefixcmp(cmd, "perf-")) {
+	if (strstarts(cmd, "perf-")) {
 		cmd += 5;
 		argv[0] = cmd;
 		handle_internal_command(argc, argv);
 		fprintf(stderr, "cannot handle %s internally", cmd);
 		goto out;
 	}
-	if (!prefixcmp(cmd, "trace")) {
+	if (strstarts(cmd, "trace")) {
 #ifdef HAVE_LIBAUDIT_SUPPORT
 		setup_path();
 		argv[0] = "trace";
@@ -495,7 +495,7 @@ int main(int argc, const char **argv)
 	commit_pager_choice();
 
 	if (argc > 0) {
-		if (!prefixcmp(argv[0], "--"))
+		if (strstarts(argv[0], "--"))
 			argv[0] += 2;
 	} else {
 		/* The user didn't specify a command; give them help */

+ 0 - 4
tools/perf/pmu-events/README

@@ -85,10 +85,6 @@ users to specify events by their name:
 
 where 'pm_1plus_ppc_cmpl' is a Power8 PMU event.
 
-In case of errors when processing files in the tools/perf/pmu-events/arch
-directory, 'jevents' tries to create an empty mapping file to allow the perf
-build to succeed even if the PMU event aliases cannot be used.
-
 However some errors in processing may cause the perf build to fail.
 
 Mapfile format

+ 14 - 7
tools/perf/pmu-events/jevents.c

@@ -822,10 +822,6 @@ static int process_one_file(const char *fpath, const struct stat *sb,
  * PMU event tables (see struct pmu_events_map).
  *
  * Write out the PMU events tables and the mapping table to pmu-event.c.
- *
- * If unable to process the JSON or arch files, create an empty mapping
- * table so we can continue to build/use  perf even if we cannot use the
- * PMU event aliases.
  */
 int main(int argc, char *argv[])
 {
@@ -836,6 +832,7 @@ int main(int argc, char *argv[])
 	const char *arch;
 	const char *output_file;
 	const char *start_dirname;
+	struct stat stbuf;
 
 	prog = basename(argv[0]);
 	if (argc < 4) {
@@ -857,11 +854,17 @@ int main(int argc, char *argv[])
 		return 2;
 	}
 
+	sprintf(ldirname, "%s/%s", start_dirname, arch);
+
+	/* If architecture does not have any event lists, bail out */
+	if (stat(ldirname, &stbuf) < 0) {
+		pr_info("%s: Arch %s has no PMU event lists\n", prog, arch);
+		goto empty_map;
+	}
+
 	/* Include pmu-events.h first */
 	fprintf(eventsfp, "#include \"../../pmu-events/pmu-events.h\"\n");
 
-	sprintf(ldirname, "%s/%s", start_dirname, arch);
-
 	/*
 	 * The mapfile allows multiple CPUids to point to the same JSON file,
 	 * so, not sure if there is a need for symlinks within the pmu-events
@@ -878,6 +881,9 @@ int main(int argc, char *argv[])
 	if (rc && verbose) {
 		pr_info("%s: Error walking file tree %s\n", prog, ldirname);
 		goto empty_map;
+	} else if (rc < 0) {
+		/* Make build fail */
+		return 1;
 	} else if (rc) {
 		goto empty_map;
 	}
@@ -892,7 +898,8 @@ int main(int argc, char *argv[])
 
 	if (process_mapfile(eventsfp, mapfile)) {
 		pr_info("%s: Error processing mapfile %s\n", prog, mapfile);
-		goto empty_map;
+		/* Make build fail */
+		return 1;
 	}
 
 	return 0;

+ 1 - 0
tools/perf/trace/beauty/Build

@@ -1,2 +1,3 @@
+libperf-y += clone.o
 libperf-y += fcntl.o
 libperf-y += statx.o

+ 3 - 0
tools/perf/trace/beauty/beauty.h

@@ -66,6 +66,9 @@ size_t syscall_arg__scnprintf_long(char *bf, size_t size, struct syscall_arg *ar
 size_t syscall_arg__scnprintf_pid(char *bf, size_t size, struct syscall_arg *arg);
 #define SCA_PID syscall_arg__scnprintf_pid
 
+size_t syscall_arg__scnprintf_clone_flags(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_CLONE_FLAGS syscall_arg__scnprintf_clone_flags
+
 size_t syscall_arg__scnprintf_fcntl_cmd(char *bf, size_t size, struct syscall_arg *arg);
 #define SCA_FCNTL_CMD syscall_arg__scnprintf_fcntl_cmd
 

+ 75 - 0
tools/perf/trace/beauty/clone.c

@@ -0,0 +1,75 @@
+/*
+ * trace/beauty/cone.c
+ *
+ *  Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Released under the GPL v2. (and only v2, not any later version)
+ */
+
+#include "trace/beauty/beauty.h"
+#include <linux/kernel.h>
+#include <sys/types.h>
+#include <uapi/linux/sched.h>
+
+static size_t clone__scnprintf_flags(unsigned long flags, char *bf, size_t size)
+{
+	int printed = 0;
+
+#define	P_FLAG(n) \
+	if (flags & CLONE_##n) { \
+		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+		flags &= ~CLONE_##n; \
+	}
+
+	P_FLAG(VM);
+	P_FLAG(FS);
+	P_FLAG(FILES);
+	P_FLAG(SIGHAND);
+	P_FLAG(PTRACE);
+	P_FLAG(VFORK);
+	P_FLAG(PARENT);
+	P_FLAG(THREAD);
+	P_FLAG(NEWNS);
+	P_FLAG(SYSVSEM);
+	P_FLAG(SETTLS);
+	P_FLAG(PARENT_SETTID);
+	P_FLAG(CHILD_CLEARTID);
+	P_FLAG(DETACHED);
+	P_FLAG(UNTRACED);
+	P_FLAG(CHILD_SETTID);
+	P_FLAG(NEWCGROUP);
+	P_FLAG(NEWUTS);
+	P_FLAG(NEWIPC);
+	P_FLAG(NEWUSER);
+	P_FLAG(NEWPID);
+	P_FLAG(NEWNET);
+	P_FLAG(IO);
+#undef P_FLAG
+
+	if (flags)
+		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
+
+	return printed;
+}
+
+size_t syscall_arg__scnprintf_clone_flags(char *bf, size_t size, struct syscall_arg *arg)
+{
+	unsigned long flags = arg->val;
+	enum syscall_clone_args {
+		SCC_FLAGS	  = (1 << 0),
+		SCC_CHILD_STACK	  = (1 << 1),
+		SCC_PARENT_TIDPTR = (1 << 2),
+		SCC_CHILD_TIDPTR  = (1 << 3),
+		SCC_TLS		  = (1 << 4),
+	};
+	if (!(flags & CLONE_PARENT_SETTID))
+		arg->mask |= SCC_PARENT_TIDPTR;
+
+	if (!(flags & (CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID)))
+		arg->mask |= SCC_CHILD_TIDPTR;
+
+	if (!(flags & CLONE_SETTLS))
+		arg->mask |= SCC_TLS;
+
+	return clone__scnprintf_flags(flags, bf, size);
+}

+ 3 - 0
tools/perf/trace/beauty/mmap.c

@@ -34,6 +34,9 @@ static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
 {
 	int printed = 0, flags = arg->val;
 
+	if (flags & MAP_ANONYMOUS)
+		arg->mask |= (1 << 4) | (1 << 5); /* Mask 4th ('fd') and 5th ('offset') args, ignored */
+
 #define	P_MMAP_FLAG(n) \
 	if (flags & MAP_##n) { \
 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \

+ 2 - 1
tools/perf/ui/browser.c

@@ -8,6 +8,7 @@
 #include <linux/compiler.h>
 #include <linux/list.h>
 #include <linux/rbtree.h>
+#include <linux/string.h>
 #include <stdlib.h>
 #include <sys/ttydefaults.h>
 #include "browser.h"
@@ -563,7 +564,7 @@ static int ui_browser__color_config(const char *var, const char *value,
 	int i;
 
 	/* same dir for all commands */
-	if (prefixcmp(var, "colors.") != 0)
+	if (!strstarts(var, "colors.") != 0)
 		return 0;
 
 	for (i = 0; ui_browser__colorsets[i].name != NULL; ++i) {

+ 5 - 4
tools/perf/ui/browsers/annotate.c

@@ -13,6 +13,7 @@
 #include <inttypes.h>
 #include <pthread.h>
 #include <linux/kernel.h>
+#include <linux/string.h>
 #include <sys/ttydefaults.h>
 
 struct disasm_line_samples {
@@ -449,14 +450,14 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser,
 		next = disasm__get_next_ip_line(&notes->src->source, pos);
 
 		for (i = 0; i < browser->nr_events; i++) {
-			u64 nr_samples;
+			struct sym_hist_entry sample;
 
 			bpos->samples[i].percent = disasm__calc_percent(notes,
 						evsel->idx + i,
 						pos->offset,
 						next ? next->offset : len,
-						&path, &nr_samples);
-			bpos->samples[i].nr = nr_samples;
+						&path, &sample);
+			bpos->samples[i].nr = sample.nr_samples;
 
 			if (max_percent < bpos->samples[i].percent)
 				max_percent = bpos->samples[i].percent;
@@ -1198,7 +1199,7 @@ static int annotate__config(const char *var, const char *value,
 	struct annotate_config *cfg;
 	const char *name;
 
-	if (prefixcmp(var, "annotate.") != 0)
+	if (!strstarts(var, "annotate."))
 		return 0;
 
 	name = var + 9;

+ 2 - 2
tools/perf/ui/gtk/annotate.c

@@ -34,10 +34,10 @@ static int perf_gtk__get_percent(char *buf, size_t size, struct symbol *sym,
 		return 0;
 
 	symhist = annotation__histogram(symbol__annotation(sym), evidx);
-	if (!symbol_conf.event_group && !symhist->addr[dl->offset])
+	if (!symbol_conf.event_group && !symhist->addr[dl->offset].nr_samples)
 		return 0;
 
-	percent = 100.0 * symhist->addr[dl->offset] / symhist->sum;
+	percent = 100.0 * symhist->addr[dl->offset].nr_samples / symhist->nr_samples;
 
 	markup = perf_gtk__get_percent_color(percent);
 	if (markup)

+ 2 - 1
tools/perf/ui/stdio/hist.c

@@ -1,4 +1,5 @@
 #include <stdio.h>
+#include <linux/string.h>
 
 #include "../../util/util.h"
 #include "../../util/hist.h"
@@ -292,7 +293,7 @@ static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root,
 			 * displayed twice.
 			 */
 			if (!i++ && field_order == NULL &&
-			    sort_order && !prefixcmp(sort_order, "sym"))
+			    sort_order && strstarts(sort_order, "sym"))
 				continue;
 
 			if (!printed) {

+ 56 - 43
tools/perf/util/annotate.c

@@ -610,10 +610,10 @@ int symbol__alloc_hist(struct symbol *sym)
 	size_t sizeof_sym_hist;
 
 	/* Check for overflow when calculating sizeof_sym_hist */
-	if (size > (SIZE_MAX - sizeof(struct sym_hist)) / sizeof(u64))
+	if (size > (SIZE_MAX - sizeof(struct sym_hist)) / sizeof(struct sym_hist_entry))
 		return -1;
 
-	sizeof_sym_hist = (sizeof(struct sym_hist) + size * sizeof(u64));
+	sizeof_sym_hist = (sizeof(struct sym_hist) + size * sizeof(struct sym_hist_entry));
 
 	/* Check for overflow in zalloc argument */
 	if (sizeof_sym_hist > (SIZE_MAX - sizeof(*notes->src))
@@ -697,7 +697,8 @@ static int __symbol__account_cycles(struct annotation *notes,
 }
 
 static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map,
-				      struct annotation *notes, int evidx, u64 addr)
+				      struct annotation *notes, int evidx, u64 addr,
+				      struct perf_sample *sample)
 {
 	unsigned offset;
 	struct sym_hist *h;
@@ -713,12 +714,15 @@ static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map,
 
 	offset = addr - sym->start;
 	h = annotation__histogram(notes, evidx);
-	h->sum++;
-	h->addr[offset]++;
+	h->nr_samples++;
+	h->addr[offset].nr_samples++;
+	h->period += sample->period;
+	h->addr[offset].period += sample->period;
 
 	pr_debug3("%#" PRIx64 " %s: period++ [addr: %#" PRIx64 ", %#" PRIx64
-		  ", evidx=%d] => %" PRIu64 "\n", sym->start, sym->name,
-		  addr, addr - sym->start, evidx, h->addr[offset]);
+		  ", evidx=%d] => nr_samples: %" PRIu64 ", period: %" PRIu64 "\n",
+		  sym->start, sym->name, addr, addr - sym->start, evidx,
+		  h->addr[offset].nr_samples, h->addr[offset].period);
 	return 0;
 }
 
@@ -738,7 +742,8 @@ static struct annotation *symbol__get_annotation(struct symbol *sym, bool cycles
 }
 
 static int symbol__inc_addr_samples(struct symbol *sym, struct map *map,
-				    int evidx, u64 addr)
+				    int evidx, u64 addr,
+				    struct perf_sample *sample)
 {
 	struct annotation *notes;
 
@@ -747,7 +752,7 @@ static int symbol__inc_addr_samples(struct symbol *sym, struct map *map,
 	notes = symbol__get_annotation(sym, false);
 	if (notes == NULL)
 		return -ENOMEM;
-	return __symbol__inc_addr_samples(sym, map, notes, evidx, addr);
+	return __symbol__inc_addr_samples(sym, map, notes, evidx, addr, sample);
 }
 
 static int symbol__account_cycles(u64 addr, u64 start,
@@ -811,14 +816,16 @@ int addr_map_symbol__account_cycles(struct addr_map_symbol *ams,
 	return err;
 }
 
-int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, int evidx)
+int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, struct perf_sample *sample,
+				 int evidx)
 {
-	return symbol__inc_addr_samples(ams->sym, ams->map, evidx, ams->al_addr);
+	return symbol__inc_addr_samples(ams->sym, ams->map, evidx, ams->al_addr, sample);
 }
 
-int hist_entry__inc_addr_samples(struct hist_entry *he, int evidx, u64 ip)
+int hist_entry__inc_addr_samples(struct hist_entry *he, struct perf_sample *sample,
+				 int evidx, u64 ip)
 {
-	return symbol__inc_addr_samples(he->ms.sym, he->ms.map, evidx, ip);
+	return symbol__inc_addr_samples(he->ms.sym, he->ms.map, evidx, ip, sample);
 }
 
 static void disasm_line__init_ins(struct disasm_line *dl, struct arch *arch, struct map *map)
@@ -928,11 +935,12 @@ struct disasm_line *disasm__get_next_ip_line(struct list_head *head, struct disa
 }
 
 double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset,
-			    s64 end, const char **path, u64 *nr_samples)
+			    s64 end, const char **path, struct sym_hist_entry *sample)
 {
 	struct source_line *src_line = notes->src->lines;
 	double percent = 0.0;
-	*nr_samples = 0;
+
+	sample->nr_samples = sample->period = 0;
 
 	if (src_line) {
 		size_t sizeof_src_line = sizeof(*src_line) +
@@ -946,19 +954,23 @@ double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset,
 				*path = src_line->path;
 
 			percent += src_line->samples[evidx].percent;
-			*nr_samples += src_line->samples[evidx].nr;
+			sample->nr_samples += src_line->samples[evidx].nr;
 			offset++;
 		}
 	} else {
 		struct sym_hist *h = annotation__histogram(notes, evidx);
 		unsigned int hits = 0;
+		u64 period = 0;
 
-		while (offset < end)
-			hits += h->addr[offset++];
+		while (offset < end) {
+			hits += h->addr[offset++].nr_samples;
+			period += h->addr[offset++].period;
+		}
 
-		if (h->sum) {
-			*nr_samples = hits;
-			percent = 100.0 * hits / h->sum;
+		if (h->nr_samples) {
+			sample->period	   = period;
+			sample->nr_samples = hits;
+			percent = 100.0 * hits / h->nr_samples;
 		}
 	}
 
@@ -1057,10 +1069,10 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st
 
 	if (dl->offset != -1) {
 		const char *path = NULL;
-		u64 nr_samples;
 		double percent, max_percent = 0.0;
 		double *ppercents = &percent;
-		u64 *psamples = &nr_samples;
+		struct sym_hist_entry sample;
+		struct sym_hist_entry *psamples = &sample;
 		int i, nr_percent = 1;
 		const char *color;
 		struct annotation *notes = symbol__annotation(sym);
@@ -1074,7 +1086,7 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st
 		if (perf_evsel__is_group_event(evsel)) {
 			nr_percent = evsel->nr_members;
 			ppercents = calloc(nr_percent, sizeof(double));
-			psamples = calloc(nr_percent, sizeof(u64));
+			psamples = calloc(nr_percent, sizeof(struct sym_hist_entry));
 			if (ppercents == NULL || psamples == NULL) {
 				return -1;
 			}
@@ -1085,10 +1097,10 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st
 					notes->src->lines ? i : evsel->idx + i,
 					offset,
 					next ? next->offset : (s64) len,
-					&path, &nr_samples);
+					&path, &sample);
 
 			ppercents[i] = percent;
-			psamples[i] = nr_samples;
+			psamples[i] = sample;
 			if (percent > max_percent)
 				max_percent = percent;
 		}
@@ -1126,12 +1138,12 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st
 
 		for (i = 0; i < nr_percent; i++) {
 			percent = ppercents[i];
-			nr_samples = psamples[i];
+			sample = psamples[i];
 			color = get_percent_color(percent);
 
 			if (symbol_conf.show_total_period)
 				color_fprintf(stdout, color, " %7" PRIu64,
-					      nr_samples);
+					      sample.period);
 			else
 				color_fprintf(stdout, color, " %7.2f", percent);
 		}
@@ -1147,7 +1159,7 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st
 		if (ppercents != &percent)
 			free(ppercents);
 
-		if (psamples != &nr_samples)
+		if (psamples != &sample)
 			free(psamples);
 
 	} else if (max_lines && printed >= max_lines)
@@ -1671,19 +1683,19 @@ static int symbol__get_source_line(struct symbol *sym, struct map *map,
 	struct sym_hist *h = annotation__histogram(notes, evidx);
 	struct rb_root tmp_root = RB_ROOT;
 	int nr_pcnt = 1;
-	u64 h_sum = h->sum;
+	u64 nr_samples = h->nr_samples;
 	size_t sizeof_src_line = sizeof(struct source_line);
 
 	if (perf_evsel__is_group_event(evsel)) {
 		for (i = 1; i < evsel->nr_members; i++) {
 			h = annotation__histogram(notes, evidx + i);
-			h_sum += h->sum;
+			nr_samples += h->nr_samples;
 		}
 		nr_pcnt = evsel->nr_members;
 		sizeof_src_line += (nr_pcnt - 1) * sizeof(src_line->samples);
 	}
 
-	if (!h_sum)
+	if (!nr_samples)
 		return 0;
 
 	src_line = notes->src->lines = calloc(len, sizeof_src_line);
@@ -1693,7 +1705,7 @@ static int symbol__get_source_line(struct symbol *sym, struct map *map,
 	start = map__rip_2objdump(map, sym->start);
 
 	for (i = 0; i < len; i++) {
-		u64 offset, nr_samples;
+		u64 offset;
 		double percent_max = 0.0;
 
 		src_line->nr_pcnt = nr_pcnt;
@@ -1702,9 +1714,9 @@ static int symbol__get_source_line(struct symbol *sym, struct map *map,
 			double percent = 0.0;
 
 			h = annotation__histogram(notes, evidx + k);
-			nr_samples = h->addr[i];
-			if (h->sum)
-				percent = 100.0 * nr_samples / h->sum;
+			nr_samples = h->addr[i].nr_samples;
+			if (h->nr_samples)
+				percent = 100.0 * nr_samples / h->nr_samples;
 
 			if (percent > percent_max)
 				percent_max = percent;
@@ -1773,10 +1785,10 @@ static void symbol__annotate_hits(struct symbol *sym, struct perf_evsel *evsel)
 	u64 len = symbol__size(sym), offset;
 
 	for (offset = 0; offset < len; ++offset)
-		if (h->addr[offset] != 0)
+		if (h->addr[offset].nr_samples != 0)
 			printf("%*" PRIx64 ": %" PRIu64 "\n", BITS_PER_LONG / 2,
-			       sym->start + offset, h->addr[offset]);
-	printf("%*s: %" PRIu64 "\n", BITS_PER_LONG / 2, "h->sum", h->sum);
+			       sym->start + offset, h->addr[offset].nr_samples);
+	printf("%*s: %" PRIu64 "\n", BITS_PER_LONG / 2, "h->nr_samples", h->nr_samples);
 }
 
 int symbol__annotate_printf(struct symbol *sym, struct map *map,
@@ -1812,7 +1824,8 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map,
 		width *= evsel->nr_members;
 
 	graph_dotted_len = printf(" %-*.*s|	Source code & Disassembly of %s for %s (%" PRIu64 " samples)\n",
-	       width, width, "Percent", d_filename, evsel_name, h->sum);
+				  width, width, symbol_conf.show_total_period ? "Event count" : "Percent",
+				  d_filename, evsel_name, h->nr_samples);
 
 	printf("%-*.*s----\n",
 	       graph_dotted_len, graph_dotted_len, graph_dotted_line);
@@ -1876,10 +1889,10 @@ void symbol__annotate_decay_histogram(struct symbol *sym, int evidx)
 	struct sym_hist *h = annotation__histogram(notes, evidx);
 	int len = symbol__size(sym), offset;
 
-	h->sum = 0;
+	h->nr_samples = 0;
 	for (offset = 0; offset < len; ++offset) {
-		h->addr[offset] = h->addr[offset] * 7 / 8;
-		h->sum += h->addr[offset];
+		h->addr[offset].nr_samples = h->addr[offset].nr_samples * 7 / 8;
+		h->nr_samples += h->addr[offset].nr_samples;
 	}
 }
 

+ 13 - 5
tools/perf/util/annotate.h

@@ -74,16 +74,22 @@ static inline bool disasm_line__has_offset(const struct disasm_line *dl)
 	return dl->ops.target.offset_avail;
 }
 
+struct sym_hist_entry {
+	u64		nr_samples;
+	u64		period;
+};
+
 void disasm_line__free(struct disasm_line *dl);
 struct disasm_line *disasm__get_next_ip_line(struct list_head *head, struct disasm_line *pos);
 int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw);
 size_t disasm__fprintf(struct list_head *head, FILE *fp);
 double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset,
-			    s64 end, const char **path, u64 *nr_samples);
+			    s64 end, const char **path, struct sym_hist_entry *sample);
 
 struct sym_hist {
-	u64		sum;
-	u64		addr[0];
+	u64		      nr_samples;
+	u64		      period;
+	struct sym_hist_entry addr[0];
 };
 
 struct cyc_hist {
@@ -149,13 +155,15 @@ static inline struct annotation *symbol__annotation(struct symbol *sym)
 	return (void *)sym - symbol_conf.priv_size;
 }
 
-int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, int evidx);
+int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, struct perf_sample *sample,
+				 int evidx);
 
 int addr_map_symbol__account_cycles(struct addr_map_symbol *ams,
 				    struct addr_map_symbol *start,
 				    unsigned cycles);
 
-int hist_entry__inc_addr_samples(struct hist_entry *he, int evidx, u64 addr);
+int hist_entry__inc_addr_samples(struct hist_entry *he, struct perf_sample *sample,
+				 int evidx, u64 addr);
 
 int symbol__alloc_hist(struct symbol *sym);
 void symbol__annotate_zero_histograms(struct symbol *sym);

+ 1 - 1
tools/perf/util/bpf-loader.c

@@ -1246,7 +1246,7 @@ int bpf__config_obj(struct bpf_object *obj,
 	if (!obj || !term || !term->config)
 		return -EINVAL;
 
-	if (!prefixcmp(term->config, "map:")) {
+	if (strstarts(term->config, "map:")) {
 		key_scan_pos = sizeof("map:") - 1;
 		err = bpf__obj_config_map(obj, term, evlist, &key_scan_pos);
 		goto out;

+ 6 - 5
tools/perf/util/branch.h

@@ -5,11 +5,12 @@
 #include "../perf.h"
 
 struct branch_type_stat {
-	u64 counts[PERF_BR_MAX];
-	u64 cond_fwd;
-	u64 cond_bwd;
-	u64 cross_4k;
-	u64 cross_2m;
+	bool	branch_to;
+	u64	counts[PERF_BR_MAX];
+	u64	cond_fwd;
+	u64	cond_bwd;
+	u64	cross_4k;
+	u64	cross_2m;
 };
 
 struct branch_flags;

+ 110 - 47
tools/perf/util/callchain.c

@@ -304,7 +304,7 @@ int perf_callchain_config(const char *var, const char *value)
 {
 	char *endptr;
 
-	if (prefixcmp(var, "call-graph."))
+	if (!strstarts(var, "call-graph."))
 		return 0;
 	var += sizeof("call-graph.") - 1;
 
@@ -563,20 +563,33 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor)
 		if (cursor_node->branch) {
 			call->branch_count = 1;
 
-			if (cursor_node->branch_flags.predicted)
-				call->predicted_count = 1;
-
-			if (cursor_node->branch_flags.abort)
-				call->abort_count = 1;
-
-			call->cycles_count = cursor_node->branch_flags.cycles;
-			call->iter_count = cursor_node->nr_loop_iter;
-			call->samples_count = cursor_node->samples;
-
-			branch_type_count(&call->brtype_stat,
-					  &cursor_node->branch_flags,
-					  cursor_node->branch_from,
-					  cursor_node->ip);
+			if (cursor_node->branch_from) {
+				/*
+				 * branch_from is set with value somewhere else
+				 * to imply it's "to" of a branch.
+				 */
+				call->brtype_stat.branch_to = true;
+
+				if (cursor_node->branch_flags.predicted)
+					call->predicted_count = 1;
+
+				if (cursor_node->branch_flags.abort)
+					call->abort_count = 1;
+
+				branch_type_count(&call->brtype_stat,
+						  &cursor_node->branch_flags,
+						  cursor_node->branch_from,
+						  cursor_node->ip);
+			} else {
+				/*
+				 * It's "from" of a branch
+				 */
+				call->brtype_stat.branch_to = false;
+				call->cycles_count =
+					cursor_node->branch_flags.cycles;
+				call->iter_count = cursor_node->nr_loop_iter;
+				call->samples_count = cursor_node->samples;
+			}
 		}
 
 		list_add_tail(&call->list, &node->val);
@@ -685,20 +698,32 @@ static enum match_result match_chain(struct callchain_cursor_node *node,
 		if (node->branch) {
 			cnode->branch_count++;
 
-			if (node->branch_flags.predicted)
-				cnode->predicted_count++;
-
-			if (node->branch_flags.abort)
-				cnode->abort_count++;
-
-			cnode->cycles_count += node->branch_flags.cycles;
-			cnode->iter_count += node->nr_loop_iter;
-			cnode->samples_count += node->samples;
-
-			branch_type_count(&cnode->brtype_stat,
-					  &node->branch_flags,
-					  node->branch_from,
-					  node->ip);
+			if (node->branch_from) {
+				/*
+				 * It's "to" of a branch
+				 */
+				cnode->brtype_stat.branch_to = true;
+
+				if (node->branch_flags.predicted)
+					cnode->predicted_count++;
+
+				if (node->branch_flags.abort)
+					cnode->abort_count++;
+
+				branch_type_count(&cnode->brtype_stat,
+						  &node->branch_flags,
+						  node->branch_from,
+						  node->ip);
+			} else {
+				/*
+				 * It's "from" of a branch
+				 */
+				cnode->brtype_stat.branch_to = false;
+				cnode->cycles_count +=
+					node->branch_flags.cycles;
+				cnode->iter_count += node->nr_loop_iter;
+				cnode->samples_count += node->samples;
+			}
 		}
 
 		return MATCH_EQ;
@@ -1010,11 +1035,11 @@ int sample__resolve_callchain(struct perf_sample *sample,
 			      struct perf_evsel *evsel, struct addr_location *al,
 			      int max_stack)
 {
-	if (sample->callchain == NULL)
+	if (sample->callchain == NULL && !symbol_conf.show_branchflag_count)
 		return 0;
 
 	if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain ||
-	    perf_hpp_list.parent) {
+	    perf_hpp_list.parent || symbol_conf.show_branchflag_count) {
 		return thread__resolve_callchain(al->thread, cursor, evsel, sample,
 						 parent, al, max_stack);
 	}
@@ -1023,7 +1048,8 @@ int sample__resolve_callchain(struct perf_sample *sample,
 
 int hist_entry__append_callchain(struct hist_entry *he, struct perf_sample *sample)
 {
-	if (!symbol_conf.use_callchain || sample->callchain == NULL)
+	if ((!symbol_conf.use_callchain || sample->callchain == NULL) &&
+		!symbol_conf.show_branchflag_count)
 		return 0;
 	return callchain_append(he->callchain, &callchain_cursor, sample->period);
 }
@@ -1235,27 +1261,26 @@ static int count_pri64_printf(int idx, const char *str, u64 value, char *bf, int
 	return printed;
 }
 
-static int count_float_printf(int idx, const char *str, float value, char *bf, int bfsize)
+static int count_float_printf(int idx, const char *str, float value,
+			      char *bf, int bfsize, float threshold)
 {
 	int printed;
 
+	if (threshold != 0.0 && value < threshold)
+		return 0;
+
 	printed = scnprintf(bf, bfsize, "%s%s:%.1f%%", (idx) ? " " : " (", str, value);
 
 	return printed;
 }
 
-static int counts_str_build(char *bf, int bfsize,
-			     u64 branch_count, u64 predicted_count,
-			     u64 abort_count, u64 cycles_count,
-			     u64 iter_count, u64 samples_count,
-			     struct branch_type_stat *brtype_stat)
+static int branch_to_str(char *bf, int bfsize,
+			 u64 branch_count, u64 predicted_count,
+			 u64 abort_count,
+			 struct branch_type_stat *brtype_stat)
 {
-	u64 cycles;
 	int printed, i = 0;
 
-	if (branch_count == 0)
-		return scnprintf(bf, bfsize, " (calltrace)");
-
 	printed = branch_type_str(brtype_stat, bf, bfsize);
 	if (printed)
 		i++;
@@ -1263,15 +1288,29 @@ static int counts_str_build(char *bf, int bfsize,
 	if (predicted_count < branch_count) {
 		printed += count_float_printf(i++, "predicted",
 				predicted_count * 100.0 / branch_count,
-				bf + printed, bfsize - printed);
+				bf + printed, bfsize - printed, 0.0);
 	}
 
 	if (abort_count) {
 		printed += count_float_printf(i++, "abort",
 				abort_count * 100.0 / branch_count,
-				bf + printed, bfsize - printed);
+				bf + printed, bfsize - printed, 0.1);
 	}
 
+	if (i)
+		printed += scnprintf(bf + printed, bfsize - printed, ")");
+
+	return printed;
+}
+
+static int branch_from_str(char *bf, int bfsize,
+			   u64 branch_count,
+			   u64 cycles_count, u64 iter_count,
+			   u64 samples_count)
+{
+	int printed = 0, i = 0;
+	u64 cycles;
+
 	cycles = cycles_count / branch_count;
 	if (cycles) {
 		printed += count_pri64_printf(i++, "cycles",
@@ -1286,10 +1325,34 @@ static int counts_str_build(char *bf, int bfsize,
 	}
 
 	if (i)
-		return scnprintf(bf + printed, bfsize - printed, ")");
+		printed += scnprintf(bf + printed, bfsize - printed, ")");
 
-	bf[0] = 0;
-	return 0;
+	return printed;
+}
+
+static int counts_str_build(char *bf, int bfsize,
+			     u64 branch_count, u64 predicted_count,
+			     u64 abort_count, u64 cycles_count,
+			     u64 iter_count, u64 samples_count,
+			     struct branch_type_stat *brtype_stat)
+{
+	int printed;
+
+	if (branch_count == 0)
+		return scnprintf(bf, bfsize, " (calltrace)");
+
+	if (brtype_stat->branch_to) {
+		printed = branch_to_str(bf, bfsize, branch_count,
+				predicted_count, abort_count, brtype_stat);
+	} else {
+		printed = branch_from_str(bf, bfsize, branch_count,
+				cycles_count, iter_count, samples_count);
+	}
+
+	if (!printed)
+		bf[0] = 0;
+
+	return printed;
 }
 
 static int callchain_counts_printf(FILE *fp, char *bf, int bfsize,

+ 5 - 3
tools/perf/util/cgroup.c

@@ -98,8 +98,10 @@ static int add_cgroup(struct perf_evlist *evlist, char *str)
 		cgrp = counter->cgrp;
 		if (!cgrp)
 			continue;
-		if (!strcmp(cgrp->name, str))
+		if (!strcmp(cgrp->name, str)) {
+			refcount_inc(&cgrp->refcnt);
 			break;
+		}
 
 		cgrp = NULL;
 	}
@@ -110,6 +112,7 @@ static int add_cgroup(struct perf_evlist *evlist, char *str)
 			return -1;
 
 		cgrp->name = str;
+		refcount_set(&cgrp->refcnt, 1);
 
 		cgrp->fd = open_cgroup(str);
 		if (cgrp->fd == -1) {
@@ -128,12 +131,11 @@ static int add_cgroup(struct perf_evlist *evlist, char *str)
 			goto found;
 		n++;
 	}
-	if (refcount_read(&cgrp->refcnt) == 0)
+	if (refcount_dec_and_test(&cgrp->refcnt))
 		free(cgrp);
 
 	return -1;
 found:
-	refcount_inc(&cgrp->refcnt);
 	counter->cgrp = cgrp;
 	return 0;
 }

+ 7 - 6
tools/perf/util/config.c

@@ -19,6 +19,7 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <unistd.h>
+#include <linux/string.h>
 
 #include "sane_ctype.h"
 
@@ -433,22 +434,22 @@ static int perf_ui_config(const char *var, const char *value)
 int perf_default_config(const char *var, const char *value,
 			void *dummy __maybe_unused)
 {
-	if (!prefixcmp(var, "core."))
+	if (strstarts(var, "core."))
 		return perf_default_core_config(var, value);
 
-	if (!prefixcmp(var, "hist."))
+	if (strstarts(var, "hist."))
 		return perf_hist_config(var, value);
 
-	if (!prefixcmp(var, "ui."))
+	if (strstarts(var, "ui."))
 		return perf_ui_config(var, value);
 
-	if (!prefixcmp(var, "call-graph."))
+	if (strstarts(var, "call-graph."))
 		return perf_callchain_config(var, value);
 
-	if (!prefixcmp(var, "llvm."))
+	if (strstarts(var, "llvm."))
 		return perf_llvm_config(var, value);
 
-	if (!prefixcmp(var, "buildid."))
+	if (strstarts(var, "buildid."))
 		return perf_buildid_config(var, value);
 
 	/* Add other config variables here. */

+ 8 - 0
tools/perf/util/evsel.c

@@ -1671,31 +1671,39 @@ try_fallback:
 	 */
 	if (!perf_missing_features.write_backward && evsel->attr.write_backward) {
 		perf_missing_features.write_backward = true;
+		pr_debug2("switching off write_backward\n");
 		goto out_close;
 	} else if (!perf_missing_features.clockid_wrong && evsel->attr.use_clockid) {
 		perf_missing_features.clockid_wrong = true;
+		pr_debug2("switching off clockid\n");
 		goto fallback_missing_features;
 	} else if (!perf_missing_features.clockid && evsel->attr.use_clockid) {
 		perf_missing_features.clockid = true;
+		pr_debug2("switching off use_clockid\n");
 		goto fallback_missing_features;
 	} else if (!perf_missing_features.cloexec && (flags & PERF_FLAG_FD_CLOEXEC)) {
 		perf_missing_features.cloexec = true;
+		pr_debug2("switching off cloexec flag\n");
 		goto fallback_missing_features;
 	} else if (!perf_missing_features.mmap2 && evsel->attr.mmap2) {
 		perf_missing_features.mmap2 = true;
+		pr_debug2("switching off mmap2\n");
 		goto fallback_missing_features;
 	} else if (!perf_missing_features.exclude_guest &&
 		   (evsel->attr.exclude_guest || evsel->attr.exclude_host)) {
 		perf_missing_features.exclude_guest = true;
+		pr_debug2("switching off exclude_guest, exclude_host\n");
 		goto fallback_missing_features;
 	} else if (!perf_missing_features.sample_id_all) {
 		perf_missing_features.sample_id_all = true;
+		pr_debug2("switching off sample_id_all\n");
 		goto retry_sample_id;
 	} else if (!perf_missing_features.lbr_flags &&
 			(evsel->attr.branch_sample_type &
 			 (PERF_SAMPLE_BRANCH_NO_CYCLES |
 			  PERF_SAMPLE_BRANCH_NO_FLAGS))) {
 		perf_missing_features.lbr_flags = true;
+		pr_debug2("switching off branch sample type no (cycles/flags)\n");
 		goto fallback_missing_features;
 	}
 out_close:

+ 2 - 0
tools/perf/util/hist.c

@@ -1759,6 +1759,8 @@ void perf_evsel__output_resort(struct perf_evsel *evsel, struct ui_progress *pro
 	else
 		use_callchain = symbol_conf.use_callchain;
 
+	use_callchain |= symbol_conf.show_branchflag_count;
+
 	output_resort(evsel__hists(evsel), prog, use_callchain, NULL);
 }
 

+ 1 - 1
tools/perf/util/llvm-utils.c

@@ -33,7 +33,7 @@ struct llvm_param llvm_param = {
 
 int perf_llvm_config(const char *var, const char *value)
 {
-	if (prefixcmp(var, "llvm."))
+	if (!strstarts(var, "llvm."))
 		return 0;
 	var += sizeof("llvm.") - 1;
 

+ 12 - 1
tools/perf/util/machine.c

@@ -1902,13 +1902,16 @@ static int thread__resolve_callchain_sample(struct thread *thread,
 {
 	struct branch_stack *branch = sample->branch_stack;
 	struct ip_callchain *chain = sample->callchain;
-	int chain_nr = chain->nr;
+	int chain_nr = 0;
 	u8 cpumode = PERF_RECORD_MISC_USER;
 	int i, j, err, nr_entries;
 	int skip_idx = -1;
 	int first_call = 0;
 	int nr_loop_iter;
 
+	if (chain)
+		chain_nr = chain->nr;
+
 	if (perf_evsel__has_branch_callstack(evsel)) {
 		err = resolve_lbr_callchain_sample(thread, cursor, sample, parent,
 						   root_al, max_stack);
@@ -1946,6 +1949,10 @@ static int thread__resolve_callchain_sample(struct thread *thread,
 		for (i = 0; i < nr; i++) {
 			if (callchain_param.order == ORDER_CALLEE) {
 				be[i] = branch->entries[i];
+
+				if (chain == NULL)
+					continue;
+
 				/*
 				 * Check for overlap into the callchain.
 				 * The return address is one off compared to
@@ -2000,6 +2007,10 @@ static int thread__resolve_callchain_sample(struct thread *thread,
 			if (err)
 				return err;
 		}
+
+		if (chain_nr == 0)
+			return 0;
+
 		chain_nr -= nr;
 	}
 

+ 184 - 62
tools/perf/util/scripting-engines/trace-event-python.c

@@ -116,6 +116,34 @@ static PyObject *get_handler(const char *handler_name)
 	return handler;
 }
 
+static int get_argument_count(PyObject *handler)
+{
+	int arg_count = 0;
+
+	/*
+	 * The attribute for the code object is func_code in Python 2,
+	 * whereas it is __code__ in Python 3.0+.
+	 */
+	PyObject *code_obj = PyObject_GetAttrString(handler,
+		"func_code");
+	if (PyErr_Occurred()) {
+		PyErr_Clear();
+		code_obj = PyObject_GetAttrString(handler,
+			"__code__");
+	}
+	PyErr_Clear();
+	if (code_obj) {
+		PyObject *arg_count_obj = PyObject_GetAttrString(code_obj,
+			"co_argcount");
+		if (arg_count_obj) {
+			arg_count = (int) PyInt_AsLong(arg_count_obj);
+			Py_DECREF(arg_count_obj);
+		}
+		Py_DECREF(code_obj);
+	}
+	return arg_count;
+}
+
 static void call_object(PyObject *handler, PyObject *args, const char *die_msg)
 {
 	PyObject *retval;
@@ -391,13 +419,115 @@ exit:
 	return pylist;
 }
 
+static PyObject *get_sample_value_as_tuple(struct sample_read_value *value)
+{
+	PyObject *t;
+
+	t = PyTuple_New(2);
+	if (!t)
+		Py_FatalError("couldn't create Python tuple");
+	PyTuple_SetItem(t, 0, PyLong_FromUnsignedLongLong(value->id));
+	PyTuple_SetItem(t, 1, PyLong_FromUnsignedLongLong(value->value));
+	return t;
+}
+
+static void set_sample_read_in_dict(PyObject *dict_sample,
+					 struct perf_sample *sample,
+					 struct perf_evsel *evsel)
+{
+	u64 read_format = evsel->attr.read_format;
+	PyObject *values;
+	unsigned int i;
+
+	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
+		pydict_set_item_string_decref(dict_sample, "time_enabled",
+			PyLong_FromUnsignedLongLong(sample->read.time_enabled));
+	}
+
+	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
+		pydict_set_item_string_decref(dict_sample, "time_running",
+			PyLong_FromUnsignedLongLong(sample->read.time_running));
+	}
+
+	if (read_format & PERF_FORMAT_GROUP)
+		values = PyList_New(sample->read.group.nr);
+	else
+		values = PyList_New(1);
+
+	if (!values)
+		Py_FatalError("couldn't create Python list");
+
+	if (read_format & PERF_FORMAT_GROUP) {
+		for (i = 0; i < sample->read.group.nr; i++) {
+			PyObject *t = get_sample_value_as_tuple(&sample->read.group.values[i]);
+			PyList_SET_ITEM(values, i, t);
+		}
+	} else {
+		PyObject *t = get_sample_value_as_tuple(&sample->read.one);
+		PyList_SET_ITEM(values, 0, t);
+	}
+	pydict_set_item_string_decref(dict_sample, "values", values);
+}
+
+static PyObject *get_perf_sample_dict(struct perf_sample *sample,
+					 struct perf_evsel *evsel,
+					 struct addr_location *al,
+					 PyObject *callchain)
+{
+	PyObject *dict, *dict_sample;
+
+	dict = PyDict_New();
+	if (!dict)
+		Py_FatalError("couldn't create Python dictionary");
+
+	dict_sample = PyDict_New();
+	if (!dict_sample)
+		Py_FatalError("couldn't create Python dictionary");
+
+	pydict_set_item_string_decref(dict, "ev_name", PyString_FromString(perf_evsel__name(evsel)));
+	pydict_set_item_string_decref(dict, "attr", PyString_FromStringAndSize(
+			(const char *)&evsel->attr, sizeof(evsel->attr)));
+
+	pydict_set_item_string_decref(dict_sample, "pid",
+			PyInt_FromLong(sample->pid));
+	pydict_set_item_string_decref(dict_sample, "tid",
+			PyInt_FromLong(sample->tid));
+	pydict_set_item_string_decref(dict_sample, "cpu",
+			PyInt_FromLong(sample->cpu));
+	pydict_set_item_string_decref(dict_sample, "ip",
+			PyLong_FromUnsignedLongLong(sample->ip));
+	pydict_set_item_string_decref(dict_sample, "time",
+			PyLong_FromUnsignedLongLong(sample->time));
+	pydict_set_item_string_decref(dict_sample, "period",
+			PyLong_FromUnsignedLongLong(sample->period));
+	set_sample_read_in_dict(dict_sample, sample, evsel);
+	pydict_set_item_string_decref(dict, "sample", dict_sample);
+
+	pydict_set_item_string_decref(dict, "raw_buf", PyString_FromStringAndSize(
+			(const char *)sample->raw_data, sample->raw_size));
+	pydict_set_item_string_decref(dict, "comm",
+			PyString_FromString(thread__comm_str(al->thread)));
+	if (al->map) {
+		pydict_set_item_string_decref(dict, "dso",
+			PyString_FromString(al->map->dso->name));
+	}
+	if (al->sym) {
+		pydict_set_item_string_decref(dict, "symbol",
+			PyString_FromString(al->sym->name));
+	}
+
+	pydict_set_item_string_decref(dict, "callchain", callchain);
+
+	return dict;
+}
+
 static void python_process_tracepoint(struct perf_sample *sample,
 				      struct perf_evsel *evsel,
 				      struct addr_location *al)
 {
 	struct event_format *event = evsel->tp_format;
 	PyObject *handler, *context, *t, *obj = NULL, *callchain;
-	PyObject *dict = NULL;
+	PyObject *dict = NULL, *all_entries_dict = NULL;
 	static char handler_name[256];
 	struct format_field *field;
 	unsigned long s, ns;
@@ -407,10 +537,7 @@ static void python_process_tracepoint(struct perf_sample *sample,
 	void *data = sample->raw_data;
 	unsigned long long nsecs = sample->time;
 	const char *comm = thread__comm_str(al->thread);
-
-	t = PyTuple_New(MAX_FIELDS);
-	if (!t)
-		Py_FatalError("couldn't create Python tuple");
+	const char *default_handler_name = "trace_unhandled";
 
 	if (!event) {
 		snprintf(handler_name, sizeof(handler_name),
@@ -427,10 +554,19 @@ static void python_process_tracepoint(struct perf_sample *sample,
 
 	handler = get_handler(handler_name);
 	if (!handler) {
+		handler = get_handler(default_handler_name);
+		if (!handler)
+			return;
 		dict = PyDict_New();
 		if (!dict)
 			Py_FatalError("couldn't create Python dict");
 	}
+
+	t = PyTuple_New(MAX_FIELDS);
+	if (!t)
+		Py_FatalError("couldn't create Python tuple");
+
+
 	s = nsecs / NSEC_PER_SEC;
 	ns = nsecs - s * NSEC_PER_SEC;
 
@@ -444,8 +580,10 @@ static void python_process_tracepoint(struct perf_sample *sample,
 
 	/* ip unwinding */
 	callchain = python_process_callchain(sample, evsel, al);
+	/* Need an additional reference for the perf_sample dict */
+	Py_INCREF(callchain);
 
-	if (handler) {
+	if (!dict) {
 		PyTuple_SetItem(t, n++, PyInt_FromLong(cpu));
 		PyTuple_SetItem(t, n++, PyInt_FromLong(s));
 		PyTuple_SetItem(t, n++, PyInt_FromLong(ns));
@@ -484,26 +622,35 @@ static void python_process_tracepoint(struct perf_sample *sample,
 		} else { /* FIELD_IS_NUMERIC */
 			obj = get_field_numeric_entry(event, field, data);
 		}
-		if (handler)
+		if (!dict)
 			PyTuple_SetItem(t, n++, obj);
 		else
 			pydict_set_item_string_decref(dict, field->name, obj);
 
 	}
 
-	if (!handler)
+	if (dict)
 		PyTuple_SetItem(t, n++, dict);
 
+	if (get_argument_count(handler) == (int) n + 1) {
+		all_entries_dict = get_perf_sample_dict(sample, evsel, al,
+			callchain);
+		PyTuple_SetItem(t, n++,	all_entries_dict);
+	} else {
+		Py_DECREF(callchain);
+	}
+
 	if (_PyTuple_Resize(&t, n) == -1)
 		Py_FatalError("error resizing Python tuple");
 
-	if (handler) {
+	if (!dict) {
 		call_object(handler, t, handler_name);
 	} else {
-		try_call_object("trace_unhandled", t);
+		call_object(handler, t, default_handler_name);
 		Py_DECREF(dict);
 	}
 
+	Py_XDECREF(all_entries_dict);
 	Py_DECREF(t);
 }
 
@@ -795,10 +942,16 @@ static void python_process_general_event(struct perf_sample *sample,
 					 struct perf_evsel *evsel,
 					 struct addr_location *al)
 {
-	PyObject *handler, *t, *dict, *callchain, *dict_sample;
+	PyObject *handler, *t, *dict, *callchain;
 	static char handler_name[64];
 	unsigned n = 0;
 
+	snprintf(handler_name, sizeof(handler_name), "%s", "process_event");
+
+	handler = get_handler(handler_name);
+	if (!handler)
+		return;
+
 	/*
 	 * Use the MAX_FIELDS to make the function expandable, though
 	 * currently there is only one item for the tuple.
@@ -807,61 +960,16 @@ static void python_process_general_event(struct perf_sample *sample,
 	if (!t)
 		Py_FatalError("couldn't create Python tuple");
 
-	dict = PyDict_New();
-	if (!dict)
-		Py_FatalError("couldn't create Python dictionary");
-
-	dict_sample = PyDict_New();
-	if (!dict_sample)
-		Py_FatalError("couldn't create Python dictionary");
-
-	snprintf(handler_name, sizeof(handler_name), "%s", "process_event");
-
-	handler = get_handler(handler_name);
-	if (!handler)
-		goto exit;
-
-	pydict_set_item_string_decref(dict, "ev_name", PyString_FromString(perf_evsel__name(evsel)));
-	pydict_set_item_string_decref(dict, "attr", PyString_FromStringAndSize(
-			(const char *)&evsel->attr, sizeof(evsel->attr)));
-
-	pydict_set_item_string_decref(dict_sample, "pid",
-			PyInt_FromLong(sample->pid));
-	pydict_set_item_string_decref(dict_sample, "tid",
-			PyInt_FromLong(sample->tid));
-	pydict_set_item_string_decref(dict_sample, "cpu",
-			PyInt_FromLong(sample->cpu));
-	pydict_set_item_string_decref(dict_sample, "ip",
-			PyLong_FromUnsignedLongLong(sample->ip));
-	pydict_set_item_string_decref(dict_sample, "time",
-			PyLong_FromUnsignedLongLong(sample->time));
-	pydict_set_item_string_decref(dict_sample, "period",
-			PyLong_FromUnsignedLongLong(sample->period));
-	pydict_set_item_string_decref(dict, "sample", dict_sample);
-
-	pydict_set_item_string_decref(dict, "raw_buf", PyString_FromStringAndSize(
-			(const char *)sample->raw_data, sample->raw_size));
-	pydict_set_item_string_decref(dict, "comm",
-			PyString_FromString(thread__comm_str(al->thread)));
-	if (al->map) {
-		pydict_set_item_string_decref(dict, "dso",
-			PyString_FromString(al->map->dso->name));
-	}
-	if (al->sym) {
-		pydict_set_item_string_decref(dict, "symbol",
-			PyString_FromString(al->sym->name));
-	}
-
 	/* ip unwinding */
 	callchain = python_process_callchain(sample, evsel, al);
-	pydict_set_item_string_decref(dict, "callchain", callchain);
+	dict = get_perf_sample_dict(sample, evsel, al, callchain);
 
 	PyTuple_SetItem(t, n++, dict);
 	if (_PyTuple_Resize(&t, n) == -1)
 		Py_FatalError("error resizing Python tuple");
 
 	call_object(handler, t, handler_name);
-exit:
+
 	Py_DECREF(dict);
 	Py_DECREF(t);
 }
@@ -1259,6 +1367,12 @@ static int python_generate_script(struct pevent *pevent, const char *outfile)
 
 			fprintf(ofp, "%s", f->name);
 		}
+		if (not_first++)
+			fprintf(ofp, ", ");
+		if (++count % 5 == 0)
+			fprintf(ofp, "\n\t\t");
+		fprintf(ofp, "perf_sample_dict");
+
 		fprintf(ofp, "):\n");
 
 		fprintf(ofp, "\t\tprint_header(event_name, common_cpu, "
@@ -1328,6 +1442,9 @@ static int python_generate_script(struct pevent *pevent, const char *outfile)
 
 		fprintf(ofp, ")\n\n");
 
+		fprintf(ofp, "\t\tprint 'Sample: {'+"
+			"get_dict_as_string(perf_sample_dict['sample'], ', ')+'}'\n\n");
+
 		fprintf(ofp, "\t\tfor node in common_callchain:");
 		fprintf(ofp, "\n\t\t\tif 'sym' in node:");
 		fprintf(ofp, "\n\t\t\t\tprint \"\\t[%%x] %%s\" %% (node['ip'], node['sym']['name'])");
@@ -1338,15 +1455,20 @@ static int python_generate_script(struct pevent *pevent, const char *outfile)
 	}
 
 	fprintf(ofp, "def trace_unhandled(event_name, context, "
-		"event_fields_dict):\n");
+		"event_fields_dict, perf_sample_dict):\n");
 
-	fprintf(ofp, "\t\tprint ' '.join(['%%s=%%s'%%(k,str(v))"
-		"for k,v in sorted(event_fields_dict.items())])\n\n");
+	fprintf(ofp, "\t\tprint get_dict_as_string(event_fields_dict)\n");
+	fprintf(ofp, "\t\tprint 'Sample: {'+"
+		"get_dict_as_string(perf_sample_dict['sample'], ', ')+'}'\n\n");
 
 	fprintf(ofp, "def print_header("
 		"event_name, cpu, secs, nsecs, pid, comm):\n"
 		"\tprint \"%%-20s %%5u %%05u.%%09u %%8u %%-20s \" %% \\\n\t"
-		"(event_name, cpu, secs, nsecs, pid, comm),\n");
+		"(event_name, cpu, secs, nsecs, pid, comm),\n\n");
+
+	fprintf(ofp, "def get_dict_as_string(a_dict, delimiter=' '):\n"
+		"\treturn delimiter.join"
+		"(['%%s=%%s'%%(k,str(v))for k,v in sorted(a_dict.items())])\n");
 
 	fclose(ofp);