瀏覽代碼

Merge tag 'perf-core-for-mingo-4.11-20170213' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

New features:

 - Introduce the 'delta-abs' 'perf diff' compute method, that orders the
   histogram entries by the absolute value of the percentage delta for a
   function in two perf.data files, i.e. the functions that changed the
   most (increase or decrease in samples) comes first (Namhyung Kim)

User visible changes:

 - Improve message about tweaking the kernel.perf_event_paranoid setting,
   telling how to make the change permanent by editing /etc/sysctl.conf
   (Arnaldo Carvalho de Melo)

Infrastructure changes:

 - Introduce linux/compiler-gcc.h as a counterpart to the kernel's,
   initially containing the definition of __fallthrough, more to
   come (__maybe_unused, etc) (Arnaldo Carvalho de Melo)

 - Fixes for problems uncovered by building tools/perf with clang, such
   as always true tests of arrays against NULL and variables that sometimes
   were used without being initialized (Arnaldo Carvalho de Melo, Steven Rostedt)

 - Before loading a new ELF, clear global variables set by the
   samples/bpf loader (Mickaël Salaün)

 - Ignore already processed ELF sections in the samples/bpf
   loader (Mickaël Salaün)

 - Fix compile error in the scripting code with some perl5
   versions (Wang YanQing)

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Ingo Molnar 8 年之前
父節點
當前提交
277d6f1dca

+ 7 - 0
samples/bpf/bpf_load.c

@@ -277,6 +277,11 @@ int load_bpf_file(char *path)
 	Elf_Data *data, *data_prog, *symbols = NULL;
 	char *shname, *shname_prog;
 
+	/* reset global variables */
+	kern_version = 0;
+	memset(license, 0, sizeof(license));
+	memset(processed_sec, 0, sizeof(processed_sec));
+
 	if (elf_version(EV_CURRENT) == EV_NONE)
 		return 1;
 
@@ -328,6 +333,8 @@ int load_bpf_file(char *path)
 
 	/* load programs that need map fixup (relocations) */
 	for (i = 1; i < ehdr.e_shnum; i++) {
+		if (processed_sec[i])
+			continue;
 
 		if (get_sec(elf, i, &ehdr, &shname, &shdr, &data))
 			continue;

+ 1 - 0
samples/bpf/tracex5_kern.c

@@ -8,6 +8,7 @@
 #include <linux/version.h>
 #include <uapi/linux/bpf.h>
 #include <uapi/linux/seccomp.h>
+#include <uapi/linux/unistd.h>
 #include "bpf_helpers.h"
 
 #define PROG(F) SEC("kprobe/"__stringify(F)) int bpf_func_##F

+ 14 - 0
tools/include/linux/compiler-gcc.h

@@ -0,0 +1,14 @@
+#ifndef _TOOLS_LINUX_COMPILER_H_
+#error "Please don't include <linux/compiler-gcc.h> directly, include <linux/compiler.h> instead."
+#endif
+
+/*
+ * Common definitions for all gcc versions go here.
+ */
+#define GCC_VERSION (__GNUC__ * 10000		\
+		     + __GNUC_MINOR__ * 100	\
+		     + __GNUC_PATCHLEVEL__)
+
+#if GCC_VERSION >= 70000 && !defined(__CHECKER__)
+# define __fallthrough __attribute__ ((fallthrough))
+#endif

+ 5 - 5
tools/include/linux/compiler.h

@@ -1,6 +1,10 @@
 #ifndef _TOOLS_LINUX_COMPILER_H_
 #define _TOOLS_LINUX_COMPILER_H_
 
+#ifdef __GNUC__
+#include <linux/compiler-gcc.h>
+#endif
+
 /* Optimization barrier */
 /* The "volatile" is due to gcc bugs */
 #define barrier() __asm__ __volatile__("": : :"memory")
@@ -128,11 +132,7 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
 
 
 #ifndef __fallthrough
-# if defined(__GNUC__) && __GNUC__ >= 7
-#  define __fallthrough __attribute__ ((fallthrough))
-# else
-#  define __fallthrough
-# endif
+# define __fallthrough
 #endif
 
 #endif /* _TOOLS_LINUX_COMPILER_H */

+ 1 - 0
tools/lib/traceevent/kbuffer-parse.c

@@ -315,6 +315,7 @@ static unsigned int old_update_pointers(struct kbuffer *kbuf)
 		extend += delta;
 		delta = extend;
 		ptr += 4;
+		length = 0;
 		break;
 
 	case OLD_RINGBUF_TYPE_TIME_STAMP:

+ 1 - 1
tools/lib/traceevent/plugin_function.c

@@ -130,7 +130,7 @@ static int function_handler(struct trace_seq *s, struct pevent_record *record,
 	unsigned long long pfunction;
 	const char *func;
 	const char *parent;
-	int index;
+	int index = 0;
 
 	if (pevent_get_field_val(s, event, "ip", record, &function, 1))
 		return trace_seq_putc(s, '!');

+ 12 - 0
tools/perf/Documentation/perf-config.txt

@@ -498,6 +498,18 @@ record.*::
 		But if this option is 'no-cache', it will not update the build-id cache.
 		'skip' skips post-processing and does not update the cache.
 
+diff.*::
+	diff.order::
+		This option sets the number of columns to sort the result.
+		The default is 0, which means sorting by baseline.
+		Setting it to 1 will sort the result by delta (or other
+		compute method selected).
+
+	diff.compute::
+		This options sets the method for computing the diff result.
+		Possible values are 'delta', 'delta-abs', 'ratio' and
+		'wdiff'.  Default is 'delta'.
+
 SEE ALSO
 --------
 linkperf:perf[1]

+ 12 - 3
tools/perf/Documentation/perf-diff.txt

@@ -86,8 +86,9 @@ OPTIONS
 
 -c::
 --compute::
-        Differential computation selection - delta,ratio,wdiff (default is delta).
-        See COMPARISON METHODS section for more info.
+        Differential computation selection - delta, ratio, wdiff, delta-abs
+        (default is delta-abs).  Default can be changed using diff.compute
+        config option.  See COMPARISON METHODS section for more info.
 
 -p::
 --period::
@@ -99,7 +100,11 @@ OPTIONS
 
 -o::
 --order::
-       Specify compute sorting column number.
+       Specify compute sorting column number.  0 means sorting by baseline
+       overhead and 1 (default) means sorting by computed value of column 1
+       (data from the first file other base baseline).  Values more than 1
+       can be used only if enough data files are provided.
+       The default value can be set using the diff.order config option.
 
 --percentage::
 	Determine how to display the overhead percentage of filtered entries.
@@ -181,6 +186,10 @@ with:
     relative to how entries are filtered.  Use --percentage=absolute to
     prevent such fluctuation.
 
+delta-abs
+~~~~~~~~~
+Same as 'delta` method, but sort the result with the absolute values.
+
 ratio
 ~~~~~
 If specified the 'Ratio' column is displayed with value 'r' computed as:

+ 1 - 0
tools/perf/MANIFEST

@@ -61,6 +61,7 @@ tools/include/asm-generic/bitops.h
 tools/include/linux/atomic.h
 tools/include/linux/bitops.h
 tools/include/linux/compiler.h
+tools/include/linux/compiler-gcc.h
 tools/include/linux/coresight-pmu.h
 tools/include/linux/filter.h
 tools/include/linux/hash.h

+ 74 - 4
tools/perf/builtin-diff.c

@@ -17,6 +17,7 @@
 #include "util/symbol.h"
 #include "util/util.h"
 #include "util/data.h"
+#include "util/config.h"
 
 #include <stdlib.h>
 #include <math.h>
@@ -30,6 +31,7 @@ enum {
 	PERF_HPP_DIFF__RATIO,
 	PERF_HPP_DIFF__WEIGHTED_DIFF,
 	PERF_HPP_DIFF__FORMULA,
+	PERF_HPP_DIFF__DELTA_ABS,
 
 	PERF_HPP_DIFF__MAX_INDEX
 };
@@ -64,7 +66,7 @@ static bool force;
 static bool show_period;
 static bool show_formula;
 static bool show_baseline_only;
-static unsigned int sort_compute;
+static unsigned int sort_compute = 1;
 
 static s64 compute_wdiff_w1;
 static s64 compute_wdiff_w2;
@@ -73,19 +75,22 @@ enum {
 	COMPUTE_DELTA,
 	COMPUTE_RATIO,
 	COMPUTE_WEIGHTED_DIFF,
+	COMPUTE_DELTA_ABS,
 	COMPUTE_MAX,
 };
 
 const char *compute_names[COMPUTE_MAX] = {
 	[COMPUTE_DELTA] = "delta",
+	[COMPUTE_DELTA_ABS] = "delta-abs",
 	[COMPUTE_RATIO] = "ratio",
 	[COMPUTE_WEIGHTED_DIFF] = "wdiff",
 };
 
-static int compute;
+static int compute = COMPUTE_DELTA_ABS;
 
 static int compute_2_hpp[COMPUTE_MAX] = {
 	[COMPUTE_DELTA]		= PERF_HPP_DIFF__DELTA,
+	[COMPUTE_DELTA_ABS]	= PERF_HPP_DIFF__DELTA_ABS,
 	[COMPUTE_RATIO]		= PERF_HPP_DIFF__RATIO,
 	[COMPUTE_WEIGHTED_DIFF]	= PERF_HPP_DIFF__WEIGHTED_DIFF,
 };
@@ -111,6 +116,10 @@ static struct header_column {
 		.name  = "Delta",
 		.width = 7,
 	},
+	[PERF_HPP_DIFF__DELTA_ABS] = {
+		.name  = "Delta Abs",
+		.width = 7,
+	},
 	[PERF_HPP_DIFF__RATIO] = {
 		.name  = "Ratio",
 		.width = 14,
@@ -298,6 +307,7 @@ static int formula_fprintf(struct hist_entry *he, struct hist_entry *pair,
 {
 	switch (compute) {
 	case COMPUTE_DELTA:
+	case COMPUTE_DELTA_ABS:
 		return formula_delta(he, pair, buf, size);
 	case COMPUTE_RATIO:
 		return formula_ratio(he, pair, buf, size);
@@ -461,6 +471,7 @@ static void hists__precompute(struct hists *hists)
 
 			switch (compute) {
 			case COMPUTE_DELTA:
+			case COMPUTE_DELTA_ABS:
 				compute_delta(he, pair);
 				break;
 			case COMPUTE_RATIO:
@@ -498,6 +509,13 @@ __hist_entry__cmp_compute(struct hist_entry *left, struct hist_entry *right,
 
 		return cmp_doubles(l, r);
 	}
+	case COMPUTE_DELTA_ABS:
+	{
+		double l = fabs(left->diff.period_ratio_delta);
+		double r = fabs(right->diff.period_ratio_delta);
+
+		return cmp_doubles(l, r);
+	}
 	case COMPUTE_RATIO:
 	{
 		double l = left->diff.period_ratio;
@@ -564,7 +582,7 @@ hist_entry__cmp_compute_idx(struct hist_entry *left, struct hist_entry *right,
 	if (!p_left || !p_right)
 		return p_left ? -1 : 1;
 
-	if (c != COMPUTE_DELTA) {
+	if (c != COMPUTE_DELTA && c != COMPUTE_DELTA_ABS) {
 		/*
 		 * The delta can be computed without the baseline, but
 		 * others are not.  Put those entries which have no
@@ -606,6 +624,15 @@ hist_entry__cmp_delta(struct perf_hpp_fmt *fmt,
 	return hist_entry__cmp_compute(right, left, COMPUTE_DELTA, d->idx);
 }
 
+static int64_t
+hist_entry__cmp_delta_abs(struct perf_hpp_fmt *fmt,
+		      struct hist_entry *left, struct hist_entry *right)
+{
+	struct data__file *d = fmt_to_data_file(fmt);
+
+	return hist_entry__cmp_compute(right, left, COMPUTE_DELTA_ABS, d->idx);
+}
+
 static int64_t
 hist_entry__cmp_ratio(struct perf_hpp_fmt *fmt,
 		      struct hist_entry *left, struct hist_entry *right)
@@ -632,6 +659,14 @@ hist_entry__cmp_delta_idx(struct perf_hpp_fmt *fmt __maybe_unused,
 					   sort_compute);
 }
 
+static int64_t
+hist_entry__cmp_delta_abs_idx(struct perf_hpp_fmt *fmt __maybe_unused,
+			      struct hist_entry *left, struct hist_entry *right)
+{
+	return hist_entry__cmp_compute_idx(right, left, COMPUTE_DELTA_ABS,
+					   sort_compute);
+}
+
 static int64_t
 hist_entry__cmp_ratio_idx(struct perf_hpp_fmt *fmt __maybe_unused,
 			  struct hist_entry *left, struct hist_entry *right)
@@ -775,7 +810,7 @@ static const struct option options[] = {
 	OPT_BOOLEAN('b', "baseline-only", &show_baseline_only,
 		    "Show only items with match in baseline"),
 	OPT_CALLBACK('c', "compute", &compute,
-		     "delta,ratio,wdiff:w1,w2 (default delta)",
+		     "delta,delta-abs,ratio,wdiff:w1,w2 (default delta-abs)",
 		     "Entries differential computation selection",
 		     setup_compute),
 	OPT_BOOLEAN('p', "period", &show_period,
@@ -945,6 +980,7 @@ hpp__entry_pair(struct hist_entry *he, struct hist_entry *pair,
 
 	switch (idx) {
 	case PERF_HPP_DIFF__DELTA:
+	case PERF_HPP_DIFF__DELTA_ABS:
 		if (pair->diff.computed)
 			diff = pair->diff.period_ratio_delta;
 		else
@@ -1118,6 +1154,10 @@ static void data__hpp_register(struct data__file *d, int idx)
 		fmt->color = hpp__color_wdiff;
 		fmt->sort  = hist_entry__cmp_wdiff;
 		break;
+	case PERF_HPP_DIFF__DELTA_ABS:
+		fmt->color = hpp__color_delta;
+		fmt->sort  = hist_entry__cmp_delta_abs;
+		break;
 	default:
 		fmt->sort  = hist_entry__cmp_nop;
 		break;
@@ -1195,6 +1235,9 @@ static int ui_init(void)
 	case COMPUTE_WEIGHTED_DIFF:
 		fmt->sort = hist_entry__cmp_wdiff_idx;
 		break;
+	case COMPUTE_DELTA_ABS:
+		fmt->sort = hist_entry__cmp_delta_abs_idx;
+		break;
 	default:
 		BUG_ON(1);
 	}
@@ -1249,6 +1292,31 @@ static int data_init(int argc, const char **argv)
 	return 0;
 }
 
+static int diff__config(const char *var, const char *value,
+			void *cb __maybe_unused)
+{
+	if (!strcmp(var, "diff.order")) {
+		sort_compute = perf_config_int(var, value);
+		return 0;
+	}
+	if (!strcmp(var, "diff.compute")) {
+		if (!strcmp(value, "delta")) {
+			compute = COMPUTE_DELTA;
+		} else if (!strcmp(value, "delta-abs")) {
+			compute = COMPUTE_DELTA_ABS;
+		} else if (!strcmp(value, "ratio")) {
+			compute = COMPUTE_RATIO;
+		} else if (!strcmp(value, "wdiff")) {
+			compute = COMPUTE_WEIGHTED_DIFF;
+		} else {
+			pr_err("Invalid compute method: %s\n", value);
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
 int cmd_diff(int argc, const char **argv, const char *prefix __maybe_unused)
 {
 	int ret = hists__init();
@@ -1256,6 +1324,8 @@ int cmd_diff(int argc, const char **argv, const char *prefix __maybe_unused)
 	if (ret < 0)
 		return ret;
 
+	perf_config(diff__config, NULL);
+
 	argc = parse_options(argc, argv, options, diff_usage, 0);
 
 	if (symbol__init(NULL) < 0)

+ 2 - 2
tools/perf/builtin-kmem.c

@@ -1065,7 +1065,7 @@ static void __print_page_alloc_result(struct perf_session *session, int n_lines)
 
 		data = rb_entry(next, struct page_stat, node);
 		sym = machine__find_kernel_function(machine, data->callsite, &map);
-		if (sym && sym->name)
+		if (sym)
 			caller = sym->name;
 		else
 			scnprintf(buf, sizeof(buf), "%"PRIx64, data->callsite);
@@ -1107,7 +1107,7 @@ static void __print_page_caller_result(struct perf_session *session, int n_lines
 
 		data = rb_entry(next, struct page_stat, node);
 		sym = machine__find_kernel_function(machine, data->callsite, &map);
-		if (sym && sym->name)
+		if (sym)
 			caller = sym->name;
 		else
 			scnprintf(buf, sizeof(buf), "%"PRIx64, data->callsite);

+ 1 - 1
tools/perf/builtin-record.c

@@ -418,7 +418,7 @@ static int record__mmap(struct record *rec)
 
 static int record__open(struct record *rec)
 {
-	char msg[512];
+	char msg[BUFSIZ];
 	struct perf_evsel *pos;
 	struct perf_evlist *evlist = rec->evlist;
 	struct perf_session *session = rec->session;

+ 1 - 1
tools/perf/builtin-sched.c

@@ -2067,7 +2067,7 @@ static void save_task_callchain(struct perf_sched *sched,
 			break;
 
 		sym = node->sym;
-		if (sym && sym->name) {
+		if (sym) {
 			if (!strcmp(sym->name, "schedule") ||
 			    !strcmp(sym->name, "__schedule") ||
 			    !strcmp(sym->name, "preempt_schedule"))

+ 1 - 1
tools/perf/builtin-stat.c

@@ -533,7 +533,7 @@ static int store_counter_ids(struct perf_evsel *counter)
 static int __run_perf_stat(int argc, const char **argv)
 {
 	int interval = stat_config.interval;
-	char msg[512];
+	char msg[BUFSIZ];
 	unsigned long long t0, t1;
 	struct perf_evsel *counter;
 	struct timespec ts;

+ 1 - 1
tools/perf/builtin-top.c

@@ -859,7 +859,7 @@ static void perf_top__mmap_read(struct perf_top *top)
 
 static int perf_top__start_counters(struct perf_top *top)
 {
-	char msg[512];
+	char msg[BUFSIZ];
 	struct perf_evsel *counter;
 	struct perf_evlist *evlist = top->evlist;
 	struct record_opts *opts = &top->record_opts;

+ 1 - 1
tools/perf/tests/perf-record.c

@@ -66,7 +66,7 @@ int test__PERF_RECORD(int subtest __maybe_unused)
 	if (evlist == NULL) /* Fallback for kernels lacking PERF_COUNT_SW_DUMMY */
 		evlist = perf_evlist__new_default();
 
-	if (evlist == NULL || argv == NULL) {
+	if (evlist == NULL) {
 		pr_debug("Not enough memory to create evlist\n");
 		goto out;
 	}

+ 3 - 1
tools/perf/util/evsel.c

@@ -2469,7 +2469,9 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target,
 		 "  -1: Allow use of (almost) all events by all users\n"
 		 ">= 0: Disallow raw tracepoint access by users without CAP_IOC_LOCK\n"
 		 ">= 1: Disallow CPU event access by users without CAP_SYS_ADMIN\n"
-		 ">= 2: Disallow kernel profiling by users without CAP_SYS_ADMIN",
+		 ">= 2: Disallow kernel profiling by users without CAP_SYS_ADMIN\n\n"
+		 "To make this setting permanent, edit /etc/sysctl.conf too, e.g.:\n\n"
+		 "	kernel.perf_event_paranoid = -1\n" ,
 				 target->system_wide ? "system-wide " : "",
 				 perf_event_paranoid());
 	case ENOENT:

+ 0 - 1
tools/perf/util/evsel_fprintf.c

@@ -168,7 +168,6 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment,
 
 			if (symbol_conf.bt_stop_list &&
 			    node->sym &&
-			    node->sym->name &&
 			    strlist__has_entry(symbol_conf.bt_stop_list,
 					       node->sym->name)) {
 				break;

+ 1 - 1
tools/perf/util/machine.c

@@ -1565,7 +1565,7 @@ int machine__process_event(struct machine *machine, union perf_event *event,
 
 static bool symbol__match_regex(struct symbol *sym, regex_t *regex)
 {
-	if (sym->name && !regexec(regex, sym->name, 0, NULL, 0))
+	if (!regexec(regex, sym->name, 0, NULL, 0))
 		return 1;
 	return 0;
 }

+ 2 - 2
tools/perf/util/map.c

@@ -387,10 +387,10 @@ size_t map__fprintf_dsoname(struct map *map, FILE *fp)
 {
 	const char *dsoname = "[unknown]";
 
-	if (map && map->dso && (map->dso->name || map->dso->long_name)) {
+	if (map && map->dso) {
 		if (symbol_conf.show_kernel_path && map->dso->long_name)
 			dsoname = map->dso->long_name;
-		else if (map->dso->name)
+		else
 			dsoname = map->dso->name;
 	}
 

+ 1 - 1
tools/perf/util/scripting-engines/Build

@@ -1,6 +1,6 @@
 libperf-$(CONFIG_LIBPERL)   += trace-event-perl.o
 libperf-$(CONFIG_LIBPYTHON) += trace-event-python.o
 
-CFLAGS_trace-event-perl.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-undef -Wno-switch-default
+CFLAGS_trace-event-perl.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-nested-externs -Wno-undef -Wno-switch-default
 
 CFLAGS_trace-event-python.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow

+ 2 - 2
tools/perf/util/scripting-engines/trace-event-perl.c

@@ -309,10 +309,10 @@ static SV *perl_process_callchain(struct perf_sample *sample,
 		if (node->map) {
 			struct map *map = node->map;
 			const char *dsoname = "[unknown]";
-			if (map && map->dso && (map->dso->name || map->dso->long_name)) {
+			if (map && map->dso) {
 				if (symbol_conf.show_kernel_path && map->dso->long_name)
 					dsoname = map->dso->long_name;
-				else if (map->dso->name)
+				else
 					dsoname = map->dso->name;
 			}
 			if (!hv_stores(elem, "dso", newSVpv(dsoname,0))) {

+ 1 - 1
tools/perf/util/symbol_fprintf.c

@@ -21,7 +21,7 @@ size_t __symbol__fprintf_symname_offs(const struct symbol *sym,
 	unsigned long offset;
 	size_t length;
 
-	if (sym && sym->name) {
+	if (sym) {
 		length = fprintf(fp, "%s", sym->name);
 		if (al && print_offsets) {
 			if (al->addr < sym->end)