Эх сурвалжийг харах

Merge tag 'perf-core-for-mingo-20161028' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

perf/core improvements and fixes from Arnaldo Carvalho de Melo:

New features:

- Support matching by topic in 'perf list' (Andi Kleen)

User visible:

- Apply cpu color only when there was activity in 'perf sched map' (Namhyung Kim)

- Always show the task's COMM in 'perf sched map -v' (Namhyung Kim)

- Fix hierarchy column counts in the perf hist browser (top, report), avoiding
  showing nothing after pressing the RIGHT key a number of times (Namhyung Kim)

Infrastructure:

- Support cascading options in libsubcmd and use it to share common options in
  'perf sched' subcommands (Namhyung Kim)

- Avoid worker cacheline bouncing in 'perf bench futex' (Davidlohr Bueso)

- Sanitize numeric parameters in 'perf bench futex' (Davidlohr Bueso)

- Update copies of kernel files (Arnaldo Carvalho de Melo)

- Fix scripting (perl, python) setup to avoid leaks (Arnaldo Carvalho de Melo)

- Add missing object file to the python binding linkage list (Arnaldo Carvalho de Melo)

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Ingo Molnar 8 жил өмнө
parent
commit
91a79e5fa6

+ 5 - 0
tools/include/uapi/asm-generic/mman-common.h

@@ -72,4 +72,9 @@
 #define MAP_HUGE_SHIFT	26
 #define MAP_HUGE_MASK	0x3f
 
+#define PKEY_DISABLE_ACCESS	0x1
+#define PKEY_DISABLE_WRITE	0x2
+#define PKEY_ACCESS_MASK	(PKEY_DISABLE_ACCESS |\
+				 PKEY_DISABLE_WRITE)
+
 #endif /* __ASM_GENERIC_MMAN_COMMON_H */

+ 14 - 0
tools/lib/subcmd/parse-options.c

@@ -314,12 +314,19 @@ static int get_value(struct parse_opt_ctx_t *p,
 
 static int parse_short_opt(struct parse_opt_ctx_t *p, const struct option *options)
 {
+retry:
 	for (; options->type != OPTION_END; options++) {
 		if (options->short_name == *p->opt) {
 			p->opt = p->opt[1] ? p->opt + 1 : NULL;
 			return get_value(p, options, OPT_SHORT);
 		}
 	}
+
+	if (options->parent) {
+		options = options->parent;
+		goto retry;
+	}
+
 	return -2;
 }
 
@@ -333,6 +340,7 @@ static int parse_long_opt(struct parse_opt_ctx_t *p, const char *arg,
 	if (!arg_end)
 		arg_end = arg + strlen(arg);
 
+retry:
 	for (; options->type != OPTION_END; options++) {
 		const char *rest;
 		int flags = 0;
@@ -426,6 +434,12 @@ match:
 	}
 	if (abbrev_option)
 		return get_value(p, abbrev_option, abbrev_flags);
+
+	if (options->parent) {
+		options = options->parent;
+		goto retry;
+	}
+
 	return -2;
 }
 

+ 2 - 0
tools/lib/subcmd/parse-options.h

@@ -109,11 +109,13 @@ struct option {
 	intptr_t defval;
 	bool *set;
 	void *data;
+	const struct option *parent;
 };
 
 #define check_vtype(v, type) ( BUILD_BUG_ON_ZERO(!__builtin_types_compatible_p(typeof(v), type)) + v )
 
 #define OPT_END()                   { .type = OPTION_END }
+#define OPT_PARENT(p)               { .type = OPTION_END, .parent = (p) }
 #define OPT_ARGUMENT(l, h)          { .type = OPTION_ARGUMENT, .long_name = (l), .help = (h) }
 #define OPT_GROUP(h)                { .type = OPTION_GROUP, .help = (h) }
 #define OPT_BIT(s, l, v, h, b)      { .type = OPTION_BIT, .short_name = (s), .long_name = (l), .value = check_vtype(v, int *), .help = (h), .defval = (b) }

+ 2 - 2
tools/perf/Makefile.perf

@@ -381,10 +381,10 @@ $(PERF_IN): prepare FORCE
         (diff -B ../arch/x86/include/asm/cpufeatures.h ../../arch/x86/include/asm/cpufeatures.h >/dev/null) \
         || echo "Warning: tools/arch/x86/include/asm/cpufeatures.h differs from kernel" >&2 )) || true
 	@(test -f ../../arch/x86/lib/memcpy_64.S && ( \
-        (diff -B ../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memcpy_64.S >/dev/null) \
+        (diff -B -I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" ../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memcpy_64.S >/dev/null) \
         || echo "Warning: tools/arch/x86/lib/memcpy_64.S differs from kernel" >&2 )) || true
 	@(test -f ../../arch/x86/lib/memset_64.S && ( \
-        (diff -B ../arch/x86/lib/memset_64.S ../../arch/x86/lib/memset_64.S >/dev/null) \
+        (diff -B -I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" ../arch/x86/lib/memset_64.S ../../arch/x86/lib/memset_64.S >/dev/null) \
         || echo "Warning: tools/arch/x86/lib/memset_64.S differs from kernel" >&2 )) || true
 	@(test -f ../../arch/arm/include/uapi/asm/perf_regs.h && ( \
         (diff -B ../arch/arm/include/uapi/asm/perf_regs.h ../../arch/arm/include/uapi/asm/perf_regs.h >/dev/null) \

+ 3 - 0
tools/perf/arch/x86/entry/syscalls/syscall_64.tbl

@@ -335,6 +335,9 @@
 326	common	copy_file_range		sys_copy_file_range
 327	64	preadv2			sys_preadv2
 328	64	pwritev2		sys_pwritev2
+329	common	pkey_mprotect		sys_pkey_mprotect
+330	common	pkey_alloc		sys_pkey_alloc
+331	common	pkey_free		sys_pkey_free
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact

+ 9 - 6
tools/perf/bench/futex-hash.c

@@ -39,15 +39,12 @@ static unsigned int threads_starting;
 static struct stats throughput_stats;
 static pthread_cond_t thread_parent, thread_worker;
 
-#define SMP_CACHE_BYTES 256
-#define __cacheline_aligned __attribute__ ((aligned (SMP_CACHE_BYTES)))
-
 struct worker {
 	int tid;
 	u_int32_t *futex;
 	pthread_t thread;
 	unsigned long ops;
-} __cacheline_aligned;
+};
 
 static const struct option options[] = {
 	OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"),
@@ -66,8 +63,9 @@ static const char * const bench_futex_hash_usage[] = {
 static void *workerfn(void *arg)
 {
 	int ret;
-	unsigned int i;
 	struct worker *w = (struct worker *) arg;
+	unsigned int i;
+	unsigned long ops = w->ops; /* avoid cacheline bouncing */
 
 	pthread_mutex_lock(&thread_lock);
 	threads_starting--;
@@ -77,7 +75,7 @@ static void *workerfn(void *arg)
 	pthread_mutex_unlock(&thread_lock);
 
 	do {
-		for (i = 0; i < nfutexes; i++, w->ops++) {
+		for (i = 0; i < nfutexes; i++, ops++) {
 			/*
 			 * We want the futex calls to fail in order to stress
 			 * the hashing of uaddr and not measure other steps,
@@ -91,6 +89,7 @@ static void *workerfn(void *arg)
 		}
 	}  while (!done);
 
+	w->ops = ops;
 	return NULL;
 }
 
@@ -131,6 +130,8 @@ int bench_futex_hash(int argc, const char **argv,
 	}
 
 	ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+	nsecs = futexbench_sanitize_numeric(nsecs);
+	nfutexes = futexbench_sanitize_numeric(nfutexes);
 
 	sigfillset(&act.sa_mask);
 	act.sa_sigaction = toggle_done;
@@ -138,6 +139,8 @@ int bench_futex_hash(int argc, const char **argv,
 
 	if (!nthreads) /* default to the number of CPUs */
 		nthreads = ncpus;
+	else
+		nthreads = futexbench_sanitize_numeric(nthreads);
 
 	worker = calloc(nthreads, sizeof(*worker));
 	if (!worker)

+ 6 - 1
tools/perf/bench/futex-lock-pi.c

@@ -75,6 +75,7 @@ static void toggle_done(int sig __maybe_unused,
 static void *workerfn(void *arg)
 {
 	struct worker *w = (struct worker *) arg;
+	unsigned long ops = w->ops;
 
 	pthread_mutex_lock(&thread_lock);
 	threads_starting--;
@@ -103,9 +104,10 @@ static void *workerfn(void *arg)
 		if (ret && !silent)
 			warn("thread %d: Could not unlock pi-lock for %p (%d)",
 			     w->tid, w->futex, ret);
-		w->ops++; /* account for thread's share of work */
+		ops++; /* account for thread's share of work */
 	}  while (!done);
 
+	w->ops = ops;
 	return NULL;
 }
 
@@ -150,6 +152,7 @@ int bench_futex_lock_pi(int argc, const char **argv,
 		goto err;
 
 	ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+	nsecs = futexbench_sanitize_numeric(nsecs);
 
 	sigfillset(&act.sa_mask);
 	act.sa_sigaction = toggle_done;
@@ -157,6 +160,8 @@ int bench_futex_lock_pi(int argc, const char **argv,
 
 	if (!nthreads)
 		nthreads = ncpus;
+	else
+		nthreads = futexbench_sanitize_numeric(nthreads);
 
 	worker = calloc(nthreads, sizeof(*worker));
 	if (!worker)

+ 2 - 0
tools/perf/bench/futex-requeue.c

@@ -128,6 +128,8 @@ int bench_futex_requeue(int argc, const char **argv,
 
 	if (!nthreads)
 		nthreads = ncpus;
+	else
+		nthreads = futexbench_sanitize_numeric(nthreads);
 
 	worker = calloc(nthreads, sizeof(*worker));
 	if (!worker)

+ 4 - 0
tools/perf/bench/futex-wake-parallel.c

@@ -217,8 +217,12 @@ int bench_futex_wake_parallel(int argc, const char **argv,
 	sigaction(SIGINT, &act, NULL);
 
 	ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+	nwaking_threads = futexbench_sanitize_numeric(nwaking_threads);
+
 	if (!nblocked_threads)
 		nblocked_threads = ncpus;
+	else
+		nblocked_threads = futexbench_sanitize_numeric(nblocked_threads);
 
 	/* some sanity checks */
 	if (nwaking_threads > nblocked_threads || !nwaking_threads)

+ 3 - 0
tools/perf/bench/futex-wake.c

@@ -129,6 +129,7 @@ int bench_futex_wake(int argc, const char **argv,
 	}
 
 	ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+	nwakes = futexbench_sanitize_numeric(nwakes);
 
 	sigfillset(&act.sa_mask);
 	act.sa_sigaction = toggle_done;
@@ -136,6 +137,8 @@ int bench_futex_wake(int argc, const char **argv,
 
 	if (!nthreads)
 		nthreads = ncpus;
+	else
+		nthreads = futexbench_sanitize_numeric(nthreads);
 
 	worker = calloc(nthreads, sizeof(*worker));
 	if (!worker)

+ 4 - 0
tools/perf/bench/futex.h

@@ -7,6 +7,7 @@
 #ifndef _FUTEX_H
 #define _FUTEX_H
 
+#include <stdlib.h>
 #include <unistd.h>
 #include <sys/syscall.h>
 #include <sys/types.h>
@@ -99,4 +100,7 @@ static inline int pthread_attr_setaffinity_np(pthread_attr_t *attr,
 }
 #endif
 
+/* User input sanitation */
+#define futexbench_sanitize_numeric(__n) abs((__n))
+
 #endif /* _FUTEX_H */

+ 21 - 17
tools/perf/builtin-sched.c

@@ -1191,6 +1191,7 @@ static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_
 	int i;
 	int ret;
 	u64 avg;
+	char max_lat_at[32];
 
 	if (!work_list->nb_atoms)
 		return;
@@ -1212,12 +1213,13 @@ static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_
 		printf(" ");
 
 	avg = work_list->total_lat / work_list->nb_atoms;
+	timestamp__scnprintf_usec(work_list->max_lat_at, max_lat_at, sizeof(max_lat_at));
 
-	printf("|%11.3f ms |%9" PRIu64 " | avg:%9.3f ms | max:%9.3f ms | max at: %13.6f s\n",
+	printf("|%11.3f ms |%9" PRIu64 " | avg:%9.3f ms | max:%9.3f ms | max at: %13s s\n",
 	      (double)work_list->total_runtime / NSEC_PER_MSEC,
 		 work_list->nb_atoms, (double)avg / NSEC_PER_MSEC,
 		 (double)work_list->max_lat / NSEC_PER_MSEC,
-		 (double)work_list->max_lat_at / NSEC_PER_SEC);
+		 max_lat_at);
 }
 
 static int pid_cmp(struct work_atoms *l, struct work_atoms *r)
@@ -1402,6 +1404,7 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
 	int cpus_nr;
 	bool new_cpu = false;
 	const char *color = PERF_COLOR_NORMAL;
+	char stimestamp[32];
 
 	BUG_ON(this_cpu >= MAX_CPUS || this_cpu < 0);
 
@@ -1479,7 +1482,7 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
 			cpu_color = COLOR_CPUS;
 
 		if (cpu != this_cpu)
-			color_fprintf(stdout, cpu_color, " ");
+			color_fprintf(stdout, color, " ");
 		else
 			color_fprintf(stdout, cpu_color, "*");
 
@@ -1492,8 +1495,9 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
 	if (sched->map.cpus && !cpu_map__has(sched->map.cpus, this_cpu))
 		goto out;
 
-	color_fprintf(stdout, color, "  %12.6f secs ", (double)timestamp / NSEC_PER_SEC);
-	if (new_shortname) {
+	timestamp__scnprintf_usec(timestamp, stimestamp, sizeof(stimestamp));
+	color_fprintf(stdout, color, "  %12s secs ", stimestamp);
+	if (new_shortname || (verbose && sched_in->tid)) {
 		const char *pid_color = color;
 
 		if (thread__has_color(sched_in))
@@ -1954,6 +1958,15 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
 		.next_shortname2      = '0',
 		.skip_merge           = 0,
 	};
+	const struct option sched_options[] = {
+	OPT_STRING('i', "input", &input_name, "file",
+		    "input file name"),
+	OPT_INCR('v', "verbose", &verbose,
+		    "be more verbose (show symbol address, etc)"),
+	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
+		    "dump raw trace in ASCII"),
+	OPT_END()
+	};
 	const struct option latency_options[] = {
 	OPT_STRING('s', "sort", &sched.sort_order, "key[,key2...]",
 		   "sort by key(s): runtime, switch, avg, max"),
@@ -1965,7 +1978,7 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
 		    "dump raw trace in ASCII"),
 	OPT_BOOLEAN('p', "pids", &sched.skip_merge,
 		    "latency stats per pid instead of per comm"),
-	OPT_END()
+	OPT_PARENT(sched_options)
 	};
 	const struct option replay_options[] = {
 	OPT_UINTEGER('r', "repeat", &sched.replay_repeat,
@@ -1975,16 +1988,7 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
 	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
 		    "dump raw trace in ASCII"),
 	OPT_BOOLEAN('f', "force", &sched.force, "don't complain, do it"),
-	OPT_END()
-	};
-	const struct option sched_options[] = {
-	OPT_STRING('i', "input", &input_name, "file",
-		    "input file name"),
-	OPT_INCR('v', "verbose", &verbose,
-		    "be more verbose (show symbol address, etc)"),
-	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
-		    "dump raw trace in ASCII"),
-	OPT_END()
+	OPT_PARENT(sched_options)
 	};
 	const struct option map_options[] = {
 	OPT_BOOLEAN(0, "compact", &sched.map.comp,
@@ -1995,7 +1999,7 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
                     "highlight given CPUs in map"),
 	OPT_STRING(0, "cpus", &sched.map.cpus_str, "cpus",
                     "display given CPUs in map"),
-	OPT_END()
+	OPT_PARENT(sched_options)
 	};
 	const char * const latency_usage[] = {
 		"perf sched latency [<options>]",

+ 6 - 4
tools/perf/builtin-script.c

@@ -441,7 +441,6 @@ static void print_sample_start(struct perf_sample *sample,
 {
 	struct perf_event_attr *attr = &evsel->attr;
 	unsigned long secs;
-	unsigned long usecs;
 	unsigned long long nsecs;
 
 	if (PRINT_FIELD(COMM)) {
@@ -471,11 +470,14 @@ static void print_sample_start(struct perf_sample *sample,
 		nsecs = sample->time;
 		secs = nsecs / NSEC_PER_SEC;
 		nsecs -= secs * NSEC_PER_SEC;
-		usecs = nsecs / NSEC_PER_USEC;
+
 		if (nanosecs)
 			printf("%5lu.%09llu: ", secs, nsecs);
-		else
-			printf("%5lu.%06lu: ", secs, usecs);
+		else {
+			char sample_time[32];
+			timestamp__scnprintf_usec(sample->time, sample_time, sizeof(sample_time));
+			printf("%12s: ", sample_time);
+		}
 	}
 }
 

+ 14 - 1
tools/perf/ui/browsers/hists.c

@@ -2076,8 +2076,21 @@ void hist_browser__init(struct hist_browser *browser,
 	browser->b.use_navkeypressed	= true;
 	browser->show_headers		= symbol_conf.show_hist_headers;
 
-	hists__for_each_format(hists, fmt)
+	if (symbol_conf.report_hierarchy) {
+		struct perf_hpp_list_node *fmt_node;
+
+		/* count overhead columns (in the first node) */
+		fmt_node = list_first_entry(&hists->hpp_formats,
+					    struct perf_hpp_list_node, list);
+		perf_hpp_list__for_each_format(&fmt_node->hpp, fmt)
+			++browser->b.columns;
+
+		/* add a single column for whole hierarchy sort keys*/
 		++browser->b.columns;
+	} else {
+		hists__for_each_format(hists, fmt)
+			++browser->b.columns;
+	}
 
 	hists__reset_column_width(hists);
 }

+ 1 - 1
tools/perf/util/parse-branch-options.c

@@ -64,7 +64,7 @@ int parse_branch_str(const char *str, __u64 *mode)
 		}
 		if (!br->name) {
 			ret = -1;
-			ui__warning("unknown branch filter %s,"
+			pr_warning("unknown branch filter %s,"
 				    " check man page\n", s);
 			goto error;
 		}

+ 3 - 1
tools/perf/util/pmu.c

@@ -1141,7 +1141,9 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag,
 			if (event_glob != NULL &&
 			    !(strglobmatch_nocase(name, event_glob) ||
 			      (!is_cpu && strglobmatch_nocase(alias->name,
-						       event_glob))))
+						       event_glob)) ||
+			      (alias->topic &&
+			       strglobmatch_nocase(alias->topic, event_glob))))
 				continue;
 
 			if (is_cpu && !name_only && !alias->desc)

+ 1 - 0
tools/perf/util/python-ext-sources

@@ -18,6 +18,7 @@ util/thread_map.c
 util/util.c
 util/xyarray.c
 util/cgroup.c
+util/parse-branch-options.c
 util/rblist.c
 util/counts.c
 util/strlist.c

+ 19 - 20
tools/perf/util/trace-event-scripting.c

@@ -25,6 +25,7 @@
 #include <errno.h>
 
 #include "../perf.h"
+#include "debug.h"
 #include "util.h"
 #include "trace-event.h"
 
@@ -86,16 +87,15 @@ struct scripting_ops python_scripting_unsupported_ops = {
 
 static void register_python_scripting(struct scripting_ops *scripting_ops)
 {
-	int err;
-	err = script_spec_register("Python", scripting_ops);
-	if (err)
-		die("error registering Python script extension");
-
-	err = script_spec_register("py", scripting_ops);
-	if (err)
-		die("error registering py script extension");
-
-	scripting_context = malloc(sizeof(struct scripting_context));
+	if (scripting_context == NULL)
+		scripting_context = malloc(sizeof(*scripting_context));
+
+       if (scripting_context == NULL ||
+	   script_spec_register("Python", scripting_ops) ||
+	   script_spec_register("py", scripting_ops)) {
+		pr_err("Error registering Python script extension: disabling it\n");
+		zfree(&scripting_context);
+	}
 }
 
 #ifdef NO_LIBPYTHON
@@ -150,16 +150,15 @@ struct scripting_ops perl_scripting_unsupported_ops = {
 
 static void register_perl_scripting(struct scripting_ops *scripting_ops)
 {
-	int err;
-	err = script_spec_register("Perl", scripting_ops);
-	if (err)
-		die("error registering Perl script extension");
-
-	err = script_spec_register("pl", scripting_ops);
-	if (err)
-		die("error registering pl script extension");
-
-	scripting_context = malloc(sizeof(struct scripting_context));
+	if (scripting_context == NULL)
+		scripting_context = malloc(sizeof(*scripting_context));
+
+       if (scripting_context == NULL ||
+	   script_spec_register("Perl", scripting_ops) ||
+	   script_spec_register("pl", scripting_ops)) {
+		pr_err("Error registering Perl script extension: disabling it\n");
+		zfree(&scripting_context);
+	}
 }
 
 #ifdef NO_LIBPERL

+ 8 - 0
tools/perf/util/util.c

@@ -433,6 +433,14 @@ int parse_nsec_time(const char *str, u64 *ptime)
 	return 0;
 }
 
+int timestamp__scnprintf_usec(u64 timestamp, char *buf, size_t sz)
+{
+	u64  sec = timestamp / NSEC_PER_SEC;
+	u64 usec = (timestamp % NSEC_PER_SEC) / NSEC_PER_USEC;
+
+	return scnprintf(buf, sz, "%"PRIu64".%06"PRIu64, sec, usec);
+}
+
 unsigned long parse_tag_value(const char *str, struct parse_tag *tags)
 {
 	struct parse_tag *i = tags;

+ 3 - 0
tools/perf/util/util.h

@@ -362,4 +362,7 @@ extern int sched_getcpu(void);
 #endif
 
 int is_printable_array(char *p, unsigned int len);
+
+int timestamp__scnprintf_usec(u64 timestamp, char *buf, size_t sz);
+
 #endif /* GIT_COMPAT_UTIL_H */