瀏覽代碼

Merge tag 'perf-core-for-mingo-4.11-20170111' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

New features:

- Add more triggers to switch the output file (perf.data.TIMESTAMP).

  Now, in addition to switching to a different output file when
  receiving a SIGUSR2, one can also specify file size and time based
  triggers:

       perf record -a --switch-output=signal

  is equivalent to what we had before:

       perf record -a --switch-output

  While we can also ask for the file to be "sliced" by size, taking
  into account that that will happen only when we get woken up by
  the kernel, i.e. one has to take into account the --mmap-pages (the
  size of the perf mmap ring buffer):

       perf record -a --switch-output=2G

  will break the perf.data output into multiple files limited to 2GB
  of samples, right when generating the output.

  For time based samples, alert() will be used, so to have 1 minute
  limited perf.data output files:

      perf record -a --switch-output=1m

  (Jiri Olsa)

- Remove the need to use -e only for syscalls and --event only for
  tracepoints/HW/SW/etc events, i.e. now one can use:

      perf trace -e nanosleep,futex,sched:sched_switch ./workload

  or:

      perf trace --event nanosleep,futex,sched:sched_switch ./workload

  And have it tracing raw_syscalls:sys_{enter,exit} for the nanosleep
  and futex syscalls, formatting those as strace does while also
  tracing sched:sched_switch, ordering it all into one strace like
  output.

  Using '!' as the first character in the -e/--event argument remains
  a way to negate the list of syscalls, i.e. all syscalls except for
  the ones specified, doesn't affect the other kinds of events.

  E.g:

  [root@jouet ~] # perf trace -e sched:sched_switch,nanosleep usleep 1
     0.000 ( 0.028 ms): usleep/28150 nanosleep(rqtp: 0x7ffe4201b9f0) ...
     0.028 (         ): sched:sched_switch:usleep:28150 [120] S ==> swapper/0:0 [120])
     0.000 ( 0.065 ms): usleep/28150  ... [continued]: nanosleep()) = 0
  [root@jouet ~]#

  (Arnaldo Carvalho de Melo)

- 'perf kallsyms' toy tool to look for extended symbol information on
  the running kernel and demonstrate the machine/thread/symbol APIs for
  use in other tools, such as 'perf probe' (Arnaldo Carvalho de Melo)

Infrastructure improvements:

- Add missing linux/kernel.h include to subcmd.h (Arnaldo Carvalho de Melo)
  tools: Sync x86's vmx.h with the kernel

- Create libdir directory before installing libperf-jvmti.so (Laura Abbott)

- Fix typo in perf_evlist__start_workload() (Soramichi Akiyama)

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Ingo Molnar 8 年之前
父節點
當前提交
f913f3a655

+ 5 - 0
tools/arch/x86/include/uapi/asm/vmx.h

@@ -65,6 +65,8 @@
 #define EXIT_REASON_TPR_BELOW_THRESHOLD 43
 #define EXIT_REASON_APIC_ACCESS         44
 #define EXIT_REASON_EOI_INDUCED         45
+#define EXIT_REASON_GDTR_IDTR           46
+#define EXIT_REASON_LDTR_TR             47
 #define EXIT_REASON_EPT_VIOLATION       48
 #define EXIT_REASON_EPT_MISCONFIG       49
 #define EXIT_REASON_INVEPT              50
@@ -113,6 +115,8 @@
 	{ EXIT_REASON_MCE_DURING_VMENTRY,    "MCE_DURING_VMENTRY" }, \
 	{ EXIT_REASON_TPR_BELOW_THRESHOLD,   "TPR_BELOW_THRESHOLD" }, \
 	{ EXIT_REASON_APIC_ACCESS,           "APIC_ACCESS" }, \
+	{ EXIT_REASON_GDTR_IDTR,	     "GDTR_IDTR" }, \
+	{ EXIT_REASON_LDTR_TR,		     "LDTR_TR" }, \
 	{ EXIT_REASON_EPT_VIOLATION,         "EPT_VIOLATION" }, \
 	{ EXIT_REASON_EPT_MISCONFIG,         "EPT_MISCONFIG" }, \
 	{ EXIT_REASON_INVEPT,                "INVEPT" }, \
@@ -129,6 +133,7 @@
 	{ EXIT_REASON_XRSTORS,               "XRSTORS" }
 
 #define VMX_ABORT_SAVE_GUEST_MSR_FAIL        1
+#define VMX_ABORT_LOAD_HOST_PDPTE_FAIL       2
 #define VMX_ABORT_LOAD_HOST_MSR_FAIL         4
 
 #endif /* _UAPIVMX_H */

+ 1 - 0
tools/lib/subcmd/parse-options.h

@@ -1,6 +1,7 @@
 #ifndef __SUBCMD_PARSE_OPTIONS_H
 #define __SUBCMD_PARSE_OPTIONS_H
 
+#include <linux/kernel.h>
 #include <stdbool.h>
 #include <stdint.h>
 

+ 1 - 0
tools/perf/Build

@@ -7,6 +7,7 @@ perf-y += builtin-help.o
 perf-y += builtin-sched.o
 perf-y += builtin-buildid-list.o
 perf-y += builtin-buildid-cache.o
+perf-y += builtin-kallsyms.o
 perf-y += builtin-list.o
 perf-y += builtin-record.o
 perf-y += builtin-report.o

+ 24 - 0
tools/perf/Documentation/perf-kallsyms.txt

@@ -0,0 +1,24 @@
+perf-kallsyms(1)
+==============
+
+NAME
+----
+perf-kallsyms - Searches running kernel for symbols
+
+SYNOPSIS
+--------
+[verse]
+'perf kallsyms <options> symbol_name[,symbol_name...]'
+
+DESCRIPTION
+-----------
+This command searches the running kernel kallsyms file for the given symbol(s)
+and prints information about it, including the DSO, the kallsyms begin/end
+addresses and the addresses in the ELF kallsyms symbol table (for symbols in
+modules).
+
+OPTIONS
+-------
+-v::
+--verbose=::
+	Increase verbosity level, showing details about symbol table loading, etc.

+ 12 - 2
tools/perf/Documentation/perf-record.txt

@@ -421,9 +421,19 @@ Configure all used events to run in user space.
 --timestamp-filename
 Append timestamp to output file name.
 
---switch-output::
+--switch-output[=mode]::
 Generate multiple perf.data files, timestamp prefixed, switching to a new one
-when receiving a SIGUSR2.
+based on 'mode' value:
+  "signal" - when receiving a SIGUSR2 (default value) or
+  <size>   - when reaching the size threshold, size is expected to
+             be a number with appended unit character - B/K/M/G
+  <time>   - when reaching the time threshold, size is expected to
+             be a number with appended unit character - s/m/h/d
+
+             Note: the precision of  the size  threshold  hugely depends
+             on your configuration  - the number and size of  your  ring
+             buffers (-m). It is generally more precise for higher sizes
+             (like >5M), for lower values expect different sizes.
 
 A possible use case is to, given an external event, slice the perf.data file
 that gets then processed, possibly via a perf script, to decide if that

+ 4 - 4
tools/perf/Documentation/perf-trace.txt

@@ -35,7 +35,10 @@ OPTIONS
 
 -e::
 --expr::
-	List of syscalls to show, currently only syscall names.
+--event::
+	List of syscalls and other perf events (tracepoints, HW cache events,
+	etc) to show.
+	See 'perf list' for a complete list of events.
 	Prefixing with ! shows all syscalls but the ones specified.  You may
 	need to escape it.
 
@@ -135,9 +138,6 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs.
 --kernel-syscall-graph::
 	 Show the kernel callchains on the syscall exit path.
 
---event::
-	Trace other events, see 'perf list' for a complete list.
-
 --max-stack::
         Set the stack depth limit when parsing the callchain, anything
         beyond the specified depth will be ignored. Note that at this point

+ 1 - 0
tools/perf/Makefile.perf

@@ -661,6 +661,7 @@ ifndef NO_PERF_READ_VDSOX32
 endif
 ifndef NO_JVMTI
 	$(call QUIET_INSTALL, $(LIBJVMTI)) \
+		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(libdir_SQ)'; \
 		$(INSTALL) $(OUTPUT)$(LIBJVMTI) '$(DESTDIR_SQ)$(libdir_SQ)';
 endif
 	$(call QUIET_INSTALL, libexec) \

+ 1 - 1
tools/perf/builtin-help.c

@@ -434,7 +434,7 @@ int cmd_help(int argc, const char **argv, const char *prefix __maybe_unused)
 	const char * const builtin_help_subcommands[] = {
 		"buildid-cache", "buildid-list", "diff", "evlist", "help", "list",
 		"record", "report", "bench", "stat", "timechart", "top", "annotate",
-		"script", "sched", "kmem", "lock", "kvm", "test", "inject", "mem", "data",
+		"script", "sched", "kallsyms", "kmem", "lock", "kvm", "test", "inject", "mem", "data",
 #ifdef HAVE_LIBELF_SUPPORT
 		"probe",
 #endif

+ 67 - 0
tools/perf/builtin-kallsyms.c

@@ -0,0 +1,67 @@
+/*
+ * builtin-kallsyms.c
+ *
+ * Builtin command: Look for a symbol in the running kernel and its modules
+ *
+ * Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Released under the GPL v2. (and only v2, not any later version)
+ */
+#include "builtin.h"
+#include <linux/compiler.h>
+#include <subcmd/parse-options.h>
+#include "debug.h"
+#include "machine.h"
+#include "symbol.h"
+
+static int __cmd_kallsyms(int argc, const char **argv)
+{
+	int i;
+	struct machine *machine = machine__new_kallsyms();
+
+	if (machine == NULL) {
+		pr_err("Couldn't read /proc/kallsyms\n");
+		return -1;
+	}
+
+	for (i = 0; i < argc; ++i) {
+		struct map *map;
+		struct symbol *symbol = machine__find_kernel_function_by_name(machine, argv[i], &map);
+
+		if (symbol == NULL) {
+			printf("%s: not found\n", argv[i]);
+			continue;
+		}
+
+		printf("%s: %s %s %#" PRIx64 "-%#" PRIx64 " (%#" PRIx64 "-%#" PRIx64")\n",
+			symbol->name, map->dso->short_name, map->dso->long_name,
+			map->unmap_ip(map, symbol->start), map->unmap_ip(map, symbol->end),
+			symbol->start, symbol->end);
+	}
+
+	machine__delete(machine);
+	return 0;
+}
+
+int cmd_kallsyms(int argc, const char **argv, const char *prefix __maybe_unused)
+{
+	const struct option options[] = {
+	OPT_INCR('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"),
+	OPT_END()
+	};
+	const char * const kallsyms_usage[] = {
+		"perf kallsyms [<options>] symbol_name",
+		NULL
+	};
+
+	argc = parse_options(argc, argv, options, kallsyms_usage, 0);
+	if (argc < 1)
+		usage_with_options(kallsyms_usage, options);
+
+	symbol_conf.sort_by_name = true;
+	symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
+	if (symbol__init(NULL) < 0)
+		return -1;
+
+	return __cmd_kallsyms(argc, argv);
+}

+ 141 - 13
tools/perf/builtin-record.c

@@ -46,6 +46,15 @@
 #include <asm/bug.h>
 #include <linux/time64.h>
 
+struct switch_output {
+	bool		 enabled;
+	bool		 signal;
+	unsigned long	 size;
+	unsigned long	 time;
+	const char	*str;
+	bool		 set;
+};
+
 struct record {
 	struct perf_tool	tool;
 	struct record_opts	opts;
@@ -62,10 +71,33 @@ struct record {
 	bool			no_buildid_cache_set;
 	bool			buildid_all;
 	bool			timestamp_filename;
-	bool			switch_output;
+	struct switch_output	switch_output;
 	unsigned long long	samples;
 };
 
+static volatile int auxtrace_record__snapshot_started;
+static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
+static DEFINE_TRIGGER(switch_output_trigger);
+
+static bool switch_output_signal(struct record *rec)
+{
+	return rec->switch_output.signal &&
+	       trigger_is_ready(&switch_output_trigger);
+}
+
+static bool switch_output_size(struct record *rec)
+{
+	return rec->switch_output.size &&
+	       trigger_is_ready(&switch_output_trigger) &&
+	       (rec->bytes_written >= rec->switch_output.size);
+}
+
+static bool switch_output_time(struct record *rec)
+{
+	return rec->switch_output.time &&
+	       trigger_is_ready(&switch_output_trigger);
+}
+
 static int record__write(struct record *rec, void *bf, size_t size)
 {
 	if (perf_data_file__write(rec->session->file, bf, size) < 0) {
@@ -74,6 +106,10 @@ static int record__write(struct record *rec, void *bf, size_t size)
 	}
 
 	rec->bytes_written += size;
+
+	if (switch_output_size(rec))
+		trigger_hit(&switch_output_trigger);
+
 	return 0;
 }
 
@@ -193,10 +229,6 @@ static volatile int done;
 static volatile int signr = -1;
 static volatile int child_finished;
 
-static volatile int auxtrace_record__snapshot_started;
-static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
-static DEFINE_TRIGGER(switch_output_trigger);
-
 static void sig_handler(int sig)
 {
 	if (sig == SIGCHLD)
@@ -712,6 +744,7 @@ static void workload_exec_failed_signal(int signo __maybe_unused,
 }
 
 static void snapshot_sig_handler(int sig);
+static void alarm_sig_handler(int sig);
 
 int __weak
 perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused,
@@ -842,11 +875,11 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 	signal(SIGTERM, sig_handler);
 	signal(SIGSEGV, sigsegv_handler);
 
-	if (rec->opts.auxtrace_snapshot_mode || rec->switch_output) {
+	if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
 		signal(SIGUSR2, snapshot_sig_handler);
 		if (rec->opts.auxtrace_snapshot_mode)
 			trigger_on(&auxtrace_snapshot_trigger);
-		if (rec->switch_output)
+		if (rec->switch_output.enabled)
 			trigger_on(&switch_output_trigger);
 	} else {
 		signal(SIGUSR2, SIG_IGN);
@@ -1043,6 +1076,10 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 				err = fd;
 				goto out_child;
 			}
+
+			/* re-arm the alarm */
+			if (rec->switch_output.time)
+				alarm(rec->switch_output.time);
 		}
 
 		if (hits == rec->samples) {
@@ -1352,6 +1389,78 @@ out_free:
 	return ret;
 }
 
+static void switch_output_size_warn(struct record *rec)
+{
+	u64 wakeup_size = perf_evlist__mmap_size(rec->opts.mmap_pages);
+	struct switch_output *s = &rec->switch_output;
+
+	wakeup_size /= 2;
+
+	if (s->size < wakeup_size) {
+		char buf[100];
+
+		unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
+		pr_warning("WARNING: switch-output data size lower than "
+			   "wakeup kernel buffer size (%s) "
+			   "expect bigger perf.data sizes\n", buf);
+	}
+}
+
+static int switch_output_setup(struct record *rec)
+{
+	struct switch_output *s = &rec->switch_output;
+	static struct parse_tag tags_size[] = {
+		{ .tag  = 'B', .mult = 1       },
+		{ .tag  = 'K', .mult = 1 << 10 },
+		{ .tag  = 'M', .mult = 1 << 20 },
+		{ .tag  = 'G', .mult = 1 << 30 },
+		{ .tag  = 0 },
+	};
+	static struct parse_tag tags_time[] = {
+		{ .tag  = 's', .mult = 1        },
+		{ .tag  = 'm', .mult = 60       },
+		{ .tag  = 'h', .mult = 60*60    },
+		{ .tag  = 'd', .mult = 60*60*24 },
+		{ .tag  = 0 },
+	};
+	unsigned long val;
+
+	if (!s->set)
+		return 0;
+
+	if (!strcmp(s->str, "signal")) {
+		s->signal = true;
+		pr_debug("switch-output with SIGUSR2 signal\n");
+		goto enabled;
+	}
+
+	val = parse_tag_value(s->str, tags_size);
+	if (val != (unsigned long) -1) {
+		s->size = val;
+		pr_debug("switch-output with %s size threshold\n", s->str);
+		goto enabled;
+	}
+
+	val = parse_tag_value(s->str, tags_time);
+	if (val != (unsigned long) -1) {
+		s->time = val;
+		pr_debug("switch-output with %s time threshold (%lu seconds)\n",
+			 s->str, s->time);
+		goto enabled;
+	}
+
+	return -1;
+
+enabled:
+	rec->timestamp_filename = true;
+	s->enabled              = true;
+
+	if (s->size && !rec->opts.no_buffering)
+		switch_output_size_warn(rec);
+
+	return 0;
+}
+
 static const char * const __record_usage[] = {
 	"perf record [<options>] [<command>]",
 	"perf record [<options>] -- <command> [<options>]",
@@ -1519,8 +1628,10 @@ static struct option __record_options[] = {
 		    "Record build-id of all DSOs regardless of hits"),
 	OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
 		    "append timestamp to output filename"),
-	OPT_BOOLEAN(0, "switch-output", &record.switch_output,
-		    "Switch output when receive SIGUSR2"),
+	OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
+			  &record.switch_output.set, "signal,size,time",
+			  "Switch output when receive SIGUSR2 or cross size,time threshold",
+			  "signal"),
 	OPT_BOOLEAN(0, "dry-run", &dry_run,
 		    "Parse options then exit"),
 	OPT_END()
@@ -1578,8 +1689,15 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
 		return -EINVAL;
 	}
 
-	if (rec->switch_output)
-		rec->timestamp_filename = true;
+	if (switch_output_setup(rec)) {
+		parse_options_usage(record_usage, record_options, "switch-output", 0);
+		return -EINVAL;
+	}
+
+	if (rec->switch_output.time) {
+		signal(SIGALRM, alarm_sig_handler);
+		alarm(rec->switch_output.time);
+	}
 
 	if (!rec->itr) {
 		rec->itr = auxtrace_record__init(rec->evlist, &err);
@@ -1629,7 +1747,7 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
 
 	if (rec->no_buildid_cache || rec->no_buildid) {
 		disable_buildid_cache();
-	} else if (rec->switch_output) {
+	} else if (rec->switch_output.enabled) {
 		/*
 		 * In 'perf record --switch-output', disable buildid
 		 * generation by default to reduce data file switching
@@ -1721,6 +1839,8 @@ out:
 
 static void snapshot_sig_handler(int sig __maybe_unused)
 {
+	struct record *rec = &record;
+
 	if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
 		trigger_hit(&auxtrace_snapshot_trigger);
 		auxtrace_record__snapshot_started = 1;
@@ -1728,6 +1848,14 @@ static void snapshot_sig_handler(int sig __maybe_unused)
 			trigger_error(&auxtrace_snapshot_trigger);
 	}
 
-	if (trigger_is_ready(&switch_output_trigger))
+	if (switch_output_signal(rec))
+		trigger_hit(&switch_output_trigger);
+}
+
+static void alarm_sig_handler(int sig __maybe_unused)
+{
+	struct record *rec = &record;
+
+	if (switch_output_time(rec))
 		trigger_hit(&switch_output_trigger);
 }

+ 92 - 28
tools/perf/builtin-trace.c

@@ -40,6 +40,7 @@
 
 #include <libaudit.h> /* FIXME: Still needed for audit_errno_to_name */
 #include <stdlib.h>
+#include <string.h>
 #include <linux/err.h>
 #include <linux/filter.h>
 #include <linux/audit.h>
@@ -2699,6 +2700,91 @@ static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
 		evsel->handler = handler;
 }
 
+/*
+ * XXX: Hackish, just splitting the combined -e+--event (syscalls
+ * (raw_syscalls:{sys_{enter,exit}} + events (tracepoints, HW, SW, etc) to use
+ * existing facilities unchanged (trace->ev_qualifier + parse_options()).
+ *
+ * It'd be better to introduce a parse_options() variant that would return a
+ * list with the terms it didn't match to an event...
+ */
+static int trace__parse_events_option(const struct option *opt, const char *str,
+				      int unset __maybe_unused)
+{
+	struct trace *trace = (struct trace *)opt->value;
+	const char *s = str;
+	char *sep = NULL, *lists[2] = { NULL, NULL, };
+	int len = strlen(str), err = -1, list;
+	char *strace_groups_dir = system_path(STRACE_GROUPS_DIR);
+	char group_name[PATH_MAX];
+
+	if (strace_groups_dir == NULL)
+		return -1;
+
+	if (*s == '!') {
+		++s;
+		trace->not_ev_qualifier = true;
+	}
+
+	while (1) {
+		if ((sep = strchr(s, ',')) != NULL)
+			*sep = '\0';
+
+		list = 0;
+		if (syscalltbl__id(trace->sctbl, s) >= 0) {
+			list = 1;
+		} else {
+			path__join(group_name, sizeof(group_name), strace_groups_dir, s);
+			if (access(group_name, R_OK) == 0)
+				list = 1;
+		}
+
+		if (lists[list]) {
+			sprintf(lists[list] + strlen(lists[list]), ",%s", s);
+		} else {
+			lists[list] = malloc(len);
+			if (lists[list] == NULL)
+				goto out;
+			strcpy(lists[list], s);
+		}
+
+		if (!sep)
+			break;
+
+		*sep = ',';
+		s = sep + 1;
+	}
+
+	if (lists[1] != NULL) {
+		struct strlist_config slist_config = {
+			.dirname = strace_groups_dir,
+		};
+
+		trace->ev_qualifier = strlist__new(lists[1], &slist_config);
+		if (trace->ev_qualifier == NULL) {
+			fputs("Not enough memory to parse event qualifier", trace->output);
+			goto out;
+		}
+
+		if (trace__validate_ev_qualifier(trace))
+			goto out;
+	}
+
+	err = 0;
+
+	if (lists[0]) {
+		struct option o = OPT_CALLBACK('e', "event", &trace->evlist, "event",
+					       "event selector. use 'perf list' to list available events",
+					       parse_events_option);
+		err = parse_events_option(&o, lists[0], 0);
+	}
+out:
+	if (sep)
+		*sep = ',';
+
+	return err;
+}
+
 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
 {
 	const char *trace_usage[] = {
@@ -2730,15 +2816,15 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
 		.max_stack = UINT_MAX,
 	};
 	const char *output_name = NULL;
-	const char *ev_qualifier_str = NULL;
 	const struct option trace_options[] = {
-	OPT_CALLBACK(0, "event", &trace.evlist, "event",
-		     "event selector. use 'perf list' to list available events",
-		     parse_events_option),
+	OPT_CALLBACK('e', "event", &trace, "event",
+		     "event/syscall selector. use 'perf list' to list available events",
+		     trace__parse_events_option),
 	OPT_BOOLEAN(0, "comm", &trace.show_comm,
 		    "show the thread COMM next to its id"),
 	OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
-	OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"),
+	OPT_CALLBACK(0, "expr", &trace, "expr", "list of syscalls/events to trace",
+		     trace__parse_events_option),
 	OPT_STRING('o', "output", &output_name, "file", "output file name"),
 	OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
 	OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
@@ -2863,7 +2949,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
 		return -1;
 	}
 
-	if (!trace.trace_syscalls && ev_qualifier_str) {
+	if (!trace.trace_syscalls && trace.ev_qualifier) {
 		pr_err("The -e option can't be used with --no-syscalls.\n");
 		goto out;
 	}
@@ -2878,28 +2964,6 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
 
 	trace.open_id = syscalltbl__id(trace.sctbl, "open");
 
-	if (ev_qualifier_str != NULL) {
-		const char *s = ev_qualifier_str;
-		struct strlist_config slist_config = {
-			.dirname = system_path(STRACE_GROUPS_DIR),
-		};
-
-		trace.not_ev_qualifier = *s == '!';
-		if (trace.not_ev_qualifier)
-			++s;
-		trace.ev_qualifier = strlist__new(s, &slist_config);
-		if (trace.ev_qualifier == NULL) {
-			fputs("Not enough memory to parse event qualifier",
-			      trace.output);
-			err = -ENOMEM;
-			goto out_close;
-		}
-
-		err = trace__validate_ev_qualifier(&trace);
-		if (err)
-			goto out_close;
-	}
-
 	err = target__validate(&trace.opts.target);
 	if (err) {
 		target__strerror(&trace.opts.target, err, bf, sizeof(bf));

+ 1 - 0
tools/perf/builtin.h

@@ -23,6 +23,7 @@ int cmd_diff(int argc, const char **argv, const char *prefix);
 int cmd_evlist(int argc, const char **argv, const char *prefix);
 int cmd_help(int argc, const char **argv, const char *prefix);
 int cmd_sched(int argc, const char **argv, const char *prefix);
+int cmd_kallsyms(int argc, const char **argv, const char *prefix);
 int cmd_list(int argc, const char **argv, const char *prefix);
 int cmd_record(int argc, const char **argv, const char *prefix);
 int cmd_report(int argc, const char **argv, const char *prefix);

+ 1 - 0
tools/perf/command-list.txt

@@ -12,6 +12,7 @@ perf-diff			mainporcelain common
 perf-config			mainporcelain common
 perf-evlist			mainporcelain common
 perf-inject			mainporcelain common
+perf-kallsyms			mainporcelain common
 perf-kmem			mainporcelain common
 perf-kvm			mainporcelain common
 perf-list			mainporcelain common

+ 1 - 0
tools/perf/perf.c

@@ -47,6 +47,7 @@ static struct cmd_struct commands[] = {
 	{ "diff",	cmd_diff,	0 },
 	{ "evlist",	cmd_evlist,	0 },
 	{ "help",	cmd_help,	0 },
+	{ "kallsyms",	cmd_kallsyms,	0 },
 	{ "list",	cmd_list,	0 },
 	{ "record",	cmd_record,	0 },
 	{ "report",	cmd_report,	0 },

+ 1 - 0
tools/perf/tests/Build

@@ -44,6 +44,7 @@ perf-y += is_printable_array.o
 perf-y += bitmap.o
 perf-y += perf-hooks.o
 perf-y += clang.o
+perf-y += unit_number__scnprintf.o
 
 $(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build
 	$(call rule_mkdir)

+ 4 - 0
tools/perf/tests/builtin-test.c

@@ -246,6 +246,10 @@ static struct test generic_tests[] = {
 			.get_desc	= test__clang_subtest_get_desc,
 		}
 	},
+	{
+		.desc = "unit_number__scnprintf",
+		.func = test__unit_number__scnprint,
+	},
 	{
 		.func = NULL,
 	},

+ 1 - 0
tools/perf/tests/tests.h

@@ -96,6 +96,7 @@ int test__perf_hooks(int subtest);
 int test__clang(int subtest);
 const char *test__clang_subtest_get_desc(int subtest);
 int test__clang_subtest_get_nr(void);
+int test__unit_number__scnprint(int subtest);
 
 #if defined(__arm__) || defined(__aarch64__)
 #ifdef HAVE_DWARF_UNWIND_SUPPORT

+ 37 - 0
tools/perf/tests/unit_number__scnprintf.c

@@ -0,0 +1,37 @@
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include "tests.h"
+#include "util.h"
+#include "debug.h"
+
+int test__unit_number__scnprint(int subtest __maybe_unused)
+{
+	struct {
+		u64		 n;
+		const char	*str;
+	} test[] = {
+		{ 1,			"1B"	},
+		{ 10*1024,		"10K"	},
+		{ 20*1024*1024,		"20M"	},
+		{ 30*1024*1024*1024ULL,	"30G"	},
+		{ 0,			"0B"	},
+		{ 0,			NULL	},
+	};
+	unsigned i = 0;
+
+	while (test[i].str) {
+		char buf[100];
+
+		unit_number__scnprintf(buf, sizeof(buf), test[i].n);
+
+		pr_debug("n %" PRIu64 ", str '%s', buf '%s'\n",
+			 test[i].n, test[i].str, buf);
+
+		if (strcmp(test[i].str, buf))
+			return TEST_FAIL;
+
+		i++;
+	}
+
+	return TEST_OK;
+}

+ 8 - 4
tools/perf/util/evlist.c

@@ -1184,7 +1184,7 @@ unsigned long perf_event_mlock_kb_in_pages(void)
 	return pages;
 }
 
-static size_t perf_evlist__mmap_size(unsigned long pages)
+size_t perf_evlist__mmap_size(unsigned long pages)
 {
 	if (pages == UINT_MAX)
 		pages = perf_event_mlock_kb_in_pages();
@@ -1224,12 +1224,16 @@ static long parse_pages_arg(const char *str, unsigned long min,
 	if (pages == 0 && min == 0) {
 		/* leave number of pages at 0 */
 	} else if (!is_power_of_2(pages)) {
+		char buf[100];
+
 		/* round pages up to next power of 2 */
 		pages = roundup_pow_of_two(pages);
 		if (!pages)
 			return -EINVAL;
-		pr_info("rounding mmap pages size to %lu bytes (%lu pages)\n",
-			pages * page_size, pages);
+
+		unit_number__scnprintf(buf, sizeof(buf), pages * page_size);
+		pr_info("rounding mmap pages size to %s (%lu pages)\n",
+			buf, pages);
 	}
 
 	if (pages > max)
@@ -1797,7 +1801,7 @@ int perf_evlist__start_workload(struct perf_evlist *evlist)
 		 */
 		ret = write(evlist->workload.cork_fd, &bf, 1);
 		if (ret < 0)
-			perror("enable to write to pipe");
+			perror("unable to write to pipe");
 
 		close(evlist->workload.cork_fd);
 		return ret;

+ 2 - 0
tools/perf/util/evlist.h

@@ -218,6 +218,8 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
 		      bool overwrite);
 void perf_evlist__munmap(struct perf_evlist *evlist);
 
+size_t perf_evlist__mmap_size(unsigned long pages);
+
 void perf_evlist__disable(struct perf_evlist *evlist);
 void perf_evlist__enable(struct perf_evlist *evlist);
 void perf_evlist__toggle_enable(struct perf_evlist *evlist);

+ 19 - 0
tools/perf/util/machine.c

@@ -87,6 +87,25 @@ out_delete:
 	return NULL;
 }
 
+struct machine *machine__new_kallsyms(void)
+{
+	struct machine *machine = machine__new_host();
+	/*
+	 * FIXME:
+	 * 1) MAP__FUNCTION will go away when we stop loading separate maps for
+	 *    functions and data objects.
+	 * 2) We should switch to machine__load_kallsyms(), i.e. not explicitely
+	 *    ask for not using the kcore parsing code, once this one is fixed
+	 *    to create a map per module.
+	 */
+	if (machine && __machine__load_kallsyms(machine, "/proc/kallsyms", MAP__FUNCTION, true) <= 0) {
+		machine__delete(machine);
+		machine = NULL;
+	}
+
+	return machine;
+}
+
 static void dsos__purge(struct dsos *dsos)
 {
 	struct dso *pos, *n;

+ 1 - 0
tools/perf/util/machine.h

@@ -129,6 +129,7 @@ char *machine__mmap_name(struct machine *machine, char *bf, size_t size);
 void machines__set_comm_exec(struct machines *machines, bool comm_exec);
 
 struct machine *machine__new_host(void);
+struct machine *machine__new_kallsyms(void);
 int machine__init(struct machine *machine, const char *root_dir, pid_t pid);
 void machine__exit(struct machine *machine);
 void machine__delete_threads(struct machine *machine);

+ 13 - 0
tools/perf/util/util.c

@@ -789,3 +789,16 @@ int is_printable_array(char *p, unsigned int len)
 	}
 	return 1;
 }
+
+int unit_number__scnprintf(char *buf, size_t size, u64 n)
+{
+	char unit[4] = "BKMG";
+	int i = 0;
+
+	while (((n / 1024) > 1) && (i < 3)) {
+		n /= 1024;
+		i++;
+	}
+
+	return scnprintf(buf, size, "%" PRIu64 "%c", n, unit[i]);
+}

+ 1 - 0
tools/perf/util/util.h

@@ -363,4 +363,5 @@ int is_printable_array(char *p, unsigned int len);
 
 int timestamp__scnprintf_usec(u64 timestamp, char *buf, size_t sz);
 
+int unit_number__scnprintf(char *buf, size_t size, u64 n);
 #endif /* GIT_COMPAT_UTIL_H */