|
@@ -1178,6 +1178,9 @@ fail:
|
|
|
return NULL;
|
|
|
}
|
|
|
|
|
|
+#define TRACE_PFMAJ (1 << 0)
|
|
|
+#define TRACE_PFMIN (1 << 1)
|
|
|
+
|
|
|
struct trace {
|
|
|
struct perf_tool tool;
|
|
|
struct {
|
|
@@ -1212,6 +1215,8 @@ struct trace {
|
|
|
bool summary_only;
|
|
|
bool show_comm;
|
|
|
bool show_tool_stats;
|
|
|
+ bool trace_syscalls;
|
|
|
+ int trace_pgfaults;
|
|
|
};
|
|
|
|
|
|
static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
|
|
@@ -1538,6 +1543,7 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
|
|
|
}
|
|
|
|
|
|
typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
|
|
|
+ union perf_event *event,
|
|
|
struct perf_sample *sample);
|
|
|
|
|
|
static struct syscall *trace__syscall_info(struct trace *trace,
|
|
@@ -1610,6 +1616,7 @@ static void thread__update_stats(struct thread_trace *ttrace,
|
|
|
}
|
|
|
|
|
|
static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
|
|
|
+ union perf_event *event __maybe_unused,
|
|
|
struct perf_sample *sample)
|
|
|
{
|
|
|
char *msg;
|
|
@@ -1658,6 +1665,7 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
|
|
|
}
|
|
|
|
|
|
static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
|
|
|
+ union perf_event *event __maybe_unused,
|
|
|
struct perf_sample *sample)
|
|
|
{
|
|
|
int ret;
|
|
@@ -1735,6 +1743,7 @@ out:
|
|
|
}
|
|
|
|
|
|
static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
|
|
|
+ union perf_event *event __maybe_unused,
|
|
|
struct perf_sample *sample)
|
|
|
{
|
|
|
trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
|
|
@@ -1742,6 +1751,7 @@ static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
|
|
|
}
|
|
|
|
|
|
static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
|
|
|
+ union perf_event *event __maybe_unused,
|
|
|
struct perf_sample *sample)
|
|
|
{
|
|
|
u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
|
|
@@ -1768,6 +1778,68 @@ out_dump:
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
+static void print_location(FILE *f, struct perf_sample *sample,
|
|
|
+ struct addr_location *al,
|
|
|
+ bool print_dso, bool print_sym)
|
|
|
+{
|
|
|
+
|
|
|
+ if ((verbose || print_dso) && al->map)
|
|
|
+ fprintf(f, "%s@", al->map->dso->long_name);
|
|
|
+
|
|
|
+ if ((verbose || print_sym) && al->sym)
|
|
|
+ fprintf(f, "%s+0x%lx", al->sym->name,
|
|
|
+ al->addr - al->sym->start);
|
|
|
+ else if (al->map)
|
|
|
+ fprintf(f, "0x%lx", al->addr);
|
|
|
+ else
|
|
|
+ fprintf(f, "0x%lx", sample->addr);
|
|
|
+}
|
|
|
+
|
|
|
+static int trace__pgfault(struct trace *trace,
|
|
|
+ struct perf_evsel *evsel,
|
|
|
+ union perf_event *event,
|
|
|
+ struct perf_sample *sample)
|
|
|
+{
|
|
|
+ struct thread *thread;
|
|
|
+ u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
|
|
|
+ struct addr_location al;
|
|
|
+ char map_type = 'd';
|
|
|
+
|
|
|
+ thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
|
|
|
+
|
|
|
+ thread__find_addr_location(thread, trace->host, cpumode, MAP__FUNCTION,
|
|
|
+ sample->ip, &al);
|
|
|
+
|
|
|
+ trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
|
|
|
+
|
|
|
+ fprintf(trace->output, "%sfault [",
|
|
|
+ evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
|
|
|
+ "maj" : "min");
|
|
|
+
|
|
|
+ print_location(trace->output, sample, &al, false, true);
|
|
|
+
|
|
|
+ fprintf(trace->output, "] => ");
|
|
|
+
|
|
|
+ thread__find_addr_location(thread, trace->host, cpumode, MAP__VARIABLE,
|
|
|
+ sample->addr, &al);
|
|
|
+
|
|
|
+ if (!al.map) {
|
|
|
+ thread__find_addr_location(thread, trace->host, cpumode,
|
|
|
+ MAP__FUNCTION, sample->addr, &al);
|
|
|
+
|
|
|
+ if (al.map)
|
|
|
+ map_type = 'x';
|
|
|
+ else
|
|
|
+ map_type = '?';
|
|
|
+ }
|
|
|
+
|
|
|
+ print_location(trace->output, sample, &al, true, false);
|
|
|
+
|
|
|
+ fprintf(trace->output, " (%c%c)\n", map_type, al.level);
|
|
|
+
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
static bool skip_sample(struct trace *trace, struct perf_sample *sample)
|
|
|
{
|
|
|
if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
|
|
@@ -1781,7 +1853,7 @@ static bool skip_sample(struct trace *trace, struct perf_sample *sample)
|
|
|
}
|
|
|
|
|
|
static int trace__process_sample(struct perf_tool *tool,
|
|
|
- union perf_event *event __maybe_unused,
|
|
|
+ union perf_event *event,
|
|
|
struct perf_sample *sample,
|
|
|
struct perf_evsel *evsel,
|
|
|
struct machine *machine __maybe_unused)
|
|
@@ -1799,7 +1871,7 @@ static int trace__process_sample(struct perf_tool *tool,
|
|
|
|
|
|
if (handler) {
|
|
|
++trace->nr_events;
|
|
|
- handler(trace, evsel, sample);
|
|
|
+ handler(trace, evsel, event, sample);
|
|
|
}
|
|
|
|
|
|
return err;
|
|
@@ -1826,7 +1898,7 @@ static int parse_target_str(struct trace *trace)
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
-static int trace__record(int argc, const char **argv)
|
|
|
+static int trace__record(struct trace *trace, int argc, const char **argv)
|
|
|
{
|
|
|
unsigned int rec_argc, i, j;
|
|
|
const char **rec_argv;
|
|
@@ -1835,34 +1907,54 @@ static int trace__record(int argc, const char **argv)
|
|
|
"-R",
|
|
|
"-m", "1024",
|
|
|
"-c", "1",
|
|
|
- "-e",
|
|
|
};
|
|
|
|
|
|
+ const char * const sc_args[] = { "-e", };
|
|
|
+ unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
|
|
|
+ const char * const majpf_args[] = { "-e", "major-faults" };
|
|
|
+ unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
|
|
|
+ const char * const minpf_args[] = { "-e", "minor-faults" };
|
|
|
+ unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
|
|
|
+
|
|
|
/* +1 is for the event string below */
|
|
|
- rec_argc = ARRAY_SIZE(record_args) + 1 + argc;
|
|
|
+ rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
|
|
|
+ majpf_args_nr + minpf_args_nr + argc;
|
|
|
rec_argv = calloc(rec_argc + 1, sizeof(char *));
|
|
|
|
|
|
if (rec_argv == NULL)
|
|
|
return -ENOMEM;
|
|
|
|
|
|
+ j = 0;
|
|
|
for (i = 0; i < ARRAY_SIZE(record_args); i++)
|
|
|
- rec_argv[i] = record_args[i];
|
|
|
-
|
|
|
- /* event string may be different for older kernels - e.g., RHEL6 */
|
|
|
- if (is_valid_tracepoint("raw_syscalls:sys_enter"))
|
|
|
- rec_argv[i] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
|
|
|
- else if (is_valid_tracepoint("syscalls:sys_enter"))
|
|
|
- rec_argv[i] = "syscalls:sys_enter,syscalls:sys_exit";
|
|
|
- else {
|
|
|
- pr_err("Neither raw_syscalls nor syscalls events exist.\n");
|
|
|
- return -1;
|
|
|
+ rec_argv[j++] = record_args[i];
|
|
|
+
|
|
|
+ if (trace->trace_syscalls) {
|
|
|
+ for (i = 0; i < sc_args_nr; i++)
|
|
|
+ rec_argv[j++] = sc_args[i];
|
|
|
+
|
|
|
+ /* event string may be different for older kernels - e.g., RHEL6 */
|
|
|
+ if (is_valid_tracepoint("raw_syscalls:sys_enter"))
|
|
|
+ rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
|
|
|
+ else if (is_valid_tracepoint("syscalls:sys_enter"))
|
|
|
+ rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
|
|
|
+ else {
|
|
|
+ pr_err("Neither raw_syscalls nor syscalls events exist.\n");
|
|
|
+ return -1;
|
|
|
+ }
|
|
|
}
|
|
|
- i++;
|
|
|
|
|
|
- for (j = 0; j < (unsigned int)argc; j++, i++)
|
|
|
- rec_argv[i] = argv[j];
|
|
|
+ if (trace->trace_pgfaults & TRACE_PFMAJ)
|
|
|
+ for (i = 0; i < majpf_args_nr; i++)
|
|
|
+ rec_argv[j++] = majpf_args[i];
|
|
|
+
|
|
|
+ if (trace->trace_pgfaults & TRACE_PFMIN)
|
|
|
+ for (i = 0; i < minpf_args_nr; i++)
|
|
|
+ rec_argv[j++] = minpf_args[i];
|
|
|
|
|
|
- return cmd_record(i, rec_argv, NULL);
|
|
|
+ for (i = 0; i < (unsigned int)argc; i++)
|
|
|
+ rec_argv[j++] = argv[i];
|
|
|
+
|
|
|
+ return cmd_record(j, rec_argv, NULL);
|
|
|
}
|
|
|
|
|
|
static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
|
|
@@ -1882,6 +1974,30 @@ static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
|
|
|
perf_evlist__add(evlist, evsel);
|
|
|
}
|
|
|
|
|
|
+static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
|
|
|
+ u64 config)
|
|
|
+{
|
|
|
+ struct perf_evsel *evsel;
|
|
|
+ struct perf_event_attr attr = {
|
|
|
+ .type = PERF_TYPE_SOFTWARE,
|
|
|
+ .mmap_data = 1,
|
|
|
+ .sample_period = 1,
|
|
|
+ };
|
|
|
+
|
|
|
+ attr.config = config;
|
|
|
+
|
|
|
+ event_attr_init(&attr);
|
|
|
+
|
|
|
+ evsel = perf_evsel__new(&attr);
|
|
|
+ if (!evsel)
|
|
|
+ return -ENOMEM;
|
|
|
+
|
|
|
+ evsel->handler = trace__pgfault;
|
|
|
+ perf_evlist__add(evlist, evsel);
|
|
|
+
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
static int trace__run(struct trace *trace, int argc, const char **argv)
|
|
|
{
|
|
|
struct perf_evlist *evlist = perf_evlist__new();
|
|
@@ -1897,10 +2013,21 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
|
|
|
goto out;
|
|
|
}
|
|
|
|
|
|
- if (perf_evlist__add_syscall_newtp(evlist, trace__sys_enter, trace__sys_exit))
|
|
|
+ if (trace->trace_syscalls &&
|
|
|
+ perf_evlist__add_syscall_newtp(evlist, trace__sys_enter,
|
|
|
+ trace__sys_exit))
|
|
|
+ goto out_error_tp;
|
|
|
+
|
|
|
+ if (trace->trace_syscalls)
|
|
|
+ perf_evlist__add_vfs_getname(evlist);
|
|
|
+
|
|
|
+ if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
|
|
|
+ perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ))
|
|
|
goto out_error_tp;
|
|
|
|
|
|
- perf_evlist__add_vfs_getname(evlist);
|
|
|
+ if ((trace->trace_pgfaults & TRACE_PFMIN) &&
|
|
|
+ perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
|
|
|
+ goto out_error_tp;
|
|
|
|
|
|
if (trace->sched &&
|
|
|
perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
|
|
@@ -1982,7 +2109,8 @@ again:
|
|
|
goto next_event;
|
|
|
}
|
|
|
|
|
|
- if (sample.raw_data == NULL) {
|
|
|
+ if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
|
|
|
+ sample.raw_data == NULL) {
|
|
|
fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
|
|
|
perf_evsel__name(evsel), sample.tid,
|
|
|
sample.cpu, sample.raw_size);
|
|
@@ -1990,7 +2118,7 @@ again:
|
|
|
}
|
|
|
|
|
|
handler = evsel->handler;
|
|
|
- handler(trace, evsel, &sample);
|
|
|
+ handler(trace, evsel, event, &sample);
|
|
|
next_event:
|
|
|
perf_evlist__mmap_consume(evlist, i);
|
|
|
|
|
@@ -2093,13 +2221,10 @@ static int trace__replay(struct trace *trace)
|
|
|
if (evsel == NULL)
|
|
|
evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
|
|
|
"syscalls:sys_enter");
|
|
|
- if (evsel == NULL) {
|
|
|
- pr_err("Data file does not have raw_syscalls:sys_enter event\n");
|
|
|
- goto out;
|
|
|
- }
|
|
|
|
|
|
- if (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
|
|
|
- perf_evsel__init_sc_tp_ptr_field(evsel, args)) {
|
|
|
+ if (evsel &&
|
|
|
+ (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
|
|
|
+ perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
|
|
|
pr_err("Error during initialize raw_syscalls:sys_enter event\n");
|
|
|
goto out;
|
|
|
}
|
|
@@ -2109,15 +2234,19 @@ static int trace__replay(struct trace *trace)
|
|
|
if (evsel == NULL)
|
|
|
evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
|
|
|
"syscalls:sys_exit");
|
|
|
- if (evsel == NULL) {
|
|
|
- pr_err("Data file does not have raw_syscalls:sys_exit event\n");
|
|
|
+ if (evsel &&
|
|
|
+ (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
|
|
|
+ perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
|
|
|
+ pr_err("Error during initialize raw_syscalls:sys_exit event\n");
|
|
|
goto out;
|
|
|
}
|
|
|
|
|
|
- if (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
|
|
|
- perf_evsel__init_sc_tp_uint_field(evsel, ret)) {
|
|
|
- pr_err("Error during initialize raw_syscalls:sys_exit event\n");
|
|
|
- goto out;
|
|
|
+ evlist__for_each(session->evlist, evsel) {
|
|
|
+ if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
|
|
|
+ (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
|
|
|
+ evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
|
|
|
+ evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
|
|
|
+ evsel->handler = trace__pgfault;
|
|
|
}
|
|
|
|
|
|
err = parse_target_str(trace);
|
|
@@ -2264,6 +2393,23 @@ static int trace__open_output(struct trace *trace, const char *filename)
|
|
|
return trace->output == NULL ? -errno : 0;
|
|
|
}
|
|
|
|
|
|
+static int parse_pagefaults(const struct option *opt, const char *str,
|
|
|
+ int unset __maybe_unused)
|
|
|
+{
|
|
|
+ int *trace_pgfaults = opt->value;
|
|
|
+
|
|
|
+ if (strcmp(str, "all") == 0)
|
|
|
+ *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
|
|
|
+ else if (strcmp(str, "maj") == 0)
|
|
|
+ *trace_pgfaults |= TRACE_PFMAJ;
|
|
|
+ else if (strcmp(str, "min") == 0)
|
|
|
+ *trace_pgfaults |= TRACE_PFMIN;
|
|
|
+ else
|
|
|
+ return -1;
|
|
|
+
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
|
|
|
{
|
|
|
const char * const trace_usage[] = {
|
|
@@ -2293,6 +2439,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
|
|
|
},
|
|
|
.output = stdout,
|
|
|
.show_comm = true,
|
|
|
+ .trace_syscalls = true,
|
|
|
};
|
|
|
const char *output_name = NULL;
|
|
|
const char *ev_qualifier_str = NULL;
|
|
@@ -2330,20 +2477,34 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
|
|
|
"Show only syscall summary with statistics"),
|
|
|
OPT_BOOLEAN('S', "with-summary", &trace.summary,
|
|
|
"Show all syscalls and summary with statistics"),
|
|
|
+ OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
|
|
|
+ "Trace pagefaults", parse_pagefaults, "maj"),
|
|
|
+ OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
|
|
|
OPT_END()
|
|
|
};
|
|
|
int err;
|
|
|
char bf[BUFSIZ];
|
|
|
|
|
|
- if ((argc > 1) && (strcmp(argv[1], "record") == 0))
|
|
|
- return trace__record(argc-2, &argv[2]);
|
|
|
+ argc = parse_options(argc, argv, trace_options, trace_usage,
|
|
|
+ PARSE_OPT_STOP_AT_NON_OPTION);
|
|
|
+
|
|
|
+ if (trace.trace_pgfaults) {
|
|
|
+ trace.opts.sample_address = true;
|
|
|
+ trace.opts.sample_time = true;
|
|
|
+ }
|
|
|
|
|
|
- argc = parse_options(argc, argv, trace_options, trace_usage, 0);
|
|
|
+ if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
|
|
|
+ return trace__record(&trace, argc-1, &argv[1]);
|
|
|
|
|
|
/* summary_only implies summary option, but don't overwrite summary if set */
|
|
|
if (trace.summary_only)
|
|
|
trace.summary = trace.summary_only;
|
|
|
|
|
|
+ if (!trace.trace_syscalls && !trace.trace_pgfaults) {
|
|
|
+ pr_err("Please specify something to trace.\n");
|
|
|
+ return -1;
|
|
|
+ }
|
|
|
+
|
|
|
if (output_name != NULL) {
|
|
|
err = trace__open_output(&trace, output_name);
|
|
|
if (err < 0) {
|