Эх сурвалжийг харах

Merge tag 'perf-core-for-mingo-20160303' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes:

User visible changes:

 - Check existence of frontend/backed stalled cycles in 'perf stat' (Andi Kleen)

 - Implement CSV metrics output in 'perf stat' (Andi Kleen)

 - Support metrics in 'perf stat' --per-core/socket mode (Andi Kleen)

 - Avoid installing .o files from tools/lib/ into the python extension (Jiri Olsa)

 - Rename the tracepoint '/format' field that carries the syscall ID from 'nr',
   that is also the name of some syscalls arguments, to "__syscall_nr", to
   avoid having multiple fields with the same name, that was breaking the
   python script skeleton generator from perf.data files (Taeung Song)

 - Support converting data from bpf events in 'perf data' (Wang Nan)

 - Fix segfault in 'perf test' hists related entries (Arnaldo Carvalho de Melo)

 - Fix output of %llu for 64 bit values read on 32 bit machines in libtraceevent (Steven Rostedt)

 - Fix time stamp rounding issue in libtraceevent (Chaos.Chen)

Infrastructure changes:

 - Fix setlocale() breakage in the pmu parsing code (Jiri Olsa)

 - Split libtraceevent's pevent_print_event() (Steven Rostedt)

 - Librarize some 'perf record' bits to allow handling multiple perf.data
   files per session (Wang Nan)

 - Ensure return non-zero rc when mmap fails in 'perf record' (Wang Nan)

 - Fix double free on 'command_line' in a error path in 'perf script' (Colin Ian King)

 - Initialize struct sigaction 'sa_flags' field in a 'perf test' entry (Colin Ian King)

 - Fix various build warnings in turbostat, detected with gcc6 (Colin Ian King)

 - Use .s extension for preprocessed assembler code (Masahiro Yamada)

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Ingo Molnar 9 жил өмнө
parent
commit
009668520a

+ 9 - 7
kernel/trace/trace_syscalls.c

@@ -186,11 +186,11 @@ print_syscall_exit(struct trace_iterator *iter, int flags,
 
 
 extern char *__bad_type_size(void);
 extern char *__bad_type_size(void);
 
 
-#define SYSCALL_FIELD(type, name)					\
-	sizeof(type) != sizeof(trace.name) ?				\
+#define SYSCALL_FIELD(type, field, name)				\
+	sizeof(type) != sizeof(trace.field) ?				\
 		__bad_type_size() :					\
 		__bad_type_size() :					\
-		#type, #name, offsetof(typeof(trace), name),		\
-		sizeof(trace.name), is_signed_type(type)
+		#type, #name, offsetof(typeof(trace), field),		\
+		sizeof(trace.field), is_signed_type(type)
 
 
 static int __init
 static int __init
 __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len)
 __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len)
@@ -261,7 +261,8 @@ static int __init syscall_enter_define_fields(struct trace_event_call *call)
 	int i;
 	int i;
 	int offset = offsetof(typeof(trace), args);
 	int offset = offsetof(typeof(trace), args);
 
 
-	ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
+	ret = trace_define_field(call, SYSCALL_FIELD(int, nr, __syscall_nr),
+				 FILTER_OTHER);
 	if (ret)
 	if (ret)
 		return ret;
 		return ret;
 
 
@@ -281,11 +282,12 @@ static int __init syscall_exit_define_fields(struct trace_event_call *call)
 	struct syscall_trace_exit trace;
 	struct syscall_trace_exit trace;
 	int ret;
 	int ret;
 
 
-	ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
+	ret = trace_define_field(call, SYSCALL_FIELD(int, nr, __syscall_nr),
+				 FILTER_OTHER);
 	if (ret)
 	if (ret)
 		return ret;
 		return ret;
 
 
-	ret = trace_define_field(call, SYSCALL_FIELD(long, ret),
+	ret = trace_define_field(call, SYSCALL_FIELD(long, ret, ret),
 				 FILTER_OTHER);
 				 FILTER_OTHER);
 
 
 	return ret;
 	return ret;

+ 1 - 1
tools/build/Makefile.build

@@ -85,7 +85,7 @@ $(OUTPUT)%.i: %.c FORCE
 	$(call rule_mkdir)
 	$(call rule_mkdir)
 	$(call if_changed_dep,cc_i_c)
 	$(call if_changed_dep,cc_i_c)
 
 
-$(OUTPUT)%.i: %.S FORCE
+$(OUTPUT)%.s: %.S FORCE
 	$(call rule_mkdir)
 	$(call rule_mkdir)
 	$(call if_changed_dep,cc_i_c)
 	$(call if_changed_dep,cc_i_c)
 
 

+ 113 - 33
tools/lib/traceevent/event-parse.c

@@ -2635,6 +2635,7 @@ process_hex(struct event_format *event, struct print_arg *arg, char **tok)
 
 
 free_field:
 free_field:
 	free_arg(arg->hex.field);
 	free_arg(arg->hex.field);
+	arg->hex.field = NULL;
 out:
 out:
 	*tok = NULL;
 	*tok = NULL;
 	return EVENT_ERROR;
 	return EVENT_ERROR;
@@ -2659,8 +2660,10 @@ process_int_array(struct event_format *event, struct print_arg *arg, char **tok)
 
 
 free_size:
 free_size:
 	free_arg(arg->int_array.count);
 	free_arg(arg->int_array.count);
+	arg->int_array.count = NULL;
 free_field:
 free_field:
 	free_arg(arg->int_array.field);
 	free_arg(arg->int_array.field);
+	arg->int_array.field = NULL;
 out:
 out:
 	*tok = NULL;
 	*tok = NULL;
 	return EVENT_ERROR;
 	return EVENT_ERROR;
@@ -4975,7 +4978,7 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event
 						break;
 						break;
 					}
 					}
 				}
 				}
-				if (pevent->long_size == 8 && ls &&
+				if (pevent->long_size == 8 && ls == 1 &&
 				    sizeof(long) != 8) {
 				    sizeof(long) != 8) {
 					char *p;
 					char *p;
 
 
@@ -5339,41 +5342,45 @@ static bool is_timestamp_in_us(char *trace_clock, bool use_trace_clock)
 	return false;
 	return false;
 }
 }
 
 
-void pevent_print_event(struct pevent *pevent, struct trace_seq *s,
-			struct pevent_record *record, bool use_trace_clock)
+/**
+ * pevent_find_event_by_record - return the event from a given record
+ * @pevent: a handle to the pevent
+ * @record: The record to get the event from
+ *
+ * Returns the associated event for a given record, or NULL if non is
+ * is found.
+ */
+struct event_format *
+pevent_find_event_by_record(struct pevent *pevent, struct pevent_record *record)
 {
 {
-	static const char *spaces = "                    "; /* 20 spaces */
-	struct event_format *event;
-	unsigned long secs;
-	unsigned long usecs;
-	unsigned long nsecs;
-	const char *comm;
-	void *data = record->data;
 	int type;
 	int type;
-	int pid;
-	int len;
-	int p;
-	bool use_usec_format;
-
-	use_usec_format = is_timestamp_in_us(pevent->trace_clock,
-							use_trace_clock);
-	if (use_usec_format) {
-		secs = record->ts / NSECS_PER_SEC;
-		nsecs = record->ts - secs * NSECS_PER_SEC;
-	}
 
 
 	if (record->size < 0) {
 	if (record->size < 0) {
 		do_warning("ug! negative record size %d", record->size);
 		do_warning("ug! negative record size %d", record->size);
-		return;
+		return NULL;
 	}
 	}
 
 
-	type = trace_parse_common_type(pevent, data);
+	type = trace_parse_common_type(pevent, record->data);
 
 
-	event = pevent_find_event(pevent, type);
-	if (!event) {
-		do_warning("ug! no event found for type %d", type);
-		return;
-	}
+	return pevent_find_event(pevent, type);
+}
+
+/**
+ * pevent_print_event_task - Write the event task comm, pid and CPU
+ * @pevent: a handle to the pevent
+ * @s: the trace_seq to write to
+ * @event: the handle to the record's event
+ * @record: The record to get the event from
+ *
+ * Writes the tasks comm, pid and CPU to @s.
+ */
+void pevent_print_event_task(struct pevent *pevent, struct trace_seq *s,
+			     struct event_format *event,
+			     struct pevent_record *record)
+{
+	void *data = record->data;
+	const char *comm;
+	int pid;
 
 
 	pid = parse_common_pid(pevent, data);
 	pid = parse_common_pid(pevent, data);
 	comm = find_cmdline(pevent, pid);
 	comm = find_cmdline(pevent, pid);
@@ -5381,9 +5388,43 @@ void pevent_print_event(struct pevent *pevent, struct trace_seq *s,
 	if (pevent->latency_format) {
 	if (pevent->latency_format) {
 		trace_seq_printf(s, "%8.8s-%-5d %3d",
 		trace_seq_printf(s, "%8.8s-%-5d %3d",
 		       comm, pid, record->cpu);
 		       comm, pid, record->cpu);
-		pevent_data_lat_fmt(pevent, s, record);
 	} else
 	} else
 		trace_seq_printf(s, "%16s-%-5d [%03d]", comm, pid, record->cpu);
 		trace_seq_printf(s, "%16s-%-5d [%03d]", comm, pid, record->cpu);
+}
+
+/**
+ * pevent_print_event_time - Write the event timestamp
+ * @pevent: a handle to the pevent
+ * @s: the trace_seq to write to
+ * @event: the handle to the record's event
+ * @record: The record to get the event from
+ * @use_trace_clock: Set to parse according to the @pevent->trace_clock
+ *
+ * Writes the timestamp of the record into @s.
+ */
+void pevent_print_event_time(struct pevent *pevent, struct trace_seq *s,
+			     struct event_format *event,
+			     struct pevent_record *record,
+			     bool use_trace_clock)
+{
+	unsigned long secs;
+	unsigned long usecs;
+	unsigned long nsecs;
+	int p;
+	bool use_usec_format;
+
+	use_usec_format = is_timestamp_in_us(pevent->trace_clock,
+							use_trace_clock);
+	if (use_usec_format) {
+		secs = record->ts / NSECS_PER_SEC;
+		nsecs = record->ts - secs * NSECS_PER_SEC;
+	}
+
+	if (pevent->latency_format) {
+		trace_seq_printf(s, " %3d", record->cpu);
+		pevent_data_lat_fmt(pevent, s, record);
+	} else
+		trace_seq_printf(s, " [%03d]", record->cpu);
 
 
 	if (use_usec_format) {
 	if (use_usec_format) {
 		if (pevent->flags & PEVENT_NSEC_OUTPUT) {
 		if (pevent->flags & PEVENT_NSEC_OUTPUT) {
@@ -5391,14 +5432,36 @@ void pevent_print_event(struct pevent *pevent, struct trace_seq *s,
 			p = 9;
 			p = 9;
 		} else {
 		} else {
 			usecs = (nsecs + 500) / NSECS_PER_USEC;
 			usecs = (nsecs + 500) / NSECS_PER_USEC;
+			/* To avoid usecs larger than 1 sec */
+			if (usecs >= 1000000) {
+				usecs -= 1000000;
+				secs++;
+			}
 			p = 6;
 			p = 6;
 		}
 		}
 
 
-		trace_seq_printf(s, " %5lu.%0*lu: %s: ",
-					secs, p, usecs, event->name);
+		trace_seq_printf(s, " %5lu.%0*lu:", secs, p, usecs);
 	} else
 	} else
-		trace_seq_printf(s, " %12llu: %s: ",
-					record->ts, event->name);
+		trace_seq_printf(s, " %12llu:", record->ts);
+}
+
+/**
+ * pevent_print_event_data - Write the event data section
+ * @pevent: a handle to the pevent
+ * @s: the trace_seq to write to
+ * @event: the handle to the record's event
+ * @record: The record to get the event from
+ *
+ * Writes the parsing of the record's data to @s.
+ */
+void pevent_print_event_data(struct pevent *pevent, struct trace_seq *s,
+			     struct event_format *event,
+			     struct pevent_record *record)
+{
+	static const char *spaces = "                    "; /* 20 spaces */
+	int len;
+
+	trace_seq_printf(s, " %s: ", event->name);
 
 
 	/* Space out the event names evenly. */
 	/* Space out the event names evenly. */
 	len = strlen(event->name);
 	len = strlen(event->name);
@@ -5408,6 +5471,23 @@ void pevent_print_event(struct pevent *pevent, struct trace_seq *s,
 	pevent_event_info(s, event, record);
 	pevent_event_info(s, event, record);
 }
 }
 
 
+void pevent_print_event(struct pevent *pevent, struct trace_seq *s,
+			struct pevent_record *record, bool use_trace_clock)
+{
+	struct event_format *event;
+
+	event = pevent_find_event_by_record(pevent, record);
+	if (!event) {
+		do_warning("ug! no event found for type %d",
+			   trace_parse_common_type(pevent, record->data));
+		return;
+	}
+
+	pevent_print_event_task(pevent, s, event, record);
+	pevent_print_event_time(pevent, s, event, record, use_trace_clock);
+	pevent_print_event_data(pevent, s, event, record);
+}
+
 static int events_id_cmp(const void *a, const void *b)
 static int events_id_cmp(const void *a, const void *b)
 {
 {
 	struct event_format * const * ea = a;
 	struct event_format * const * ea = a;

+ 13 - 0
tools/lib/traceevent/event-parse.h

@@ -628,6 +628,16 @@ int pevent_register_print_string(struct pevent *pevent, const char *fmt,
 				 unsigned long long addr);
 				 unsigned long long addr);
 int pevent_pid_is_registered(struct pevent *pevent, int pid);
 int pevent_pid_is_registered(struct pevent *pevent, int pid);
 
 
+void pevent_print_event_task(struct pevent *pevent, struct trace_seq *s,
+			     struct event_format *event,
+			     struct pevent_record *record);
+void pevent_print_event_time(struct pevent *pevent, struct trace_seq *s,
+			     struct event_format *event,
+			     struct pevent_record *record,
+			     bool use_trace_clock);
+void pevent_print_event_data(struct pevent *pevent, struct trace_seq *s,
+			     struct event_format *event,
+			     struct pevent_record *record);
 void pevent_print_event(struct pevent *pevent, struct trace_seq *s,
 void pevent_print_event(struct pevent *pevent, struct trace_seq *s,
 			struct pevent_record *record, bool use_trace_clock);
 			struct pevent_record *record, bool use_trace_clock);
 
 
@@ -694,6 +704,9 @@ struct event_format *pevent_find_event(struct pevent *pevent, int id);
 struct event_format *
 struct event_format *
 pevent_find_event_by_name(struct pevent *pevent, const char *sys, const char *name);
 pevent_find_event_by_name(struct pevent *pevent, const char *sys, const char *name);
 
 
+struct event_format *
+pevent_find_event_by_record(struct pevent *pevent, struct pevent_record *record);
+
 void pevent_data_lat_fmt(struct pevent *pevent,
 void pevent_data_lat_fmt(struct pevent *pevent,
 			 struct trace_seq *s, struct pevent_record *record);
 			 struct trace_seq *s, struct pevent_record *record);
 int pevent_data_type(struct pevent *pevent, struct pevent_record *rec);
 int pevent_data_type(struct pevent *pevent, struct pevent_record *rec);

+ 1 - 0
tools/perf/arch/x86/tests/rdpmc.c

@@ -103,6 +103,7 @@ static int __test__rdpmc(void)
 
 
 	sigfillset(&sa.sa_mask);
 	sigfillset(&sa.sa_mask);
 	sa.sa_sigaction = segfault_handler;
 	sa.sa_sigaction = segfault_handler;
+	sa.sa_flags = 0;
 	sigaction(SIGSEGV, &sa, NULL);
 	sigaction(SIGSEGV, &sa, NULL);
 
 
 	fd = sys_perf_event_open(&attr, 0, -1, -1,
 	fd = sys_perf_event_open(&attr, 0, -1, -1,

+ 99 - 69
tools/perf/builtin-record.c

@@ -33,6 +33,7 @@
 #include "util/parse-regs-options.h"
 #include "util/parse-regs-options.h"
 #include "util/llvm-utils.h"
 #include "util/llvm-utils.h"
 #include "util/bpf-loader.h"
 #include "util/bpf-loader.h"
+#include "asm/bug.h"
 
 
 #include <unistd.h>
 #include <unistd.h>
 #include <sched.h>
 #include <sched.h>
@@ -323,7 +324,10 @@ try_again:
 		} else {
 		} else {
 			pr_err("failed to mmap with %d (%s)\n", errno,
 			pr_err("failed to mmap with %d (%s)\n", errno,
 				strerror_r(errno, msg, sizeof(msg)));
 				strerror_r(errno, msg, sizeof(msg)));
-			rc = -errno;
+			if (errno)
+				rc = -errno;
+			else
+				rc = -EINVAL;
 		}
 		}
 		goto out;
 		goto out;
 	}
 	}
@@ -467,6 +471,29 @@ static void record__init_features(struct record *rec)
 	perf_header__clear_feat(&session->header, HEADER_STAT);
 	perf_header__clear_feat(&session->header, HEADER_STAT);
 }
 }
 
 
+static void
+record__finish_output(struct record *rec)
+{
+	struct perf_data_file *file = &rec->file;
+	int fd = perf_data_file__fd(file);
+
+	if (file->is_pipe)
+		return;
+
+	rec->session->header.data_size += rec->bytes_written;
+	file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR);
+
+	if (!rec->no_buildid) {
+		process_buildids(rec);
+
+		if (rec->buildid_all)
+			dsos__hit_all(rec->session);
+	}
+	perf_session__write_header(rec->session, rec->evlist, fd, true);
+
+	return;
+}
+
 static volatile int workload_exec_errno;
 static volatile int workload_exec_errno;
 
 
 /*
 /*
@@ -485,6 +512,74 @@ static void workload_exec_failed_signal(int signo __maybe_unused,
 
 
 static void snapshot_sig_handler(int sig);
 static void snapshot_sig_handler(int sig);
 
 
+static int record__synthesize(struct record *rec)
+{
+	struct perf_session *session = rec->session;
+	struct machine *machine = &session->machines.host;
+	struct perf_data_file *file = &rec->file;
+	struct record_opts *opts = &rec->opts;
+	struct perf_tool *tool = &rec->tool;
+	int fd = perf_data_file__fd(file);
+	int err = 0;
+
+	if (file->is_pipe) {
+		err = perf_event__synthesize_attrs(tool, session,
+						   process_synthesized_event);
+		if (err < 0) {
+			pr_err("Couldn't synthesize attrs.\n");
+			goto out;
+		}
+
+		if (have_tracepoints(&rec->evlist->entries)) {
+			/*
+			 * FIXME err <= 0 here actually means that
+			 * there were no tracepoints so its not really
+			 * an error, just that we don't need to
+			 * synthesize anything.  We really have to
+			 * return this more properly and also
+			 * propagate errors that now are calling die()
+			 */
+			err = perf_event__synthesize_tracing_data(tool,	fd, rec->evlist,
+								  process_synthesized_event);
+			if (err <= 0) {
+				pr_err("Couldn't record tracing data.\n");
+				goto out;
+			}
+			rec->bytes_written += err;
+		}
+	}
+
+	if (rec->opts.full_auxtrace) {
+		err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
+					session, process_synthesized_event);
+		if (err)
+			goto out;
+	}
+
+	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
+						 machine);
+	WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
+			   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
+			   "Check /proc/kallsyms permission or run as root.\n");
+
+	err = perf_event__synthesize_modules(tool, process_synthesized_event,
+					     machine);
+	WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
+			   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
+			   "Check /proc/modules permission or run as root.\n");
+
+	if (perf_guest) {
+		machines__process_guests(&session->machines,
+					 perf_event__synthesize_guest_os, tool);
+	}
+
+	err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
+					    process_synthesized_event, opts->sample_address,
+					    opts->proc_map_timeout);
+out:
+	return err;
+}
+
 static int __cmd_record(struct record *rec, int argc, const char **argv)
 static int __cmd_record(struct record *rec, int argc, const char **argv)
 {
 {
 	int err;
 	int err;
@@ -579,63 +674,8 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 
 
 	machine = &session->machines.host;
 	machine = &session->machines.host;
 
 
-	if (file->is_pipe) {
-		err = perf_event__synthesize_attrs(tool, session,
-						   process_synthesized_event);
-		if (err < 0) {
-			pr_err("Couldn't synthesize attrs.\n");
-			goto out_child;
-		}
-
-		if (have_tracepoints(&rec->evlist->entries)) {
-			/*
-			 * FIXME err <= 0 here actually means that
-			 * there were no tracepoints so its not really
-			 * an error, just that we don't need to
-			 * synthesize anything.  We really have to
-			 * return this more properly and also
-			 * propagate errors that now are calling die()
-			 */
-			err = perf_event__synthesize_tracing_data(tool,	fd, rec->evlist,
-								  process_synthesized_event);
-			if (err <= 0) {
-				pr_err("Couldn't record tracing data.\n");
-				goto out_child;
-			}
-			rec->bytes_written += err;
-		}
-	}
-
-	if (rec->opts.full_auxtrace) {
-		err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
-					session, process_synthesized_event);
-		if (err)
-			goto out_delete_session;
-	}
-
-	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
-						 machine);
-	if (err < 0)
-		pr_err("Couldn't record kernel reference relocation symbol\n"
-		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
-		       "Check /proc/kallsyms permission or run as root.\n");
-
-	err = perf_event__synthesize_modules(tool, process_synthesized_event,
-					     machine);
+	err = record__synthesize(rec);
 	if (err < 0)
 	if (err < 0)
-		pr_err("Couldn't record kernel module information.\n"
-		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
-		       "Check /proc/modules permission or run as root.\n");
-
-	if (perf_guest) {
-		machines__process_guests(&session->machines,
-					 perf_event__synthesize_guest_os, tool);
-	}
-
-	err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
-					    process_synthesized_event, opts->sample_address,
-					    opts->proc_map_timeout);
-	if (err != 0)
 		goto out_child;
 		goto out_child;
 
 
 	if (rec->realtime_prio) {
 	if (rec->realtime_prio) {
@@ -771,18 +811,8 @@ out_child:
 	/* this will be recalculated during process_buildids() */
 	/* this will be recalculated during process_buildids() */
 	rec->samples = 0;
 	rec->samples = 0;
 
 
-	if (!err && !file->is_pipe) {
-		rec->session->header.data_size += rec->bytes_written;
-		file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR);
-
-		if (!rec->no_buildid) {
-			process_buildids(rec);
-
-			if (rec->buildid_all)
-				dsos__hit_all(rec->session);
-		}
-		perf_session__write_header(rec->session, rec->evlist, fd, true);
-	}
+	if (!err)
+		record__finish_output(rec);
 
 
 	if (!err && !quiet) {
 	if (!err && !quiet) {
 		char samples[128];
 		char samples[128];

+ 145 - 13
tools/perf/builtin-stat.c

@@ -739,6 +739,9 @@ struct outstate {
 	FILE *fh;
 	FILE *fh;
 	bool newline;
 	bool newline;
 	const char *prefix;
 	const char *prefix;
+	int  nfields;
+	int  id, nr;
+	struct perf_evsel *evsel;
 };
 };
 
 
 #define METRIC_LEN  35
 #define METRIC_LEN  35
@@ -754,12 +757,9 @@ static void do_new_line_std(struct outstate *os)
 {
 {
 	fputc('\n', os->fh);
 	fputc('\n', os->fh);
 	fputs(os->prefix, os->fh);
 	fputs(os->prefix, os->fh);
+	aggr_printout(os->evsel, os->id, os->nr);
 	if (stat_config.aggr_mode == AGGR_NONE)
 	if (stat_config.aggr_mode == AGGR_NONE)
 		fprintf(os->fh, "        ");
 		fprintf(os->fh, "        ");
-	if (stat_config.aggr_mode == AGGR_CORE)
-		fprintf(os->fh, "                  ");
-	if (stat_config.aggr_mode == AGGR_SOCKET)
-		fprintf(os->fh, "            ");
 	fprintf(os->fh, "                                                 ");
 	fprintf(os->fh, "                                                 ");
 }
 }
 
 
@@ -789,6 +789,44 @@ static void print_metric_std(void *ctx, const char *color, const char *fmt,
 	fprintf(out, " %-*s", METRIC_LEN - n - 1, unit);
 	fprintf(out, " %-*s", METRIC_LEN - n - 1, unit);
 }
 }
 
 
+static void new_line_csv(void *ctx)
+{
+	struct outstate *os = ctx;
+	int i;
+
+	fputc('\n', os->fh);
+	if (os->prefix)
+		fprintf(os->fh, "%s%s", os->prefix, csv_sep);
+	aggr_printout(os->evsel, os->id, os->nr);
+	for (i = 0; i < os->nfields; i++)
+		fputs(csv_sep, os->fh);
+}
+
+static void print_metric_csv(void *ctx,
+			     const char *color __maybe_unused,
+			     const char *fmt, const char *unit, double val)
+{
+	struct outstate *os = ctx;
+	FILE *out = os->fh;
+	char buf[64], *vals, *ends;
+
+	if (unit == NULL || fmt == NULL) {
+		fprintf(out, "%s%s%s%s", csv_sep, csv_sep, csv_sep, csv_sep);
+		return;
+	}
+	snprintf(buf, sizeof(buf), fmt, val);
+	vals = buf;
+	while (isspace(*vals))
+		vals++;
+	ends = vals;
+	while (isdigit(*ends) || *ends == '.')
+		ends++;
+	*ends = 0;
+	while (isspace(*unit))
+		unit++;
+	fprintf(out, "%s%s%s%s", csv_sep, vals, csv_sep, unit);
+}
+
 static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 {
 {
 	FILE *output = stat_config.output;
 	FILE *output = stat_config.output;
@@ -817,6 +855,28 @@ static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 		fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
 		fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
 }
 }
 
 
+static int first_shadow_cpu(struct perf_evsel *evsel, int id)
+{
+	int i;
+
+	if (!aggr_get_id)
+		return 0;
+
+	if (stat_config.aggr_mode == AGGR_NONE)
+		return id;
+
+	if (stat_config.aggr_mode == AGGR_GLOBAL)
+		return 0;
+
+	for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) {
+		int cpu2 = perf_evsel__cpus(evsel)->map[i];
+
+		if (aggr_get_id(evsel_list->cpus, cpu2) == id)
+			return cpu2;
+	}
+	return 0;
+}
+
 static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 {
 {
 	FILE *output = stat_config.output;
 	FILE *output = stat_config.output;
@@ -853,13 +913,32 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval,
 	struct perf_stat_output_ctx out;
 	struct perf_stat_output_ctx out;
 	struct outstate os = {
 	struct outstate os = {
 		.fh = stat_config.output,
 		.fh = stat_config.output,
-		.prefix = prefix ? prefix : ""
+		.prefix = prefix ? prefix : "",
+		.id = id,
+		.nr = nr,
+		.evsel = counter,
 	};
 	};
 	print_metric_t pm = print_metric_std;
 	print_metric_t pm = print_metric_std;
 	void (*nl)(void *);
 	void (*nl)(void *);
 
 
 	nl = new_line_std;
 	nl = new_line_std;
 
 
+	if (csv_output) {
+		static int aggr_fields[] = {
+			[AGGR_GLOBAL] = 0,
+			[AGGR_THREAD] = 1,
+			[AGGR_NONE] = 1,
+			[AGGR_SOCKET] = 2,
+			[AGGR_CORE] = 2,
+		};
+
+		pm = print_metric_csv;
+		nl = new_line_csv;
+		os.nfields = 3;
+		os.nfields += aggr_fields[stat_config.aggr_mode];
+		if (counter->cgrp)
+			os.nfields++;
+	}
 	if (run == 0 || ena == 0 || counter->counts->scaled == -1) {
 	if (run == 0 || ena == 0 || counter->counts->scaled == -1) {
 		aggr_printout(counter, id, nr);
 		aggr_printout(counter, id, nr);
 
 
@@ -880,7 +959,12 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval,
 			fprintf(stat_config.output, "%s%s",
 			fprintf(stat_config.output, "%s%s",
 				csv_sep, counter->cgrp->name);
 				csv_sep, counter->cgrp->name);
 
 
+		if (!csv_output)
+			pm(&os, NULL, NULL, "", 0);
+		print_noise(counter, noise);
 		print_running(run, ena);
 		print_running(run, ena);
+		if (csv_output)
+			pm(&os, NULL, NULL, "", 0);
 		return;
 		return;
 	}
 	}
 
 
@@ -893,14 +977,41 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval,
 	out.new_line = nl;
 	out.new_line = nl;
 	out.ctx = &os;
 	out.ctx = &os;
 
 
-	if (!csv_output)
-		perf_stat__print_shadow_stats(counter, uval,
-				stat_config.aggr_mode == AGGR_GLOBAL ? 0 :
-				cpu_map__id_to_cpu(id),
+	if (csv_output) {
+		print_noise(counter, noise);
+		print_running(run, ena);
+	}
+
+	perf_stat__print_shadow_stats(counter, uval,
+				first_shadow_cpu(counter, id),
 				&out);
 				&out);
+	if (!csv_output) {
+		print_noise(counter, noise);
+		print_running(run, ena);
+	}
+}
+
+static void aggr_update_shadow(void)
+{
+	int cpu, s2, id, s;
+	u64 val;
+	struct perf_evsel *counter;
 
 
-	print_noise(counter, noise);
-	print_running(run, ena);
+	for (s = 0; s < aggr_map->nr; s++) {
+		id = aggr_map->map[s];
+		evlist__for_each(evsel_list, counter) {
+			val = 0;
+			for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
+				s2 = aggr_get_id(evsel_list->cpus, cpu);
+				if (s2 != id)
+					continue;
+				val += perf_counts(counter->counts, cpu, 0)->val;
+			}
+			val = val * counter->scale;
+			perf_stat__update_shadow_stats(counter, &val,
+						       first_shadow_cpu(counter, id));
+		}
+	}
 }
 }
 
 
 static void print_aggr(char *prefix)
 static void print_aggr(char *prefix)
@@ -914,6 +1025,8 @@ static void print_aggr(char *prefix)
 	if (!(aggr_map || aggr_get_id))
 	if (!(aggr_map || aggr_get_id))
 		return;
 		return;
 
 
+	aggr_update_shadow();
+
 	for (s = 0; s < aggr_map->nr; s++) {
 	for (s = 0; s < aggr_map->nr; s++) {
 		id = aggr_map->map[s];
 		id = aggr_map->map[s];
 		evlist__for_each(evsel_list, counter) {
 		evlist__for_each(evsel_list, counter) {
@@ -1441,7 +1554,7 @@ static int perf_stat_init_aggr_mode_file(struct perf_stat *st)
  */
  */
 static int add_default_attributes(void)
 static int add_default_attributes(void)
 {
 {
-	struct perf_event_attr default_attrs[] = {
+	struct perf_event_attr default_attrs0[] = {
 
 
   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK		},
   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK		},
   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES	},
   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES	},
@@ -1449,8 +1562,14 @@ static int add_default_attributes(void)
   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS		},
   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS		},
 
 
   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES		},
   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES		},
+};
+	struct perf_event_attr frontend_attrs[] = {
   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND	},
   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND	},
+};
+	struct perf_event_attr backend_attrs[] = {
   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND	},
   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND	},
+};
+	struct perf_event_attr default_attrs1[] = {
   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS		},
   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS		},
   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS	},
   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS	},
   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES		},
   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES		},
@@ -1567,7 +1686,19 @@ static int add_default_attributes(void)
 	}
 	}
 
 
 	if (!evsel_list->nr_entries) {
 	if (!evsel_list->nr_entries) {
-		if (perf_evlist__add_default_attrs(evsel_list, default_attrs) < 0)
+		if (perf_evlist__add_default_attrs(evsel_list, default_attrs0) < 0)
+			return -1;
+		if (pmu_have_event("cpu", "stalled-cycles-frontend")) {
+			if (perf_evlist__add_default_attrs(evsel_list,
+						frontend_attrs) < 0)
+				return -1;
+		}
+		if (pmu_have_event("cpu", "stalled-cycles-backend")) {
+			if (perf_evlist__add_default_attrs(evsel_list,
+						backend_attrs) < 0)
+				return -1;
+		}
+		if (perf_evlist__add_default_attrs(evsel_list, default_attrs1) < 0)
 			return -1;
 			return -1;
 	}
 	}
 
 
@@ -1835,6 +1966,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 	argc = parse_options_subcommand(argc, argv, stat_options, stat_subcommands,
 	argc = parse_options_subcommand(argc, argv, stat_options, stat_subcommands,
 					(const char **) stat_usage,
 					(const char **) stat_usage,
 					PARSE_OPT_STOP_AT_NON_OPTION);
 					PARSE_OPT_STOP_AT_NON_OPTION);
+	perf_stat__init_shadow_stats();
 
 
 	if (csv_sep) {
 	if (csv_sep) {
 		csv_output = true;
 		csv_output = true;

+ 6 - 2
tools/perf/builtin-trace.c

@@ -1725,8 +1725,12 @@ static int trace__read_syscall_info(struct trace *trace, int id)
 
 
 	sc->args = sc->tp_format->format.fields;
 	sc->args = sc->tp_format->format.fields;
 	sc->nr_args = sc->tp_format->format.nr_fields;
 	sc->nr_args = sc->tp_format->format.nr_fields;
-	/* drop nr field - not relevant here; does not exist on older kernels */
-	if (sc->args && strcmp(sc->args->name, "nr") == 0) {
+	/*
+	 * We need to check and discard the first variable '__syscall_nr'
+	 * or 'nr' that mean the syscall number. It is needless here.
+	 * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels.
+	 */
+	if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) {
 		sc->args = sc->args->next;
 		sc->args = sc->args->next;
 		--sc->nr_args;
 		--sc->nr_args;
 	}
 	}

+ 117 - 1
tools/perf/util/data-convert-bt.c

@@ -352,6 +352,84 @@ static int add_tracepoint_values(struct ctf_writer *cw,
 	return ret;
 	return ret;
 }
 }
 
 
+static int
+add_bpf_output_values(struct bt_ctf_event_class *event_class,
+		      struct bt_ctf_event *event,
+		      struct perf_sample *sample)
+{
+	struct bt_ctf_field_type *len_type, *seq_type;
+	struct bt_ctf_field *len_field, *seq_field;
+	unsigned int raw_size = sample->raw_size;
+	unsigned int nr_elements = raw_size / sizeof(u32);
+	unsigned int i;
+	int ret;
+
+	if (nr_elements * sizeof(u32) != raw_size)
+		pr_warning("Incorrect raw_size (%u) in bpf output event, skip %lu bytes\n",
+			   raw_size, nr_elements * sizeof(u32) - raw_size);
+
+	len_type = bt_ctf_event_class_get_field_by_name(event_class, "raw_len");
+	len_field = bt_ctf_field_create(len_type);
+	if (!len_field) {
+		pr_err("failed to create 'raw_len' for bpf output event\n");
+		ret = -1;
+		goto put_len_type;
+	}
+
+	ret = bt_ctf_field_unsigned_integer_set_value(len_field, nr_elements);
+	if (ret) {
+		pr_err("failed to set field value for raw_len\n");
+		goto put_len_field;
+	}
+	ret = bt_ctf_event_set_payload(event, "raw_len", len_field);
+	if (ret) {
+		pr_err("failed to set payload to raw_len\n");
+		goto put_len_field;
+	}
+
+	seq_type = bt_ctf_event_class_get_field_by_name(event_class, "raw_data");
+	seq_field = bt_ctf_field_create(seq_type);
+	if (!seq_field) {
+		pr_err("failed to create 'raw_data' for bpf output event\n");
+		ret = -1;
+		goto put_seq_type;
+	}
+
+	ret = bt_ctf_field_sequence_set_length(seq_field, len_field);
+	if (ret) {
+		pr_err("failed to set length of 'raw_data'\n");
+		goto put_seq_field;
+	}
+
+	for (i = 0; i < nr_elements; i++) {
+		struct bt_ctf_field *elem_field =
+			bt_ctf_field_sequence_get_field(seq_field, i);
+
+		ret = bt_ctf_field_unsigned_integer_set_value(elem_field,
+				((u32 *)(sample->raw_data))[i]);
+
+		bt_ctf_field_put(elem_field);
+		if (ret) {
+			pr_err("failed to set raw_data[%d]\n", i);
+			goto put_seq_field;
+		}
+	}
+
+	ret = bt_ctf_event_set_payload(event, "raw_data", seq_field);
+	if (ret)
+		pr_err("failed to set payload for raw_data\n");
+
+put_seq_field:
+	bt_ctf_field_put(seq_field);
+put_seq_type:
+	bt_ctf_field_type_put(seq_type);
+put_len_field:
+	bt_ctf_field_put(len_field);
+put_len_type:
+	bt_ctf_field_type_put(len_type);
+	return ret;
+}
+
 static int add_generic_values(struct ctf_writer *cw,
 static int add_generic_values(struct ctf_writer *cw,
 			      struct bt_ctf_event *event,
 			      struct bt_ctf_event *event,
 			      struct perf_evsel *evsel,
 			      struct perf_evsel *evsel,
@@ -597,6 +675,12 @@ static int process_sample_event(struct perf_tool *tool,
 			return -1;
 			return -1;
 	}
 	}
 
 
+	if (perf_evsel__is_bpf_output(evsel)) {
+		ret = add_bpf_output_values(event_class, event, sample);
+		if (ret)
+			return -1;
+	}
+
 	cs = ctf_stream(cw, get_sample_cpu(cw, sample, evsel));
 	cs = ctf_stream(cw, get_sample_cpu(cw, sample, evsel));
 	if (cs) {
 	if (cs) {
 		if (is_flush_needed(cs))
 		if (is_flush_needed(cs))
@@ -744,6 +828,25 @@ static int add_tracepoint_types(struct ctf_writer *cw,
 	return ret;
 	return ret;
 }
 }
 
 
+static int add_bpf_output_types(struct ctf_writer *cw,
+				struct bt_ctf_event_class *class)
+{
+	struct bt_ctf_field_type *len_type = cw->data.u32;
+	struct bt_ctf_field_type *seq_base_type = cw->data.u32_hex;
+	struct bt_ctf_field_type *seq_type;
+	int ret;
+
+	ret = bt_ctf_event_class_add_field(class, len_type, "raw_len");
+	if (ret)
+		return ret;
+
+	seq_type = bt_ctf_field_type_sequence_create(seq_base_type, "raw_len");
+	if (!seq_type)
+		return -1;
+
+	return bt_ctf_event_class_add_field(class, seq_type, "raw_data");
+}
+
 static int add_generic_types(struct ctf_writer *cw, struct perf_evsel *evsel,
 static int add_generic_types(struct ctf_writer *cw, struct perf_evsel *evsel,
 			     struct bt_ctf_event_class *event_class)
 			     struct bt_ctf_event_class *event_class)
 {
 {
@@ -755,7 +858,8 @@ static int add_generic_types(struct ctf_writer *cw, struct perf_evsel *evsel,
 	 *                              ctf event header
 	 *                              ctf event header
 	 *   PERF_SAMPLE_READ         - TODO
 	 *   PERF_SAMPLE_READ         - TODO
 	 *   PERF_SAMPLE_CALLCHAIN    - TODO
 	 *   PERF_SAMPLE_CALLCHAIN    - TODO
-	 *   PERF_SAMPLE_RAW          - tracepoint fields are handled separately
+	 *   PERF_SAMPLE_RAW          - tracepoint fields and BPF output
+	 *                              are handled separately
 	 *   PERF_SAMPLE_BRANCH_STACK - TODO
 	 *   PERF_SAMPLE_BRANCH_STACK - TODO
 	 *   PERF_SAMPLE_REGS_USER    - TODO
 	 *   PERF_SAMPLE_REGS_USER    - TODO
 	 *   PERF_SAMPLE_STACK_USER   - TODO
 	 *   PERF_SAMPLE_STACK_USER   - TODO
@@ -824,6 +928,12 @@ static int add_event(struct ctf_writer *cw, struct perf_evsel *evsel)
 			goto err;
 			goto err;
 	}
 	}
 
 
+	if (perf_evsel__is_bpf_output(evsel)) {
+		ret = add_bpf_output_types(cw, event_class);
+		if (ret)
+			goto err;
+	}
+
 	ret = bt_ctf_stream_class_add_event_class(cw->stream_class, event_class);
 	ret = bt_ctf_stream_class_add_event_class(cw->stream_class, event_class);
 	if (ret) {
 	if (ret) {
 		pr("Failed to add event class into stream.\n");
 		pr("Failed to add event class into stream.\n");
@@ -970,6 +1080,12 @@ static struct bt_ctf_field_type *create_int_type(int size, bool sign, bool hex)
 	    bt_ctf_field_type_integer_set_base(type, BT_CTF_INTEGER_BASE_HEXADECIMAL))
 	    bt_ctf_field_type_integer_set_base(type, BT_CTF_INTEGER_BASE_HEXADECIMAL))
 		goto err;
 		goto err;
 
 
+#if __BYTE_ORDER == __BIG_ENDIAN
+	bt_ctf_field_type_set_byte_order(type, BT_CTF_BYTE_ORDER_BIG_ENDIAN);
+#else
+	bt_ctf_field_type_set_byte_order(type, BT_CTF_BYTE_ORDER_LITTLE_ENDIAN);
+#endif
+
 	pr2("Created type: INTEGER %d-bit %ssigned %s\n",
 	pr2("Created type: INTEGER %d-bit %ssigned %s\n",
 	    size, sign ? "un" : "", hex ? "hex" : "");
 	    size, sign ? "un" : "", hex ? "hex" : "");
 	return type;
 	return type;

+ 13 - 0
tools/perf/util/pmu.c

@@ -123,6 +123,17 @@ static int perf_pmu__parse_scale(struct perf_pmu_alias *alias, char *dir, char *
 	 */
 	 */
 	lc = setlocale(LC_NUMERIC, NULL);
 	lc = setlocale(LC_NUMERIC, NULL);
 
 
+	/*
+	 * The lc string may be allocated in static storage,
+	 * so get a dynamic copy to make it survive setlocale
+	 * call below.
+	 */
+	lc = strdup(lc);
+	if (!lc) {
+		ret = -ENOMEM;
+		goto error;
+	}
+
 	/*
 	/*
 	 * force to C locale to ensure kernel
 	 * force to C locale to ensure kernel
 	 * scale string is converted correctly.
 	 * scale string is converted correctly.
@@ -135,6 +146,8 @@ static int perf_pmu__parse_scale(struct perf_pmu_alias *alias, char *dir, char *
 	/* restore locale */
 	/* restore locale */
 	setlocale(LC_NUMERIC, lc);
 	setlocale(LC_NUMERIC, lc);
 
 
+	free((char *) lc);
+
 	ret = 0;
 	ret = 0;
 error:
 error:
 	close(fd);
 	close(fd);

+ 2 - 2
tools/perf/util/scripting-engines/trace-event-python.c

@@ -1094,8 +1094,6 @@ static int python_start_script(const char *script, int argc, const char **argv)
 		goto error;
 		goto error;
 	}
 	}
 
 
-	free(command_line);
-
 	set_table_handlers(tables);
 	set_table_handlers(tables);
 
 
 	if (tables->db_export_mode) {
 	if (tables->db_export_mode) {
@@ -1104,6 +1102,8 @@ static int python_start_script(const char *script, int argc, const char **argv)
 			goto error;
 			goto error;
 	}
 	}
 
 
+	free(command_line);
+
 	return err;
 	return err;
 error:
 error:
 	Py_Finalize();
 	Py_Finalize();

+ 4 - 0
tools/perf/util/setup.py

@@ -22,6 +22,7 @@ cflags = getenv('CFLAGS', '').split()
 # switch off several checks (need to be at the end of cflags list)
 # switch off several checks (need to be at the end of cflags list)
 cflags += ['-fno-strict-aliasing', '-Wno-write-strings', '-Wno-unused-parameter' ]
 cflags += ['-fno-strict-aliasing', '-Wno-write-strings', '-Wno-unused-parameter' ]
 
 
+src_perf  = getenv('srctree') + '/tools/perf'
 build_lib = getenv('PYTHON_EXTBUILD_LIB')
 build_lib = getenv('PYTHON_EXTBUILD_LIB')
 build_tmp = getenv('PYTHON_EXTBUILD_TMP')
 build_tmp = getenv('PYTHON_EXTBUILD_TMP')
 libtraceevent = getenv('LIBTRACEEVENT')
 libtraceevent = getenv('LIBTRACEEVENT')
@@ -30,6 +31,9 @@ libapikfs = getenv('LIBAPI')
 ext_sources = [f.strip() for f in file('util/python-ext-sources')
 ext_sources = [f.strip() for f in file('util/python-ext-sources')
 				if len(f.strip()) > 0 and f[0] != '#']
 				if len(f.strip()) > 0 and f[0] != '#']
 
 
+# use full paths with source files
+ext_sources = map(lambda x: '%s/%s' % (src_perf, x) , ext_sources)
+
 perf = Extension('perf',
 perf = Extension('perf',
 		  sources = ext_sources,
 		  sources = ext_sources,
 		  include_dirs = ['util/include'],
 		  include_dirs = ['util/include'],

+ 22 - 15
tools/perf/util/sort.c

@@ -2635,25 +2635,14 @@ out:
 	return ret;
 	return ret;
 }
 }
 
 
-int setup_sorting(struct perf_evlist *evlist)
+static void evlist__set_hists_nr_sort_keys(struct perf_evlist *evlist)
 {
 {
-	int err;
-	struct hists *hists;
 	struct perf_evsel *evsel;
 	struct perf_evsel *evsel;
-	struct perf_hpp_fmt *fmt;
-
-	err = __setup_sorting(evlist);
-	if (err < 0)
-		return err;
-
-	if (parent_pattern != default_parent_pattern) {
-		err = sort_dimension__add("parent", evlist);
-		if (err < 0)
-			return err;
-	}
 
 
 	evlist__for_each(evlist, evsel) {
 	evlist__for_each(evlist, evsel) {
-		hists = evsel__hists(evsel);
+		struct perf_hpp_fmt *fmt;
+		struct hists *hists = evsel__hists(evsel);
+
 		hists->nr_sort_keys = perf_hpp_list.nr_sort_keys;
 		hists->nr_sort_keys = perf_hpp_list.nr_sort_keys;
 
 
 		/*
 		/*
@@ -2667,6 +2656,24 @@ int setup_sorting(struct perf_evlist *evlist)
 				hists->nr_sort_keys--;
 				hists->nr_sort_keys--;
 		}
 		}
 	}
 	}
+}
+
+int setup_sorting(struct perf_evlist *evlist)
+{
+	int err;
+
+	err = __setup_sorting(evlist);
+	if (err < 0)
+		return err;
+
+	if (parent_pattern != default_parent_pattern) {
+		err = sort_dimension__add("parent", evlist);
+		if (err < 0)
+			return err;
+	}
+
+	if (evlist != NULL)
+		evlist__set_hists_nr_sort_keys(evlist);
 
 
 	reset_dimensions();
 	reset_dimensions();
 
 

+ 16 - 2
tools/perf/util/stat-shadow.c

@@ -2,6 +2,7 @@
 #include "evsel.h"
 #include "evsel.h"
 #include "stat.h"
 #include "stat.h"
 #include "color.h"
 #include "color.h"
+#include "pmu.h"
 
 
 enum {
 enum {
 	CTX_BIT_USER	= 1 << 0,
 	CTX_BIT_USER	= 1 << 0,
@@ -14,6 +15,13 @@ enum {
 
 
 #define NUM_CTX CTX_BIT_MAX
 #define NUM_CTX CTX_BIT_MAX
 
 
+/*
+ * AGGR_GLOBAL: Use CPU 0
+ * AGGR_SOCKET: Use first CPU of socket
+ * AGGR_CORE: Use first CPU of core
+ * AGGR_NONE: Use matching CPU
+ * AGGR_THREAD: Not supported?
+ */
 static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
 static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
 static struct stats runtime_cycles_stats[NUM_CTX][MAX_NR_CPUS];
 static struct stats runtime_cycles_stats[NUM_CTX][MAX_NR_CPUS];
 static struct stats runtime_stalled_cycles_front_stats[NUM_CTX][MAX_NR_CPUS];
 static struct stats runtime_stalled_cycles_front_stats[NUM_CTX][MAX_NR_CPUS];
@@ -28,9 +36,15 @@ static struct stats runtime_dtlb_cache_stats[NUM_CTX][MAX_NR_CPUS];
 static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS];
 static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS];
 static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS];
 static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS];
 static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS];
 static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS];
+static bool have_frontend_stalled;
 
 
 struct stats walltime_nsecs_stats;
 struct stats walltime_nsecs_stats;
 
 
+void perf_stat__init_shadow_stats(void)
+{
+	have_frontend_stalled = pmu_have_event("cpu", "stalled-cycles-frontend");
+}
+
 static int evsel_context(struct perf_evsel *evsel)
 static int evsel_context(struct perf_evsel *evsel)
 {
 {
 	int ctx = 0;
 	int ctx = 0;
@@ -310,13 +324,13 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
 		total = avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]);
 		total = avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]);
 		total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu]));
 		total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu]));
 
 
-		out->new_line(ctxp);
 		if (total && avg) {
 		if (total && avg) {
+			out->new_line(ctxp);
 			ratio = total / avg;
 			ratio = total / avg;
 			print_metric(ctxp, NULL, "%7.2f ",
 			print_metric(ctxp, NULL, "%7.2f ",
 					"stalled cycles per insn",
 					"stalled cycles per insn",
 					ratio);
 					ratio);
-		} else {
+		} else if (have_frontend_stalled) {
 			print_metric(ctxp, NULL, NULL,
 			print_metric(ctxp, NULL, NULL,
 				     "stalled cycles per insn", 0);
 				     "stalled cycles per insn", 0);
 		}
 		}

+ 1 - 0
tools/perf/util/stat.h

@@ -72,6 +72,7 @@ typedef void (*print_metric_t)(void *ctx, const char *color, const char *unit,
 			       const char *fmt, double val);
 			       const char *fmt, double val);
 typedef void (*new_line_t )(void *ctx);
 typedef void (*new_line_t )(void *ctx);
 
 
+void perf_stat__init_shadow_stats(void);
 void perf_stat__reset_shadow_stats(void);
 void perf_stat__reset_shadow_stats(void);
 void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count,
 void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count,
 				    int cpu);
 				    int cpu);

+ 4 - 4
tools/power/x86/turbostat/turbostat.c

@@ -1970,7 +1970,7 @@ int has_config_tdp(unsigned int family, unsigned int model)
 }
 }
 
 
 static void
 static void
-dump_cstate_pstate_config_info(family, model)
+dump_cstate_pstate_config_info(unsigned int family, unsigned int model)
 {
 {
 	if (!do_nhm_platform_info)
 	if (!do_nhm_platform_info)
 		return;
 		return;
@@ -2142,7 +2142,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
 #define	RAPL_POWER_GRANULARITY	0x7FFF	/* 15 bit power granularity */
 #define	RAPL_POWER_GRANULARITY	0x7FFF	/* 15 bit power granularity */
 #define	RAPL_TIME_GRANULARITY	0x3F /* 6 bit time granularity */
 #define	RAPL_TIME_GRANULARITY	0x3F /* 6 bit time granularity */
 
 
-double get_tdp(model)
+double get_tdp(unsigned int model)
 {
 {
 	unsigned long long msr;
 	unsigned long long msr;
 
 
@@ -2256,7 +2256,7 @@ void rapl_probe(unsigned int family, unsigned int model)
 	return;
 	return;
 }
 }
 
 
-void perf_limit_reasons_probe(family, model)
+void perf_limit_reasons_probe(unsigned int family, unsigned int model)
 {
 {
 	if (!genuine_intel)
 	if (!genuine_intel)
 		return;
 		return;
@@ -2792,7 +2792,7 @@ void process_cpuid()
 	perf_limit_reasons_probe(family, model);
 	perf_limit_reasons_probe(family, model);
 
 
 	if (debug)
 	if (debug)
-		dump_cstate_pstate_config_info();
+		dump_cstate_pstate_config_info(family, model);
 
 
 	if (has_skl_msrs(family, model))
 	if (has_skl_msrs(family, model))
 		calculate_tsc_tweak();
 		calculate_tsc_tweak();