8 gadi atpakaļ · 47414424c5
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -33,6 +33,7 @@
 
				 #include <stdint.h>
			
 
				 #include <limits.h>
			
 
				 #include <linux/string.h>
			
 
				+#include <linux/time64.h>
			
 
				 
			
 
				 #include <netinet/in.h>
			
 
				 #include "event-parse.h"
			
@@ -5191,17 +5192,43 @@ struct event_format *pevent_data_event_from_type(struct pevent *pevent, int type
 
				 }
			
 
				 
			
 
				 /**
			
 
				- * pevent_data_pid - parse the PID from raw data
			
 
				+ * pevent_data_pid - parse the PID from record
			
 
				  * @pevent: a handle to the pevent
			
 
				  * @rec: the record to parse
			
 
				  *
			
 
				- * This returns the PID from a raw data.
			
 
				+ * This returns the PID from a record.
			
 
				  */
			
 
				 int pevent_data_pid(struct pevent *pevent, struct pevent_record *rec)
			
 
				 {
			
 
				 	return parse_common_pid(pevent, rec->data);
			
 
				 }
			
 
				 
			
 
				+/**
			
 
				+ * pevent_data_prempt_count - parse the preempt count from the record
			
 
				+ * @pevent: a handle to the pevent
			
 
				+ * @rec: the record to parse
			
 
				+ *
			
 
				+ * This returns the preempt count from a record.
			
 
				+ */
			
 
				+int pevent_data_prempt_count(struct pevent *pevent, struct pevent_record *rec)
			
 
				+{
			
 
				+	return parse_common_pc(pevent, rec->data);
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * pevent_data_flags - parse the latency flags from the record
			
 
				+ * @pevent: a handle to the pevent
			
 
				+ * @rec: the record to parse
			
 
				+ *
			
 
				+ * This returns the latency flags from a record.
			
 
				+ *
			
 
				+ *  Use trace_flag_type enum for the flags (see event-parse.h).
			
 
				+ */
			
 
				+int pevent_data_flags(struct pevent *pevent, struct pevent_record *rec)
			
 
				+{
			
 
				+	return parse_common_flags(pevent, rec->data);
			
 
				+}
			
 
				+
			
 
				 /**
			
 
				  * pevent_data_comm_from_pid - return the command line from PID
			
 
				  * @pevent: a handle to the pevent
			
@@ -5424,8 +5451,8 @@ void pevent_print_event_time(struct pevent *pevent, struct trace_seq *s,
 
				 	use_usec_format = is_timestamp_in_us(pevent->trace_clock,
			
 
				 							use_trace_clock);
			
 
				 	if (use_usec_format) {
			
 
				-		secs = record->ts / NSECS_PER_SEC;
			
 
				-		nsecs = record->ts - secs * NSECS_PER_SEC;
			
 
				+		secs = record->ts / NSEC_PER_SEC;
			
 
				+		nsecs = record->ts - secs * NSEC_PER_SEC;
			
 
				 	}
			
 
				 
			
 
				 	if (pevent->latency_format) {
			
@@ -5437,10 +5464,10 @@ void pevent_print_event_time(struct pevent *pevent, struct trace_seq *s,
 
				 			usecs = nsecs;
			
 
				 			p = 9;
			
 
				 		} else {
			
 
				-			usecs = (nsecs + 500) / NSECS_PER_USEC;
			
 
				+			usecs = (nsecs + 500) / NSEC_PER_USEC;
			
 
				 			/* To avoid usecs larger than 1 sec */
			
 
				-			if (usecs >= 1000000) {
			
 
				-				usecs -= 1000000;
			
 
				+			if (usecs >= USEC_PER_SEC) {
			
 
				+				usecs -= USEC_PER_SEC;
			
 
				 				secs++;
			
 
				 			}
			
 
				 			p = 6;
			
--- a/tools/lib/traceevent/event-parse.h
+++ b/tools/lib/traceevent/event-parse.h
@@ -172,9 +172,6 @@ struct pevent_plugin_option {
 
				 #define PEVENT_PLUGIN_OPTIONS_NAME MAKE_STR(PEVENT_PLUGIN_OPTIONS)
			
 
				 #define PEVENT_PLUGIN_ALIAS_NAME MAKE_STR(PEVENT_PLUGIN_ALIAS)
			
 
				 
			
 
				-#define NSECS_PER_SEC		1000000000ULL
			
 
				-#define NSECS_PER_USEC		1000ULL
			
 
				-
			
 
				 enum format_flags {
			
 
				 	FIELD_IS_ARRAY		= 1,
			
 
				 	FIELD_IS_POINTER	= 2,
			
@@ -712,6 +709,8 @@ void pevent_data_lat_fmt(struct pevent *pevent,
 
				 int pevent_data_type(struct pevent *pevent, struct pevent_record *rec);
			
 
				 struct event_format *pevent_data_event_from_type(struct pevent *pevent, int type);
			
 
				 int pevent_data_pid(struct pevent *pevent, struct pevent_record *rec);
			
 
				+int pevent_data_prempt_count(struct pevent *pevent, struct pevent_record *rec);
			
 
				+int pevent_data_flags(struct pevent *pevent, struct pevent_record *rec);
			
 
				 const char *pevent_data_comm_from_pid(struct pevent *pevent, int pid);
			
 
				 struct cmdline;
			
 
				 struct cmdline *pevent_data_pid_from_comm(struct pevent *pevent, const char *comm,
			
--- a/tools/perf/Documentation/perf-c2c.txt
+++ b/tools/perf/Documentation/perf-c2c.txt
@@ -100,6 +100,14 @@ REPORT OPTIONS
 
				 --show-all::
			
 
				 	Show all captured HITM lines, with no regard to HITM % 0.0005 limit.
			
 
				 
			
 
				+-f::
			
 
				+--force::
			
 
				+	Don't do ownership validation.
			
 
				+
			
 
				+-d::
			
 
				+--display::
			
 
				+	Siwtch to HITM type (rmt, lcl) to display and sort on. Total HITMs as default.
			
 
				+
			
 
				 C2C RECORD
			
 
				 ----------
			
 
				 The perf c2c record command setup options related to HITM cacheline analysis
			
--- a/tools/perf/Documentation/perf-sched.txt
+++ b/tools/perf/Documentation/perf-sched.txt
@@ -8,11 +8,11 @@ perf-sched - Tool to trace/measure scheduler properties (latencies)
 
				 SYNOPSIS
			
 
				 --------
			
 
				 [verse]
			
 
				-'perf sched' {record|latency|map|replay|script}
			
 
				+'perf sched' {record|latency|map|replay|script|timehist}
			
 
				 
			
 
				 DESCRIPTION
			
 
				 -----------
			
 
				-There are five variants of perf sched:
			
 
				+There are several variants of 'perf sched':
			
 
				 
			
 
				   'perf sched record <command>' to record the scheduling events
			
 
				   of an arbitrary workload.
			
@@ -36,6 +36,30 @@ There are five variants of perf sched:
 
				   are running on a CPU. A '*' denotes the CPU that had the event, and
			
 
				   a dot signals an idle CPU.
			
 
				 
			
 
				+  'perf sched timehist' provides an analysis of scheduling events.
			
 
				+    
			
 
				+    Example usage:
			
 
				+        perf sched record -- sleep 1
			
 
				+        perf sched timehist
			
 
				+    
			
 
				+   By default it shows the individual schedule events, including the wait
			
 
				+   time (time between sched-out and next sched-in events for the task), the
			
 
				+   task scheduling delay (time between wakeup and actually running) and run
			
 
				+   time for the task:
			
 
				+    
			
 
				+                time    cpu  task name             wait time  sch delay   run time
			
 
				+                             [tid/pid]                (msec)     (msec)     (msec)
			
 
				+      -------------- ------  --------------------  ---------  ---------  ---------
			
 
				+        79371.874569 [0011]  gcc[31949]                0.014      0.000      1.148
			
 
				+        79371.874591 [0010]  gcc[31951]                0.000      0.000      0.024
			
 
				+        79371.874603 [0010]  migration/10[59]          3.350      0.004      0.011
			
 
				+        79371.874604 [0011]  <idle>                    1.148      0.000      0.035
			
 
				+        79371.874723 [0005]  <idle>                    0.016      0.000      1.383
			
 
				+        79371.874746 [0005]  gcc[31949]                0.153      0.078      0.022
			
 
				+    ...
			
 
				+    
			
 
				+   Times are in msec.usec.
			
 
				+
			
 
				 OPTIONS
			
 
				 -------
			
 
				 -i::
			
@@ -66,6 +90,44 @@ OPTIONS for 'perf sched map'
 
				 --color-pids::
			
 
				 	Highlight the given pids.
			
 
				 
			
 
				+OPTIONS for 'perf sched timehist'
			
 
				+---------------------------------
			
 
				+-k::
			
 
				+--vmlinux=<file>::
			
 
				+    vmlinux pathname
			
 
				+
			
 
				+--kallsyms=<file>::
			
 
				+    kallsyms pathname
			
 
				+
			
 
				+-g::
			
 
				+--no-call-graph::
			
 
				+	Do not display call chains if present.
			
 
				+
			
 
				+--max-stack::
			
 
				+	Maximum number of functions to display in backtrace, default 5.
			
 
				+
			
 
				+-s::
			
 
				+--summary::
			
 
				+    Show only a summary of scheduling by thread with min, max, and average
			
 
				+    run times (in sec) and relative stddev.
			
 
				+
			
 
				+-S::
			
 
				+--with-summary::
			
 
				+    Show all scheduling events followed by a summary by thread with min,
			
 
				+    max, and average run times (in sec) and relative stddev.
			
 
				+
			
 
				+--symfs=<directory>::
			
 
				+    Look for files with symbols relative to this directory.
			
 
				+
			
 
				+-V::
			
 
				+--cpu-visual::
			
 
				+	Show visual aid for sched switches by CPU: 'i' marks idle time,
			
 
				+	's' are scheduler events.
			
 
				+
			
 
				+-w::
			
 
				+--wakeups::
			
 
				+	Show wakeup events.
			
 
				+
			
 
				 SEE ALSO
			
 
				 --------
			
 
				 linkperf:perf-record[1]
			
--- a/tools/perf/arch/arm/annotate/instructions.c
+++ b/tools/perf/arch/arm/annotate/instructions.c
@@ -0,0 +1,90 @@
 
				+static struct ins arm__instructions[] = {
			
 
				+	{ .name = "add",	.ops = &mov_ops,  },
			
 
				+	{ .name = "addl",	.ops = &mov_ops,  },
			
 
				+	{ .name = "addq",	.ops = &mov_ops,  },
			
 
				+	{ .name = "addw",	.ops = &mov_ops,  },
			
 
				+	{ .name = "and",	.ops = &mov_ops,  },
			
 
				+	{ .name = "b",		.ops = &jump_ops, }, // might also be a call
			
 
				+	{ .name = "bcc",	.ops = &jump_ops, },
			
 
				+	{ .name = "bcs",	.ops = &jump_ops, },
			
 
				+	{ .name = "beq",	.ops = &jump_ops, },
			
 
				+	{ .name = "bge",	.ops = &jump_ops, },
			
 
				+	{ .name = "bgt",	.ops = &jump_ops, },
			
 
				+	{ .name = "bhi",	.ops = &jump_ops, },
			
 
				+	{ .name = "bl",		.ops = &call_ops, },
			
 
				+	{ .name = "bls",	.ops = &jump_ops, },
			
 
				+	{ .name = "blt",	.ops = &jump_ops, },
			
 
				+	{ .name = "blx",	.ops = &call_ops, },
			
 
				+	{ .name = "bne",	.ops = &jump_ops, },
			
 
				+	{ .name = "bts",	.ops = &mov_ops,  },
			
 
				+	{ .name = "call",	.ops = &call_ops, },
			
 
				+	{ .name = "callq",	.ops = &call_ops, },
			
 
				+	{ .name = "cmp",	.ops = &mov_ops,  },
			
 
				+	{ .name = "cmpb",	.ops = &mov_ops,  },
			
 
				+	{ .name = "cmpl",	.ops = &mov_ops,  },
			
 
				+	{ .name = "cmpq",	.ops = &mov_ops,  },
			
 
				+	{ .name = "cmpw",	.ops = &mov_ops,  },
			
 
				+	{ .name = "cmpxch",	.ops = &mov_ops,  },
			
 
				+	{ .name = "dec",	.ops = &dec_ops,  },
			
 
				+	{ .name = "decl",	.ops = &dec_ops,  },
			
 
				+	{ .name = "imul",	.ops = &mov_ops,  },
			
 
				+	{ .name = "inc",	.ops = &dec_ops,  },
			
 
				+	{ .name = "incl",	.ops = &dec_ops,  },
			
 
				+	{ .name = "ja",		.ops = &jump_ops, },
			
 
				+	{ .name = "jae",	.ops = &jump_ops, },
			
 
				+	{ .name = "jb",		.ops = &jump_ops, },
			
 
				+	{ .name = "jbe",	.ops = &jump_ops, },
			
 
				+	{ .name = "jc",		.ops = &jump_ops, },
			
 
				+	{ .name = "jcxz",	.ops = &jump_ops, },
			
 
				+	{ .name = "je",		.ops = &jump_ops, },
			
 
				+	{ .name = "jecxz",	.ops = &jump_ops, },
			
 
				+	{ .name = "jg",		.ops = &jump_ops, },
			
 
				+	{ .name = "jge",	.ops = &jump_ops, },
			
 
				+	{ .name = "jl",		.ops = &jump_ops, },
			
 
				+	{ .name = "jle",	.ops = &jump_ops, },
			
 
				+	{ .name = "jmp",	.ops = &jump_ops, },
			
 
				+	{ .name = "jmpq",	.ops = &jump_ops, },
			
 
				+	{ .name = "jna",	.ops = &jump_ops, },
			
 
				+	{ .name = "jnae",	.ops = &jump_ops, },
			
 
				+	{ .name = "jnb",	.ops = &jump_ops, },
			
 
				+	{ .name = "jnbe",	.ops = &jump_ops, },
			
 
				+	{ .name = "jnc",	.ops = &jump_ops, },
			
 
				+	{ .name = "jne",	.ops = &jump_ops, },
			
 
				+	{ .name = "jng",	.ops = &jump_ops, },
			
 
				+	{ .name = "jnge",	.ops = &jump_ops, },
			
 
				+	{ .name = "jnl",	.ops = &jump_ops, },
			
 
				+	{ .name = "jnle",	.ops = &jump_ops, },
			
 
				+	{ .name = "jno",	.ops = &jump_ops, },
			
 
				+	{ .name = "jnp",	.ops = &jump_ops, },
			
 
				+	{ .name = "jns",	.ops = &jump_ops, },
			
 
				+	{ .name = "jnz",	.ops = &jump_ops, },
			
 
				+	{ .name = "jo",		.ops = &jump_ops, },
			
 
				+	{ .name = "jp",		.ops = &jump_ops, },
			
 
				+	{ .name = "jpe",	.ops = &jump_ops, },
			
 
				+	{ .name = "jpo",	.ops = &jump_ops, },
			
 
				+	{ .name = "jrcxz",	.ops = &jump_ops, },
			
 
				+	{ .name = "js",		.ops = &jump_ops, },
			
 
				+	{ .name = "jz",		.ops = &jump_ops, },
			
 
				+	{ .name = "lea",	.ops = &mov_ops,  },
			
 
				+	{ .name = "lock",	.ops = &lock_ops, },
			
 
				+	{ .name = "mov",	.ops = &mov_ops,  },
			
 
				+	{ .name = "movb",	.ops = &mov_ops,  },
			
 
				+	{ .name = "movdqa",	.ops = &mov_ops,  },
			
 
				+	{ .name = "movl",	.ops = &mov_ops,  },
			
 
				+	{ .name = "movq",	.ops = &mov_ops,  },
			
 
				+	{ .name = "movslq",	.ops = &mov_ops,  },
			
 
				+	{ .name = "movzbl",	.ops = &mov_ops,  },
			
 
				+	{ .name = "movzwl",	.ops = &mov_ops,  },
			
 
				+	{ .name = "nop",	.ops = &nop_ops,  },
			
 
				+	{ .name = "nopl",	.ops = &nop_ops,  },
			
 
				+	{ .name = "nopw",	.ops = &nop_ops,  },
			
 
				+	{ .name = "or",		.ops = &mov_ops,  },
			
 
				+	{ .name = "orl",	.ops = &mov_ops,  },
			
 
				+	{ .name = "test",	.ops = &mov_ops,  },
			
 
				+	{ .name = "testb",	.ops = &mov_ops,  },
			
 
				+	{ .name = "testl",	.ops = &mov_ops,  },
			
 
				+	{ .name = "xadd",	.ops = &mov_ops,  },
			
 
				+	{ .name = "xbeginl",	.ops = &jump_ops, },
			
 
				+	{ .name = "xbeginq",	.ops = &jump_ops, },
			
 
				+	{ .name = "retq",	.ops = &ret_ops,  },
			
 
				+};
			
--- a/tools/perf/arch/x86/annotate/instructions.c
+++ b/tools/perf/arch/x86/annotate/instructions.c
@@ -0,0 +1,78 @@
 
				+static struct ins x86__instructions[] = {
			
 
				+	{ .name = "add",	.ops = &mov_ops,  },
			
 
				+	{ .name = "addl",	.ops = &mov_ops,  },
			
 
				+	{ .name = "addq",	.ops = &mov_ops,  },
			
 
				+	{ .name = "addw",	.ops = &mov_ops,  },
			
 
				+	{ .name = "and",	.ops = &mov_ops,  },
			
 
				+	{ .name = "bts",	.ops = &mov_ops,  },
			
 
				+	{ .name = "call",	.ops = &call_ops, },
			
 
				+	{ .name = "callq",	.ops = &call_ops, },
			
 
				+	{ .name = "cmp",	.ops = &mov_ops,  },
			
 
				+	{ .name = "cmpb",	.ops = &mov_ops,  },
			
 
				+	{ .name = "cmpl",	.ops = &mov_ops,  },
			
 
				+	{ .name = "cmpq",	.ops = &mov_ops,  },
			
 
				+	{ .name = "cmpw",	.ops = &mov_ops,  },
			
 
				+	{ .name = "cmpxch",	.ops = &mov_ops,  },
			
 
				+	{ .name = "dec",	.ops = &dec_ops,  },
			
 
				+	{ .name = "decl",	.ops = &dec_ops,  },
			
 
				+	{ .name = "imul",	.ops = &mov_ops,  },
			
 
				+	{ .name = "inc",	.ops = &dec_ops,  },
			
 
				+	{ .name = "incl",	.ops = &dec_ops,  },
			
 
				+	{ .name = "ja",		.ops = &jump_ops, },
			
 
				+	{ .name = "jae",	.ops = &jump_ops, },
			
 
				+	{ .name = "jb",		.ops = &jump_ops, },
			
 
				+	{ .name = "jbe",	.ops = &jump_ops, },
			
 
				+	{ .name = "jc",		.ops = &jump_ops, },
			
 
				+	{ .name = "jcxz",	.ops = &jump_ops, },
			
 
				+	{ .name = "je",		.ops = &jump_ops, },
			
 
				+	{ .name = "jecxz",	.ops = &jump_ops, },
			
 
				+	{ .name = "jg",		.ops = &jump_ops, },
			
 
				+	{ .name = "jge",	.ops = &jump_ops, },
			
 
				+	{ .name = "jl",		.ops = &jump_ops, },
			
 
				+	{ .name = "jle",	.ops = &jump_ops, },
			
 
				+	{ .name = "jmp",	.ops = &jump_ops, },
			
 
				+	{ .name = "jmpq",	.ops = &jump_ops, },
			
 
				+	{ .name = "jna",	.ops = &jump_ops, },
			
 
				+	{ .name = "jnae",	.ops = &jump_ops, },
			
 
				+	{ .name = "jnb",	.ops = &jump_ops, },
			
 
				+	{ .name = "jnbe",	.ops = &jump_ops, },
			
 
				+	{ .name = "jnc",	.ops = &jump_ops, },
			
 
				+	{ .name = "jne",	.ops = &jump_ops, },
			
 
				+	{ .name = "jng",	.ops = &jump_ops, },
			
 
				+	{ .name = "jnge",	.ops = &jump_ops, },
			
 
				+	{ .name = "jnl",	.ops = &jump_ops, },
			
 
				+	{ .name = "jnle",	.ops = &jump_ops, },
			
 
				+	{ .name = "jno",	.ops = &jump_ops, },
			
 
				+	{ .name = "jnp",	.ops = &jump_ops, },
			
 
				+	{ .name = "jns",	.ops = &jump_ops, },
			
 
				+	{ .name = "jnz",	.ops = &jump_ops, },
			
 
				+	{ .name = "jo",		.ops = &jump_ops, },
			
 
				+	{ .name = "jp",		.ops = &jump_ops, },
			
 
				+	{ .name = "jpe",	.ops = &jump_ops, },
			
 
				+	{ .name = "jpo",	.ops = &jump_ops, },
			
 
				+	{ .name = "jrcxz",	.ops = &jump_ops, },
			
 
				+	{ .name = "js",		.ops = &jump_ops, },
			
 
				+	{ .name = "jz",		.ops = &jump_ops, },
			
 
				+	{ .name = "lea",	.ops = &mov_ops,  },
			
 
				+	{ .name = "lock",	.ops = &lock_ops, },
			
 
				+	{ .name = "mov",	.ops = &mov_ops,  },
			
 
				+	{ .name = "movb",	.ops = &mov_ops,  },
			
 
				+	{ .name = "movdqa",	.ops = &mov_ops,  },
			
 
				+	{ .name = "movl",	.ops = &mov_ops,  },
			
 
				+	{ .name = "movq",	.ops = &mov_ops,  },
			
 
				+	{ .name = "movslq",	.ops = &mov_ops,  },
			
 
				+	{ .name = "movzbl",	.ops = &mov_ops,  },
			
 
				+	{ .name = "movzwl",	.ops = &mov_ops,  },
			
 
				+	{ .name = "nop",	.ops = &nop_ops,  },
			
 
				+	{ .name = "nopl",	.ops = &nop_ops,  },
			
 
				+	{ .name = "nopw",	.ops = &nop_ops,  },
			
 
				+	{ .name = "or",		.ops = &mov_ops,  },
			
 
				+	{ .name = "orl",	.ops = &mov_ops,  },
			
 
				+	{ .name = "test",	.ops = &mov_ops,  },
			
 
				+	{ .name = "testb",	.ops = &mov_ops,  },
			
 
				+	{ .name = "testl",	.ops = &mov_ops,  },
			
 
				+	{ .name = "xadd",	.ops = &mov_ops,  },
			
 
				+	{ .name = "xbeginl",	.ops = &jump_ops, },
			
 
				+	{ .name = "xbeginq",	.ops = &jump_ops, },
			
 
				+	{ .name = "retq",	.ops = &ret_ops,  },
			
 
				+};
			
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -91,6 +91,19 @@ struct perf_c2c {
 
				 enum {
			
 
				 	DISPLAY_LCL,
			
 
				 	DISPLAY_RMT,
			
 
				+	DISPLAY_TOT,
			
 
				+	DISPLAY_MAX,
			
 
				+};
			
 
				+
			
 
				+static const char *display_str[DISPLAY_MAX] = {
			
 
				+	[DISPLAY_LCL] = "Local",
			
 
				+	[DISPLAY_RMT] = "Remote",
			
 
				+	[DISPLAY_TOT] = "Total",
			
 
				+};
			
 
				+
			
 
				+static const struct option c2c_options[] = {
			
 
				+	OPT_INCR('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"),
			
 
				+	OPT_END()
			
 
				 };
			
 
				 
			
 
				 static struct perf_c2c c2c;
			
@@ -745,6 +758,10 @@ static double percent_hitm(struct c2c_hist_entry *c2c_he)
 
				 	case DISPLAY_LCL:
			
 
				 		st  = stats->lcl_hitm;
			
 
				 		tot = total->lcl_hitm;
			
 
				+		break;
			
 
				+	case DISPLAY_TOT:
			
 
				+		st  = stats->tot_hitm;
			
 
				+		tot = total->tot_hitm;
			
 
				 	default:
			
 
				 		break;
			
 
				 	}
			
@@ -1044,6 +1061,9 @@ node_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp,
 
				 				break;
			
 
				 			case DISPLAY_LCL:
			
 
				 				DISPLAY_HITM(lcl_hitm);
			
 
				+				break;
			
 
				+			case DISPLAY_TOT:
			
 
				+				DISPLAY_HITM(tot_hitm);
			
 
				 			default:
			
 
				 				break;
			
 
				 			}
			
@@ -1351,6 +1371,7 @@ static struct c2c_dimension dim_tot_loads = {
 
				 static struct c2c_header percent_hitm_header[] = {
			
 
				 	[DISPLAY_LCL] = HEADER_BOTH("Lcl", "Hitm"),
			
 
				 	[DISPLAY_RMT] = HEADER_BOTH("Rmt", "Hitm"),
			
 
				+	[DISPLAY_TOT] = HEADER_BOTH("Tot", "Hitm"),
			
 
				 };
			
 
				 
			
 
				 static struct c2c_dimension dim_percent_hitm = {
			
@@ -1794,6 +1815,9 @@ static bool he__display(struct hist_entry *he, struct c2c_stats *stats)
 
				 		break;
			
 
				 	case DISPLAY_RMT:
			
 
				 		FILTER_HITM(rmt_hitm);
			
 
				+		break;
			
 
				+	case DISPLAY_TOT:
			
 
				+		FILTER_HITM(tot_hitm);
			
 
				 	default:
			
 
				 		break;
			
 
				 	};
			
@@ -1809,8 +1833,9 @@ static inline int valid_hitm_or_store(struct hist_entry *he)
 
				 	bool has_hitm;
			
 
				 
			
 
				 	c2c_he = container_of(he, struct c2c_hist_entry, he);
			
 
				-	has_hitm = c2c.display == DISPLAY_LCL ?
			
 
				-		   c2c_he->stats.lcl_hitm : c2c_he->stats.rmt_hitm;
			
 
				+	has_hitm = c2c.display == DISPLAY_TOT ? c2c_he->stats.tot_hitm :
			
 
				+		   c2c.display == DISPLAY_LCL ? c2c_he->stats.lcl_hitm :
			
 
				+						c2c_he->stats.rmt_hitm;
			
 
				 	return has_hitm || c2c_he->stats.store;
			
 
				 }
			
 
				 
			
@@ -2095,7 +2120,7 @@ static void print_c2c_info(FILE *out, struct perf_session *session)
 
				 		first = false;
			
 
				 	}
			
 
				 	fprintf(out, "  Cachelines sort on                : %s HITMs\n",
			
 
				-		c2c.display == DISPLAY_LCL ? "Local" : "Remote");
			
 
				+		display_str[c2c.display]);
			
 
				 	fprintf(out, "  Cacheline data grouping           : %s\n", c2c.cl_sort);
			
 
				 }
			
 
				 
			
@@ -2250,7 +2275,7 @@ static int perf_c2c_browser__title(struct hist_browser *browser,
 
				 		  "Shared Data Cache Line Table     "
			
 
				 		  "(%lu entries, sorted on %s HITMs)",
			
 
				 		  browser->nr_non_filtered_entries,
			
 
				-		  c2c.display == DISPLAY_LCL ? "local" : "remote");
			
 
				+		  display_str[c2c.display]);
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -2387,9 +2412,11 @@ static int setup_callchain(struct perf_evlist *evlist)
 
				 
			
 
				 static int setup_display(const char *str)
			
 
				 {
			
 
				-	const char *display = str ?: "rmt";
			
 
				+	const char *display = str ?: "tot";
			
 
				 
			
 
				-	if (!strcmp(display, "rmt"))
			
 
				+	if (!strcmp(display, "tot"))
			
 
				+		c2c.display = DISPLAY_TOT;
			
 
				+	else if (!strcmp(display, "rmt"))
			
 
				 		c2c.display = DISPLAY_RMT;
			
 
				 	else if (!strcmp(display, "lcl"))
			
 
				 		c2c.display = DISPLAY_LCL;
			
@@ -2474,6 +2501,8 @@ static int setup_coalesce(const char *coalesce, bool no_source)
 
				 		return -1;
			
 
				 
			
 
				 	if (asprintf(&c2c.cl_resort, "offset,%s",
			
 
				+		     c2c.display == DISPLAY_TOT ?
			
 
				+		     "tot_hitm" :
			
 
				 		     c2c.display == DISPLAY_RMT ?
			
 
				 		     "rmt_hitm,lcl_hitm" :
			
 
				 		     "lcl_hitm,rmt_hitm") < 0)
			
@@ -2496,11 +2525,9 @@ static int perf_c2c__report(int argc, const char **argv)
 
				 	const char *display = NULL;
			
 
				 	const char *coalesce = NULL;
			
 
				 	bool no_source = false;
			
 
				-	const struct option c2c_options[] = {
			
 
				+	const struct option options[] = {
			
 
				 	OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
			
 
				 		   "file", "vmlinux pathname"),
			
 
				-	OPT_INCR('v', "verbose", &verbose,
			
 
				-		 "be more verbose (show counter open errors, etc)"),
			
 
				 	OPT_STRING('i', "input", &input_name, "file",
			
 
				 		   "the input file to process"),
			
 
				 	OPT_INCR('N', "node-info", &c2c.node_info,
			
@@ -2520,32 +2547,28 @@ static int perf_c2c__report(int argc, const char **argv)
 
				 			     "print_type,threshold[,print_limit],order,sort_key[,branch],value",
			
 
				 			     callchain_help, &parse_callchain_opt,
			
 
				 			     callchain_default_opt),
			
 
				-	OPT_STRING('d', "display", &display, NULL, "lcl,rmt"),
			
 
				+	OPT_STRING('d', "display", &display, "Switch HITM output type", "lcl,rmt"),
			
 
				 	OPT_STRING('c', "coalesce", &coalesce, "coalesce fields",
			
 
				 		   "coalesce fields: pid,tid,iaddr,dso"),
			
 
				+	OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"),
			
 
				+	OPT_PARENT(c2c_options),
			
 
				 	OPT_END()
			
 
				 	};
			
 
				 	int err = 0;
			
 
				 
			
 
				-	argc = parse_options(argc, argv, c2c_options, report_c2c_usage,
			
 
				+	argc = parse_options(argc, argv, options, report_c2c_usage,
			
 
				 			     PARSE_OPT_STOP_AT_NON_OPTION);
			
 
				 	if (argc)
			
 
				-		usage_with_options(report_c2c_usage, c2c_options);
			
 
				+		usage_with_options(report_c2c_usage, options);
			
 
				 
			
 
				 	if (c2c.stats_only)
			
 
				 		c2c.use_stdio = true;
			
 
				 
			
 
				-	if (c2c.use_stdio)
			
 
				-		use_browser = 0;
			
 
				-	else
			
 
				-		use_browser = 1;
			
 
				-
			
 
				-	setup_browser(false);
			
 
				-
			
 
				 	if (!input_name || !strlen(input_name))
			
 
				 		input_name = "perf.data";
			
 
				 
			
 
				-	file.path = input_name;
			
 
				+	file.path  = input_name;
			
 
				+	file.force = symbol_conf.force;
			
 
				 
			
 
				 	err = setup_display(display);
			
 
				 	if (err)
			
@@ -2568,6 +2591,7 @@ static int perf_c2c__report(int argc, const char **argv)
 
				 		pr_debug("No memory for session\n");
			
 
				 		goto out;
			
 
				 	}
			
 
				+
			
 
				 	err = setup_nodes(session);
			
 
				 	if (err) {
			
 
				 		pr_err("Failed setup nodes\n");
			
@@ -2587,6 +2611,13 @@ static int perf_c2c__report(int argc, const char **argv)
 
				 		goto out_session;
			
 
				 	}
			
 
				 
			
 
				+	if (c2c.use_stdio)
			
 
				+		use_browser = 0;
			
 
				+	else
			
 
				+		use_browser = 1;
			
 
				+
			
 
				+	setup_browser(false);
			
 
				+
			
 
				 	err = perf_session__process_events(session);
			
 
				 	if (err) {
			
 
				 		pr_err("failed to process sample\n");
			
@@ -2605,6 +2636,7 @@ static int perf_c2c__report(int argc, const char **argv)
 
				 			"tot_loads,"
			
 
				 			"ld_fbhit,ld_l1hit,ld_l2hit,"
			
 
				 			"ld_lclhit,ld_rmthit",
			
 
				+			c2c.display == DISPLAY_TOT ? "tot_hitm" :
			
 
				 			c2c.display == DISPLAY_LCL ? "lcl_hitm" : "rmt_hitm"
			
 
				 			);
			
 
				 
			
@@ -2655,11 +2687,10 @@ static int perf_c2c__record(int argc, const char **argv)
 
				 	OPT_CALLBACK('e', "event", &event_set, "event",
			
 
				 		     "event selector. Use 'perf mem record -e list' to list available events",
			
 
				 		     parse_record_events),
			
 
				-	OPT_INCR('v', "verbose", &verbose,
			
 
				-		 "be more verbose (show counter open errors, etc)"),
			
 
				 	OPT_BOOLEAN('u', "all-user", &all_user, "collect only user level data"),
			
 
				 	OPT_BOOLEAN('k', "all-kernel", &all_kernel, "collect only kernel level data"),
			
 
				 	OPT_UINTEGER('l', "ldlat", &perf_mem_events__loads_ldlat, "setup mem-loads latency"),
			
 
				+	OPT_PARENT(c2c_options),
			
 
				 	OPT_END()
			
 
				 	};
			
 
				 
			
@@ -2731,11 +2762,6 @@ static int perf_c2c__record(int argc, const char **argv)
 
				 
			
 
				 int cmd_c2c(int argc, const char **argv, const char *prefix __maybe_unused)
			
 
				 {
			
 
				-	const struct option c2c_options[] = {
			
 
				-	OPT_INCR('v', "verbose", &verbose, "be more verbose"),
			
 
				-	OPT_END()
			
 
				-	};
			
 
				-
			
 
				 	argc = parse_options(argc, argv, c2c_options, c2c_usage,
			
 
				 			     PARSE_OPT_STOP_AT_NON_OPTION);
			
 
				 
			
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -13,12 +13,15 @@
 
				 #include "util/cloexec.h"
			
 
				 #include "util/thread_map.h"
			
 
				 #include "util/color.h"
			
 
				+#include "util/stat.h"
			
 
				+#include "util/callchain.h"
			
 
				 
			
 
				 #include <subcmd/parse-options.h>
			
 
				 #include "util/trace-event.h"
			
 
				 
			
 
				 #include "util/debug.h"
			
 
				 
			
 
				+#include <linux/log2.h>
			
 
				 #include <sys/prctl.h>
			
 
				 #include <sys/resource.h>
			
 
				 
			
@@ -192,8 +195,40 @@ struct perf_sched {
 
				 	bool force;
			
 
				 	bool skip_merge;
			
 
				 	struct perf_sched_map map;
			
 
				+
			
 
				+	/* options for timehist command */
			
 
				+	bool		summary;
			
 
				+	bool		summary_only;
			
 
				+	bool		show_callchain;
			
 
				+	unsigned int	max_stack;
			
 
				+	bool		show_cpu_visual;
			
 
				+	bool		show_wakeups;
			
 
				+	u64		skipped_samples;
			
 
				+};
			
 
				+
			
 
				+/* per thread run time data */
			
 
				+struct thread_runtime {
			
 
				+	u64 last_time;      /* time of previous sched in/out event */
			
 
				+	u64 dt_run;         /* run time */
			
 
				+	u64 dt_wait;        /* time between CPU access (off cpu) */
			
 
				+	u64 dt_delay;       /* time between wakeup and sched-in */
			
 
				+	u64 ready_to_run;   /* time of wakeup */
			
 
				+
			
 
				+	struct stats run_stats;
			
 
				+	u64 total_run_time;
			
 
				 };
			
 
				 
			
 
				+/* per event run time data */
			
 
				+struct evsel_runtime {
			
 
				+	u64 *last_time; /* time this event was last seen per cpu */
			
 
				+	u32 ncpu;       /* highest cpu slot allocated */
			
 
				+};
			
 
				+
			
 
				+/* track idle times per cpu */
			
 
				+static struct thread **idle_threads;
			
 
				+static int idle_max_cpu;
			
 
				+static char idle_comm[] = "<idle>";
			
 
				+
			
 
				 static u64 get_nsecs(void)
			
 
				 {
			
 
				 	struct timespec ts;
			
@@ -1654,6 +1689,837 @@ out_delete:
 
				 	return rc;
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * scheduling times are printed as msec.usec
			
 
				+ */
			
 
				+static inline void print_sched_time(unsigned long long nsecs, int width)
			
 
				+{
			
 
				+	unsigned long msecs;
			
 
				+	unsigned long usecs;
			
 
				+
			
 
				+	msecs  = nsecs / NSEC_PER_MSEC;
			
 
				+	nsecs -= msecs * NSEC_PER_MSEC;
			
 
				+	usecs  = nsecs / NSEC_PER_USEC;
			
 
				+	printf("%*lu.%03lu ", width, msecs, usecs);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * returns runtime data for event, allocating memory for it the
			
 
				+ * first time it is used.
			
 
				+ */
			
 
				+static struct evsel_runtime *perf_evsel__get_runtime(struct perf_evsel *evsel)
			
 
				+{
			
 
				+	struct evsel_runtime *r = evsel->priv;
			
 
				+
			
 
				+	if (r == NULL) {
			
 
				+		r = zalloc(sizeof(struct evsel_runtime));
			
 
				+		evsel->priv = r;
			
 
				+	}
			
 
				+
			
 
				+	return r;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * save last time event was seen per cpu
			
 
				+ */
			
 
				+static void perf_evsel__save_time(struct perf_evsel *evsel,
			
 
				+				  u64 timestamp, u32 cpu)
			
 
				+{
			
 
				+	struct evsel_runtime *r = perf_evsel__get_runtime(evsel);
			
 
				+
			
 
				+	if (r == NULL)
			
 
				+		return;
			
 
				+
			
 
				+	if ((cpu >= r->ncpu) || (r->last_time == NULL)) {
			
 
				+		int i, n = __roundup_pow_of_two(cpu+1);
			
 
				+		void *p = r->last_time;
			
 
				+
			
 
				+		p = realloc(r->last_time, n * sizeof(u64));
			
 
				+		if (!p)
			
 
				+			return;
			
 
				+
			
 
				+		r->last_time = p;
			
 
				+		for (i = r->ncpu; i < n; ++i)
			
 
				+			r->last_time[i] = (u64) 0;
			
 
				+
			
 
				+		r->ncpu = n;
			
 
				+	}
			
 
				+
			
 
				+	r->last_time[cpu] = timestamp;
			
 
				+}
			
 
				+
			
 
				+/* returns last time this event was seen on the given cpu */
			
 
				+static u64 perf_evsel__get_time(struct perf_evsel *evsel, u32 cpu)
			
 
				+{
			
 
				+	struct evsel_runtime *r = perf_evsel__get_runtime(evsel);
			
 
				+
			
 
				+	if ((r == NULL) || (r->last_time == NULL) || (cpu >= r->ncpu))
			
 
				+		return 0;
			
 
				+
			
 
				+	return r->last_time[cpu];
			
 
				+}
			
 
				+
			
 
				+static int comm_width = 20;
			
 
				+
			
 
				+static char *timehist_get_commstr(struct thread *thread)
			
 
				+{
			
 
				+	static char str[32];
			
 
				+	const char *comm = thread__comm_str(thread);
			
 
				+	pid_t tid = thread->tid;
			
 
				+	pid_t pid = thread->pid_;
			
 
				+	int n;
			
 
				+
			
 
				+	if (pid == 0)
			
 
				+		n = scnprintf(str, sizeof(str), "%s", comm);
			
 
				+
			
 
				+	else if (tid != pid)
			
 
				+		n = scnprintf(str, sizeof(str), "%s[%d/%d]", comm, tid, pid);
			
 
				+
			
 
				+	else
			
 
				+		n = scnprintf(str, sizeof(str), "%s[%d]", comm, tid);
			
 
				+
			
 
				+	if (n > comm_width)
			
 
				+		comm_width = n;
			
 
				+
			
 
				+	return str;
			
 
				+}
			
 
				+
			
 
				+static void timehist_header(struct perf_sched *sched)
			
 
				+{
			
 
				+	u32 ncpus = sched->max_cpu + 1;
			
 
				+	u32 i, j;
			
 
				+
			
 
				+	printf("%15s %6s ", "time", "cpu");
			
 
				+
			
 
				+	if (sched->show_cpu_visual) {
			
 
				+		printf(" ");
			
 
				+		for (i = 0, j = 0; i < ncpus; ++i) {
			
 
				+			printf("%x", j++);
			
 
				+			if (j > 15)
			
 
				+				j = 0;
			
 
				+		}
			
 
				+		printf(" ");
			
 
				+	}
			
 
				+
			
 
				+	printf(" %-20s  %9s  %9s  %9s",
			
 
				+		"task name", "wait time", "sch delay", "run time");
			
 
				+
			
 
				+	printf("\n");
			
 
				+
			
 
				+	/*
			
 
				+	 * units row
			
 
				+	 */
			
 
				+	printf("%15s %-6s ", "", "");
			
 
				+
			
 
				+	if (sched->show_cpu_visual)
			
 
				+		printf(" %*s ", ncpus, "");
			
 
				+
			
 
				+	printf(" %-20s  %9s  %9s  %9s\n", "[tid/pid]", "(msec)", "(msec)", "(msec)");
			
 
				+
			
 
				+	/*
			
 
				+	 * separator
			
 
				+	 */
			
 
				+	printf("%.15s %.6s ", graph_dotted_line, graph_dotted_line);
			
 
				+
			
 
				+	if (sched->show_cpu_visual)
			
 
				+		printf(" %.*s ", ncpus, graph_dotted_line);
			
 
				+
			
 
				+	printf(" %.20s  %.9s  %.9s  %.9s",
			
 
				+		graph_dotted_line, graph_dotted_line, graph_dotted_line,
			
 
				+		graph_dotted_line);
			
 
				+
			
 
				+	printf("\n");
			
 
				+}
			
 
				+
			
 
				+static void timehist_print_sample(struct perf_sched *sched,
			
 
				+				  struct perf_sample *sample,
			
 
				+				  struct addr_location *al,
			
 
				+				  struct thread *thread)
			
 
				+{
			
 
				+	struct thread_runtime *tr = thread__priv(thread);
			
 
				+	u32 max_cpus = sched->max_cpu + 1;
			
 
				+	char tstr[64];
			
 
				+
			
 
				+	timestamp__scnprintf_usec(sample->time, tstr, sizeof(tstr));
			
 
				+	printf("%15s [%04d] ", tstr, sample->cpu);
			
 
				+
			
 
				+	if (sched->show_cpu_visual) {
			
 
				+		u32 i;
			
 
				+		char c;
			
 
				+
			
 
				+		printf(" ");
			
 
				+		for (i = 0; i < max_cpus; ++i) {
			
 
				+			/* flag idle times with 'i'; others are sched events */
			
 
				+			if (i == sample->cpu)
			
 
				+				c = (thread->tid == 0) ? 'i' : 's';
			
 
				+			else
			
 
				+				c = ' ';
			
 
				+			printf("%c", c);
			
 
				+		}
			
 
				+		printf(" ");
			
 
				+	}
			
 
				+
			
 
				+	printf(" %-*s ", comm_width, timehist_get_commstr(thread));
			
 
				+
			
 
				+	print_sched_time(tr->dt_wait, 6);
			
 
				+	print_sched_time(tr->dt_delay, 6);
			
 
				+	print_sched_time(tr->dt_run, 6);
			
 
				+
			
 
				+	if (sched->show_wakeups)
			
 
				+		printf("  %-*s", comm_width, "");
			
 
				+
			
 
				+	if (thread->tid == 0)
			
 
				+		goto out;
			
 
				+
			
 
				+	if (sched->show_callchain)
			
 
				+		printf("  ");
			
 
				+
			
 
				+	sample__fprintf_sym(sample, al, 0,
			
 
				+			    EVSEL__PRINT_SYM | EVSEL__PRINT_ONELINE |
			
 
				+			    EVSEL__PRINT_CALLCHAIN_ARROW,
			
 
				+			    &callchain_cursor, stdout);
			
 
				+
			
 
				+out:
			
 
				+	printf("\n");
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Explanation of delta-time stats:
			
 
				+ *
			
 
				+ *            t = time of current schedule out event
			
 
				+ *        tprev = time of previous sched out event
			
 
				+ *                also time of schedule-in event for current task
			
 
				+ *    last_time = time of last sched change event for current task
			
 
				+ *                (i.e, time process was last scheduled out)
			
 
				+ * ready_to_run = time of wakeup for current task
			
 
				+ *
			
 
				+ * -----|------------|------------|------------|------
			
 
				+ *    last         ready        tprev          t
			
 
				+ *    time         to run
			
 
				+ *
			
 
				+ *      |-------- dt_wait --------|
			
 
				+ *                   |- dt_delay -|-- dt_run --|
			
 
				+ *
			
 
				+ *   dt_run = run time of current task
			
 
				+ *  dt_wait = time between last schedule out event for task and tprev
			
 
				+ *            represents time spent off the cpu
			
 
				+ * dt_delay = time between wakeup and schedule-in of task
			
 
				+ */
			
 
				+
			
 
				+static void timehist_update_runtime_stats(struct thread_runtime *r,
			
 
				+					 u64 t, u64 tprev)
			
 
				+{
			
 
				+	r->dt_delay   = 0;
			
 
				+	r->dt_wait    = 0;
			
 
				+	r->dt_run     = 0;
			
 
				+	if (tprev) {
			
 
				+		r->dt_run = t - tprev;
			
 
				+		if (r->ready_to_run) {
			
 
				+			if (r->ready_to_run > tprev)
			
 
				+				pr_debug("time travel: wakeup time for task > previous sched_switch event\n");
			
 
				+			else
			
 
				+				r->dt_delay = tprev - r->ready_to_run;
			
 
				+		}
			
 
				+
			
 
				+		if (r->last_time > tprev)
			
 
				+			pr_debug("time travel: last sched out time for task > previous sched_switch event\n");
			
 
				+		else if (r->last_time)
			
 
				+			r->dt_wait = tprev - r->last_time;
			
 
				+	}
			
 
				+
			
 
				+	update_stats(&r->run_stats, r->dt_run);
			
 
				+	r->total_run_time += r->dt_run;
			
 
				+}
			
 
				+
			
 
				+static bool is_idle_sample(struct perf_sched *sched,
			
 
				+			   struct perf_sample *sample,
			
 
				+			   struct perf_evsel *evsel,
			
 
				+			   struct machine *machine)
			
 
				+{
			
 
				+	struct thread *thread;
			
 
				+	struct callchain_cursor *cursor = &callchain_cursor;
			
 
				+
			
 
				+	/* pid 0 == swapper == idle task */
			
 
				+	if (sample->pid == 0)
			
 
				+		return true;
			
 
				+
			
 
				+	if (strcmp(perf_evsel__name(evsel), "sched:sched_switch") == 0) {
			
 
				+		if (perf_evsel__intval(evsel, sample, "prev_pid") == 0)
			
 
				+			return true;
			
 
				+	}
			
 
				+
			
 
				+	/* want main thread for process - has maps */
			
 
				+	thread = machine__findnew_thread(machine, sample->pid, sample->pid);
			
 
				+	if (thread == NULL) {
			
 
				+		pr_debug("Failed to get thread for pid %d.\n", sample->pid);
			
 
				+		return false;
			
 
				+	}
			
 
				+
			
 
				+	if (!symbol_conf.use_callchain || sample->callchain == NULL)
			
 
				+		return false;
			
 
				+
			
 
				+	if (thread__resolve_callchain(thread, cursor, evsel, sample,
			
 
				+				      NULL, NULL, sched->max_stack) != 0) {
			
 
				+		if (verbose)
			
 
				+			error("Failed to resolve callchain. Skipping\n");
			
 
				+
			
 
				+		return false;
			
 
				+	}
			
 
				+	callchain_cursor_commit(cursor);
			
 
				+	return false;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Track idle stats per cpu by maintaining a local thread
			
 
				+ * struct for the idle task on each cpu.
			
 
				+ */
			
 
				+static int init_idle_threads(int ncpu)
			
 
				+{
			
 
				+	int i;
			
 
				+
			
 
				+	idle_threads = zalloc(ncpu * sizeof(struct thread *));
			
 
				+	if (!idle_threads)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	idle_max_cpu = ncpu - 1;
			
 
				+
			
 
				+	/* allocate the actual thread struct if needed */
			
 
				+	for (i = 0; i < ncpu; ++i) {
			
 
				+		idle_threads[i] = thread__new(0, 0);
			
 
				+		if (idle_threads[i] == NULL)
			
 
				+			return -ENOMEM;
			
 
				+
			
 
				+		thread__set_comm(idle_threads[i], idle_comm, 0);
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static void free_idle_threads(void)
			
 
				+{
			
 
				+	int i;
			
 
				+
			
 
				+	if (idle_threads == NULL)
			
 
				+		return;
			
 
				+
			
 
				+	for (i = 0; i <= idle_max_cpu; ++i) {
			
 
				+		if ((idle_threads[i]))
			
 
				+			thread__delete(idle_threads[i]);
			
 
				+	}
			
 
				+
			
 
				+	free(idle_threads);
			
 
				+}
			
 
				+
			
 
				+static struct thread *get_idle_thread(int cpu)
			
 
				+{
			
 
				+	/*
			
 
				+	 * expand/allocate array of pointers to local thread
			
 
				+	 * structs if needed
			
 
				+	 */
			
 
				+	if ((cpu >= idle_max_cpu) || (idle_threads == NULL)) {
			
 
				+		int i, j = __roundup_pow_of_two(cpu+1);
			
 
				+		void *p;
			
 
				+
			
 
				+		p = realloc(idle_threads, j * sizeof(struct thread *));
			
 
				+		if (!p)
			
 
				+			return NULL;
			
 
				+
			
 
				+		idle_threads = (struct thread **) p;
			
 
				+		i = idle_max_cpu ? idle_max_cpu + 1 : 0;
			
 
				+		for (; i < j; ++i)
			
 
				+			idle_threads[i] = NULL;
			
 
				+
			
 
				+		idle_max_cpu = j;
			
 
				+	}
			
 
				+
			
 
				+	/* allocate a new thread struct if needed */
			
 
				+	if (idle_threads[cpu] == NULL) {
			
 
				+		idle_threads[cpu] = thread__new(0, 0);
			
 
				+
			
 
				+		if (idle_threads[cpu]) {
			
 
				+			idle_threads[cpu]->tid = 0;
			
 
				+			thread__set_comm(idle_threads[cpu], idle_comm, 0);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return idle_threads[cpu];
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * handle runtime stats saved per thread
			
 
				+ */
			
 
				+static struct thread_runtime *thread__init_runtime(struct thread *thread)
			
 
				+{
			
 
				+	struct thread_runtime *r;
			
 
				+
			
 
				+	r = zalloc(sizeof(struct thread_runtime));
			
 
				+	if (!r)
			
 
				+		return NULL;
			
 
				+
			
 
				+	init_stats(&r->run_stats);
			
 
				+	thread__set_priv(thread, r);
			
 
				+
			
 
				+	return r;
			
 
				+}
			
 
				+
			
 
				+static struct thread_runtime *thread__get_runtime(struct thread *thread)
			
 
				+{
			
 
				+	struct thread_runtime *tr;
			
 
				+
			
 
				+	tr = thread__priv(thread);
			
 
				+	if (tr == NULL) {
			
 
				+		tr = thread__init_runtime(thread);
			
 
				+		if (tr == NULL)
			
 
				+			pr_debug("Failed to malloc memory for runtime data.\n");
			
 
				+	}
			
 
				+
			
 
				+	return tr;
			
 
				+}
			
 
				+
			
 
				+static struct thread *timehist_get_thread(struct perf_sched *sched,
			
 
				+					  struct perf_sample *sample,
			
 
				+					  struct machine *machine,
			
 
				+					  struct perf_evsel *evsel)
			
 
				+{
			
 
				+	struct thread *thread;
			
 
				+
			
 
				+	if (is_idle_sample(sched, sample, evsel, machine)) {
			
 
				+		thread = get_idle_thread(sample->cpu);
			
 
				+		if (thread == NULL)
			
 
				+			pr_err("Failed to get idle thread for cpu %d.\n", sample->cpu);
			
 
				+
			
 
				+	} else {
			
 
				+		thread = machine__findnew_thread(machine, sample->pid, sample->tid);
			
 
				+		if (thread == NULL) {
			
 
				+			pr_debug("Failed to get thread for tid %d. skipping sample.\n",
			
 
				+				 sample->tid);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return thread;
			
 
				+}
			
 
				+
			
 
				+static bool timehist_skip_sample(struct perf_sched *sched,
			
 
				+				 struct thread *thread)
			
 
				+{
			
 
				+	bool rc = false;
			
 
				+
			
 
				+	if (thread__is_filtered(thread)) {
			
 
				+		rc = true;
			
 
				+		sched->skipped_samples++;
			
 
				+	}
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+static void timehist_print_wakeup_event(struct perf_sched *sched,
			
 
				+					struct perf_sample *sample,
			
 
				+					struct machine *machine,
			
 
				+					struct thread *awakened)
			
 
				+{
			
 
				+	struct thread *thread;
			
 
				+	char tstr[64];
			
 
				+
			
 
				+	thread = machine__findnew_thread(machine, sample->pid, sample->tid);
			
 
				+	if (thread == NULL)
			
 
				+		return;
			
 
				+
			
 
				+	/* show wakeup unless both awakee and awaker are filtered */
			
 
				+	if (timehist_skip_sample(sched, thread) &&
			
 
				+	    timehist_skip_sample(sched, awakened)) {
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	timestamp__scnprintf_usec(sample->time, tstr, sizeof(tstr));
			
 
				+	printf("%15s [%04d] ", tstr, sample->cpu);
			
 
				+	if (sched->show_cpu_visual)
			
 
				+		printf(" %*s ", sched->max_cpu + 1, "");
			
 
				+
			
 
				+	printf(" %-*s ", comm_width, timehist_get_commstr(thread));
			
 
				+
			
 
				+	/* dt spacer */
			
 
				+	printf("  %9s  %9s  %9s ", "", "", "");
			
 
				+
			
 
				+	printf("awakened: %s", timehist_get_commstr(awakened));
			
 
				+
			
 
				+	printf("\n");
			
 
				+}
			
 
				+
			
 
				+static int timehist_sched_wakeup_event(struct perf_tool *tool,
			
 
				+				       union perf_event *event __maybe_unused,
			
 
				+				       struct perf_evsel *evsel,
			
 
				+				       struct perf_sample *sample,
			
 
				+				       struct machine *machine)
			
 
				+{
			
 
				+	struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
			
 
				+	struct thread *thread;
			
 
				+	struct thread_runtime *tr = NULL;
			
 
				+	/* want pid of awakened task not pid in sample */
			
 
				+	const u32 pid = perf_evsel__intval(evsel, sample, "pid");
			
 
				+
			
 
				+	thread = machine__findnew_thread(machine, 0, pid);
			
 
				+	if (thread == NULL)
			
 
				+		return -1;
			
 
				+
			
 
				+	tr = thread__get_runtime(thread);
			
 
				+	if (tr == NULL)
			
 
				+		return -1;
			
 
				+
			
 
				+	if (tr->ready_to_run == 0)
			
 
				+		tr->ready_to_run = sample->time;
			
 
				+
			
 
				+	/* show wakeups if requested */
			
 
				+	if (sched->show_wakeups)
			
 
				+		timehist_print_wakeup_event(sched, sample, machine, thread);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int timehist_sched_change_event(struct perf_tool *tool,
			
 
				+				       union perf_event *event,
			
 
				+				       struct perf_evsel *evsel,
			
 
				+				       struct perf_sample *sample,
			
 
				+				       struct machine *machine)
			
 
				+{
			
 
				+	struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
			
 
				+	struct addr_location al;
			
 
				+	struct thread *thread;
			
 
				+	struct thread_runtime *tr = NULL;
			
 
				+	u64 tprev;
			
 
				+	int rc = 0;
			
 
				+
			
 
				+	if (machine__resolve(machine, &al, sample) < 0) {
			
 
				+		pr_err("problem processing %d event. skipping it\n",
			
 
				+		       event->header.type);
			
 
				+		rc = -1;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	thread = timehist_get_thread(sched, sample, machine, evsel);
			
 
				+	if (thread == NULL) {
			
 
				+		rc = -1;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	if (timehist_skip_sample(sched, thread))
			
 
				+		goto out;
			
 
				+
			
 
				+	tr = thread__get_runtime(thread);
			
 
				+	if (tr == NULL) {
			
 
				+		rc = -1;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	tprev = perf_evsel__get_time(evsel, sample->cpu);
			
 
				+
			
 
				+	timehist_update_runtime_stats(tr, sample->time, tprev);
			
 
				+	if (!sched->summary_only)
			
 
				+		timehist_print_sample(sched, sample, &al, thread);
			
 
				+
			
 
				+out:
			
 
				+	if (tr) {
			
 
				+		/* time of this sched_switch event becomes last time task seen */
			
 
				+		tr->last_time = sample->time;
			
 
				+
			
 
				+		/* sched out event for task so reset ready to run time */
			
 
				+		tr->ready_to_run = 0;
			
 
				+	}
			
 
				+
			
 
				+	perf_evsel__save_time(evsel, sample->time, sample->cpu);
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+static int timehist_sched_switch_event(struct perf_tool *tool,
			
 
				+			     union perf_event *event,
			
 
				+			     struct perf_evsel *evsel,
			
 
				+			     struct perf_sample *sample,
			
 
				+			     struct machine *machine __maybe_unused)
			
 
				+{
			
 
				+	return timehist_sched_change_event(tool, event, evsel, sample, machine);
			
 
				+}
			
 
				+
			
 
				+static int process_lost(struct perf_tool *tool __maybe_unused,
			
 
				+			union perf_event *event,
			
 
				+			struct perf_sample *sample,
			
 
				+			struct machine *machine __maybe_unused)
			
 
				+{
			
 
				+	char tstr[64];
			
 
				+
			
 
				+	timestamp__scnprintf_usec(sample->time, tstr, sizeof(tstr));
			
 
				+	printf("%15s ", tstr);
			
 
				+	printf("lost %" PRIu64 " events on cpu %d\n", event->lost.lost, sample->cpu);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+static void print_thread_runtime(struct thread *t,
			
 
				+				 struct thread_runtime *r)
			
 
				+{
			
 
				+	double mean = avg_stats(&r->run_stats);
			
 
				+	float stddev;
			
 
				+
			
 
				+	printf("%*s   %5d  %9" PRIu64 " ",
			
 
				+	       comm_width, timehist_get_commstr(t), t->ppid,
			
 
				+	       (u64) r->run_stats.n);
			
 
				+
			
 
				+	print_sched_time(r->total_run_time, 8);
			
 
				+	stddev = rel_stddev_stats(stddev_stats(&r->run_stats), mean);
			
 
				+	print_sched_time(r->run_stats.min, 6);
			
 
				+	printf(" ");
			
 
				+	print_sched_time((u64) mean, 6);
			
 
				+	printf(" ");
			
 
				+	print_sched_time(r->run_stats.max, 6);
			
 
				+	printf("  ");
			
 
				+	printf("%5.2f", stddev);
			
 
				+	printf("\n");
			
 
				+}
			
 
				+
			
 
				+struct total_run_stats {
			
 
				+	u64  sched_count;
			
 
				+	u64  task_count;
			
 
				+	u64  total_run_time;
			
 
				+};
			
 
				+
			
 
				+static int __show_thread_runtime(struct thread *t, void *priv)
			
 
				+{
			
 
				+	struct total_run_stats *stats = priv;
			
 
				+	struct thread_runtime *r;
			
 
				+
			
 
				+	if (thread__is_filtered(t))
			
 
				+		return 0;
			
 
				+
			
 
				+	r = thread__priv(t);
			
 
				+	if (r && r->run_stats.n) {
			
 
				+		stats->task_count++;
			
 
				+		stats->sched_count += r->run_stats.n;
			
 
				+		stats->total_run_time += r->total_run_time;
			
 
				+		print_thread_runtime(t, r);
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int show_thread_runtime(struct thread *t, void *priv)
			
 
				+{
			
 
				+	if (t->dead)
			
 
				+		return 0;
			
 
				+
			
 
				+	return __show_thread_runtime(t, priv);
			
 
				+}
			
 
				+
			
 
				+static int show_deadthread_runtime(struct thread *t, void *priv)
			
 
				+{
			
 
				+	if (!t->dead)
			
 
				+		return 0;
			
 
				+
			
 
				+	return __show_thread_runtime(t, priv);
			
 
				+}
			
 
				+
			
 
				+static void timehist_print_summary(struct perf_sched *sched,
			
 
				+				   struct perf_session *session)
			
 
				+{
			
 
				+	struct machine *m = &session->machines.host;
			
 
				+	struct total_run_stats totals;
			
 
				+	u64 task_count;
			
 
				+	struct thread *t;
			
 
				+	struct thread_runtime *r;
			
 
				+	int i;
			
 
				+
			
 
				+	memset(&totals, 0, sizeof(totals));
			
 
				+
			
 
				+	if (comm_width < 30)
			
 
				+		comm_width = 30;
			
 
				+
			
 
				+	printf("\nRuntime summary\n");
			
 
				+	printf("%*s  parent   sched-in  ", comm_width, "comm");
			
 
				+	printf("   run-time    min-run     avg-run     max-run  stddev\n");
			
 
				+	printf("%*s            (count)  ", comm_width, "");
			
 
				+	printf("     (msec)     (msec)      (msec)      (msec)       %%\n");
			
 
				+	printf("%.105s\n", graph_dotted_line);
			
 
				+
			
 
				+	machine__for_each_thread(m, show_thread_runtime, &totals);
			
 
				+	task_count = totals.task_count;
			
 
				+	if (!task_count)
			
 
				+		printf("<no still running tasks>\n");
			
 
				+
			
 
				+	printf("\nTerminated tasks:\n");
			
 
				+	machine__for_each_thread(m, show_deadthread_runtime, &totals);
			
 
				+	if (task_count == totals.task_count)
			
 
				+		printf("<no terminated tasks>\n");
			
 
				+
			
 
				+	/* CPU idle stats not tracked when samples were skipped */
			
 
				+	if (sched->skipped_samples)
			
 
				+		return;
			
 
				+
			
 
				+	printf("\nIdle stats:\n");
			
 
				+	for (i = 0; i <= idle_max_cpu; ++i) {
			
 
				+		t = idle_threads[i];
			
 
				+		if (!t)
			
 
				+			continue;
			
 
				+
			
 
				+		r = thread__priv(t);
			
 
				+		if (r && r->run_stats.n) {
			
 
				+			totals.sched_count += r->run_stats.n;
			
 
				+			printf("    CPU %2d idle for ", i);
			
 
				+			print_sched_time(r->total_run_time, 6);
			
 
				+			printf(" msec\n");
			
 
				+		} else
			
 
				+			printf("    CPU %2d idle entire time window\n", i);
			
 
				+	}
			
 
				+
			
 
				+	printf("\n"
			
 
				+	       "    Total number of unique tasks: %" PRIu64 "\n"
			
 
				+	       "Total number of context switches: %" PRIu64 "\n"
			
 
				+	       "           Total run time (msec): ",
			
 
				+	       totals.task_count, totals.sched_count);
			
 
				+
			
 
				+	print_sched_time(totals.total_run_time, 2);
			
 
				+	printf("\n");
			
 
				+}
			
 
				+
			
 
				+typedef int (*sched_handler)(struct perf_tool *tool,
			
 
				+			  union perf_event *event,
			
 
				+			  struct perf_evsel *evsel,
			
 
				+			  struct perf_sample *sample,
			
 
				+			  struct machine *machine);
			
 
				+
			
 
				+static int perf_timehist__process_sample(struct perf_tool *tool,
			
 
				+					 union perf_event *event,
			
 
				+					 struct perf_sample *sample,
			
 
				+					 struct perf_evsel *evsel,
			
 
				+					 struct machine *machine)
			
 
				+{
			
 
				+	struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
			
 
				+	int err = 0;
			
 
				+	int this_cpu = sample->cpu;
			
 
				+
			
 
				+	if (this_cpu > sched->max_cpu)
			
 
				+		sched->max_cpu = this_cpu;
			
 
				+
			
 
				+	if (evsel->handler != NULL) {
			
 
				+		sched_handler f = evsel->handler;
			
 
				+
			
 
				+		err = f(tool, event, evsel, sample, machine);
			
 
				+	}
			
 
				+
			
 
				+	return err;
			
 
				+}
			
 
				+
			
 
				+static int timehist_check_attr(struct perf_sched *sched,
			
 
				+			       struct perf_evlist *evlist)
			
 
				+{
			
 
				+	struct perf_evsel *evsel;
			
 
				+	struct evsel_runtime *er;
			
 
				+
			
 
				+	list_for_each_entry(evsel, &evlist->entries, node) {
			
 
				+		er = perf_evsel__get_runtime(evsel);
			
 
				+		if (er == NULL) {
			
 
				+			pr_err("Failed to allocate memory for evsel runtime data\n");
			
 
				+			return -1;
			
 
				+		}
			
 
				+
			
 
				+		if (sched->show_callchain &&
			
 
				+		    !(evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN)) {
			
 
				+			pr_info("Samples do not have callchains.\n");
			
 
				+			sched->show_callchain = 0;
			
 
				+			symbol_conf.use_callchain = 0;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int perf_sched__timehist(struct perf_sched *sched)
			
 
				+{
			
 
				+	const struct perf_evsel_str_handler handlers[] = {
			
 
				+		{ "sched:sched_switch",       timehist_sched_switch_event, },
			
 
				+		{ "sched:sched_wakeup",	      timehist_sched_wakeup_event, },
			
 
				+		{ "sched:sched_wakeup_new",   timehist_sched_wakeup_event, },
			
 
				+	};
			
 
				+	struct perf_data_file file = {
			
 
				+		.path = input_name,
			
 
				+		.mode = PERF_DATA_MODE_READ,
			
 
				+	};
			
 
				+
			
 
				+	struct perf_session *session;
			
 
				+	struct perf_evlist *evlist;
			
 
				+	int err = -1;
			
 
				+
			
 
				+	/*
			
 
				+	 * event handlers for timehist option
			
 
				+	 */
			
 
				+	sched->tool.sample	 = perf_timehist__process_sample;
			
 
				+	sched->tool.mmap	 = perf_event__process_mmap;
			
 
				+	sched->tool.comm	 = perf_event__process_comm;
			
 
				+	sched->tool.exit	 = perf_event__process_exit;
			
 
				+	sched->tool.fork	 = perf_event__process_fork;
			
 
				+	sched->tool.lost	 = process_lost;
			
 
				+	sched->tool.attr	 = perf_event__process_attr;
			
 
				+	sched->tool.tracing_data = perf_event__process_tracing_data;
			
 
				+	sched->tool.build_id	 = perf_event__process_build_id;
			
 
				+
			
 
				+	sched->tool.ordered_events = true;
			
 
				+	sched->tool.ordering_requires_timestamps = true;
			
 
				+
			
 
				+	symbol_conf.use_callchain = sched->show_callchain;
			
 
				+
			
 
				+	session = perf_session__new(&file, false, &sched->tool);
			
 
				+	if (session == NULL)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	evlist = session->evlist;
			
 
				+
			
 
				+	symbol__init(&session->header.env);
			
 
				+
			
 
				+	if (timehist_check_attr(sched, evlist) != 0)
			
 
				+		goto out;
			
 
				+
			
 
				+	setup_pager();
			
 
				+
			
 
				+	/* setup per-evsel handlers */
			
 
				+	if (perf_session__set_tracepoints_handlers(session, handlers))
			
 
				+		goto out;
			
 
				+
			
 
				+	if (!perf_session__has_traces(session, "record -R"))
			
 
				+		goto out;
			
 
				+
			
 
				+	/* pre-allocate struct for per-CPU idle stats */
			
 
				+	sched->max_cpu = session->header.env.nr_cpus_online;
			
 
				+	if (sched->max_cpu == 0)
			
 
				+		sched->max_cpu = 4;
			
 
				+	if (init_idle_threads(sched->max_cpu))
			
 
				+		goto out;
			
 
				+
			
 
				+	/* summary_only implies summary option, but don't overwrite summary if set */
			
 
				+	if (sched->summary_only)
			
 
				+		sched->summary = sched->summary_only;
			
 
				+
			
 
				+	if (!sched->summary_only)
			
 
				+		timehist_header(sched);
			
 
				+
			
 
				+	err = perf_session__process_events(session);
			
 
				+	if (err) {
			
 
				+		pr_err("Failed to process events, error %d", err);
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	sched->nr_events      = evlist->stats.nr_events[0];
			
 
				+	sched->nr_lost_events = evlist->stats.total_lost;
			
 
				+	sched->nr_lost_chunks = evlist->stats.nr_events[PERF_RECORD_LOST];
			
 
				+
			
 
				+	if (sched->summary)
			
 
				+		timehist_print_summary(sched, session);
			
 
				+
			
 
				+out:
			
 
				+	free_idle_threads();
			
 
				+	perf_session__delete(session);
			
 
				+
			
 
				+	return err;
			
 
				+}
			
 
				+
			
 
				+
			
 
				 static void print_bad_events(struct perf_sched *sched)
			
 
				 {
			
 
				 	if (sched->nr_unordered_timestamps && sched->nr_timestamps) {
			
@@ -1957,6 +2823,8 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 		.next_shortname1      = 'A',
			
 
				 		.next_shortname2      = '0',
			
 
				 		.skip_merge           = 0,
			
 
				+		.show_callchain	      = 1,
			
 
				+		.max_stack            = 5,
			
 
				 	};
			
 
				 	const struct option sched_options[] = {
			
 
				 	OPT_STRING('i', "input", &input_name, "file",
			
@@ -1970,8 +2838,6 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 	const struct option latency_options[] = {
			
 
				 	OPT_STRING('s', "sort", &sched.sort_order, "key[,key2...]",
			
 
				 		   "sort by key(s): runtime, switch, avg, max"),
			
 
				-	OPT_INCR('v', "verbose", &verbose,
			
 
				-		    "be more verbose (show symbol address, etc)"),
			
 
				 	OPT_INTEGER('C', "CPU", &sched.profile_cpu,
			
 
				 		    "CPU to profile on"),
			
 
				 	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
			
@@ -1983,8 +2849,6 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 	const struct option replay_options[] = {
			
 
				 	OPT_UINTEGER('r', "repeat", &sched.replay_repeat,
			
 
				 		     "repeat the workload replay N times (-1: infinite)"),
			
 
				-	OPT_INCR('v', "verbose", &verbose,
			
 
				-		    "be more verbose (show symbol address, etc)"),
			
 
				 	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
			
 
				 		    "dump raw trace in ASCII"),
			
 
				 	OPT_BOOLEAN('f', "force", &sched.force, "don't complain, do it"),
			
@@ -2001,6 +2865,26 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
 
				                     "display given CPUs in map"),
			
 
				 	OPT_PARENT(sched_options)
			
 
				 	};
			
 
				+	const struct option timehist_options[] = {
			
 
				+	OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
			
 
				+		   "file", "vmlinux pathname"),
			
 
				+	OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name,
			
 
				+		   "file", "kallsyms pathname"),
			
 
				+	OPT_BOOLEAN('g', "call-graph", &sched.show_callchain,
			
 
				+		    "Display call chains if present (default on)"),
			
 
				+	OPT_UINTEGER(0, "max-stack", &sched.max_stack,
			
 
				+		   "Maximum number of functions to display backtrace."),
			
 
				+	OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
			
 
				+		    "Look for files with symbols relative to this directory"),
			
 
				+	OPT_BOOLEAN('s', "summary", &sched.summary_only,
			
 
				+		    "Show only syscall summary with statistics"),
			
 
				+	OPT_BOOLEAN('S', "with-summary", &sched.summary,
			
 
				+		    "Show all syscalls and summary with statistics"),
			
 
				+	OPT_BOOLEAN('w', "wakeups", &sched.show_wakeups, "Show wakeup events"),
			
 
				+	OPT_BOOLEAN('V', "cpu-visual", &sched.show_cpu_visual, "Add CPU visual"),
			
 
				+	OPT_PARENT(sched_options)
			
 
				+	};
			
 
				+
			
 
				 	const char * const latency_usage[] = {
			
 
				 		"perf sched latency [<options>]",
			
 
				 		NULL
			
@@ -2013,8 +2897,13 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 		"perf sched map [<options>]",
			
 
				 		NULL
			
 
				 	};
			
 
				+	const char * const timehist_usage[] = {
			
 
				+		"perf sched timehist [<options>]",
			
 
				+		NULL
			
 
				+	};
			
 
				 	const char *const sched_subcommands[] = { "record", "latency", "map",
			
 
				-						  "replay", "script", NULL };
			
 
				+						  "replay", "script",
			
 
				+						  "timehist", NULL };
			
 
				 	const char *sched_usage[] = {
			
 
				 		NULL,
			
 
				 		NULL
			
@@ -2077,6 +2966,21 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 				usage_with_options(replay_usage, replay_options);
			
 
				 		}
			
 
				 		return perf_sched__replay(&sched);
			
 
				+	} else if (!strcmp(argv[0], "timehist")) {
			
 
				+		if (argc) {
			
 
				+			argc = parse_options(argc, argv, timehist_options,
			
 
				+					     timehist_usage, 0);
			
 
				+			if (argc)
			
 
				+				usage_with_options(timehist_usage, timehist_options);
			
 
				+		}
			
 
				+		if (sched.show_wakeups && sched.summary_only) {
			
 
				+			pr_err(" Error: -s and -w are mutually exclusive.\n");
			
 
				+			parse_options_usage(timehist_usage, timehist_options, "s", true);
			
 
				+			parse_options_usage(NULL, timehist_options, "w", true);
			
 
				+			return -EINVAL;
			
 
				+		}
			
 
				+
			
 
				+		return perf_sched__timehist(&sched);
			
 
				 	} else {
			
 
				 		usage_with_options(sched_usage, sched_options);
			
 
				 	}
			
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -130,7 +130,7 @@ static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he)
 
				 		return err;
			
 
				 	}
			
 
				 
			
 
				-	err = symbol__disassemble(sym, map, 0);
			
 
				+	err = symbol__disassemble(sym, map, NULL, 0);
			
 
				 	if (err == 0) {
			
 
				 out_assign:
			
 
				 		top->sym_filter_entry = he;
			
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -1050,7 +1050,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
 
				 		  (nr_pcnt - 1);
			
 
				 	}
			
 
				 
			
 
				-	err = symbol__disassemble(sym, map, sizeof_bdl);
			
 
				+	err = symbol__disassemble(sym, map, perf_evsel__env_arch(evsel), sizeof_bdl);
			
 
				 	if (err) {
			
 
				 		char msg[BUFSIZ];
			
 
				 		symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg));
			
--- a/tools/perf/ui/gtk/annotate.c
+++ b/tools/perf/ui/gtk/annotate.c
@@ -167,7 +167,7 @@ static int symbol__gtk_annotate(struct symbol *sym, struct map *map,
 
				 	if (map->dso->annotate_warned)
			
 
				 		return -1;
			
 
				 
			
 
				-	err = symbol__disassemble(sym, map, 0);
			
 
				+	err = symbol__disassemble(sym, map, perf_evsel__env_arch(evsel), 0);
			
 
				 	if (err) {
			
 
				 		char msg[BUFSIZ];
			
 
				 		symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg));
			
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -18,17 +18,61 @@
 
				 #include "annotate.h"
			
 
				 #include "evsel.h"
			
 
				 #include "block-range.h"
			
 
				+#include "arch/common.h"
			
 
				 #include <regex.h>
			
 
				 #include <pthread.h>
			
 
				 #include <linux/bitops.h>
			
 
				+#include <sys/utsname.h>
			
 
				 
			
 
				 const char 	*disassembler_style;
			
 
				 const char	*objdump_path;
			
 
				 static regex_t	 file_lineno;
			
 
				 
			
 
				-static struct ins *ins__find(const char *name);
			
 
				+static struct ins *ins__find(struct arch *arch, const char *name);
			
 
				 static int disasm_line__parse(char *line, char **namep, char **rawp);
			
 
				 
			
 
				+struct arch {
			
 
				+	const char	*name;
			
 
				+	struct ins	*instructions;
			
 
				+	size_t		nr_instructions;
			
 
				+	bool		sorted_instructions;
			
 
				+	struct		{
			
 
				+		char comment_char;
			
 
				+		char skip_functions_char;
			
 
				+	} objdump;
			
 
				+};
			
 
				+
			
 
				+static struct ins_ops call_ops;
			
 
				+static struct ins_ops dec_ops;
			
 
				+static struct ins_ops jump_ops;
			
 
				+static struct ins_ops mov_ops;
			
 
				+static struct ins_ops nop_ops;
			
 
				+static struct ins_ops lock_ops;
			
 
				+static struct ins_ops ret_ops;
			
 
				+
			
 
				+#include "arch/arm/annotate/instructions.c"
			
 
				+#include "arch/x86/annotate/instructions.c"
			
 
				+
			
 
				+static struct arch architectures[] = {
			
 
				+	{
			
 
				+		.name = "arm",
			
 
				+		.instructions = arm__instructions,
			
 
				+		.nr_instructions = ARRAY_SIZE(arm__instructions),
			
 
				+		.objdump =  {
			
 
				+			.comment_char = ';',
			
 
				+			.skip_functions_char = '+',
			
 
				+		},
			
 
				+	},
			
 
				+	{
			
 
				+		.name = "x86",
			
 
				+		.instructions = x86__instructions,
			
 
				+		.nr_instructions = ARRAY_SIZE(x86__instructions),
			
 
				+		.objdump =  {
			
 
				+			.comment_char = '#',
			
 
				+		},
			
 
				+	},
			
 
				+};
			
 
				+
			
 
				 static void ins__delete(struct ins_operands *ops)
			
 
				 {
			
 
				 	if (ops == NULL)
			
@@ -54,7 +98,7 @@ int ins__scnprintf(struct ins *ins, char *bf, size_t size,
 
				 	return ins__raw_scnprintf(ins, bf, size, ops);
			
 
				 }
			
 
				 
			
 
				-static int call__parse(struct ins_operands *ops, struct map *map)
			
 
				+static int call__parse(struct arch *arch, struct ins_operands *ops, struct map *map)
			
 
				 {
			
 
				 	char *endptr, *tok, *name;
			
 
				 
			
@@ -66,10 +110,9 @@ static int call__parse(struct ins_operands *ops, struct map *map)
 
				 
			
 
				 	name++;
			
 
				 
			
 
				-#ifdef __arm__
			
 
				-	if (strchr(name, '+'))
			
 
				+	if (arch->objdump.skip_functions_char &&
			
 
				+	    strchr(name, arch->objdump.skip_functions_char))
			
 
				 		return -1;
			
 
				-#endif
			
 
				 
			
 
				 	tok = strchr(name, '>');
			
 
				 	if (tok == NULL)
			
@@ -118,7 +161,7 @@ bool ins__is_call(const struct ins *ins)
 
				 	return ins->ops == &call_ops;
			
 
				 }
			
 
				 
			
 
				-static int jump__parse(struct ins_operands *ops, struct map *map __maybe_unused)
			
 
				+static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, struct map *map __maybe_unused)
			
 
				 {
			
 
				 	const char *s = strchr(ops->raw, '+');
			
 
				 
			
@@ -173,7 +216,7 @@ static int comment__symbol(char *raw, char *comment, u64 *addrp, char **namep)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static int lock__parse(struct ins_operands *ops, struct map *map)
			
 
				+static int lock__parse(struct arch *arch, struct ins_operands *ops, struct map *map)
			
 
				 {
			
 
				 	char *name;
			
 
				 
			
@@ -184,7 +227,7 @@ static int lock__parse(struct ins_operands *ops, struct map *map)
 
				 	if (disasm_line__parse(ops->raw, &name, &ops->locked.ops->raw) < 0)
			
 
				 		goto out_free_ops;
			
 
				 
			
 
				-	ops->locked.ins = ins__find(name);
			
 
				+	ops->locked.ins = ins__find(arch, name);
			
 
				 	free(name);
			
 
				 
			
 
				 	if (ops->locked.ins == NULL)
			
@@ -194,7 +237,7 @@ static int lock__parse(struct ins_operands *ops, struct map *map)
 
				 		return 0;
			
 
				 
			
 
				 	if (ops->locked.ins->ops->parse &&
			
 
				-	    ops->locked.ins->ops->parse(ops->locked.ops, map) < 0)
			
 
				+	    ops->locked.ins->ops->parse(arch, ops->locked.ops, map) < 0)
			
 
				 		goto out_free_ops;
			
 
				 
			
 
				 	return 0;
			
@@ -237,7 +280,7 @@ static struct ins_ops lock_ops = {
 
				 	.scnprintf = lock__scnprintf,
			
 
				 };
			
 
				 
			
 
				-static int mov__parse(struct ins_operands *ops, struct map *map __maybe_unused)
			
 
				+static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map *map __maybe_unused)
			
 
				 {
			
 
				 	char *s = strchr(ops->raw, ','), *target, *comment, prev;
			
 
				 
			
@@ -252,11 +295,7 @@ static int mov__parse(struct ins_operands *ops, struct map *map __maybe_unused)
 
				 		return -1;
			
 
				 
			
 
				 	target = ++s;
			
 
				-#ifdef __arm__
			
 
				-	comment = strchr(s, ';');
			
 
				-#else
			
 
				-	comment = strchr(s, '#');
			
 
				-#endif
			
 
				+	comment = strchr(s, arch->objdump.comment_char);
			
 
				 
			
 
				 	if (comment != NULL)
			
 
				 		s = comment - 1;
			
@@ -304,7 +343,7 @@ static struct ins_ops mov_ops = {
 
				 	.scnprintf = mov__scnprintf,
			
 
				 };
			
 
				 
			
 
				-static int dec__parse(struct ins_operands *ops, struct map *map __maybe_unused)
			
 
				+static int dec__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, struct map *map __maybe_unused)
			
 
				 {
			
 
				 	char *target, *comment, *s, prev;
			
 
				 
			
@@ -364,99 +403,6 @@ bool ins__is_ret(const struct ins *ins)
 
				 	return ins->ops == &ret_ops;
			
 
				 }
			
 
				 
			
 
				-static struct ins instructions[] = {
			
 
				-	{ .name = "add",   .ops  = &mov_ops, },
			
 
				-	{ .name = "addl",  .ops  = &mov_ops, },
			
 
				-	{ .name = "addq",  .ops  = &mov_ops, },
			
 
				-	{ .name = "addw",  .ops  = &mov_ops, },
			
 
				-	{ .name = "and",   .ops  = &mov_ops, },
			
 
				-#ifdef __arm__
			
 
				-	{ .name = "b",     .ops  = &jump_ops, }, // might also be a call
			
 
				-	{ .name = "bcc",   .ops  = &jump_ops, },
			
 
				-	{ .name = "bcs",   .ops  = &jump_ops, },
			
 
				-	{ .name = "beq",   .ops  = &jump_ops, },
			
 
				-	{ .name = "bge",   .ops  = &jump_ops, },
			
 
				-	{ .name = "bgt",   .ops  = &jump_ops, },
			
 
				-	{ .name = "bhi",   .ops  = &jump_ops, },
			
 
				-	{ .name = "bl",    .ops  = &call_ops, },
			
 
				-	{ .name = "bls",   .ops  = &jump_ops, },
			
 
				-	{ .name = "blt",   .ops  = &jump_ops, },
			
 
				-	{ .name = "blx",   .ops  = &call_ops, },
			
 
				-	{ .name = "bne",   .ops  = &jump_ops, },
			
 
				-#endif
			
 
				-	{ .name = "bts",   .ops  = &mov_ops, },
			
 
				-	{ .name = "call",  .ops  = &call_ops, },
			
 
				-	{ .name = "callq", .ops  = &call_ops, },
			
 
				-	{ .name = "cmp",   .ops  = &mov_ops, },
			
 
				-	{ .name = "cmpb",  .ops  = &mov_ops, },
			
 
				-	{ .name = "cmpl",  .ops  = &mov_ops, },
			
 
				-	{ .name = "cmpq",  .ops  = &mov_ops, },
			
 
				-	{ .name = "cmpw",  .ops  = &mov_ops, },
			
 
				-	{ .name = "cmpxch", .ops  = &mov_ops, },
			
 
				-	{ .name = "dec",   .ops  = &dec_ops, },
			
 
				-	{ .name = "decl",  .ops  = &dec_ops, },
			
 
				-	{ .name = "imul",  .ops  = &mov_ops, },
			
 
				-	{ .name = "inc",   .ops  = &dec_ops, },
			
 
				-	{ .name = "incl",  .ops  = &dec_ops, },
			
 
				-	{ .name = "ja",	   .ops  = &jump_ops, },
			
 
				-	{ .name = "jae",   .ops  = &jump_ops, },
			
 
				-	{ .name = "jb",	   .ops  = &jump_ops, },
			
 
				-	{ .name = "jbe",   .ops  = &jump_ops, },
			
 
				-	{ .name = "jc",	   .ops  = &jump_ops, },
			
 
				-	{ .name = "jcxz",  .ops  = &jump_ops, },
			
 
				-	{ .name = "je",	   .ops  = &jump_ops, },
			
 
				-	{ .name = "jecxz", .ops  = &jump_ops, },
			
 
				-	{ .name = "jg",	   .ops  = &jump_ops, },
			
 
				-	{ .name = "jge",   .ops  = &jump_ops, },
			
 
				-	{ .name = "jl",    .ops  = &jump_ops, },
			
 
				-	{ .name = "jle",   .ops  = &jump_ops, },
			
 
				-	{ .name = "jmp",   .ops  = &jump_ops, },
			
 
				-	{ .name = "jmpq",  .ops  = &jump_ops, },
			
 
				-	{ .name = "jna",   .ops  = &jump_ops, },
			
 
				-	{ .name = "jnae",  .ops  = &jump_ops, },
			
 
				-	{ .name = "jnb",   .ops  = &jump_ops, },
			
 
				-	{ .name = "jnbe",  .ops  = &jump_ops, },
			
 
				-	{ .name = "jnc",   .ops  = &jump_ops, },
			
 
				-	{ .name = "jne",   .ops  = &jump_ops, },
			
 
				-	{ .name = "jng",   .ops  = &jump_ops, },
			
 
				-	{ .name = "jnge",  .ops  = &jump_ops, },
			
 
				-	{ .name = "jnl",   .ops  = &jump_ops, },
			
 
				-	{ .name = "jnle",  .ops  = &jump_ops, },
			
 
				-	{ .name = "jno",   .ops  = &jump_ops, },
			
 
				-	{ .name = "jnp",   .ops  = &jump_ops, },
			
 
				-	{ .name = "jns",   .ops  = &jump_ops, },
			
 
				-	{ .name = "jnz",   .ops  = &jump_ops, },
			
 
				-	{ .name = "jo",	   .ops  = &jump_ops, },
			
 
				-	{ .name = "jp",	   .ops  = &jump_ops, },
			
 
				-	{ .name = "jpe",   .ops  = &jump_ops, },
			
 
				-	{ .name = "jpo",   .ops  = &jump_ops, },
			
 
				-	{ .name = "jrcxz", .ops  = &jump_ops, },
			
 
				-	{ .name = "js",	   .ops  = &jump_ops, },
			
 
				-	{ .name = "jz",	   .ops  = &jump_ops, },
			
 
				-	{ .name = "lea",   .ops  = &mov_ops, },
			
 
				-	{ .name = "lock",  .ops  = &lock_ops, },
			
 
				-	{ .name = "mov",   .ops  = &mov_ops, },
			
 
				-	{ .name = "movb",  .ops  = &mov_ops, },
			
 
				-	{ .name = "movdqa",.ops  = &mov_ops, },
			
 
				-	{ .name = "movl",  .ops  = &mov_ops, },
			
 
				-	{ .name = "movq",  .ops  = &mov_ops, },
			
 
				-	{ .name = "movslq", .ops  = &mov_ops, },
			
 
				-	{ .name = "movzbl", .ops  = &mov_ops, },
			
 
				-	{ .name = "movzwl", .ops  = &mov_ops, },
			
 
				-	{ .name = "nop",   .ops  = &nop_ops, },
			
 
				-	{ .name = "nopl",  .ops  = &nop_ops, },
			
 
				-	{ .name = "nopw",  .ops  = &nop_ops, },
			
 
				-	{ .name = "or",    .ops  = &mov_ops, },
			
 
				-	{ .name = "orl",   .ops  = &mov_ops, },
			
 
				-	{ .name = "test",  .ops  = &mov_ops, },
			
 
				-	{ .name = "testb", .ops  = &mov_ops, },
			
 
				-	{ .name = "testl", .ops  = &mov_ops, },
			
 
				-	{ .name = "xadd",  .ops  = &mov_ops, },
			
 
				-	{ .name = "xbeginl", .ops  = &jump_ops, },
			
 
				-	{ .name = "xbeginq", .ops  = &jump_ops, },
			
 
				-	{ .name = "retq",  .ops  = &ret_ops, },
			
 
				-};
			
 
				-
			
 
				 static int ins__key_cmp(const void *name, const void *insp)
			
 
				 {
			
 
				 	const struct ins *ins = insp;
			
@@ -472,24 +418,58 @@ static int ins__cmp(const void *a, const void *b)
 
				 	return strcmp(ia->name, ib->name);
			
 
				 }
			
 
				 
			
 
				-static void ins__sort(void)
			
 
				+static void ins__sort(struct arch *arch)
			
 
				 {
			
 
				-	const int nmemb = ARRAY_SIZE(instructions);
			
 
				+	const int nmemb = arch->nr_instructions;
			
 
				 
			
 
				-	qsort(instructions, nmemb, sizeof(struct ins), ins__cmp);
			
 
				+	qsort(arch->instructions, nmemb, sizeof(struct ins), ins__cmp);
			
 
				 }
			
 
				 
			
 
				-static struct ins *ins__find(const char *name)
			
 
				+static struct ins *ins__find(struct arch *arch, const char *name)
			
 
				 {
			
 
				-	const int nmemb = ARRAY_SIZE(instructions);
			
 
				+	const int nmemb = arch->nr_instructions;
			
 
				+
			
 
				+	if (!arch->sorted_instructions) {
			
 
				+		ins__sort(arch);
			
 
				+		arch->sorted_instructions = true;
			
 
				+	}
			
 
				+
			
 
				+	return bsearch(name, arch->instructions, nmemb, sizeof(struct ins), ins__key_cmp);
			
 
				+}
			
 
				+
			
 
				+static int arch__key_cmp(const void *name, const void *archp)
			
 
				+{
			
 
				+	const struct arch *arch = archp;
			
 
				+
			
 
				+	return strcmp(name, arch->name);
			
 
				+}
			
 
				+
			
 
				+static int arch__cmp(const void *a, const void *b)
			
 
				+{
			
 
				+	const struct arch *aa = a;
			
 
				+	const struct arch *ab = b;
			
 
				+
			
 
				+	return strcmp(aa->name, ab->name);
			
 
				+}
			
 
				+
			
 
				+static void arch__sort(void)
			
 
				+{
			
 
				+	const int nmemb = ARRAY_SIZE(architectures);
			
 
				+
			
 
				+	qsort(architectures, nmemb, sizeof(struct arch), arch__cmp);
			
 
				+}
			
 
				+
			
 
				+static struct arch *arch__find(const char *name)
			
 
				+{
			
 
				+	const int nmemb = ARRAY_SIZE(architectures);
			
 
				 	static bool sorted;
			
 
				 
			
 
				 	if (!sorted) {
			
 
				-		ins__sort();
			
 
				+		arch__sort();
			
 
				 		sorted = true;
			
 
				 	}
			
 
				 
			
 
				-	return bsearch(name, instructions, nmemb, sizeof(struct ins), ins__key_cmp);
			
 
				+	return bsearch(name, architectures, nmemb, sizeof(struct arch), arch__key_cmp);
			
 
				 }
			
 
				 
			
 
				 int symbol__alloc_hist(struct symbol *sym)
			
@@ -709,9 +689,9 @@ int hist_entry__inc_addr_samples(struct hist_entry *he, int evidx, u64 ip)
 
				 	return symbol__inc_addr_samples(he->ms.sym, he->ms.map, evidx, ip);
			
 
				 }
			
 
				 
			
 
				-static void disasm_line__init_ins(struct disasm_line *dl, struct map *map)
			
 
				+static void disasm_line__init_ins(struct disasm_line *dl, struct arch *arch, struct map *map)
			
 
				 {
			
 
				-	dl->ins = ins__find(dl->name);
			
 
				+	dl->ins = ins__find(arch, dl->name);
			
 
				 
			
 
				 	if (dl->ins == NULL)
			
 
				 		return;
			
@@ -719,7 +699,7 @@ static void disasm_line__init_ins(struct disasm_line *dl, struct map *map)
 
				 	if (!dl->ins->ops)
			
 
				 		return;
			
 
				 
			
 
				-	if (dl->ins->ops->parse && dl->ins->ops->parse(&dl->ops, map) < 0)
			
 
				+	if (dl->ins->ops->parse && dl->ins->ops->parse(arch, &dl->ops, map) < 0)
			
 
				 		dl->ins = NULL;
			
 
				 }
			
 
				 
			
@@ -762,6 +742,7 @@ out_free_name:
 
				 
			
 
				 static struct disasm_line *disasm_line__new(s64 offset, char *line,
			
 
				 					    size_t privsize, int line_nr,
			
 
				+					    struct arch *arch,
			
 
				 					    struct map *map)
			
 
				 {
			
 
				 	struct disasm_line *dl = zalloc(sizeof(*dl) + privsize);
			
@@ -777,7 +758,7 @@ static struct disasm_line *disasm_line__new(s64 offset, char *line,
 
				 			if (disasm_line__parse(dl->line, &dl->name, &dl->ops.raw) < 0)
			
 
				 				goto out_free_line;
			
 
				 
			
 
				-			disasm_line__init_ins(dl, map);
			
 
				+			disasm_line__init_ins(dl, arch, map);
			
 
				 		}
			
 
				 	}
			
 
				 
			
@@ -1087,6 +1068,7 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st
 
				  * The ops.raw part will be parsed further according to type of the instruction.
			
 
				  */
			
 
				 static int symbol__parse_objdump_line(struct symbol *sym, struct map *map,
			
 
				+				      struct arch *arch,
			
 
				 				      FILE *file, size_t privsize,
			
 
				 				      int *line_nr)
			
 
				 {
			
@@ -1149,7 +1131,7 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map,
 
				 			parsed_line = tmp2 + 1;
			
 
				 	}
			
 
				 
			
 
				-	dl = disasm_line__new(offset, parsed_line, privsize, *line_nr, map);
			
 
				+	dl = disasm_line__new(offset, parsed_line, privsize, *line_nr, arch, map);
			
 
				 	free(line);
			
 
				 	(*line_nr)++;
			
 
				 
			
@@ -1280,10 +1262,23 @@ fallback:
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-int symbol__disassemble(struct symbol *sym, struct map *map, size_t privsize)
			
 
				+static const char *annotate__norm_arch(const char *arch_name)
			
 
				+{
			
 
				+	struct utsname uts;
			
 
				+
			
 
				+	if (!arch_name) { /* Assume we are annotating locally. */
			
 
				+		if (uname(&uts) < 0)
			
 
				+			return NULL;
			
 
				+		arch_name = uts.machine;
			
 
				+	}
			
 
				+	return normalize_arch((char *)arch_name);
			
 
				+}
			
 
				+
			
 
				+int symbol__disassemble(struct symbol *sym, struct map *map, const char *arch_name, size_t privsize)
			
 
				 {
			
 
				 	struct dso *dso = map->dso;
			
 
				 	char command[PATH_MAX * 2];
			
 
				+	struct arch *arch = NULL;
			
 
				 	FILE *file;
			
 
				 	char symfs_filename[PATH_MAX];
			
 
				 	struct kcore_extract kce;
			
@@ -1297,6 +1292,14 @@ int symbol__disassemble(struct symbol *sym, struct map *map, size_t privsize)
 
				 	if (err)
			
 
				 		return err;
			
 
				 
			
 
				+	arch_name = annotate__norm_arch(arch_name);
			
 
				+	if (!arch_name)
			
 
				+		return -1;
			
 
				+
			
 
				+	arch = arch__find(arch_name);
			
 
				+	if (arch == NULL)
			
 
				+		return -ENOTSUP;
			
 
				+
			
 
				 	pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__,
			
 
				 		 symfs_filename, sym->name, map->unmap_ip(map, sym->start),
			
 
				 		 map->unmap_ip(map, sym->end));
			
@@ -1395,7 +1398,7 @@ int symbol__disassemble(struct symbol *sym, struct map *map, size_t privsize)
 
				 
			
 
				 	nline = 0;
			
 
				 	while (!feof(file)) {
			
 
				-		if (symbol__parse_objdump_line(sym, map, file, privsize,
			
 
				+		if (symbol__parse_objdump_line(sym, map, arch, file, privsize,
			
 
				 			    &lineno) < 0)
			
 
				 			break;
			
 
				 		nline++;
			
@@ -1793,7 +1796,7 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map,
 
				 	struct rb_root source_line = RB_ROOT;
			
 
				 	u64 len;
			
 
				 
			
 
				-	if (symbol__disassemble(sym, map, 0) < 0)
			
 
				+	if (symbol__disassemble(sym, map, perf_evsel__env_arch(evsel), 0) < 0)
			
 
				 		return -1;
			
 
				 
			
 
				 	len = symbol__size(sym);
			
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -34,9 +34,11 @@ struct ins_operands {
 
				 	};
			
 
				 };
			
 
				 
			
 
				+struct arch;
			
 
				+
			
 
				 struct ins_ops {
			
 
				 	void (*free)(struct ins_operands *ops);
			
 
				-	int (*parse)(struct ins_operands *ops, struct map *map);
			
 
				+	int (*parse)(struct arch *arch, struct ins_operands *ops, struct map *map);
			
 
				 	int (*scnprintf)(struct ins *ins, char *bf, size_t size,
			
 
				 			 struct ins_operands *ops);
			
 
				 };
			
@@ -156,7 +158,7 @@ int hist_entry__inc_addr_samples(struct hist_entry *he, int evidx, u64 addr);
 
				 int symbol__alloc_hist(struct symbol *sym);
			
 
				 void symbol__annotate_zero_histograms(struct symbol *sym);
			
 
				 
			
 
				-int symbol__disassemble(struct symbol *sym, struct map *map, size_t privsize);
			
 
				+int symbol__disassemble(struct symbol *sym, struct map *map, const char *arch_name, size_t privsize);
			
 
				 
			
 
				 enum symbol_disassemble_errno {
			
 
				 	SYMBOL_ANNOTATE_ERRNO__SUCCESS		= 0,
			
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1481,7 +1481,7 @@ retry_sample_id:
 
				 
			
 
				 			group_fd = get_group_fd(evsel, cpu, thread);
			
 
				 retry_open:
			
 
				-			pr_debug2("sys_perf_event_open: pid %d  cpu %d  group_fd %d  flags %#lx\n",
			
 
				+			pr_debug2("sys_perf_event_open: pid %d  cpu %d  group_fd %d  flags %#lx",
			
 
				 				  pid, cpus->map[cpu], group_fd, flags);
			
 
				 
			
 
				 			FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr,
			
@@ -1490,11 +1490,13 @@ retry_open:
 
				 								     group_fd, flags);
			
 
				 			if (FD(evsel, cpu, thread) < 0) {
			
 
				 				err = -errno;
			
 
				-				pr_debug2("sys_perf_event_open failed, error %d\n",
			
 
				+				pr_debug2("\nsys_perf_event_open failed, error %d\n",
			
 
				 					  err);
			
 
				 				goto try_fallback;
			
 
				 			}
			
 
				 
			
 
				+			pr_debug2(" = %d\n", FD(evsel, cpu, thread));
			
 
				+
			
 
				 			if (evsel->bpf_fd >= 0) {
			
 
				 				int evt_fd = FD(evsel, cpu, thread);
			
 
				 				int bpf_fd = evsel->bpf_fd;
			
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -391,6 +391,7 @@ int perf_evsel__fprintf(struct perf_evsel *evsel,
 
				 #define EVSEL__PRINT_ONELINE		(1<<4)
			
 
				 #define EVSEL__PRINT_SRCLINE		(1<<5)
			
 
				 #define EVSEL__PRINT_UNKNOWN_AS_ADDR	(1<<6)
			
 
				+#define EVSEL__PRINT_CALLCHAIN_ARROW	(1<<7)
			
 
				 
			
 
				 struct callchain_cursor;
			
 
				 
			
--- a/tools/perf/util/evsel_fprintf.c
+++ b/tools/perf/util/evsel_fprintf.c
@@ -108,7 +108,9 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment,
 
				 	int print_oneline = print_opts & EVSEL__PRINT_ONELINE;
			
 
				 	int print_srcline = print_opts & EVSEL__PRINT_SRCLINE;
			
 
				 	int print_unknown_as_addr = print_opts & EVSEL__PRINT_UNKNOWN_AS_ADDR;
			
 
				+	int print_arrow = print_opts & EVSEL__PRINT_CALLCHAIN_ARROW;
			
 
				 	char s = print_oneline ? ' ' : '\t';
			
 
				+	bool first = true;
			
 
				 
			
 
				 	if (sample->callchain) {
			
 
				 		struct addr_location node_al;
			
@@ -124,6 +126,9 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment,
 
				 
			
 
				 			printed += fprintf(fp, "%-*.*s", left_alignment, left_alignment, " ");
			
 
				 
			
 
				+			if (print_arrow && !first)
			
 
				+				printed += fprintf(fp, " <-");
			
 
				+
			
 
				 			if (print_ip)
			
 
				 				printed += fprintf(fp, "%c%16" PRIx64, s, node->ip);
			
 
				 
			
@@ -137,7 +142,8 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment,
 
				 
			
 
				 				if (print_symoffset) {
			
 
				 					printed += __symbol__fprintf_symname_offs(node->sym, &node_al,
			
 
				-										  print_unknown_as_addr, fp);
			
 
				+										  print_unknown_as_addr,
			
 
				+										  true, fp);
			
 
				 				} else {
			
 
				 					printed += __symbol__fprintf_symname(node->sym, &node_al,
			
 
				 									     print_unknown_as_addr, fp);
			
@@ -157,6 +163,7 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment,
 
				 				printed += fprintf(fp, "\n");
			
 
				 
			
 
				 			callchain_cursor_advance(cursor);
			
 
				+			first = false;
			
 
				 		}
			
 
				 	}
			
 
				 
			
@@ -188,7 +195,8 @@ int sample__fprintf_sym(struct perf_sample *sample, struct addr_location *al,
 
				 			printed += fprintf(fp, " ");
			
 
				 			if (print_symoffset) {
			
 
				 				printed += __symbol__fprintf_symname_offs(al->sym, al,
			
 
				-									  print_unknown_as_addr, fp);
			
 
				+									  print_unknown_as_addr,
			
 
				+									  true, fp);
			
 
				 			} else {
			
 
				 				printed += __symbol__fprintf_symname(al->sym, al,
			
 
				 								     print_unknown_as_addr, fp);
			
--- a/tools/perf/util/mem-events.c
+++ b/tools/perf/util/mem-events.c
@@ -280,6 +280,12 @@ int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi)
 
				 	u64 lock   = data_src->mem_lock;
			
 
				 	int err = 0;
			
 
				 
			
 
				+#define HITM_INC(__f)		\
			
 
				+do {				\
			
 
				+	stats->__f++;		\
			
 
				+	stats->tot_hitm++;	\
			
 
				+} while (0)
			
 
				+
			
 
				 #define P(a, b) PERF_MEM_##a##_##b
			
 
				 
			
 
				 	stats->nr_entries++;
			
@@ -303,7 +309,7 @@ int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi)
 
				 			if (lvl & P(LVL, L2 )) stats->ld_l2hit++;
			
 
				 			if (lvl & P(LVL, L3 )) {
			
 
				 				if (snoop & P(SNOOP, HITM))
			
 
				-					stats->lcl_hitm++;
			
 
				+					HITM_INC(lcl_hitm);
			
 
				 				else
			
 
				 					stats->ld_llchit++;
			
 
				 			}
			
@@ -331,7 +337,7 @@ int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi)
 
				 			if (snoop & P(SNOOP, HIT))
			
 
				 				stats->rmt_hit++;
			
 
				 			else if (snoop & P(SNOOP, HITM))
			
 
				-				stats->rmt_hitm++;
			
 
				+				HITM_INC(rmt_hitm);
			
 
				 		}
			
 
				 
			
 
				 		if ((lvl & P(LVL, MISS)))
			
@@ -364,6 +370,7 @@ int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi)
 
				 	}
			
 
				 
			
 
				 #undef P
			
 
				+#undef HITM_INC
			
 
				 	return err;
			
 
				 }
			
 
				 
			
@@ -390,6 +397,7 @@ void c2c_add_stats(struct c2c_stats *stats, struct c2c_stats *add)
 
				 	stats->ld_llchit	+= add->ld_llchit;
			
 
				 	stats->lcl_hitm		+= add->lcl_hitm;
			
 
				 	stats->rmt_hitm		+= add->rmt_hitm;
			
 
				+	stats->tot_hitm		+= add->tot_hitm;
			
 
				 	stats->rmt_hit		+= add->rmt_hit;
			
 
				 	stats->lcl_dram		+= add->lcl_dram;
			
 
				 	stats->rmt_dram		+= add->rmt_dram;
			
--- a/tools/perf/util/mem-events.h
+++ b/tools/perf/util/mem-events.h
@@ -59,6 +59,7 @@ struct c2c_stats {
 
				 	u32	ld_llchit;           /* count of loads that hit LLC */
			
 
				 	u32	lcl_hitm;            /* count of loads with local HITM  */
			
 
				 	u32	rmt_hitm;            /* count of loads with remote HITM */
			
 
				+	u32	tot_hitm;            /* count of loads with local and remote HITM */
			
 
				 	u32	rmt_hit;             /* count of loads with remote hit clean; */
			
 
				 	u32	lcl_dram;            /* count of loads miss to local DRAM */
			
 
				 	u32	rmt_dram;            /* count of loads miss to remote DRAM */
			
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -282,7 +282,8 @@ int symbol__annotation_init(void);
 
				 struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name);
			
 
				 size_t __symbol__fprintf_symname_offs(const struct symbol *sym,
			
 
				 				      const struct addr_location *al,
			
 
				-				      bool unknown_as_addr, FILE *fp);
			
 
				+				      bool unknown_as_addr,
			
 
				+				      bool print_offsets, FILE *fp);
			
 
				 size_t symbol__fprintf_symname_offs(const struct symbol *sym,
			
 
				 				    const struct addr_location *al, FILE *fp);
			
 
				 size_t __symbol__fprintf_symname(const struct symbol *sym,
			
--- a/tools/perf/util/symbol_fprintf.c
+++ b/tools/perf/util/symbol_fprintf.c
@@ -15,14 +15,15 @@ size_t symbol__fprintf(struct symbol *sym, FILE *fp)
 
				 
			
 
				 size_t __symbol__fprintf_symname_offs(const struct symbol *sym,
			
 
				 				      const struct addr_location *al,
			
 
				-				      bool unknown_as_addr, FILE *fp)
			
 
				+				      bool unknown_as_addr,
			
 
				+				      bool print_offsets, FILE *fp)
			
 
				 {
			
 
				 	unsigned long offset;
			
 
				 	size_t length;
			
 
				 
			
 
				 	if (sym && sym->name) {
			
 
				 		length = fprintf(fp, "%s", sym->name);
			
 
				-		if (al) {
			
 
				+		if (al && print_offsets) {
			
 
				 			if (al->addr < sym->end)
			
 
				 				offset = al->addr - sym->start;
			
 
				 			else
			
@@ -40,19 +41,19 @@ size_t symbol__fprintf_symname_offs(const struct symbol *sym,
 
				 				    const struct addr_location *al,
			
 
				 				    FILE *fp)
			
 
				 {
			
 
				-	return __symbol__fprintf_symname_offs(sym, al, false, fp);
			
 
				+	return __symbol__fprintf_symname_offs(sym, al, false, true, fp);
			
 
				 }
			
 
				 
			
 
				 size_t __symbol__fprintf_symname(const struct symbol *sym,
			
 
				 				 const struct addr_location *al,
			
 
				 				 bool unknown_as_addr, FILE *fp)
			
 
				 {
			
 
				-	return __symbol__fprintf_symname_offs(sym, al, unknown_as_addr, fp);
			
 
				+	return __symbol__fprintf_symname_offs(sym, al, unknown_as_addr, false, fp);
			
 
				 }
			
 
				 
			
 
				 size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp)
			
 
				 {
			
 
				-	return __symbol__fprintf_symname_offs(sym, NULL, false, fp);
			
 
				+	return __symbol__fprintf_symname_offs(sym, NULL, false, false, fp);
			
 
				 }
			
 
				 
			
 
				 size_t dso__fprintf_symbols_by_name(struct dso *dso,