Przeglądaj źródła

Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

User visible changes:

  - Replace CTRL+z with 'f' as hotkey for enable/disable events (Arnaldo Carvalho de Melo)

  - Do not exit when 'f' is pressed in 'report' mode (Arnaldo Carvalho de Melo)

  - Tell the user how to unfreeze events after pressing 'f' in 'perf top' (Arnaldo Carvalho de Melo)

  - React to unassigned hotkey pressing in 'top/report' (Arnaldo Carvalho de Melo)

  - Display total number of samples with --show-total-period in 'annotate' (Martin Liška)

  - Add timeout to make procfs mmap processing more robust (Kan Liang)

  - Fix sort__sym_cmp to also compare end of symbol (Yannick Brosseau)

Infrastructure changes:

  - Ensure thread-stack is flushed (Adrian Hunter)

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Ingo Molnar 10 lat temu
rodzic
commit
a9a3cd900f

+ 4 - 0
include/uapi/linux/perf_event.h

@@ -565,6 +565,10 @@ struct perf_event_mmap_page {
 #define PERF_RECORD_MISC_GUEST_KERNEL		(4 << 0)
 #define PERF_RECORD_MISC_GUEST_USER		(5 << 0)
 
+/*
+ * Indicates that /proc/PID/maps parsing are truncated by time out.
+ */
+#define PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT	(1 << 12)
 /*
  * PERF_RECORD_MISC_MMAP_DATA and PERF_RECORD_MISC_COMM_EXEC are used on
  * different events so can reuse the same bit position.

+ 6 - 0
tools/perf/Documentation/perf-kvm.txt

@@ -151,6 +151,12 @@ STAT LIVE OPTIONS
        Show events other than HLT (x86 only) or Wait state (s390 only)
        that take longer than duration usecs.
 
+--proc-map-timeout::
+	When processing pre-existing threads /proc/XXX/mmap, it may take
+	a long time, because the file may be huge. A time out is needed
+	in such cases.
+	This option sets the time out limit. The default value is 500 ms.
+
 SEE ALSO
 --------
 linkperf:perf-top[1], linkperf:perf-record[1], linkperf:perf-report[1],

+ 5 - 0
tools/perf/Documentation/perf-record.txt

@@ -271,6 +271,11 @@ AUX area tracing event. Optionally the number of bytes to capture per
 snapshot can be specified. In Snapshot Mode, trace data is captured only when
 signal SIGUSR2 is received.
 
+--proc-map-timeout::
+When processing pre-existing threads /proc/XXX/mmap, it may take a long time,
+because the file may be huge. A time out is needed in such cases.
+This option sets the time out limit. The default value is 500 ms.
+
 SEE ALSO
 --------
 linkperf:perf-stat[1], linkperf:perf-list[1]

+ 6 - 0
tools/perf/Documentation/perf-top.txt

@@ -201,6 +201,12 @@ Default is to monitor all CPUS.
 	Force each column width to the provided list, for large terminal
 	readability.  0 means no limit (default behavior).
 
+--proc-map-timeout::
+	When processing pre-existing threads /proc/XXX/mmap, it may take
+	a long time, because the file may be huge. A time out is needed
+	in such cases.
+	This option sets the time out limit. The default value is 500 ms.
+
 
 INTERACTIVE PROMPTING KEYS
 --------------------------

+ 5 - 0
tools/perf/Documentation/perf-trace.txt

@@ -121,6 +121,11 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs.
 --event::
 	Trace other events, see 'perf list' for a complete list.
 
+--proc-map-timeout::
+	When processing pre-existing threads /proc/XXX/mmap, it may take a long time,
+	because the file may be huge. A time out is needed in such cases.
+	This option sets the time out limit. The default value is 500 ms.
+
 PAGEFAULTS
 ----------
 

+ 2 - 0
tools/perf/builtin-annotate.c

@@ -329,6 +329,8 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
 		   "objdump binary to use for disassembly and annotations"),
 	OPT_BOOLEAN(0, "group", &symbol_conf.event_group,
 		    "Show event group information together"),
+	OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
+		    "Show a column with the sum of periods"),
 	OPT_END()
 	};
 	int ret = hists__init();

+ 4 - 1
tools/perf/builtin-kvm.c

@@ -1311,6 +1311,8 @@ static int kvm_events_live(struct perf_kvm_stat *kvm,
 			"show events other than"
 			" HLT (x86 only) or Wait state (s390 only)"
 			" that take longer than duration usecs"),
+		OPT_UINTEGER(0, "proc-map-timeout", &kvm->opts.proc_map_timeout,
+				"per thread proc mmap processing timeout in ms"),
 		OPT_END()
 	};
 	const char * const live_usage[] = {
@@ -1338,6 +1340,7 @@ static int kvm_events_live(struct perf_kvm_stat *kvm,
 	kvm->opts.target.uses_mmap = false;
 	kvm->opts.target.uid_str = NULL;
 	kvm->opts.target.uid = UINT_MAX;
+	kvm->opts.proc_map_timeout = 500;
 
 	symbol__init(NULL);
 	disable_buildid_cache();
@@ -1393,7 +1396,7 @@ static int kvm_events_live(struct perf_kvm_stat *kvm,
 	perf_session__set_id_hdr_size(kvm->session);
 	ordered_events__set_copy_on_queue(&kvm->session->ordered_events, true);
 	machine__synthesize_threads(&kvm->session->machines.host, &kvm->opts.target,
-				    kvm->evlist->threads, false);
+				    kvm->evlist->threads, false, kvm->opts.proc_map_timeout);
 	err = kvm_live_open_events(kvm);
 	if (err)
 		goto out;

+ 5 - 1
tools/perf/builtin-record.c

@@ -598,7 +598,8 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 	}
 
 	err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
-					    process_synthesized_event, opts->sample_address);
+					    process_synthesized_event, opts->sample_address,
+					    opts->proc_map_timeout);
 	if (err != 0)
 		goto out_child;
 
@@ -959,6 +960,7 @@ static struct record record = {
 			.uses_mmap   = true,
 			.default_per_cpu = true,
 		},
+		.proc_map_timeout     = 500,
 	},
 	.tool = {
 		.sample		= process_sample_event,
@@ -1066,6 +1068,8 @@ struct option __record_options[] = {
 	parse_clockid),
 	OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
 			  "opts", "AUX area tracing Snapshot Mode", ""),
+	OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout,
+			"per thread proc mmap processing timeout in ms"),
 	OPT_END()
 };
 

+ 12 - 3
tools/perf/builtin-top.c

@@ -591,7 +591,7 @@ static void *display_thread_tui(void *arg)
 							top->min_percent,
 							&top->session->header.env);
 
-		if (key != CTRL('z'))
+		if (key != 'f')
 			break;
 
 		perf_evlist__toggle_enable(top->evlist);
@@ -599,7 +599,13 @@ static void *display_thread_tui(void *arg)
 		 * No need to refresh, resort/decay histogram entries
 		 * if we are not collecting samples:
 		 */
-		hbt.refresh = top->evlist->enabled ? top->delay_secs : 0;
+		if (top->evlist->enabled) {
+			hbt.refresh = top->delay_secs;
+			help = "Press 'f' to disable the events or 'h' to see other hotkeys";
+		} else {
+			help = "Press 'f' again to re-enable the events";
+			hbt.refresh = 0;
+		}
 	}
 
 	done = 1;
@@ -971,7 +977,7 @@ static int __cmd_top(struct perf_top *top)
 		goto out_delete;
 
 	machine__synthesize_threads(&top->session->machines.host, &opts->target,
-				    top->evlist->threads, false);
+				    top->evlist->threads, false, opts->proc_map_timeout);
 	ret = perf_top__start_counters(top);
 	if (ret)
 		goto out_delete;
@@ -1081,6 +1087,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
 			.target		= {
 				.uses_mmap   = true,
 			},
+			.proc_map_timeout    = 500,
 		},
 		.max_stack	     = PERF_MAX_STACK_DEPTH,
 		.sym_pcnt_filter     = 5,
@@ -1180,6 +1187,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
 	OPT_STRING('w', "column-widths", &symbol_conf.col_width_list_str,
 		   "width[,width...]",
 		   "don't try to adjust column width, use these fixed values"),
+	OPT_UINTEGER(0, "proc-map-timeout", &opts->proc_map_timeout,
+			"per thread proc mmap processing timeout in ms"),
 	OPT_END()
 	};
 	const char * const top_usage[] = {

+ 5 - 1
tools/perf/builtin-trace.c

@@ -1518,7 +1518,8 @@ static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
 		return -ENOMEM;
 
 	err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
-					    evlist->threads, trace__tool_process, false);
+					    evlist->threads, trace__tool_process, false,
+					    trace->opts.proc_map_timeout);
 	if (err)
 		symbol__exit();
 
@@ -2747,6 +2748,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
 			.user_interval = ULLONG_MAX,
 			.no_buffering  = true,
 			.mmap_pages    = UINT_MAX,
+			.proc_map_timeout  = 500,
 		},
 		.output = stdout,
 		.show_comm = true,
@@ -2796,6 +2798,8 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
 		     "Trace pagefaults", parse_pagefaults, "maj"),
 	OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
 	OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
+	OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
+			"per thread proc mmap processing timeout in ms"),
 	OPT_END()
 	};
 	const char * const trace_subcommands[] = { "record", NULL };

+ 1 - 0
tools/perf/perf.h

@@ -69,6 +69,7 @@ struct record_opts {
 	unsigned     initial_delay;
 	bool         use_clockid;
 	clockid_t    clockid;
+	unsigned int proc_map_timeout;
 };
 
 struct option;

+ 1 - 1
tools/perf/tests/code-reading.c

@@ -451,7 +451,7 @@ static int do_test_code_reading(bool try_kcore)
 	}
 
 	ret = perf_event__synthesize_thread_map(NULL, threads,
-						perf_event__process, machine, false);
+						perf_event__process, machine, false, 500);
 	if (ret < 0) {
 		pr_debug("perf_event__synthesize_thread_map failed\n");
 		goto out_err;

+ 1 - 1
tools/perf/tests/dwarf-unwind.c

@@ -28,7 +28,7 @@ static int init_live_machine(struct machine *machine)
 	pid_t pid = getpid();
 
 	return perf_event__synthesize_mmap_events(NULL, &event, pid, pid,
-						  mmap_handler, machine, true);
+						  mmap_handler, machine, true, 500);
 }
 
 #define MAX_STACK 8

+ 2 - 2
tools/perf/tests/mmap-thread-lookup.c

@@ -129,7 +129,7 @@ static int synth_all(struct machine *machine)
 {
 	return perf_event__synthesize_threads(NULL,
 					      perf_event__process,
-					      machine, 0);
+					      machine, 0, 500);
 }
 
 static int synth_process(struct machine *machine)
@@ -141,7 +141,7 @@ static int synth_process(struct machine *machine)
 
 	err = perf_event__synthesize_thread_map(NULL, map,
 						perf_event__process,
-						machine, 0);
+						machine, 0, 500);
 
 	thread_map__delete(map);
 	return err;

+ 43 - 17
tools/perf/ui/browsers/annotate.c

@@ -11,16 +11,21 @@
 #include "../../util/evsel.h"
 #include <pthread.h>
 
+struct disasm_line_samples {
+	double		percent;
+	u64		nr;
+};
+
 struct browser_disasm_line {
-	struct rb_node	rb_node;
-	u32		idx;
-	int		idx_asm;
-	int		jump_sources;
+	struct rb_node			rb_node;
+	u32				idx;
+	int				idx_asm;
+	int				jump_sources;
 	/*
 	 * actual length of this array is saved on the nr_events field
 	 * of the struct annotate_browser
 	 */
-	double		percent[1];
+	struct disasm_line_samples	samples[1];
 };
 
 static struct annotate_browser_opt {
@@ -28,7 +33,8 @@ static struct annotate_browser_opt {
 	     use_offset,
 	     jump_arrows,
 	     show_linenr,
-	     show_nr_jumps;
+	     show_nr_jumps,
+	     show_total_period;
 } annotate_browser__opts = {
 	.use_offset	= true,
 	.jump_arrows	= true,
@@ -105,15 +111,20 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
 	char bf[256];
 
 	for (i = 0; i < ab->nr_events; i++) {
-		if (bdl->percent[i] > percent_max)
-			percent_max = bdl->percent[i];
+		if (bdl->samples[i].percent > percent_max)
+			percent_max = bdl->samples[i].percent;
 	}
 
 	if (dl->offset != -1 && percent_max != 0.0) {
 		for (i = 0; i < ab->nr_events; i++) {
-			ui_browser__set_percent_color(browser, bdl->percent[i],
+			ui_browser__set_percent_color(browser,
+						      bdl->samples[i].percent,
 						      current_entry);
-			slsmg_printf("%6.2f ", bdl->percent[i]);
+			if (annotate_browser__opts.show_total_period)
+				slsmg_printf("%6" PRIu64 " ",
+					     bdl->samples[i].nr);
+			else
+				slsmg_printf("%6.2f ", bdl->samples[i].percent);
 		}
 	} else {
 		ui_browser__set_percent_color(browser, 0, current_entry);
@@ -273,9 +284,9 @@ static int disasm__cmp(struct browser_disasm_line *a,
 	int i;
 
 	for (i = 0; i < nr_pcnt; i++) {
-		if (a->percent[i] == b->percent[i])
+		if (a->samples[i].percent == b->samples[i].percent)
 			continue;
-		return a->percent[i] < b->percent[i];
+		return a->samples[i].percent < b->samples[i].percent;
 	}
 	return 0;
 }
@@ -366,14 +377,17 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser,
 		next = disasm__get_next_ip_line(&notes->src->source, pos);
 
 		for (i = 0; i < browser->nr_events; i++) {
-			bpos->percent[i] = disasm__calc_percent(notes,
+			u64 nr_samples;
+
+			bpos->samples[i].percent = disasm__calc_percent(notes,
 						evsel->idx + i,
 						pos->offset,
 						next ? next->offset : len,
-					        &path);
+						&path, &nr_samples);
+			bpos->samples[i].nr = nr_samples;
 
-			if (max_percent < bpos->percent[i])
-				max_percent = bpos->percent[i];
+			if (max_percent < bpos->samples[i].percent)
+				max_percent = bpos->samples[i].percent;
 		}
 
 		if (max_percent < 0.01) {
@@ -737,6 +751,7 @@ static int annotate_browser__run(struct annotate_browser *browser,
 		"n             Search next string\n"
 		"o             Toggle disassembler output/simplified view\n"
 		"s             Toggle source code view\n"
+		"t             Toggle total period view\n"
 		"/             Search string\n"
 		"k             Toggle line numbers\n"
 		"r             Run available scripts\n"
@@ -812,6 +827,11 @@ show_sup_ins:
 				ui_helpline__puts("Actions are only available for 'callq', 'retq' & jump instructions.");
 			}
 			continue;
+		case 't':
+			annotate_browser__opts.show_total_period =
+			  !annotate_browser__opts.show_total_period;
+			annotate_browser__update_addr_width(browser);
+			continue;
 		case K_LEFT:
 		case K_ESC:
 		case 'q':
@@ -832,6 +852,10 @@ out:
 int map_symbol__tui_annotate(struct map_symbol *ms, struct perf_evsel *evsel,
 			     struct hist_browser_timer *hbt)
 {
+	/* Set default value for show_total_period.  */
+	annotate_browser__opts.show_total_period =
+	  symbol_conf.show_total_period;
+
 	return symbol__tui_annotate(ms->sym, ms->map, evsel, hbt);
 }
 
@@ -929,7 +953,8 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
 
 	if (perf_evsel__is_group_event(evsel)) {
 		nr_pcnt = evsel->nr_members;
-		sizeof_bdl += sizeof(double) * (nr_pcnt - 1);
+		sizeof_bdl += sizeof(struct disasm_line_samples) *
+		  (nr_pcnt - 1);
 	}
 
 	if (symbol__annotate(sym, map, sizeof_bdl) < 0) {
@@ -1006,6 +1031,7 @@ static struct annotate_config {
 	ANNOTATE_CFG(show_linenr),
 	ANNOTATE_CFG(show_nr_jumps),
 	ANNOTATE_CFG(use_offset),
+	ANNOTATE_CFG(show_total_period),
 };
 
 #undef ANNOTATE_CFG

+ 9 - 6
tools/perf/ui/browsers/hists.c

@@ -424,7 +424,7 @@ static void ui_browser__warn_lost_events(struct ui_browser *browser)
 		"Or reduce the sampling frequency.");
 }
 
-static int hist_browser__run(struct hist_browser *browser)
+static int hist_browser__run(struct hist_browser *browser, const char *help)
 {
 	int key;
 	char title[160];
@@ -436,8 +436,7 @@ static int hist_browser__run(struct hist_browser *browser)
 
 	hists__browser_title(browser->hists, hbt, title, sizeof(title));
 
-	if (ui_browser__show(&browser->b, title,
-			     "Press '?' for help on key bindings") < 0)
+	if (ui_browser__show(&browser->b, title, help) < 0)
 		return -1;
 
 	while (1) {
@@ -1736,7 +1735,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 	"t             Zoom into current Thread\n"
 	"V             Verbose (DSO names in callchains, etc)\n"
 	"z             Toggle zeroing of samples\n"
-	"CTRL+z        Enable/Disable events\n"
+	"f             Enable/Disable events\n"
 	"/             Filter symbol by name";
 
 	if (browser == NULL)
@@ -1773,7 +1772,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 
 		nr_options = 0;
 
-		key = hist_browser__run(browser);
+		key = hist_browser__run(browser, helpline);
 
 		if (browser->he_selection != NULL) {
 			thread = hist_browser__selected_thread(browser);
@@ -1901,9 +1900,13 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 			/* Fall thru */
 		case 'q':
 		case CTRL('c'):
-		case CTRL('z'):
 			goto out_free_stack;
+		case 'f':
+			if (!is_report_browser(hbt))
+				goto out_free_stack;
+			/* Fall thru */
 		default:
+			helpline = "Press '?' for help on key bindings";
 			continue;
 		}
 

+ 35 - 17
tools/perf/util/annotate.c

@@ -654,14 +654,15 @@ struct disasm_line *disasm__get_next_ip_line(struct list_head *head, struct disa
 }
 
 double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset,
-			    s64 end, const char **path)
+			    s64 end, const char **path, u64 *nr_samples)
 {
 	struct source_line *src_line = notes->src->lines;
 	double percent = 0.0;
+	*nr_samples = 0;
 
 	if (src_line) {
 		size_t sizeof_src_line = sizeof(*src_line) +
-				sizeof(src_line->p) * (src_line->nr_pcnt - 1);
+				sizeof(src_line->samples) * (src_line->nr_pcnt - 1);
 
 		while (offset < end) {
 			src_line = (void *)notes->src->lines +
@@ -670,7 +671,8 @@ double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset,
 			if (*path == NULL)
 				*path = src_line->path;
 
-			percent += src_line->p[evidx].percent;
+			percent += src_line->samples[evidx].percent;
+			*nr_samples += src_line->samples[evidx].nr;
 			offset++;
 		}
 	} else {
@@ -680,8 +682,10 @@ double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset,
 		while (offset < end)
 			hits += h->addr[offset++];
 
-		if (h->sum)
+		if (h->sum) {
+			*nr_samples = hits;
 			percent = 100.0 * hits / h->sum;
+		}
 	}
 
 	return percent;
@@ -696,8 +700,10 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st
 
 	if (dl->offset != -1) {
 		const char *path = NULL;
+		u64 nr_samples;
 		double percent, max_percent = 0.0;
 		double *ppercents = &percent;
+		u64 *psamples = &nr_samples;
 		int i, nr_percent = 1;
 		const char *color;
 		struct annotation *notes = symbol__annotation(sym);
@@ -710,8 +716,10 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st
 		if (perf_evsel__is_group_event(evsel)) {
 			nr_percent = evsel->nr_members;
 			ppercents = calloc(nr_percent, sizeof(double));
-			if (ppercents == NULL)
+			psamples = calloc(nr_percent, sizeof(u64));
+			if (ppercents == NULL || psamples == NULL) {
 				return -1;
+			}
 		}
 
 		for (i = 0; i < nr_percent; i++) {
@@ -719,9 +727,10 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st
 					notes->src->lines ? i : evsel->idx + i,
 					offset,
 					next ? next->offset : (s64) len,
-					&path);
+					&path, &nr_samples);
 
 			ppercents[i] = percent;
+			psamples[i] = nr_samples;
 			if (percent > max_percent)
 				max_percent = percent;
 		}
@@ -759,8 +768,14 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st
 
 		for (i = 0; i < nr_percent; i++) {
 			percent = ppercents[i];
+			nr_samples = psamples[i];
 			color = get_percent_color(percent);
-			color_fprintf(stdout, color, " %7.2f", percent);
+
+			if (symbol_conf.show_total_period)
+				color_fprintf(stdout, color, " %7" PRIu64,
+					      nr_samples);
+			else
+				color_fprintf(stdout, color, " %7.2f", percent);
 		}
 
 		printf(" :	");
@@ -770,6 +785,9 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st
 		if (ppercents != &percent)
 			free(ppercents);
 
+		if (psamples != &nr_samples)
+			free(psamples);
+
 	} else if (max_lines && printed >= max_lines)
 		return 1;
 	else {
@@ -1103,7 +1121,7 @@ static void insert_source_line(struct rb_root *root, struct source_line *src_lin
 		ret = strcmp(iter->path, src_line->path);
 		if (ret == 0) {
 			for (i = 0; i < src_line->nr_pcnt; i++)
-				iter->p[i].percent_sum += src_line->p[i].percent;
+				iter->samples[i].percent_sum += src_line->samples[i].percent;
 			return;
 		}
 
@@ -1114,7 +1132,7 @@ static void insert_source_line(struct rb_root *root, struct source_line *src_lin
 	}
 
 	for (i = 0; i < src_line->nr_pcnt; i++)
-		src_line->p[i].percent_sum = src_line->p[i].percent;
+		src_line->samples[i].percent_sum = src_line->samples[i].percent;
 
 	rb_link_node(&src_line->node, parent, p);
 	rb_insert_color(&src_line->node, root);
@@ -1125,9 +1143,9 @@ static int cmp_source_line(struct source_line *a, struct source_line *b)
 	int i;
 
 	for (i = 0; i < a->nr_pcnt; i++) {
-		if (a->p[i].percent_sum == b->p[i].percent_sum)
+		if (a->samples[i].percent_sum == b->samples[i].percent_sum)
 			continue;
-		return a->p[i].percent_sum > b->p[i].percent_sum;
+		return a->samples[i].percent_sum > b->samples[i].percent_sum;
 	}
 
 	return 0;
@@ -1179,7 +1197,7 @@ static void symbol__free_source_line(struct symbol *sym, int len)
 	int i;
 
 	sizeof_src_line = sizeof(*src_line) +
-			  (sizeof(src_line->p) * (src_line->nr_pcnt - 1));
+			  (sizeof(src_line->samples) * (src_line->nr_pcnt - 1));
 
 	for (i = 0; i < len; i++) {
 		free_srcline(src_line->path);
@@ -1211,7 +1229,7 @@ static int symbol__get_source_line(struct symbol *sym, struct map *map,
 			h_sum += h->sum;
 		}
 		nr_pcnt = evsel->nr_members;
-		sizeof_src_line += (nr_pcnt - 1) * sizeof(src_line->p);
+		sizeof_src_line += (nr_pcnt - 1) * sizeof(src_line->samples);
 	}
 
 	if (!h_sum)
@@ -1231,10 +1249,10 @@ static int symbol__get_source_line(struct symbol *sym, struct map *map,
 
 		for (k = 0; k < nr_pcnt; k++) {
 			h = annotation__histogram(notes, evidx + k);
-			src_line->p[k].percent = 100.0 * h->addr[i] / h->sum;
+			src_line->samples[k].percent = 100.0 * h->addr[i] / h->sum;
 
-			if (src_line->p[k].percent > percent_max)
-				percent_max = src_line->p[k].percent;
+			if (src_line->samples[k].percent > percent_max)
+				percent_max = src_line->samples[k].percent;
 		}
 
 		if (percent_max <= 0.5)
@@ -1274,7 +1292,7 @@ static void print_summary(struct rb_root *root, const char *filename)
 
 		src_line = rb_entry(node, struct source_line, node);
 		for (i = 0; i < src_line->nr_pcnt; i++) {
-			percent = src_line->p[i].percent_sum;
+			percent = src_line->samples[i].percent_sum;
 			color = get_percent_color(percent);
 			color_fprintf(stdout, color, " %7.2f", percent);
 

+ 4 - 3
tools/perf/util/annotate.h

@@ -72,23 +72,24 @@ struct disasm_line *disasm__get_next_ip_line(struct list_head *head, struct disa
 int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw);
 size_t disasm__fprintf(struct list_head *head, FILE *fp);
 double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset,
-			    s64 end, const char **path);
+			    s64 end, const char **path, u64 *nr_samples);
 
 struct sym_hist {
 	u64		sum;
 	u64		addr[0];
 };
 
-struct source_line_percent {
+struct source_line_samples {
 	double		percent;
 	double		percent_sum;
+	double          nr;
 };
 
 struct source_line {
 	struct rb_node	node;
 	char		*path;
 	int		nr_pcnt;
-	struct source_line_percent p[1];
+	struct source_line_samples samples[1];
 };
 
 /** struct annotated_source - symbols with hits have this attached as in sannotation

+ 37 - 9
tools/perf/util/event.c

@@ -218,10 +218,14 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
 				       pid_t pid, pid_t tgid,
 				       perf_event__handler_t process,
 				       struct machine *machine,
-				       bool mmap_data)
+				       bool mmap_data,
+				       unsigned int proc_map_timeout)
 {
 	char filename[PATH_MAX];
 	FILE *fp;
+	unsigned long long t;
+	bool truncation = false;
+	unsigned long long timeout = proc_map_timeout * 1000000ULL;
 	int rc = 0;
 
 	if (machine__is_default_guest(machine))
@@ -240,6 +244,7 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
 	}
 
 	event->header.type = PERF_RECORD_MMAP2;
+	t = rdclock();
 
 	while (1) {
 		char bf[BUFSIZ];
@@ -253,6 +258,15 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
 		if (fgets(bf, sizeof(bf), fp) == NULL)
 			break;
 
+		if ((rdclock() - t) > timeout) {
+			pr_warning("Reading %s time out. "
+				   "You may want to increase "
+				   "the time limit by --proc-map-timeout\n",
+				   filename);
+			truncation = true;
+			goto out;
+		}
+
 		/* ensure null termination since stack will be reused. */
 		strcpy(execname, "");
 
@@ -301,6 +315,10 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
 			event->header.misc |= PERF_RECORD_MISC_MMAP_DATA;
 		}
 
+out:
+		if (truncation)
+			event->header.misc |= PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT;
+
 		if (!strcmp(execname, ""))
 			strcpy(execname, anonstr);
 
@@ -319,6 +337,9 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
 			rc = -1;
 			break;
 		}
+
+		if (truncation)
+			break;
 	}
 
 	fclose(fp);
@@ -386,7 +407,9 @@ static int __event__synthesize_thread(union perf_event *comm_event,
 				      pid_t pid, int full,
 					  perf_event__handler_t process,
 				      struct perf_tool *tool,
-				      struct machine *machine, bool mmap_data)
+				      struct machine *machine,
+				      bool mmap_data,
+				      unsigned int proc_map_timeout)
 {
 	char filename[PATH_MAX];
 	DIR *tasks;
@@ -403,7 +426,8 @@ static int __event__synthesize_thread(union perf_event *comm_event,
 			return -1;
 
 		return perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid,
-							  process, machine, mmap_data);
+							  process, machine, mmap_data,
+							  proc_map_timeout);
 	}
 
 	if (machine__is_default_guest(machine))
@@ -444,7 +468,7 @@ static int __event__synthesize_thread(union perf_event *comm_event,
 		if (_pid == pid) {
 			/* process the parent's maps too */
 			rc = perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid,
-						process, machine, mmap_data);
+						process, machine, mmap_data, proc_map_timeout);
 			if (rc)
 				break;
 		}
@@ -458,7 +482,8 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool,
 				      struct thread_map *threads,
 				      perf_event__handler_t process,
 				      struct machine *machine,
-				      bool mmap_data)
+				      bool mmap_data,
+				      unsigned int proc_map_timeout)
 {
 	union perf_event *comm_event, *mmap_event, *fork_event;
 	int err = -1, thread, j;
@@ -481,7 +506,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool,
 					       fork_event,
 					       threads->map[thread], 0,
 					       process, tool, machine,
-					       mmap_data)) {
+					       mmap_data, proc_map_timeout)) {
 			err = -1;
 			break;
 		}
@@ -507,7 +532,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool,
 						       fork_event,
 						       comm_event->comm.pid, 0,
 						       process, tool, machine,
-						       mmap_data)) {
+						       mmap_data, proc_map_timeout)) {
 				err = -1;
 				break;
 			}
@@ -524,7 +549,9 @@ out:
 
 int perf_event__synthesize_threads(struct perf_tool *tool,
 				   perf_event__handler_t process,
-				   struct machine *machine, bool mmap_data)
+				   struct machine *machine,
+				   bool mmap_data,
+				   unsigned int proc_map_timeout)
 {
 	DIR *proc;
 	char proc_path[PATH_MAX];
@@ -564,7 +591,8 @@ int perf_event__synthesize_threads(struct perf_tool *tool,
  		 * one thread couldn't be synthesized.
  		 */
 		__event__synthesize_thread(comm_event, mmap_event, fork_event, pid,
-					   1, process, tool, machine, mmap_data);
+					   1, process, tool, machine, mmap_data,
+					   proc_map_timeout);
 	}
 
 	err = 0;

+ 7 - 3
tools/perf/util/event.h

@@ -265,6 +265,7 @@ struct events_stats {
 	u32 nr_unknown_id;
 	u32 nr_unprocessable_samples;
 	u32 nr_auxtrace_errors[PERF_AUXTRACE_ERROR_MAX];
+	u32 nr_proc_map_timeout;
 };
 
 struct attr_event {
@@ -383,10 +384,12 @@ typedef int (*perf_event__handler_t)(struct perf_tool *tool,
 int perf_event__synthesize_thread_map(struct perf_tool *tool,
 				      struct thread_map *threads,
 				      perf_event__handler_t process,
-				      struct machine *machine, bool mmap_data);
+				      struct machine *machine, bool mmap_data,
+				      unsigned int proc_map_timeout);
 int perf_event__synthesize_threads(struct perf_tool *tool,
 				   perf_event__handler_t process,
-				   struct machine *machine, bool mmap_data);
+				   struct machine *machine, bool mmap_data,
+				   unsigned int proc_map_timeout);
 int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
 				       perf_event__handler_t process,
 				       struct machine *machine);
@@ -468,7 +471,8 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
 				       pid_t pid, pid_t tgid,
 				       perf_event__handler_t process,
 				       struct machine *machine,
-				       bool mmap_data);
+				       bool mmap_data,
+				       unsigned int proc_map_timeout);
 
 size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp);
 size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp);

+ 25 - 3
tools/perf/util/machine.c

@@ -1890,14 +1890,36 @@ int machine__for_each_thread(struct machine *machine,
 	return rc;
 }
 
+int machines__for_each_thread(struct machines *machines,
+			      int (*fn)(struct thread *thread, void *p),
+			      void *priv)
+{
+	struct rb_node *nd;
+	int rc = 0;
+
+	rc = machine__for_each_thread(&machines->host, fn, priv);
+	if (rc != 0)
+		return rc;
+
+	for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) {
+		struct machine *machine = rb_entry(nd, struct machine, rb_node);
+
+		rc = machine__for_each_thread(machine, fn, priv);
+		if (rc != 0)
+			return rc;
+	}
+	return rc;
+}
+
 int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool,
 				  struct target *target, struct thread_map *threads,
-				  perf_event__handler_t process, bool data_mmap)
+				  perf_event__handler_t process, bool data_mmap,
+				  unsigned int proc_map_timeout)
 {
 	if (target__has_task(target))
-		return perf_event__synthesize_thread_map(tool, threads, process, machine, data_mmap);
+		return perf_event__synthesize_thread_map(tool, threads, process, machine, data_mmap, proc_map_timeout);
 	else if (target__has_cpu(target))
-		return perf_event__synthesize_threads(tool, process, machine, data_mmap);
+		return perf_event__synthesize_threads(tool, process, machine, data_mmap, proc_map_timeout);
 	/* command specified */
 	return 0;
 }

+ 9 - 3
tools/perf/util/machine.h

@@ -216,16 +216,22 @@ size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp);
 int machine__for_each_thread(struct machine *machine,
 			     int (*fn)(struct thread *thread, void *p),
 			     void *priv);
+int machines__for_each_thread(struct machines *machines,
+			      int (*fn)(struct thread *thread, void *p),
+			      void *priv);
 
 int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool,
 				  struct target *target, struct thread_map *threads,
-				  perf_event__handler_t process, bool data_mmap);
+				  perf_event__handler_t process, bool data_mmap,
+				  unsigned int proc_map_timeout);
 static inline
 int machine__synthesize_threads(struct machine *machine, struct target *target,
-				struct thread_map *threads, bool data_mmap)
+				struct thread_map *threads, bool data_mmap,
+				unsigned int proc_map_timeout)
 {
 	return __machine__synthesize_threads(machine, NULL, target, threads,
-					     perf_event__process, data_mmap);
+					     perf_event__process, data_mmap,
+					     proc_map_timeout);
 }
 
 pid_t machine__get_current_tid(struct machine *machine, int cpu);

+ 33 - 0
tools/perf/util/session.c

@@ -16,6 +16,7 @@
 #include "perf_regs.h"
 #include "asm/bug.h"
 #include "auxtrace.h"
+#include "thread-stack.h"
 
 static int perf_session__deliver_event(struct perf_session *session,
 				       union perf_event *event,
@@ -1063,6 +1064,8 @@ static int machines__deliver_event(struct machines *machines,
 	case PERF_RECORD_MMAP:
 		return tool->mmap(tool, event, sample, machine);
 	case PERF_RECORD_MMAP2:
+		if (event->header.misc & PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT)
+			++evlist->stats.nr_proc_map_timeout;
 		return tool->mmap2(tool, event, sample, machine);
 	case PERF_RECORD_COMM:
 		return tool->comm(tool, event, sample, machine);
@@ -1359,6 +1362,30 @@ static void perf_session__warn_about_errors(const struct perf_session *session)
 		ui__warning("%u out of order events recorded.\n", oe->nr_unordered_events);
 
 	events_stats__auxtrace_error_warn(stats);
+
+	if (stats->nr_proc_map_timeout != 0) {
+		ui__warning("%d map information files for pre-existing threads were\n"
+			    "not processed, if there are samples for addresses they\n"
+			    "will not be resolved, you may find out which are these\n"
+			    "threads by running with -v and redirecting the output\n"
+			    "to a file.\n"
+			    "The time limit to process proc map is too short?\n"
+			    "Increase it by --proc-map-timeout\n",
+			    stats->nr_proc_map_timeout);
+	}
+}
+
+static int perf_session__flush_thread_stack(struct thread *thread,
+					    void *p __maybe_unused)
+{
+	return thread_stack__flush(thread);
+}
+
+static int perf_session__flush_thread_stacks(struct perf_session *session)
+{
+	return machines__for_each_thread(&session->machines,
+					 perf_session__flush_thread_stack,
+					 NULL);
 }
 
 volatile int session_done;
@@ -1450,6 +1477,9 @@ done:
 	if (err)
 		goto out_err;
 	err = auxtrace__flush_events(session, tool);
+	if (err)
+		goto out_err;
+	err = perf_session__flush_thread_stacks(session);
 out_err:
 	free(buf);
 	perf_session__warn_about_errors(session);
@@ -1600,6 +1630,9 @@ out:
 	if (err)
 		goto out_err;
 	err = auxtrace__flush_events(session, tool);
+	if (err)
+		goto out_err;
+	err = perf_session__flush_thread_stacks(session);
 out_err:
 	ui_progress__finish();
 	perf_session__warn_about_errors(session);

+ 3 - 5
tools/perf/util/sort.c

@@ -182,18 +182,16 @@ static int64_t _sort__addr_cmp(u64 left_ip, u64 right_ip)
 
 static int64_t _sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r)
 {
-	u64 ip_l, ip_r;
-
 	if (!sym_l || !sym_r)
 		return cmp_null(sym_l, sym_r);
 
 	if (sym_l == sym_r)
 		return 0;
 
-	ip_l = sym_l->start;
-	ip_r = sym_r->start;
+	if (sym_l->start != sym_r->start)
+		return (int64_t)(sym_r->start - sym_l->start);
 
-	return (int64_t)(ip_r - ip_l);
+	return (int64_t)(sym_r->end - sym_l->end);
 }
 
 static int64_t

+ 13 - 5
tools/perf/util/thread-stack.c

@@ -219,7 +219,7 @@ static int thread_stack__call_return(struct thread *thread,
 	return crp->process(&cr, crp->data);
 }
 
-static int thread_stack__flush(struct thread *thread, struct thread_stack *ts)
+static int __thread_stack__flush(struct thread *thread, struct thread_stack *ts)
 {
 	struct call_return_processor *crp = ts->crp;
 	int err;
@@ -242,6 +242,14 @@ static int thread_stack__flush(struct thread *thread, struct thread_stack *ts)
 	return 0;
 }
 
+int thread_stack__flush(struct thread *thread)
+{
+	if (thread->ts)
+		return __thread_stack__flush(thread, thread->ts);
+
+	return 0;
+}
+
 int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,
 			u64 to_ip, u16 insn_len, u64 trace_nr)
 {
@@ -264,7 +272,7 @@ int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,
 	 */
 	if (trace_nr != thread->ts->trace_nr) {
 		if (thread->ts->trace_nr)
-			thread_stack__flush(thread, thread->ts);
+			__thread_stack__flush(thread, thread->ts);
 		thread->ts->trace_nr = trace_nr;
 	}
 
@@ -297,7 +305,7 @@ void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr)
 
 	if (trace_nr != thread->ts->trace_nr) {
 		if (thread->ts->trace_nr)
-			thread_stack__flush(thread, thread->ts);
+			__thread_stack__flush(thread, thread->ts);
 		thread->ts->trace_nr = trace_nr;
 	}
 }
@@ -305,7 +313,7 @@ void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr)
 void thread_stack__free(struct thread *thread)
 {
 	if (thread->ts) {
-		thread_stack__flush(thread, thread->ts);
+		__thread_stack__flush(thread, thread->ts);
 		zfree(&thread->ts->stack);
 		zfree(&thread->ts);
 	}
@@ -689,7 +697,7 @@ int thread_stack__process(struct thread *thread, struct comm *comm,
 
 	/* Flush stack on exec */
 	if (ts->comm != comm && thread->pid_ == thread->tid) {
-		err = thread_stack__flush(thread, ts);
+		err = __thread_stack__flush(thread, ts);
 		if (err)
 			return err;
 		ts->comm = comm;

+ 1 - 0
tools/perf/util/thread-stack.h

@@ -96,6 +96,7 @@ int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,
 void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr);
 void thread_stack__sample(struct thread *thread, struct ip_callchain *chain,
 			  size_t sz, u64 ip);
+int thread_stack__flush(struct thread *thread);
 void thread_stack__free(struct thread *thread);
 
 struct call_return_processor *