Browse Source

Merge tag 'perf-core-for-mingo-20160224' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements from Arnaldo Carvalho de Melo:

User visible changes:

  - Hierarchy histogram mode for 'perf top' and 'perf report', showing multiple
    levels, one per --sort entry: (Namhyung Kim)

    On a mostly idle system:

    # perf top --hierarchy -s comm,dso

    Then expand some levels and use 'P' to take a snapshot:

    # cat perf.hist.0
    -  92.32%         perf
          58.20%         perf
          22.29%         libc-2.22.so
           5.97%         [kernel]
           4.18%         libelf-0.165.so
           1.69%         [unknown]
    -   4.71%         qemu-system-x86
           3.10%         [kernel]
           1.60%         qemu-system-x86_64 (deleted)
    +   2.97%         swapper
    #

  - Check availability of memory events in 'perf mem': (Jiri Olsa)

    On a Intel Broadwell machine:

    # perf mem record -e list
    ldlat-loads : available
    ldlat-stores: available
    #

  - Decode data_src values (e.g. perf.data files generated by 'perf mem record')
    in 'perf script': (Jiri Olsa)

    # perf script
      perf 693 [1] 4.088652: 1 cpu/mem-loads,ldlat=30/P: ffff88007d0b0f40 68100142 L1 hit|SNP None|TLB L1 or L2 hit|LCK No <SNIP>
                                                                          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  - Print bpf-output events in 'perf script': (Wang Nan).

    # perf record -e bpf-output/no-inherit,name=evt/ -e ./test_bpf_output_3.c/map:channel.event=evt/ usleep 1000
    # perf script
       usleep  4882 21384.532523:   evt:  ffffffff810e97d1 sys_nanosleep ([kernel.kallsyms])
        BPF output: 0000: 52 61 69 73 65 20 61 20  Raise a
                    0008: 42 50 46 20 65 76 65 6e  BPF even
                    0010: 74 21 00 00              t!..
        BPF string: "Raise a BPF event!"
    #

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Ingo Molnar 9 years ago
parent
commit
06466212a6

+ 3 - 0
tools/perf/Documentation/perf-report.txt

@@ -401,6 +401,9 @@ include::itrace.txt[]
 --raw-trace::
 	When displaying traceevent output, do not use print fmt or plugins.
 
+--hierarchy::
+	Enable hierarchical output.
+
 include::callchain-overhead-calculation.txt[]
 
 SEE ALSO

+ 3 - 0
tools/perf/Documentation/perf-top.txt

@@ -233,6 +233,9 @@ Default is to monitor all CPUS.
 --raw-trace::
 	When displaying traceevent output, do not use print fmt or plugins.
 
+--hierarchy::
+	Enable hierarchy output.
+
 INTERACTIVE PROMPTING KEYS
 --------------------------
 

+ 1 - 0
tools/perf/Documentation/tips.txt

@@ -27,3 +27,4 @@ Skip collecing build-id when recording: perf record -B
 To change sampling frequency to 100 Hz: perf record -F 100
 See assembly instructions with percentage: perf annotate <symbol>
 If you prefer Intel style assembly, try: perf annotate -M intel
+For hierarchical output, try: perf report --hierarchy

+ 17 - 5
tools/perf/builtin-mem.c

@@ -40,10 +40,11 @@ static int parse_record_events(const struct option *opt,
 	for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
 		struct perf_mem_event *e = &perf_mem_events[j];
 
-		fprintf(stderr, "%-20s%s",
-			e->tag, verbose ? "" : "\n");
-		if (verbose)
-			fprintf(stderr, " [%s]\n", e->name);
+		fprintf(stderr, "%-13s%-*s%s\n",
+			e->tag,
+			verbose ? 25 : 0,
+			verbose ? perf_mem_events__name(j) : "",
+			e->supported ? ": available" : "");
 	}
 	exit(0);
 }
@@ -92,8 +93,14 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
 		if (!perf_mem_events[j].record)
 			continue;
 
+		if (!perf_mem_events[j].supported) {
+			pr_err("failed: event '%s' not supported\n",
+			       perf_mem_events__name(j));
+			return -1;
+		}
+
 		rec_argv[i++] = "-e";
-		rec_argv[i++] = perf_mem_events[j].name;
+		rec_argv[i++] = perf_mem_events__name(j);
 	};
 
 	for (j = 0; j < argc; j++, i++)
@@ -355,6 +362,11 @@ int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused)
 		NULL
 	};
 
+	if (perf_mem_events__init()) {
+		pr_err("failed: memory events not supported\n");
+		return -1;
+	}
+
 	argc = parse_options_subcommand(argc, argv, mem_options, mem_subcommands,
 					mem_usage, PARSE_OPT_STOP_AT_NON_OPTION);
 

+ 17 - 0
tools/perf/builtin-report.c

@@ -811,6 +811,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
 		    "only show processor socket that match with this filter"),
 	OPT_BOOLEAN(0, "raw-trace", &symbol_conf.raw_trace,
 		    "Show raw trace event output (do not use print fmt or plugins)"),
+	OPT_BOOLEAN(0, "hierarchy", &symbol_conf.report_hierarchy,
+		    "Show entries in a hierarchy"),
 	OPT_END()
 	};
 	struct perf_data_file file = {
@@ -920,6 +922,21 @@ repeat:
 		symbol_conf.cumulate_callchain = false;
 	}
 
+	if (symbol_conf.report_hierarchy) {
+		/* disable incompatible options */
+		symbol_conf.event_group = false;
+		symbol_conf.cumulate_callchain = false;
+
+		if (field_order) {
+			pr_err("Error: --hierarchy and --fields options cannot be used together\n");
+			parse_options_usage(report_usage, options, "F", 1);
+			parse_options_usage(NULL, options, "hierarchy", 0);
+			goto error;
+		}
+
+		sort__need_collapse = true;
+	}
+
 	/* Force tty output for header output and per-thread stat. */
 	if (report.header || report.header_only || report.show_threads)
 		use_browser = 0;

+ 107 - 6
tools/perf/builtin-script.c

@@ -23,6 +23,7 @@
 #include "util/stat.h"
 #include <linux/bitmap.h>
 #include "asm/bug.h"
+#include "util/mem-events.h"
 
 static char const		*script_name;
 static char const		*generate_script_lang;
@@ -60,6 +61,7 @@ enum perf_output_field {
 	PERF_OUTPUT_BRSTACKSYM	    = 1U << 16,
 	PERF_OUTPUT_DATA_SRC	    = 1U << 17,
 	PERF_OUTPUT_WEIGHT	    = 1U << 18,
+	PERF_OUTPUT_BPF_OUTPUT	    = 1U << 19,
 };
 
 struct output_option {
@@ -85,6 +87,7 @@ struct output_option {
 	{.str = "brstacksym", .field = PERF_OUTPUT_BRSTACKSYM},
 	{.str = "data_src", .field = PERF_OUTPUT_DATA_SRC},
 	{.str = "weight",   .field = PERF_OUTPUT_WEIGHT},
+	{.str = "bpf-output",   .field = PERF_OUTPUT_BPF_OUTPUT},
 };
 
 /* default set to maintain compatibility with current format */
@@ -105,7 +108,7 @@ static struct {
 			      PERF_OUTPUT_SYM | PERF_OUTPUT_DSO |
 			      PERF_OUTPUT_PERIOD,
 
-		.invalid_fields = PERF_OUTPUT_TRACE,
+		.invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT,
 	},
 
 	[PERF_TYPE_SOFTWARE] = {
@@ -115,7 +118,7 @@ static struct {
 			      PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
 			      PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP |
 			      PERF_OUTPUT_SYM | PERF_OUTPUT_DSO |
-			      PERF_OUTPUT_PERIOD,
+			      PERF_OUTPUT_PERIOD | PERF_OUTPUT_BPF_OUTPUT,
 
 		.invalid_fields = PERF_OUTPUT_TRACE,
 	},
@@ -125,7 +128,7 @@ static struct {
 
 		.fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID |
 				  PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
-				  PERF_OUTPUT_EVNAME | PERF_OUTPUT_TRACE,
+				  PERF_OUTPUT_EVNAME | PERF_OUTPUT_TRACE
 	},
 
 	[PERF_TYPE_RAW] = {
@@ -138,7 +141,7 @@ static struct {
 			      PERF_OUTPUT_PERIOD |  PERF_OUTPUT_ADDR |
 			      PERF_OUTPUT_DATA_SRC | PERF_OUTPUT_WEIGHT,
 
-		.invalid_fields = PERF_OUTPUT_TRACE,
+		.invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT,
 	},
 
 	[PERF_TYPE_BREAKPOINT] = {
@@ -150,7 +153,7 @@ static struct {
 			      PERF_OUTPUT_SYM | PERF_OUTPUT_DSO |
 			      PERF_OUTPUT_PERIOD,
 
-		.invalid_fields = PERF_OUTPUT_TRACE,
+		.invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT,
 	},
 };
 
@@ -623,6 +626,84 @@ static void print_sample_flags(u32 flags)
 	printf("  %-4s ", str);
 }
 
+struct printer_data {
+	int line_no;
+	bool hit_nul;
+	bool is_printable;
+};
+
+static void
+print_sample_bpf_output_printer(enum binary_printer_ops op,
+				unsigned int val,
+				void *extra)
+{
+	unsigned char ch = (unsigned char)val;
+	struct printer_data *printer_data = extra;
+
+	switch (op) {
+	case BINARY_PRINT_DATA_BEGIN:
+		printf("\n");
+		break;
+	case BINARY_PRINT_LINE_BEGIN:
+		printf("%17s", !printer_data->line_no ? "BPF output:" :
+						        "           ");
+		break;
+	case BINARY_PRINT_ADDR:
+		printf(" %04x:", val);
+		break;
+	case BINARY_PRINT_NUM_DATA:
+		printf(" %02x", val);
+		break;
+	case BINARY_PRINT_NUM_PAD:
+		printf("   ");
+		break;
+	case BINARY_PRINT_SEP:
+		printf("  ");
+		break;
+	case BINARY_PRINT_CHAR_DATA:
+		if (printer_data->hit_nul && ch)
+			printer_data->is_printable = false;
+
+		if (!isprint(ch)) {
+			printf("%c", '.');
+
+			if (!printer_data->is_printable)
+				break;
+
+			if (ch == '\0')
+				printer_data->hit_nul = true;
+			else
+				printer_data->is_printable = false;
+		} else {
+			printf("%c", ch);
+		}
+		break;
+	case BINARY_PRINT_CHAR_PAD:
+		printf(" ");
+		break;
+	case BINARY_PRINT_LINE_END:
+		printf("\n");
+		printer_data->line_no++;
+		break;
+	case BINARY_PRINT_DATA_END:
+	default:
+		break;
+	}
+}
+
+static void print_sample_bpf_output(struct perf_sample *sample)
+{
+	unsigned int nr_bytes = sample->raw_size;
+	struct printer_data printer_data = {0, false, true};
+
+	print_binary(sample->raw_data, nr_bytes, 8,
+		     print_sample_bpf_output_printer, &printer_data);
+
+	if (printer_data.is_printable && printer_data.hit_nul)
+		printf("%17s \"%s\"\n", "BPF string:",
+		       (char *)(sample->raw_data));
+}
+
 struct perf_script {
 	struct perf_tool	tool;
 	struct perf_session	*session;
@@ -649,6 +730,23 @@ static int perf_evlist__max_name_len(struct perf_evlist *evlist)
 	return max;
 }
 
+static size_t data_src__printf(u64 data_src)
+{
+	struct mem_info mi = { .data_src.val = data_src };
+	char decode[100];
+	char out[100];
+	static int maxlen;
+	int len;
+
+	perf_script__meminfo_scnprintf(decode, 100, &mi);
+
+	len = scnprintf(out, 100, "%16" PRIx64 " %s", data_src, decode);
+	if (maxlen < len)
+		maxlen = len;
+
+	return printf("%-*s", maxlen, out);
+}
+
 static void process_event(struct perf_script *script, union perf_event *event,
 			  struct perf_sample *sample, struct perf_evsel *evsel,
 			  struct addr_location *al)
@@ -689,7 +787,7 @@ static void process_event(struct perf_script *script, union perf_event *event,
 		print_sample_addr(event, sample, thread, attr);
 
 	if (PRINT_FIELD(DATA_SRC))
-		printf("%16" PRIx64, sample->data_src);
+		data_src__printf(sample->data_src);
 
 	if (PRINT_FIELD(WEIGHT))
 		printf("%16" PRIu64, sample->weight);
@@ -713,6 +811,9 @@ static void process_event(struct perf_script *script, union perf_event *event,
 	else if (PRINT_FIELD(BRSTACKSYM))
 		print_sample_brstacksym(event, sample, thread, attr);
 
+	if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT))
+		print_sample_bpf_output(sample);
+
 	printf("\n");
 }
 

+ 15 - 0
tools/perf/builtin-top.c

@@ -1214,6 +1214,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
 		     parse_branch_stack),
 	OPT_BOOLEAN(0, "raw-trace", &symbol_conf.raw_trace,
 		    "Show raw trace event output (do not use print fmt or plugins)"),
+	OPT_BOOLEAN(0, "hierarchy", &symbol_conf.report_hierarchy,
+		    "Show entries in a hierarchy"),
 	OPT_END()
 	};
 	const char * const top_usage[] = {
@@ -1241,6 +1243,19 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
 		goto out_delete_evlist;
 	}
 
+	if (symbol_conf.report_hierarchy) {
+		/* disable incompatible options */
+		symbol_conf.event_group = false;
+		symbol_conf.cumulate_callchain = false;
+
+		if (field_order) {
+			pr_err("Error: --hierarchy and --fields options cannot be used together\n");
+			parse_options_usage(top_usage, options, "fields", 0);
+			parse_options_usage(NULL, options, "hierarchy", 0);
+			goto out_delete_evlist;
+		}
+	}
+
 	sort__mode = SORT_MODE__TOP;
 	/* display thread wants entries to be collapsed in a different tree */
 	sort__need_collapse = 1;

+ 460 - 44
tools/perf/ui/browsers/hists.c

@@ -32,6 +32,7 @@ struct hist_browser {
 	bool		     show_headers;
 	float		     min_pcnt;
 	u64		     nr_non_filtered_entries;
+	u64		     nr_hierarchy_entries;
 	u64		     nr_callchain_rows;
 };
 
@@ -58,11 +59,11 @@ static int hist_browser__get_folding(struct hist_browser *browser)
 
 	for (nd = rb_first(&hists->entries);
 	     (nd = hists__filter_entries(nd, browser->min_pcnt)) != NULL;
-	     nd = rb_next(nd)) {
+	     nd = rb_hierarchy_next(nd)) {
 		struct hist_entry *he =
 			rb_entry(nd, struct hist_entry, rb_node);
 
-		if (he->unfolded)
+		if (he->leaf && he->unfolded)
 			unfolded_rows += he->nr_rows;
 	}
 	return unfolded_rows;
@@ -72,7 +73,9 @@ static u32 hist_browser__nr_entries(struct hist_browser *hb)
 {
 	u32 nr_entries;
 
-	if (hist_browser__has_filter(hb))
+	if (symbol_conf.report_hierarchy)
+		nr_entries = hb->nr_hierarchy_entries;
+	else if (hist_browser__has_filter(hb))
 		nr_entries = hb->nr_non_filtered_entries;
 	else
 		nr_entries = hb->hists->nr_entries;
@@ -247,6 +250,35 @@ static int callchain__count_rows(struct rb_root *chain)
 	return n;
 }
 
+static int hierarchy_count_rows(struct hist_browser *hb, struct hist_entry *he,
+				bool include_children)
+{
+	int count = 0;
+	struct rb_node *node;
+	struct hist_entry *child;
+
+	if (he->leaf)
+		return callchain__count_rows(&he->sorted_chain);
+
+	node = rb_first(&he->hroot_out);
+	while (node) {
+		float percent;
+
+		child = rb_entry(node, struct hist_entry, rb_node);
+		percent = hist_entry__get_percent_limit(child);
+
+		if (!child->filtered && percent >= hb->min_pcnt) {
+			count++;
+
+			if (include_children && child->unfolded)
+				count += hierarchy_count_rows(hb, child, true);
+		}
+
+		node = rb_next(node);
+	}
+	return count;
+}
+
 static bool hist_entry__toggle_fold(struct hist_entry *he)
 {
 	if (!he)
@@ -326,11 +358,17 @@ static void callchain__init_have_children(struct rb_root *root)
 
 static void hist_entry__init_have_children(struct hist_entry *he)
 {
-	if (!he->init_have_children) {
+	if (he->init_have_children)
+		return;
+
+	if (he->leaf) {
 		he->has_children = !RB_EMPTY_ROOT(&he->sorted_chain);
 		callchain__init_have_children(&he->sorted_chain);
-		he->init_have_children = true;
+	} else {
+		he->has_children = !RB_EMPTY_ROOT(&he->hroot_out);
 	}
+
+	he->init_have_children = true;
 }
 
 static bool hist_browser__toggle_fold(struct hist_browser *browser)
@@ -349,17 +387,41 @@ static bool hist_browser__toggle_fold(struct hist_browser *browser)
 		has_children = callchain_list__toggle_fold(cl);
 
 	if (has_children) {
+		int child_rows = 0;
+
 		hist_entry__init_have_children(he);
 		browser->b.nr_entries -= he->nr_rows;
-		browser->nr_callchain_rows -= he->nr_rows;
 
-		if (he->unfolded)
-			he->nr_rows = callchain__count_rows(&he->sorted_chain);
+		if (he->leaf)
+			browser->nr_callchain_rows -= he->nr_rows;
 		else
+			browser->nr_hierarchy_entries -= he->nr_rows;
+
+		if (symbol_conf.report_hierarchy)
+			child_rows = hierarchy_count_rows(browser, he, true);
+
+		if (he->unfolded) {
+			if (he->leaf)
+				he->nr_rows = callchain__count_rows(&he->sorted_chain);
+			else
+				he->nr_rows = hierarchy_count_rows(browser, he, false);
+
+			/* account grand children */
+			if (symbol_conf.report_hierarchy)
+				browser->b.nr_entries += child_rows - he->nr_rows;
+		} else {
+			if (symbol_conf.report_hierarchy)
+				browser->b.nr_entries -= child_rows - he->nr_rows;
+
 			he->nr_rows = 0;
+		}
 
 		browser->b.nr_entries += he->nr_rows;
-		browser->nr_callchain_rows += he->nr_rows;
+
+		if (he->leaf)
+			browser->nr_callchain_rows += he->nr_rows;
+		else
+			browser->nr_hierarchy_entries += he->nr_rows;
 
 		return true;
 	}
@@ -422,13 +484,38 @@ static int callchain__set_folding(struct rb_root *chain, bool unfold)
 	return n;
 }
 
-static void hist_entry__set_folding(struct hist_entry *he, bool unfold)
+static int hierarchy_set_folding(struct hist_browser *hb, struct hist_entry *he,
+				 bool unfold __maybe_unused)
+{
+	float percent;
+	struct rb_node *nd;
+	struct hist_entry *child;
+	int n = 0;
+
+	for (nd = rb_first(&he->hroot_out); nd; nd = rb_next(nd)) {
+		child = rb_entry(nd, struct hist_entry, rb_node);
+		percent = hist_entry__get_percent_limit(child);
+		if (!child->filtered && percent >= hb->min_pcnt)
+			n++;
+	}
+
+	return n;
+}
+
+static void hist_entry__set_folding(struct hist_entry *he,
+				    struct hist_browser *hb, bool unfold)
 {
 	hist_entry__init_have_children(he);
 	he->unfolded = unfold ? he->has_children : false;
 
 	if (he->has_children) {
-		int n = callchain__set_folding(&he->sorted_chain, unfold);
+		int n;
+
+		if (he->leaf)
+			n = callchain__set_folding(&he->sorted_chain, unfold);
+		else
+			n = hierarchy_set_folding(hb, he, unfold);
+
 		he->nr_rows = unfold ? n : 0;
 	} else
 		he->nr_rows = 0;
@@ -438,19 +525,32 @@ static void
 __hist_browser__set_folding(struct hist_browser *browser, bool unfold)
 {
 	struct rb_node *nd;
-	struct hists *hists = browser->hists;
+	struct hist_entry *he;
+	double percent;
 
-	for (nd = rb_first(&hists->entries);
-	     (nd = hists__filter_entries(nd, browser->min_pcnt)) != NULL;
-	     nd = rb_next(nd)) {
-		struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node);
-		hist_entry__set_folding(he, unfold);
-		browser->nr_callchain_rows += he->nr_rows;
+	nd = rb_first(&browser->hists->entries);
+	while (nd) {
+		he = rb_entry(nd, struct hist_entry, rb_node);
+
+		/* set folding state even if it's currently folded */
+		nd = __rb_hierarchy_next(nd, HMD_FORCE_CHILD);
+
+		hist_entry__set_folding(he, browser, unfold);
+
+		percent = hist_entry__get_percent_limit(he);
+		if (he->filtered || percent < browser->min_pcnt)
+			continue;
+
+		if (!he->depth || unfold)
+			browser->nr_hierarchy_entries++;
+		if (he->leaf)
+			browser->nr_callchain_rows += he->nr_rows;
 	}
 }
 
 static void hist_browser__set_folding(struct hist_browser *browser, bool unfold)
 {
+	browser->nr_hierarchy_entries = 0;
 	browser->nr_callchain_rows = 0;
 	__hist_browser__set_folding(browser, unfold);
 
@@ -1160,6 +1260,158 @@ static int hist_browser__show_entry(struct hist_browser *browser,
 	return printed;
 }
 
+static int hist_browser__show_hierarchy_entry(struct hist_browser *browser,
+					      struct hist_entry *entry,
+					      unsigned short row,
+					      int level, int nr_sort_keys)
+{
+	int printed = 0;
+	int width = browser->b.width;
+	char folded_sign = ' ';
+	bool current_entry = ui_browser__is_current_entry(&browser->b, row);
+	off_t row_offset = entry->row_offset;
+	bool first = true;
+	struct perf_hpp_fmt *fmt;
+	struct hpp_arg arg = {
+		.b		= &browser->b,
+		.current_entry	= current_entry,
+	};
+	int column = 0;
+	int hierarchy_indent = (nr_sort_keys - 1) * HIERARCHY_INDENT;
+
+	if (current_entry) {
+		browser->he_selection = entry;
+		browser->selection = &entry->ms;
+	}
+
+	hist_entry__init_have_children(entry);
+	folded_sign = hist_entry__folded(entry);
+	arg.folded_sign = folded_sign;
+
+	if (entry->leaf && row_offset) {
+		row_offset--;
+		goto show_callchain;
+	}
+
+	hist_browser__gotorc(browser, row, 0);
+
+	if (current_entry && browser->b.navkeypressed)
+		ui_browser__set_color(&browser->b, HE_COLORSET_SELECTED);
+	else
+		ui_browser__set_color(&browser->b, HE_COLORSET_NORMAL);
+
+	ui_browser__write_nstring(&browser->b, "", level * HIERARCHY_INDENT);
+	width -= level * HIERARCHY_INDENT;
+
+	hists__for_each_format(entry->hists, fmt) {
+		char s[2048];
+		struct perf_hpp hpp = {
+			.buf		= s,
+			.size		= sizeof(s),
+			.ptr		= &arg,
+		};
+
+		if (perf_hpp__should_skip(fmt, entry->hists) ||
+		    column++ < browser->b.horiz_scroll)
+			continue;
+
+		if (perf_hpp__is_sort_entry(fmt) ||
+		    perf_hpp__is_dynamic_entry(fmt))
+			break;
+
+		if (current_entry && browser->b.navkeypressed) {
+			ui_browser__set_color(&browser->b,
+					      HE_COLORSET_SELECTED);
+		} else {
+			ui_browser__set_color(&browser->b,
+					      HE_COLORSET_NORMAL);
+		}
+
+		if (first) {
+			ui_browser__printf(&browser->b, "%c", folded_sign);
+			width--;
+			first = false;
+		} else {
+			ui_browser__printf(&browser->b, "  ");
+			width -= 2;
+		}
+
+		if (fmt->color) {
+			int ret = fmt->color(fmt, &hpp, entry);
+			hist_entry__snprintf_alignment(entry, &hpp, fmt, ret);
+			/*
+			 * fmt->color() already used ui_browser to
+			 * print the non alignment bits, skip it (+ret):
+			 */
+			ui_browser__printf(&browser->b, "%s", s + ret);
+		} else {
+			int ret = fmt->entry(fmt, &hpp, entry);
+			hist_entry__snprintf_alignment(entry, &hpp, fmt, ret);
+			ui_browser__printf(&browser->b, "%s", s);
+		}
+		width -= hpp.buf - s;
+	}
+
+	ui_browser__write_nstring(&browser->b, "", hierarchy_indent);
+	width -= hierarchy_indent;
+
+	if (column >= browser->b.horiz_scroll) {
+		char s[2048];
+		struct perf_hpp hpp = {
+			.buf		= s,
+			.size		= sizeof(s),
+			.ptr		= &arg,
+		};
+
+		if (current_entry && browser->b.navkeypressed) {
+			ui_browser__set_color(&browser->b,
+					      HE_COLORSET_SELECTED);
+		} else {
+			ui_browser__set_color(&browser->b,
+					      HE_COLORSET_NORMAL);
+		}
+
+		ui_browser__write_nstring(&browser->b, "", 2);
+		width -= 2;
+
+		/*
+		 * No need to call hist_entry__snprintf_alignment()
+		 * since this fmt is always the last column in the
+		 * hierarchy mode.
+		 */
+		fmt = entry->fmt;
+		if (fmt->color) {
+			width -= fmt->color(fmt, &hpp, entry);
+		} else {
+			width -= fmt->entry(fmt, &hpp, entry);
+			ui_browser__printf(&browser->b, "%s", s);
+		}
+	}
+
+	/* The scroll bar isn't being used */
+	if (!browser->b.navkeypressed)
+		width += 1;
+
+	ui_browser__write_nstring(&browser->b, "", width);
+
+	++row;
+	++printed;
+
+show_callchain:
+	if (entry->leaf && folded_sign == '-' && row != browser->b.rows) {
+		struct callchain_print_arg carg = {
+			.row_offset = row_offset,
+		};
+
+		printed += hist_browser__show_callchain(browser, entry,
+					level + 1, row,
+					hist_browser__show_callchain_entry, &carg,
+					hist_browser__check_output_full);
+	}
+
+	return printed;
+}
+
 static int advance_hpp_check(struct perf_hpp *hpp, int inc)
 {
 	advance_hpp(hpp, inc);
@@ -1199,11 +1451,80 @@ static int hists_browser__scnprintf_headers(struct hist_browser *browser, char *
 	return ret;
 }
 
+static int hists_browser__scnprintf_hierarchy_headers(struct hist_browser *browser, char *buf, size_t size)
+{
+	struct hists *hists = browser->hists;
+	struct perf_hpp dummy_hpp = {
+		.buf    = buf,
+		.size   = size,
+	};
+	struct perf_hpp_fmt *fmt;
+	size_t ret = 0;
+	int column = 0;
+	int nr_sort_keys = hists->hpp_list->nr_sort_keys;
+	bool first = true;
+
+	ret = scnprintf(buf, size, " ");
+	if (advance_hpp_check(&dummy_hpp, ret))
+		return ret;
+
+	hists__for_each_format(hists, fmt) {
+		if (column++ < browser->b.horiz_scroll)
+			continue;
+
+		if (perf_hpp__is_sort_entry(fmt) || perf_hpp__is_dynamic_entry(fmt))
+			break;
+
+		ret = fmt->header(fmt, &dummy_hpp, hists_to_evsel(hists));
+		if (advance_hpp_check(&dummy_hpp, ret))
+			break;
+
+		ret = scnprintf(dummy_hpp.buf, dummy_hpp.size, "  ");
+		if (advance_hpp_check(&dummy_hpp, ret))
+			break;
+	}
+
+	ret = scnprintf(dummy_hpp.buf, dummy_hpp.size, "%*s",
+			(nr_sort_keys - 1) * HIERARCHY_INDENT, "");
+	if (advance_hpp_check(&dummy_hpp, ret))
+		return ret;
+
+	hists__for_each_format(hists, fmt) {
+		if (!perf_hpp__is_sort_entry(fmt) && !perf_hpp__is_dynamic_entry(fmt))
+			continue;
+		if (perf_hpp__should_skip(fmt, hists))
+			continue;
+
+		if (first) {
+			first = false;
+		} else {
+			ret = scnprintf(dummy_hpp.buf, dummy_hpp.size, " / ");
+			if (advance_hpp_check(&dummy_hpp, ret))
+				break;
+		}
+
+		ret = fmt->header(fmt, &dummy_hpp, hists_to_evsel(hists));
+		dummy_hpp.buf[ret] = '\0';
+		rtrim(dummy_hpp.buf);
+
+		ret = strlen(dummy_hpp.buf);
+		if (advance_hpp_check(&dummy_hpp, ret))
+			break;
+	}
+
+	return ret;
+}
+
 static void hist_browser__show_headers(struct hist_browser *browser)
 {
 	char headers[1024];
 
-	hists_browser__scnprintf_headers(browser, headers, sizeof(headers));
+	if (symbol_conf.report_hierarchy)
+		hists_browser__scnprintf_hierarchy_headers(browser, headers,
+							   sizeof(headers));
+	else
+		hists_browser__scnprintf_headers(browser, headers,
+						 sizeof(headers));
 	ui_browser__gotorc(&browser->b, 0, 0);
 	ui_browser__set_color(&browser->b, HE_COLORSET_ROOT);
 	ui_browser__write_nstring(&browser->b, headers, browser->b.width + 1);
@@ -1225,6 +1546,7 @@ static unsigned int hist_browser__refresh(struct ui_browser *browser)
 	u16 header_offset = 0;
 	struct rb_node *nd;
 	struct hist_browser *hb = container_of(browser, struct hist_browser, b);
+	int nr_sort = hb->hists->hpp_list->nr_sort_keys;
 
 	if (hb->show_headers) {
 		hist_browser__show_headers(hb);
@@ -1235,18 +1557,28 @@ static unsigned int hist_browser__refresh(struct ui_browser *browser)
 	hb->he_selection = NULL;
 	hb->selection = NULL;
 
-	for (nd = browser->top; nd; nd = rb_next(nd)) {
+	for (nd = browser->top; nd; nd = rb_hierarchy_next(nd)) {
 		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
 		float percent;
 
-		if (h->filtered)
+		if (h->filtered) {
+			/* let it move to sibling */
+			h->unfolded = false;
 			continue;
+		}
 
 		percent = hist_entry__get_percent_limit(h);
 		if (percent < hb->min_pcnt)
 			continue;
 
-		row += hist_browser__show_entry(hb, h, row);
+		if (symbol_conf.report_hierarchy) {
+			row += hist_browser__show_hierarchy_entry(hb, h, row,
+								  h->depth,
+								  nr_sort);
+		} else {
+			row += hist_browser__show_entry(hb, h, row);
+		}
+
 		if (row == browser->rows)
 			break;
 	}
@@ -1264,7 +1596,14 @@ static struct rb_node *hists__filter_entries(struct rb_node *nd,
 		if (!h->filtered && percent >= min_pcnt)
 			return nd;
 
-		nd = rb_next(nd);
+		/*
+		 * If it's filtered, its all children also were filtered.
+		 * So move to sibling node.
+		 */
+		if (rb_next(nd))
+			nd = rb_next(nd);
+		else
+			nd = rb_hierarchy_next(nd);
 	}
 
 	return NULL;
@@ -1280,7 +1619,7 @@ static struct rb_node *hists__filter_prev_entries(struct rb_node *nd,
 		if (!h->filtered && percent >= min_pcnt)
 			return nd;
 
-		nd = rb_prev(nd);
+		nd = rb_hierarchy_prev(nd);
 	}
 
 	return NULL;
@@ -1310,8 +1649,8 @@ static void ui_browser__hists_seek(struct ui_browser *browser,
 		nd = browser->top;
 		goto do_offset;
 	case SEEK_END:
-		nd = hists__filter_prev_entries(rb_last(browser->entries),
-						hb->min_pcnt);
+		nd = rb_hierarchy_last(rb_last(browser->entries));
+		nd = hists__filter_prev_entries(nd, hb->min_pcnt);
 		first = false;
 		break;
 	default:
@@ -1345,7 +1684,7 @@ do_offset:
 	if (offset > 0) {
 		do {
 			h = rb_entry(nd, struct hist_entry, rb_node);
-			if (h->unfolded) {
+			if (h->unfolded && h->leaf) {
 				u16 remaining = h->nr_rows - h->row_offset;
 				if (offset > remaining) {
 					offset -= remaining;
@@ -1357,7 +1696,8 @@ do_offset:
 					break;
 				}
 			}
-			nd = hists__filter_entries(rb_next(nd), hb->min_pcnt);
+			nd = hists__filter_entries(rb_hierarchy_next(nd),
+						   hb->min_pcnt);
 			if (nd == NULL)
 				break;
 			--offset;
@@ -1366,7 +1706,7 @@ do_offset:
 	} else if (offset < 0) {
 		while (1) {
 			h = rb_entry(nd, struct hist_entry, rb_node);
-			if (h->unfolded) {
+			if (h->unfolded && h->leaf) {
 				if (first) {
 					if (-offset > h->row_offset) {
 						offset += h->row_offset;
@@ -1390,7 +1730,7 @@ do_offset:
 				}
 			}
 
-			nd = hists__filter_prev_entries(rb_prev(nd),
+			nd = hists__filter_prev_entries(rb_hierarchy_prev(nd),
 							hb->min_pcnt);
 			if (nd == NULL)
 				break;
@@ -1403,7 +1743,7 @@ do_offset:
 				 * row_offset at its last entry.
 				 */
 				h = rb_entry(nd, struct hist_entry, rb_node);
-				if (h->unfolded)
+				if (h->unfolded && h->leaf)
 					h->row_offset = h->nr_rows;
 				break;
 			}
@@ -1417,13 +1757,14 @@ do_offset:
 }
 
 static int hist_browser__fprintf_callchain(struct hist_browser *browser,
-					   struct hist_entry *he, FILE *fp)
+					   struct hist_entry *he, FILE *fp,
+					   int level)
 {
 	struct callchain_print_arg arg  = {
 		.fp = fp,
 	};
 
-	hist_browser__show_callchain(browser, he, 1, 0,
+	hist_browser__show_callchain(browser, he, level, 0,
 				     hist_browser__fprintf_callchain_entry, &arg,
 				     hist_browser__check_dump_full);
 	return arg.printed;
@@ -1466,7 +1807,65 @@ static int hist_browser__fprintf_entry(struct hist_browser *browser,
 	printed += fprintf(fp, "%s\n", s);
 
 	if (folded_sign == '-')
-		printed += hist_browser__fprintf_callchain(browser, he, fp);
+		printed += hist_browser__fprintf_callchain(browser, he, fp, 1);
+
+	return printed;
+}
+
+
+static int hist_browser__fprintf_hierarchy_entry(struct hist_browser *browser,
+						 struct hist_entry *he,
+						 FILE *fp, int level,
+						 int nr_sort_keys)
+{
+	char s[8192];
+	int printed = 0;
+	char folded_sign = ' ';
+	struct perf_hpp hpp = {
+		.buf = s,
+		.size = sizeof(s),
+	};
+	struct perf_hpp_fmt *fmt;
+	bool first = true;
+	int ret;
+	int hierarchy_indent = (nr_sort_keys + 1) * HIERARCHY_INDENT;
+
+	printed = fprintf(fp, "%*s", level * HIERARCHY_INDENT, "");
+
+	folded_sign = hist_entry__folded(he);
+	printed += fprintf(fp, "%c", folded_sign);
+
+	hists__for_each_format(he->hists, fmt) {
+		if (perf_hpp__should_skip(fmt, he->hists))
+			continue;
+
+		if (perf_hpp__is_sort_entry(fmt) ||
+		    perf_hpp__is_dynamic_entry(fmt))
+			break;
+
+		if (!first) {
+			ret = scnprintf(hpp.buf, hpp.size, "  ");
+			advance_hpp(&hpp, ret);
+		} else
+			first = false;
+
+		ret = fmt->entry(fmt, &hpp, he);
+		advance_hpp(&hpp, ret);
+	}
+
+	ret = scnprintf(hpp.buf, hpp.size, "%*s", hierarchy_indent, "");
+	advance_hpp(&hpp, ret);
+
+	fmt = he->fmt;
+	ret = fmt->entry(fmt, &hpp, he);
+	advance_hpp(&hpp, ret);
+
+	printed += fprintf(fp, "%s\n", rtrim(s));
+
+	if (he->leaf && folded_sign == '-') {
+		printed += hist_browser__fprintf_callchain(browser, he, fp,
+							   he->depth + 1);
+	}
 
 	return printed;
 }
@@ -1476,12 +1875,22 @@ static int hist_browser__fprintf(struct hist_browser *browser, FILE *fp)
 	struct rb_node *nd = hists__filter_entries(rb_first(browser->b.entries),
 						   browser->min_pcnt);
 	int printed = 0;
+	int nr_sort = browser->hists->hpp_list->nr_sort_keys;
 
 	while (nd) {
 		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
 
-		printed += hist_browser__fprintf_entry(browser, h, fp);
-		nd = hists__filter_entries(rb_next(nd), browser->min_pcnt);
+		if (symbol_conf.report_hierarchy) {
+			printed += hist_browser__fprintf_hierarchy_entry(browser,
+									 h, fp,
+									 h->depth,
+									 nr_sort);
+		} else {
+			printed += hist_browser__fprintf_entry(browser, h, fp);
+		}
+
+		nd = hists__filter_entries(rb_hierarchy_next(nd),
+					   browser->min_pcnt);
 	}
 
 	return printed;
@@ -2025,17 +2434,18 @@ static void hist_browser__update_nr_entries(struct hist_browser *hb)
 	u64 nr_entries = 0;
 	struct rb_node *nd = rb_first(&hb->hists->entries);
 
-	if (hb->min_pcnt == 0) {
+	if (hb->min_pcnt == 0 && !symbol_conf.report_hierarchy) {
 		hb->nr_non_filtered_entries = hb->hists->nr_non_filtered_entries;
 		return;
 	}
 
 	while ((nd = hists__filter_entries(nd, hb->min_pcnt)) != NULL) {
 		nr_entries++;
-		nd = rb_next(nd);
+		nd = rb_hierarchy_next(nd);
 	}
 
 	hb->nr_non_filtered_entries = nr_entries;
+	hb->nr_hierarchy_entries = nr_entries;
 }
 
 static void hist_browser__update_percent_limit(struct hist_browser *hb,
@@ -2048,12 +2458,12 @@ static void hist_browser__update_percent_limit(struct hist_browser *hb,
 
 	hb->min_pcnt = callchain_param.min_percent = percent;
 
-	if (!symbol_conf.use_callchain)
-		return;
-
 	while ((nd = hists__filter_entries(nd, hb->min_pcnt)) != NULL) {
 		he = rb_entry(nd, struct hist_entry, rb_node);
 
+		if (!he->leaf || !symbol_conf.use_callchain)
+			goto next;
+
 		if (callchain_param.mode == CHAIN_GRAPH_REL) {
 			total = he->stat.period;
 
@@ -2066,11 +2476,17 @@ static void hist_browser__update_percent_limit(struct hist_browser *hb,
 		callchain_param.sort(&he->sorted_chain, he->callchain,
 				     min_callchain_hits, &callchain_param);
 
+next:
+		/*
+		 * Tentatively set unfolded so that the rb_hierarchy_next()
+		 * can toggle children of folded entries too.
+		 */
+		he->unfolded = he->has_children;
+		nd = rb_hierarchy_next(nd);
+
 		/* force to re-evaluate folding state of callchains */
 		he->init_have_children = false;
-		hist_entry__set_folding(he, false);
-
-		nd = rb_next(nd);
+		hist_entry__set_folding(he, hb, false);
 	}
 }
 

+ 162 - 1
tools/perf/ui/gtk/hists.c

@@ -396,6 +396,164 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists,
 	gtk_container_add(GTK_CONTAINER(window), view);
 }
 
+static void perf_gtk__add_hierarchy_entries(struct hists *hists,
+					    struct rb_root *root,
+					    GtkTreeStore *store,
+					    GtkTreeIter *parent,
+					    struct perf_hpp *hpp,
+					    float min_pcnt)
+{
+	int col_idx = 0;
+	struct rb_node *node;
+	struct hist_entry *he;
+	struct perf_hpp_fmt *fmt;
+	u64 total = hists__total_period(hists);
+
+	for (node = rb_first(root); node; node = rb_next(node)) {
+		GtkTreeIter iter;
+		float percent;
+
+		he = rb_entry(node, struct hist_entry, rb_node);
+		if (he->filtered)
+			continue;
+
+		percent = hist_entry__get_percent_limit(he);
+		if (percent < min_pcnt)
+			continue;
+
+		gtk_tree_store_append(store, &iter, parent);
+
+		col_idx = 0;
+		hists__for_each_format(hists, fmt) {
+			if (perf_hpp__is_sort_entry(fmt) ||
+			    perf_hpp__is_dynamic_entry(fmt))
+				break;
+
+			if (fmt->color)
+				fmt->color(fmt, hpp, he);
+			else
+				fmt->entry(fmt, hpp, he);
+
+			gtk_tree_store_set(store, &iter, col_idx++, hpp->buf, -1);
+		}
+
+		fmt = he->fmt;
+		if (fmt->color)
+			fmt->color(fmt, hpp, he);
+		else
+			fmt->entry(fmt, hpp, he);
+
+		gtk_tree_store_set(store, &iter, col_idx, rtrim(hpp->buf), -1);
+
+		if (!he->leaf) {
+			perf_gtk__add_hierarchy_entries(hists, &he->hroot_out,
+							store, &iter, hpp,
+							min_pcnt);
+		}
+
+		if (symbol_conf.use_callchain && he->leaf) {
+			if (callchain_param.mode == CHAIN_GRAPH_REL)
+				total = symbol_conf.cumulate_callchain ?
+					he->stat_acc->period : he->stat.period;
+
+			perf_gtk__add_callchain(&he->sorted_chain, store, &iter,
+						col_idx, total);
+		}
+	}
+
+}
+
+static void perf_gtk__show_hierarchy(GtkWidget *window, struct hists *hists,
+				     float min_pcnt)
+{
+	struct perf_hpp_fmt *fmt;
+	GType col_types[MAX_COLUMNS];
+	GtkCellRenderer *renderer;
+	GtkTreeStore *store;
+	GtkWidget *view;
+	int col_idx;
+	int nr_cols = 0;
+	char s[512];
+	char buf[512];
+	bool first = true;
+	struct perf_hpp hpp = {
+		.buf		= s,
+		.size		= sizeof(s),
+	};
+
+	hists__for_each_format(hists, fmt) {
+		if (perf_hpp__is_sort_entry(fmt) ||
+		    perf_hpp__is_dynamic_entry(fmt))
+			break;
+
+		col_types[nr_cols++] = G_TYPE_STRING;
+	}
+	col_types[nr_cols++] = G_TYPE_STRING;
+
+	store = gtk_tree_store_newv(nr_cols, col_types);
+	view = gtk_tree_view_new();
+	renderer = gtk_cell_renderer_text_new();
+
+	col_idx = 0;
+	hists__for_each_format(hists, fmt) {
+		if (perf_hpp__is_sort_entry(fmt) ||
+		    perf_hpp__is_dynamic_entry(fmt))
+			break;
+
+		gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view),
+							    -1, fmt->name,
+							    renderer, "markup",
+							    col_idx++, NULL);
+	}
+
+	/* construct merged column header since sort keys share single column */
+	buf[0] = '\0';
+	hists__for_each_format(hists ,fmt) {
+		if (!perf_hpp__is_sort_entry(fmt) &&
+		    !perf_hpp__is_dynamic_entry(fmt))
+			continue;
+		if (perf_hpp__should_skip(fmt, hists))
+			continue;
+
+		if (first)
+			first = false;
+		else
+			strcat(buf, " / ");
+
+		fmt->header(fmt, &hpp, hists_to_evsel(hists));
+		strcat(buf, rtrim(hpp.buf));
+	}
+
+	gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view),
+						    -1, buf,
+						    renderer, "markup",
+						    col_idx++, NULL);
+
+	for (col_idx = 0; col_idx < nr_cols; col_idx++) {
+		GtkTreeViewColumn *column;
+
+		column = gtk_tree_view_get_column(GTK_TREE_VIEW(view), col_idx);
+		gtk_tree_view_column_set_resizable(column, TRUE);
+
+		if (col_idx == 0) {
+			gtk_tree_view_set_expander_column(GTK_TREE_VIEW(view),
+							  column);
+		}
+	}
+
+	gtk_tree_view_set_model(GTK_TREE_VIEW(view), GTK_TREE_MODEL(store));
+	g_object_unref(GTK_TREE_MODEL(store));
+
+	perf_gtk__add_hierarchy_entries(hists, &hists->entries, store,
+					NULL, &hpp, min_pcnt);
+
+	gtk_tree_view_set_rules_hint(GTK_TREE_VIEW(view), TRUE);
+
+	g_signal_connect(view, "row-activated",
+			 G_CALLBACK(on_row_activated), NULL);
+	gtk_container_add(GTK_CONTAINER(window), view);
+}
+
 int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist,
 				  const char *help,
 				  struct hist_browser_timer *hbt __maybe_unused,
@@ -463,7 +621,10 @@ int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist,
 							GTK_POLICY_AUTOMATIC,
 							GTK_POLICY_AUTOMATIC);
 
-		perf_gtk__show_hists(scrolled_window, hists, min_pcnt);
+		if (symbol_conf.report_hierarchy)
+			perf_gtk__show_hierarchy(scrolled_window, hists, min_pcnt);
+		else
+			perf_gtk__show_hists(scrolled_window, hists, min_pcnt);
 
 		tab_label = gtk_label_new(evname);
 

+ 3 - 0
tools/perf/ui/hist.c

@@ -514,6 +514,9 @@ void perf_hpp_list__column_register(struct perf_hpp_list *list,
 void perf_hpp_list__register_sort_field(struct perf_hpp_list *list,
 					struct perf_hpp_fmt *format)
 {
+	if (perf_hpp__is_sort_entry(format) || perf_hpp__is_dynamic_entry(format))
+		list->nr_sort_keys++;
+
 	list_add_tail(&format->sort_list, &list->sorts);
 }
 

+ 183 - 1
tools/perf/ui/stdio/hist.c

@@ -410,6 +410,76 @@ static int hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp)
 	return hpp->buf - start;
 }
 
+static int hist_entry__hierarchy_fprintf(struct hist_entry *he,
+					 struct perf_hpp *hpp,
+					 int nr_sort_key, struct hists *hists,
+					 FILE *fp)
+{
+	const char *sep = symbol_conf.field_sep;
+	struct perf_hpp_fmt *fmt;
+	char *buf = hpp->buf;
+	int ret, printed = 0;
+	bool first = true;
+
+	if (symbol_conf.exclude_other && !he->parent)
+		return 0;
+
+	ret = scnprintf(hpp->buf, hpp->size, "%*s", he->depth * HIERARCHY_INDENT, "");
+	advance_hpp(hpp, ret);
+
+	hists__for_each_format(he->hists, fmt) {
+		if (perf_hpp__is_sort_entry(fmt) || perf_hpp__is_dynamic_entry(fmt))
+			break;
+
+		/*
+		 * If there's no field_sep, we still need
+		 * to display initial '  '.
+		 */
+		if (!sep || !first) {
+			ret = scnprintf(hpp->buf, hpp->size, "%s", sep ?: "  ");
+			advance_hpp(hpp, ret);
+		} else
+			first = false;
+
+		if (perf_hpp__use_color() && fmt->color)
+			ret = fmt->color(fmt, hpp, he);
+		else
+			ret = fmt->entry(fmt, hpp, he);
+
+		ret = hist_entry__snprintf_alignment(he, hpp, fmt, ret);
+		advance_hpp(hpp, ret);
+	}
+
+	if (sep)
+		ret = scnprintf(hpp->buf, hpp->size, "%s", sep);
+	else
+		ret = scnprintf(hpp->buf, hpp->size, "%*s",
+				(nr_sort_key - 1) * HIERARCHY_INDENT + 2, "");
+	advance_hpp(hpp, ret);
+
+	/*
+	 * No need to call hist_entry__snprintf_alignment() since this
+	 * fmt is always the last column in the hierarchy mode.
+	 */
+	fmt = he->fmt;
+	if (perf_hpp__use_color() && fmt->color)
+		fmt->color(fmt, hpp, he);
+	else
+		fmt->entry(fmt, hpp, he);
+
+	printed += fprintf(fp, "%s\n", buf);
+
+	if (symbol_conf.use_callchain && he->leaf) {
+		u64 total = hists__total_period(hists);
+
+		printed += hist_entry_callchain__fprintf(he, total, 0, fp);
+		goto out;
+	}
+
+out:
+	return printed;
+}
+
 static int hist_entry__fprintf(struct hist_entry *he, size_t size,
 			       struct hists *hists,
 			       char *bf, size_t bfsz, FILE *fp)
@@ -424,6 +494,13 @@ static int hist_entry__fprintf(struct hist_entry *he, size_t size,
 	if (size == 0 || size > bfsz)
 		size = hpp.size = bfsz;
 
+	if (symbol_conf.report_hierarchy) {
+		int nr_sort = hists->hpp_list->nr_sort_keys;
+
+		return hist_entry__hierarchy_fprintf(he, &hpp, nr_sort,
+						     hists, fp);
+	}
+
 	hist_entry__snprintf(he, &hpp);
 
 	ret = fprintf(fp, "%s\n", bf);
@@ -434,6 +511,106 @@ static int hist_entry__fprintf(struct hist_entry *he, size_t size,
 	return ret;
 }
 
+static int print_hierarchy_indent(const char *sep, int nr_sort,
+				  const char *line, FILE *fp)
+{
+	if (sep != NULL || nr_sort < 1)
+		return 0;
+
+	return fprintf(fp, "%-.*s", (nr_sort - 1) * HIERARCHY_INDENT, line);
+}
+
+static int print_hierarchy_header(struct hists *hists, struct perf_hpp *hpp,
+				  const char *sep, FILE *fp)
+{
+	bool first = true;
+	int nr_sort;
+	unsigned width = 0;
+	unsigned header_width = 0;
+	struct perf_hpp_fmt *fmt;
+
+	nr_sort = hists->hpp_list->nr_sort_keys;
+
+	/* preserve max indent depth for column headers */
+	print_hierarchy_indent(sep, nr_sort, spaces, fp);
+
+	hists__for_each_format(hists, fmt) {
+		if (perf_hpp__is_sort_entry(fmt) || perf_hpp__is_dynamic_entry(fmt))
+			break;
+
+		if (!first)
+			fprintf(fp, "%s", sep ?: "  ");
+		else
+			first = false;
+
+		fmt->header(fmt, hpp, hists_to_evsel(hists));
+		fprintf(fp, "%s", hpp->buf);
+	}
+
+	/* combine sort headers with ' / ' */
+	first = true;
+	hists__for_each_format(hists, fmt) {
+		if (!perf_hpp__is_sort_entry(fmt) && !perf_hpp__is_dynamic_entry(fmt))
+			continue;
+		if (perf_hpp__should_skip(fmt, hists))
+			continue;
+
+		if (!first)
+			header_width += fprintf(fp, " / ");
+		else {
+			header_width += fprintf(fp, "%s", sep ?: "  ");
+			first = false;
+		}
+
+		fmt->header(fmt, hpp, hists_to_evsel(hists));
+		rtrim(hpp->buf);
+
+		header_width += fprintf(fp, "%s", hpp->buf);
+	}
+
+	/* preserve max indent depth for combined sort headers */
+	print_hierarchy_indent(sep, nr_sort, spaces, fp);
+
+	fprintf(fp, "\n# ");
+
+	/* preserve max indent depth for initial dots */
+	print_hierarchy_indent(sep, nr_sort, dots, fp);
+
+	first = true;
+	hists__for_each_format(hists, fmt) {
+		if (perf_hpp__is_sort_entry(fmt) || perf_hpp__is_dynamic_entry(fmt))
+			break;
+
+		if (!first)
+			fprintf(fp, "%s", sep ?: "  ");
+		else
+			first = false;
+
+		width = fmt->width(fmt, hpp, hists_to_evsel(hists));
+		fprintf(fp, "%.*s", width, dots);
+	}
+
+	hists__for_each_format(hists, fmt) {
+		if (!perf_hpp__is_sort_entry(fmt) && !perf_hpp__is_dynamic_entry(fmt))
+			continue;
+		if (perf_hpp__should_skip(fmt, hists))
+			continue;
+
+		width = fmt->width(fmt, hpp, hists_to_evsel(hists));
+		if (width > header_width)
+			header_width = width;
+	}
+
+	fprintf(fp, "%s%-.*s", sep ?: "  ", header_width, dots);
+
+	/* preserve max indent depth for dots under sort headers */
+	print_hierarchy_indent(sep, nr_sort, dots, fp);
+
+	fprintf(fp, "\n#\n");
+
+	return 2;
+}
+
 size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
 		      int max_cols, float min_pcnt, FILE *fp)
 {
@@ -465,6 +642,11 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
 
 	fprintf(fp, "# ");
 
+	if (symbol_conf.report_hierarchy) {
+		nr_rows += print_hierarchy_header(hists, &dummy_hpp, sep, fp);
+		goto print_entries;
+	}
+
 	hists__for_each_format(hists, fmt) {
 		if (perf_hpp__should_skip(fmt, hists))
 			continue;
@@ -522,7 +704,7 @@ print_entries:
 		goto out;
 	}
 
-	for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) {
+	for (nd = rb_first(&hists->entries); nd; nd = __rb_hierarchy_next(nd, HMD_FORCE_CHILD)) {
 		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
 		float percent;
 

+ 9 - 0
tools/perf/util/ctype.c

@@ -31,9 +31,18 @@ unsigned char sane_ctype[256] = {
 };
 
 const char *graph_line =
+	"_____________________________________________________________________"
 	"_____________________________________________________________________"
 	"_____________________________________________________________________";
 const char *graph_dotted_line =
 	"---------------------------------------------------------------------"
 	"---------------------------------------------------------------------"
 	"---------------------------------------------------------------------";
+const char *spaces =
+	"                                                                     "
+	"                                                                     "
+	"                                                                     ";
+const char *dots =
+	"....................................................................."
+	"....................................................................."
+	".....................................................................";

+ 48 - 27
tools/perf/util/debug.c

@@ -106,40 +106,61 @@ int dump_printf(const char *fmt, ...)
 	return ret;
 }
 
+static void trace_event_printer(enum binary_printer_ops op,
+				unsigned int val, void *extra)
+{
+	const char *color = PERF_COLOR_BLUE;
+	union perf_event *event = (union perf_event *)extra;
+	unsigned char ch = (unsigned char)val;
+
+	switch (op) {
+	case BINARY_PRINT_DATA_BEGIN:
+		printf(".");
+		color_fprintf(stdout, color, "\n. ... raw event: size %d bytes\n",
+				event->header.size);
+		break;
+	case BINARY_PRINT_LINE_BEGIN:
+		printf(".");
+		break;
+	case BINARY_PRINT_ADDR:
+		color_fprintf(stdout, color, "  %04x: ", val);
+		break;
+	case BINARY_PRINT_NUM_DATA:
+		color_fprintf(stdout, color, " %02x", val);
+		break;
+	case BINARY_PRINT_NUM_PAD:
+		color_fprintf(stdout, color, "   ");
+		break;
+	case BINARY_PRINT_SEP:
+		color_fprintf(stdout, color, "  ");
+		break;
+	case BINARY_PRINT_CHAR_DATA:
+		color_fprintf(stdout, color, "%c",
+			      isprint(ch) ? ch : '.');
+		break;
+	case BINARY_PRINT_CHAR_PAD:
+		color_fprintf(stdout, color, " ");
+		break;
+	case BINARY_PRINT_LINE_END:
+		color_fprintf(stdout, color, "\n");
+		break;
+	case BINARY_PRINT_DATA_END:
+		printf("\n");
+		break;
+	default:
+		break;
+	}
+}
+
 void trace_event(union perf_event *event)
 {
 	unsigned char *raw_event = (void *)event;
-	const char *color = PERF_COLOR_BLUE;
-	int i, j;
 
 	if (!dump_trace)
 		return;
 
-	printf(".");
-	color_fprintf(stdout, color, "\n. ... raw event: size %d bytes\n",
-		      event->header.size);
-
-	for (i = 0; i < event->header.size; i++) {
-		if ((i & 15) == 0) {
-			printf(".");
-			color_fprintf(stdout, color, "  %04x: ", i);
-		}
-
-		color_fprintf(stdout, color, " %02x", raw_event[i]);
-
-		if (((i & 15) == 15) || i == event->header.size-1) {
-			color_fprintf(stdout, color, "  ");
-			for (j = 0; j < 15-(i & 15); j++)
-				color_fprintf(stdout, color, "   ");
-			for (j = i & ~15; j <= i; j++) {
-				color_fprintf(stdout, color, "%c",
-					      isprint(raw_event[j]) ?
-					      raw_event[j] : '.');
-			}
-			color_fprintf(stdout, color, "\n");
-		}
-	}
-	printf(".\n");
+	print_binary(raw_event, event->header.size, 16,
+		     trace_event_printer, event);
 }
 
 static struct debug_variable {

+ 442 - 19
tools/perf/util/hist.c

@@ -248,6 +248,8 @@ static void he_stat__decay(struct he_stat *he_stat)
 	/* XXX need decay for weight too? */
 }
 
+static void hists__delete_entry(struct hists *hists, struct hist_entry *he);
+
 static bool hists__decay_entry(struct hists *hists, struct hist_entry *he)
 {
 	u64 prev_period = he->stat.period;
@@ -263,21 +265,45 @@ static bool hists__decay_entry(struct hists *hists, struct hist_entry *he)
 
 	diff = prev_period - he->stat.period;
 
-	hists->stats.total_period -= diff;
-	if (!he->filtered)
-		hists->stats.total_non_filtered_period -= diff;
+	if (!he->depth) {
+		hists->stats.total_period -= diff;
+		if (!he->filtered)
+			hists->stats.total_non_filtered_period -= diff;
+	}
+
+	if (!he->leaf) {
+		struct hist_entry *child;
+		struct rb_node *node = rb_first(&he->hroot_out);
+		while (node) {
+			child = rb_entry(node, struct hist_entry, rb_node);
+			node = rb_next(node);
+
+			if (hists__decay_entry(hists, child))
+				hists__delete_entry(hists, child);
+		}
+	}
 
 	return he->stat.period == 0;
 }
 
 static void hists__delete_entry(struct hists *hists, struct hist_entry *he)
 {
-	rb_erase(&he->rb_node, &hists->entries);
+	struct rb_root *root_in;
+	struct rb_root *root_out;
 
-	if (sort__need_collapse)
-		rb_erase(&he->rb_node_in, &hists->entries_collapsed);
-	else
-		rb_erase(&he->rb_node_in, hists->entries_in);
+	if (he->parent_he) {
+		root_in  = &he->parent_he->hroot_in;
+		root_out = &he->parent_he->hroot_out;
+	} else {
+		if (sort__need_collapse)
+			root_in = &hists->entries_collapsed;
+		else
+			root_in = hists->entries_in;
+		root_out = &hists->entries;
+	}
+
+	rb_erase(&he->rb_node_in, root_in);
+	rb_erase(&he->rb_node, root_out);
 
 	--hists->nr_entries;
 	if (!he->filtered)
@@ -396,6 +422,9 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template,
 		}
 		INIT_LIST_HEAD(&he->pairs.node);
 		thread__get(he->thread);
+
+		if (!symbol_conf.report_hierarchy)
+			he->leaf = true;
 	}
 
 	return he;
@@ -1049,6 +1078,114 @@ int hist_entry__snprintf_alignment(struct hist_entry *he, struct perf_hpp *hpp,
  * collapse the histogram
  */
 
+static void hists__apply_filters(struct hists *hists, struct hist_entry *he);
+
+static struct hist_entry *hierarchy_insert_entry(struct hists *hists,
+						 struct rb_root *root,
+						 struct hist_entry *he,
+						 struct perf_hpp_fmt *fmt)
+{
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent = NULL;
+	struct hist_entry *iter, *new;
+	int64_t cmp;
+
+	while (*p != NULL) {
+		parent = *p;
+		iter = rb_entry(parent, struct hist_entry, rb_node_in);
+
+		cmp = fmt->collapse(fmt, iter, he);
+		if (!cmp) {
+			he_stat__add_stat(&iter->stat, &he->stat);
+			return iter;
+		}
+
+		if (cmp < 0)
+			p = &parent->rb_left;
+		else
+			p = &parent->rb_right;
+	}
+
+	new = hist_entry__new(he, true);
+	if (new == NULL)
+		return NULL;
+
+	hists__apply_filters(hists, new);
+	hists->nr_entries++;
+
+	/* save related format for output */
+	new->fmt = fmt;
+
+	/* some fields are now passed to 'new' */
+	if (perf_hpp__is_trace_entry(fmt))
+		he->trace_output = NULL;
+	else
+		new->trace_output = NULL;
+
+	if (perf_hpp__is_srcline_entry(fmt))
+		he->srcline = NULL;
+	else
+		new->srcline = NULL;
+
+	if (perf_hpp__is_srcfile_entry(fmt))
+		he->srcfile = NULL;
+	else
+		new->srcfile = NULL;
+
+	rb_link_node(&new->rb_node_in, parent, p);
+	rb_insert_color(&new->rb_node_in, root);
+	return new;
+}
+
+static int hists__hierarchy_insert_entry(struct hists *hists,
+					 struct rb_root *root,
+					 struct hist_entry *he)
+{
+	struct perf_hpp_fmt *fmt;
+	struct hist_entry *new_he = NULL;
+	struct hist_entry *parent = NULL;
+	int depth = 0;
+	int ret = 0;
+
+	hists__for_each_sort_list(hists, fmt) {
+		if (!perf_hpp__is_sort_entry(fmt) &&
+		    !perf_hpp__is_dynamic_entry(fmt))
+			continue;
+		if (perf_hpp__should_skip(fmt, hists))
+			continue;
+
+		/* insert copy of 'he' for each fmt into the hierarchy */
+		new_he = hierarchy_insert_entry(hists, root, he, fmt);
+		if (new_he == NULL) {
+			ret = -1;
+			break;
+		}
+
+		root = &new_he->hroot_in;
+		new_he->parent_he = parent;
+		new_he->depth = depth++;
+		parent = new_he;
+	}
+
+	if (new_he) {
+		new_he->leaf = true;
+
+		if (symbol_conf.use_callchain) {
+			callchain_cursor_reset(&callchain_cursor);
+			if (callchain_merge(&callchain_cursor,
+					    new_he->callchain,
+					    he->callchain) < 0)
+				ret = -1;
+		}
+	}
+
+	/* 'he' is no longer used */
+	hist_entry__delete(he);
+
+	/* return 0 (or -1) since it already applied filters */
+	return ret;
+}
+
 int hists__collapse_insert_entry(struct hists *hists, struct rb_root *root,
 				 struct hist_entry *he)
 {
@@ -1057,6 +1194,9 @@ int hists__collapse_insert_entry(struct hists *hists, struct rb_root *root,
 	struct hist_entry *iter;
 	int64_t cmp;
 
+	if (symbol_conf.report_hierarchy)
+		return hists__hierarchy_insert_entry(hists, root, he);
+
 	while (*p != NULL) {
 		parent = *p;
 		iter = rb_entry(parent, struct hist_entry, rb_node_in);
@@ -1204,6 +1344,86 @@ void hists__inc_stats(struct hists *hists, struct hist_entry *h)
 	hists->stats.total_period += h->stat.period;
 }
 
+static void hierarchy_insert_output_entry(struct rb_root *root,
+					  struct hist_entry *he)
+{
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent = NULL;
+	struct hist_entry *iter;
+
+	while (*p != NULL) {
+		parent = *p;
+		iter = rb_entry(parent, struct hist_entry, rb_node);
+
+		if (hist_entry__sort(he, iter) > 0)
+			p = &parent->rb_left;
+		else
+			p = &parent->rb_right;
+	}
+
+	rb_link_node(&he->rb_node, parent, p);
+	rb_insert_color(&he->rb_node, root);
+}
+
+static void hists__hierarchy_output_resort(struct hists *hists,
+					   struct ui_progress *prog,
+					   struct rb_root *root_in,
+					   struct rb_root *root_out,
+					   u64 min_callchain_hits,
+					   bool use_callchain)
+{
+	struct rb_node *node;
+	struct hist_entry *he;
+
+	*root_out = RB_ROOT;
+	node = rb_first(root_in);
+
+	while (node) {
+		he = rb_entry(node, struct hist_entry, rb_node_in);
+		node = rb_next(node);
+
+		hierarchy_insert_output_entry(root_out, he);
+
+		if (prog)
+			ui_progress__update(prog, 1);
+
+		if (!he->leaf) {
+			hists__hierarchy_output_resort(hists, prog,
+						       &he->hroot_in,
+						       &he->hroot_out,
+						       min_callchain_hits,
+						       use_callchain);
+			hists->nr_entries++;
+			if (!he->filtered) {
+				hists->nr_non_filtered_entries++;
+				hists__calc_col_len(hists, he);
+			}
+
+			continue;
+		}
+
+		/* only update stat for leaf entries to avoid duplication */
+		hists__inc_stats(hists, he);
+		if (!he->filtered)
+			hists__calc_col_len(hists, he);
+
+		if (!use_callchain)
+			continue;
+
+		if (callchain_param.mode == CHAIN_GRAPH_REL) {
+			u64 total = he->stat.period;
+
+			if (symbol_conf.cumulate_callchain)
+				total = he->stat_acc->period;
+
+			min_callchain_hits = total * (callchain_param.min_percent / 100);
+		}
+
+		callchain_param.sort(&he->sorted_chain, he->callchain,
+				     min_callchain_hits, &callchain_param);
+	}
+}
+
 static void __hists__insert_output_entry(struct rb_root *entries,
 					 struct hist_entry *he,
 					 u64 min_callchain_hits,
@@ -1255,6 +1475,17 @@ static void output_resort(struct hists *hists, struct ui_progress *prog,
 
 	min_callchain_hits = callchain_total * (callchain_param.min_percent / 100);
 
+	hists__reset_stats(hists);
+	hists__reset_col_len(hists);
+
+	if (symbol_conf.report_hierarchy) {
+		return hists__hierarchy_output_resort(hists, prog,
+						      &hists->entries_collapsed,
+						      &hists->entries,
+						      min_callchain_hits,
+						      use_callchain);
+	}
+
 	if (sort__need_collapse)
 		root = &hists->entries_collapsed;
 	else
@@ -1263,9 +1494,6 @@ static void output_resort(struct hists *hists, struct ui_progress *prog,
 	next = rb_first(root);
 	hists->entries = RB_ROOT;
 
-	hists__reset_stats(hists);
-	hists__reset_col_len(hists);
-
 	while (next) {
 		n = rb_entry(next, struct hist_entry, rb_node_in);
 		next = rb_next(&n->rb_node_in);
@@ -1298,10 +1526,87 @@ void hists__output_resort(struct hists *hists, struct ui_progress *prog)
 	output_resort(hists, prog, symbol_conf.use_callchain);
 }
 
+static bool can_goto_child(struct hist_entry *he, enum hierarchy_move_dir hmd)
+{
+	if (he->leaf || hmd == HMD_FORCE_SIBLING)
+		return false;
+
+	if (he->unfolded || hmd == HMD_FORCE_CHILD)
+		return true;
+
+	return false;
+}
+
+struct rb_node *rb_hierarchy_last(struct rb_node *node)
+{
+	struct hist_entry *he = rb_entry(node, struct hist_entry, rb_node);
+
+	while (can_goto_child(he, HMD_NORMAL)) {
+		node = rb_last(&he->hroot_out);
+		he = rb_entry(node, struct hist_entry, rb_node);
+	}
+	return node;
+}
+
+struct rb_node *__rb_hierarchy_next(struct rb_node *node, enum hierarchy_move_dir hmd)
+{
+	struct hist_entry *he = rb_entry(node, struct hist_entry, rb_node);
+
+	if (can_goto_child(he, hmd))
+		node = rb_first(&he->hroot_out);
+	else
+		node = rb_next(node);
+
+	while (node == NULL) {
+		he = he->parent_he;
+		if (he == NULL)
+			break;
+
+		node = rb_next(&he->rb_node);
+	}
+	return node;
+}
+
+struct rb_node *rb_hierarchy_prev(struct rb_node *node)
+{
+	struct hist_entry *he = rb_entry(node, struct hist_entry, rb_node);
+
+	node = rb_prev(node);
+	if (node)
+		return rb_hierarchy_last(node);
+
+	he = he->parent_he;
+	if (he == NULL)
+		return NULL;
+
+	return &he->rb_node;
+}
+
 static void hists__remove_entry_filter(struct hists *hists, struct hist_entry *h,
 				       enum hist_filter filter)
 {
 	h->filtered &= ~(1 << filter);
+
+	if (symbol_conf.report_hierarchy) {
+		struct hist_entry *parent = h->parent_he;
+
+		while (parent) {
+			he_stat__add_stat(&parent->stat, &h->stat);
+
+			parent->filtered &= ~(1 << filter);
+
+			if (parent->filtered)
+				goto next;
+
+			/* force fold unfiltered entry for simplicity */
+			parent->unfolded = false;
+			parent->row_offset = 0;
+			parent->nr_rows = 0;
+next:
+			parent = parent->parent_he;
+		}
+	}
+
 	if (h->filtered)
 		return;
 
@@ -1387,28 +1692,146 @@ static void hists__filter_by_type(struct hists *hists, int type, filter_fn_t fil
 	}
 }
 
+static void resort_filtered_entry(struct rb_root *root, struct hist_entry *he)
+{
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent = NULL;
+	struct hist_entry *iter;
+	struct rb_root new_root = RB_ROOT;
+	struct rb_node *nd;
+
+	while (*p != NULL) {
+		parent = *p;
+		iter = rb_entry(parent, struct hist_entry, rb_node);
+
+		if (hist_entry__sort(he, iter) > 0)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	rb_link_node(&he->rb_node, parent, p);
+	rb_insert_color(&he->rb_node, root);
+
+	if (he->leaf || he->filtered)
+		return;
+
+	nd = rb_first(&he->hroot_out);
+	while (nd) {
+		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
+
+		nd = rb_next(nd);
+		rb_erase(&h->rb_node, &he->hroot_out);
+
+		resort_filtered_entry(&new_root, h);
+	}
+
+	he->hroot_out = new_root;
+}
+
+static void hists__filter_hierarchy(struct hists *hists, int type, const void *arg)
+{
+	struct rb_node *nd;
+	struct rb_root new_root = RB_ROOT;
+
+	hists->stats.nr_non_filtered_samples = 0;
+
+	hists__reset_filter_stats(hists);
+	hists__reset_col_len(hists);
+
+	nd = rb_first(&hists->entries);
+	while (nd) {
+		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
+		int ret;
+
+		ret = hist_entry__filter(h, type, arg);
+
+		/*
+		 * case 1. non-matching type
+		 * zero out the period, set filter marker and move to child
+		 */
+		if (ret < 0) {
+			memset(&h->stat, 0, sizeof(h->stat));
+			h->filtered |= (1 << type);
+
+			nd = __rb_hierarchy_next(&h->rb_node, HMD_FORCE_CHILD);
+		}
+		/*
+		 * case 2. matched type (filter out)
+		 * set filter marker and move to next
+		 */
+		else if (ret == 1) {
+			h->filtered |= (1 << type);
+
+			nd = __rb_hierarchy_next(&h->rb_node, HMD_FORCE_SIBLING);
+		}
+		/*
+		 * case 3. ok (not filtered)
+		 * add period to hists and parents, erase the filter marker
+		 * and move to next sibling
+		 */
+		else {
+			hists__remove_entry_filter(hists, h, type);
+
+			nd = __rb_hierarchy_next(&h->rb_node, HMD_FORCE_SIBLING);
+		}
+	}
+
+	/*
+	 * resort output after applying a new filter since filter in a lower
+	 * hierarchy can change periods in a upper hierarchy.
+	 */
+	nd = rb_first(&hists->entries);
+	while (nd) {
+		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
+
+		nd = rb_next(nd);
+		rb_erase(&h->rb_node, &hists->entries);
+
+		resort_filtered_entry(&new_root, h);
+	}
+
+	hists->entries = new_root;
+}
+
 void hists__filter_by_thread(struct hists *hists)
 {
-	hists__filter_by_type(hists, HIST_FILTER__THREAD,
-			      hists__filter_entry_by_thread);
+	if (symbol_conf.report_hierarchy)
+		hists__filter_hierarchy(hists, HIST_FILTER__THREAD,
+					hists->thread_filter);
+	else
+		hists__filter_by_type(hists, HIST_FILTER__THREAD,
+				      hists__filter_entry_by_thread);
 }
 
 void hists__filter_by_dso(struct hists *hists)
 {
-	hists__filter_by_type(hists, HIST_FILTER__DSO,
-			      hists__filter_entry_by_dso);
+	if (symbol_conf.report_hierarchy)
+		hists__filter_hierarchy(hists, HIST_FILTER__DSO,
+					hists->dso_filter);
+	else
+		hists__filter_by_type(hists, HIST_FILTER__DSO,
+				      hists__filter_entry_by_dso);
 }
 
 void hists__filter_by_symbol(struct hists *hists)
 {
-	hists__filter_by_type(hists, HIST_FILTER__SYMBOL,
-			      hists__filter_entry_by_symbol);
+	if (symbol_conf.report_hierarchy)
+		hists__filter_hierarchy(hists, HIST_FILTER__SYMBOL,
+					hists->symbol_filter_str);
+	else
+		hists__filter_by_type(hists, HIST_FILTER__SYMBOL,
+				      hists__filter_entry_by_symbol);
 }
 
 void hists__filter_by_socket(struct hists *hists)
 {
-	hists__filter_by_type(hists, HIST_FILTER__SOCKET,
-			      hists__filter_entry_by_socket);
+	if (symbol_conf.report_hierarchy)
+		hists__filter_hierarchy(hists, HIST_FILTER__SOCKET,
+					&hists->socket_filter);
+	else
+		hists__filter_by_type(hists, HIST_FILTER__SOCKET,
+				      hists__filter_entry_by_socket);
 }
 
 void events_stats__inc(struct events_stats *stats, u32 type)

+ 24 - 0
tools/perf/util/hist.h

@@ -237,6 +237,7 @@ struct perf_hpp_fmt {
 struct perf_hpp_list {
 	struct list_head fields;
 	struct list_head sorts;
+	int nr_sort_keys;
 };
 
 extern struct perf_hpp_list perf_hpp_list;
@@ -301,6 +302,11 @@ void perf_hpp__append_sort_keys(struct perf_hpp_list *list);
 bool perf_hpp__is_sort_entry(struct perf_hpp_fmt *format);
 bool perf_hpp__is_dynamic_entry(struct perf_hpp_fmt *format);
 bool perf_hpp__defined_dynamic_entry(struct perf_hpp_fmt *fmt, struct hists *hists);
+bool perf_hpp__is_trace_entry(struct perf_hpp_fmt *fmt);
+bool perf_hpp__is_srcline_entry(struct perf_hpp_fmt *fmt);
+bool perf_hpp__is_srcfile_entry(struct perf_hpp_fmt *fmt);
+
+int hist_entry__filter(struct hist_entry *he, int type, const void *arg);
 
 static inline bool perf_hpp__should_skip(struct perf_hpp_fmt *format,
 					 struct hists *hists)
@@ -415,4 +421,22 @@ int perf_hist_config(const char *var, const char *value);
 
 void perf_hpp_list__init(struct perf_hpp_list *list);
 
+enum hierarchy_move_dir {
+	HMD_NORMAL,
+	HMD_FORCE_SIBLING,
+	HMD_FORCE_CHILD,
+};
+
+struct rb_node *rb_hierarchy_last(struct rb_node *node);
+struct rb_node *__rb_hierarchy_next(struct rb_node *node,
+				    enum hierarchy_move_dir hmd);
+struct rb_node *rb_hierarchy_prev(struct rb_node *node);
+
+static inline struct rb_node *rb_hierarchy_next(struct rb_node *node)
+{
+	return __rb_hierarchy_next(node, HMD_NORMAL);
+}
+
+#define HIERARCHY_INDENT  3
+
 #endif	/* __PERF_HIST_H */

+ 207 - 3
tools/perf/util/mem-events.c

@@ -2,18 +2,29 @@
 #include <stdlib.h>
 #include <string.h>
 #include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <api/fs/fs.h>
 #include "mem-events.h"
 #include "debug.h"
+#include "symbol.h"
 
-#define E(t, n) { .tag = t, .name = n }
+#define E(t, n, s) { .tag = t, .name = n, .sysfs_name = s }
 
 struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = {
-	E("ldlat-loads",	"cpu/mem-loads,ldlat=30/P"),
-	E("ldlat-stores",	"cpu/mem-stores/P"),
+	E("ldlat-loads",	"cpu/mem-loads,ldlat=30/P",	"mem-loads"),
+	E("ldlat-stores",	"cpu/mem-stores/P",		"mem-stores"),
 };
+#undef E
 
 #undef E
 
+char *perf_mem_events__name(int i)
+{
+	return (char *)perf_mem_events[i].name;
+}
+
 int perf_mem_events__parse(const char *str)
 {
 	char *tok, *saveptr = NULL;
@@ -49,3 +60,196 @@ int perf_mem_events__parse(const char *str)
 	pr_err("failed: event '%s' not found, use '-e list' to get list of available events\n", str);
 	return -1;
 }
+
+int perf_mem_events__init(void)
+{
+	const char *mnt = sysfs__mount();
+	bool found = false;
+	int j;
+
+	if (!mnt)
+		return -ENOENT;
+
+	for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
+		char path[PATH_MAX];
+		struct perf_mem_event *e = &perf_mem_events[j];
+		struct stat st;
+
+		scnprintf(path, PATH_MAX, "%s/devices/cpu/events/%s",
+			  mnt, e->sysfs_name);
+
+		if (!stat(path, &st))
+			e->supported = found = true;
+	}
+
+	return found ? 0 : -ENOENT;
+}
+
+static const char * const tlb_access[] = {
+	"N/A",
+	"HIT",
+	"MISS",
+	"L1",
+	"L2",
+	"Walker",
+	"Fault",
+};
+
+int perf_mem__tlb_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
+{
+	size_t l = 0, i;
+	u64 m = PERF_MEM_TLB_NA;
+	u64 hit, miss;
+
+	sz -= 1; /* -1 for null termination */
+	out[0] = '\0';
+
+	if (mem_info)
+		m = mem_info->data_src.mem_dtlb;
+
+	hit = m & PERF_MEM_TLB_HIT;
+	miss = m & PERF_MEM_TLB_MISS;
+
+	/* already taken care of */
+	m &= ~(PERF_MEM_TLB_HIT|PERF_MEM_TLB_MISS);
+
+	for (i = 0; m && i < ARRAY_SIZE(tlb_access); i++, m >>= 1) {
+		if (!(m & 0x1))
+			continue;
+		if (l) {
+			strcat(out, " or ");
+			l += 4;
+		}
+		l += scnprintf(out + l, sz - l, tlb_access[i]);
+	}
+	if (*out == '\0')
+		l += scnprintf(out, sz - l, "N/A");
+	if (hit)
+		l += scnprintf(out + l, sz - l, " hit");
+	if (miss)
+		l += scnprintf(out + l, sz - l, " miss");
+
+	return l;
+}
+
+static const char * const mem_lvl[] = {
+	"N/A",
+	"HIT",
+	"MISS",
+	"L1",
+	"LFB",
+	"L2",
+	"L3",
+	"Local RAM",
+	"Remote RAM (1 hop)",
+	"Remote RAM (2 hops)",
+	"Remote Cache (1 hop)",
+	"Remote Cache (2 hops)",
+	"I/O",
+	"Uncached",
+};
+
+int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
+{
+	size_t i, l = 0;
+	u64 m =  PERF_MEM_LVL_NA;
+	u64 hit, miss;
+
+	if (mem_info)
+		m  = mem_info->data_src.mem_lvl;
+
+	sz -= 1; /* -1 for null termination */
+	out[0] = '\0';
+
+	hit = m & PERF_MEM_LVL_HIT;
+	miss = m & PERF_MEM_LVL_MISS;
+
+	/* already taken care of */
+	m &= ~(PERF_MEM_LVL_HIT|PERF_MEM_LVL_MISS);
+
+	for (i = 0; m && i < ARRAY_SIZE(mem_lvl); i++, m >>= 1) {
+		if (!(m & 0x1))
+			continue;
+		if (l) {
+			strcat(out, " or ");
+			l += 4;
+		}
+		l += scnprintf(out + l, sz - l, mem_lvl[i]);
+	}
+	if (*out == '\0')
+		l += scnprintf(out, sz - l, "N/A");
+	if (hit)
+		l += scnprintf(out + l, sz - l, " hit");
+	if (miss)
+		l += scnprintf(out + l, sz - l, " miss");
+
+	return l;
+}
+
+static const char * const snoop_access[] = {
+	"N/A",
+	"None",
+	"Miss",
+	"Hit",
+	"HitM",
+};
+
+int perf_mem__snp_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
+{
+	size_t i, l = 0;
+	u64 m = PERF_MEM_SNOOP_NA;
+
+	sz -= 1; /* -1 for null termination */
+	out[0] = '\0';
+
+	if (mem_info)
+		m = mem_info->data_src.mem_snoop;
+
+	for (i = 0; m && i < ARRAY_SIZE(snoop_access); i++, m >>= 1) {
+		if (!(m & 0x1))
+			continue;
+		if (l) {
+			strcat(out, " or ");
+			l += 4;
+		}
+		l += scnprintf(out + l, sz - l, snoop_access[i]);
+	}
+
+	if (*out == '\0')
+		l += scnprintf(out, sz - l, "N/A");
+
+	return l;
+}
+
+int perf_mem__lck_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
+{
+	u64 mask = PERF_MEM_LOCK_NA;
+	int l;
+
+	if (mem_info)
+		mask = mem_info->data_src.mem_lock;
+
+	if (mask & PERF_MEM_LOCK_NA)
+		l = scnprintf(out, sz, "N/A");
+	else if (mask & PERF_MEM_LOCK_LOCKED)
+		l = scnprintf(out, sz, "Yes");
+	else
+		l = scnprintf(out, sz, "No");
+
+	return l;
+}
+
+int perf_script__meminfo_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
+{
+	int i = 0;
+
+	i += perf_mem__lvl_scnprintf(out, sz, mem_info);
+	i += scnprintf(out + i, sz - i, "|SNP ");
+	i += perf_mem__snp_scnprintf(out + i, sz - i, mem_info);
+	i += scnprintf(out + i, sz - i, "|TLB ");
+	i += perf_mem__tlb_scnprintf(out + i, sz - i, mem_info);
+	i += scnprintf(out + i, sz - i, "|LCK ");
+	i += perf_mem__lck_scnprintf(out + i, sz - i, mem_info);
+
+	return i;
+}

+ 13 - 0
tools/perf/util/mem-events.h

@@ -5,8 +5,10 @@
 
 struct perf_mem_event {
 	bool		record;
+	bool		supported;
 	const char	*tag;
 	const char	*name;
+	const char	*sysfs_name;
 };
 
 enum {
@@ -18,5 +20,16 @@ enum {
 extern struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX];
 
 int perf_mem_events__parse(const char *str);
+int perf_mem_events__init(void);
+
+char *perf_mem_events__name(int i);
+
+struct mem_info;
+int perf_mem__tlb_scnprintf(char *out, size_t sz, struct mem_info *mem_info);
+int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info);
+int perf_mem__snp_scnprintf(char *out, size_t sz, struct mem_info *mem_info);
+int perf_mem__lck_scnprintf(char *out, size_t sz, struct mem_info *mem_info);
+
+int perf_script__meminfo_scnprintf(char *bf, size_t size, struct mem_info *mem_info);
 
 #endif /* __PERF_MEM_EVENTS_H */

+ 152 - 133
tools/perf/util/sort.c

@@ -6,6 +6,7 @@
 #include "evsel.h"
 #include "evlist.h"
 #include <traceevent/event-parse.h>
+#include "mem-events.h"
 
 regex_t		parent_regex;
 const char	default_parent_pattern[] = "^sys_|^do_page_fault";
@@ -89,10 +90,21 @@ static int hist_entry__thread_snprintf(struct hist_entry *he, char *bf,
 			       width, width, comm ?: "");
 }
 
+static int hist_entry__thread_filter(struct hist_entry *he, int type, const void *arg)
+{
+	const struct thread *th = arg;
+
+	if (type != HIST_FILTER__THREAD)
+		return -1;
+
+	return th && he->thread != th;
+}
+
 struct sort_entry sort_thread = {
 	.se_header	= "  Pid:Command",
 	.se_cmp		= sort__thread_cmp,
 	.se_snprintf	= hist_entry__thread_snprintf,
+	.se_filter	= hist_entry__thread_filter,
 	.se_width_idx	= HISTC_THREAD,
 };
 
@@ -130,6 +142,7 @@ struct sort_entry sort_comm = {
 	.se_collapse	= sort__comm_collapse,
 	.se_sort	= sort__comm_sort,
 	.se_snprintf	= hist_entry__comm_snprintf,
+	.se_filter	= hist_entry__thread_filter,
 	.se_width_idx	= HISTC_COMM,
 };
 
@@ -179,10 +192,21 @@ static int hist_entry__dso_snprintf(struct hist_entry *he, char *bf,
 	return _hist_entry__dso_snprintf(he->ms.map, bf, size, width);
 }
 
+static int hist_entry__dso_filter(struct hist_entry *he, int type, const void *arg)
+{
+	const struct dso *dso = arg;
+
+	if (type != HIST_FILTER__DSO)
+		return -1;
+
+	return dso && (!he->ms.map || he->ms.map->dso != dso);
+}
+
 struct sort_entry sort_dso = {
 	.se_header	= "Shared Object",
 	.se_cmp		= sort__dso_cmp,
 	.se_snprintf	= hist_entry__dso_snprintf,
+	.se_filter	= hist_entry__dso_filter,
 	.se_width_idx	= HISTC_DSO,
 };
 
@@ -276,11 +300,22 @@ static int hist_entry__sym_snprintf(struct hist_entry *he, char *bf,
 					 he->level, bf, size, width);
 }
 
+static int hist_entry__sym_filter(struct hist_entry *he, int type, const void *arg)
+{
+	const char *sym = arg;
+
+	if (type != HIST_FILTER__SYMBOL)
+		return -1;
+
+	return sym && (!he->ms.sym || !strstr(he->ms.sym->name, sym));
+}
+
 struct sort_entry sort_sym = {
 	.se_header	= "Symbol",
 	.se_cmp		= sort__sym_cmp,
 	.se_sort	= sort__sym_sort,
 	.se_snprintf	= hist_entry__sym_snprintf,
+	.se_filter	= hist_entry__sym_filter,
 	.se_width_idx	= HISTC_SYMBOL,
 };
 
@@ -439,10 +474,21 @@ static int hist_entry__socket_snprintf(struct hist_entry *he, char *bf,
 	return repsep_snprintf(bf, size, "%*.*d", width, width-3, he->socket);
 }
 
+static int hist_entry__socket_filter(struct hist_entry *he, int type, const void *arg)
+{
+	int sk = *(const int *)arg;
+
+	if (type != HIST_FILTER__SOCKET)
+		return -1;
+
+	return sk >= 0 && he->socket != sk;
+}
+
 struct sort_entry sort_socket = {
 	.se_header      = "Socket",
 	.se_cmp	        = sort__socket_cmp,
 	.se_snprintf    = hist_entry__socket_snprintf,
+	.se_filter      = hist_entry__socket_filter,
 	.se_width_idx	= HISTC_SOCKET,
 };
 
@@ -529,6 +575,18 @@ static int hist_entry__dso_from_snprintf(struct hist_entry *he, char *bf,
 		return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A");
 }
 
+static int hist_entry__dso_from_filter(struct hist_entry *he, int type,
+				       const void *arg)
+{
+	const struct dso *dso = arg;
+
+	if (type != HIST_FILTER__DSO)
+		return -1;
+
+	return dso && (!he->branch_info || !he->branch_info->from.map ||
+		       he->branch_info->from.map->dso != dso);
+}
+
 static int64_t
 sort__dso_to_cmp(struct hist_entry *left, struct hist_entry *right)
 {
@@ -549,6 +607,18 @@ static int hist_entry__dso_to_snprintf(struct hist_entry *he, char *bf,
 		return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A");
 }
 
+static int hist_entry__dso_to_filter(struct hist_entry *he, int type,
+				     const void *arg)
+{
+	const struct dso *dso = arg;
+
+	if (type != HIST_FILTER__DSO)
+		return -1;
+
+	return dso && (!he->branch_info || !he->branch_info->to.map ||
+		       he->branch_info->to.map->dso != dso);
+}
+
 static int64_t
 sort__sym_from_cmp(struct hist_entry *left, struct hist_entry *right)
 {
@@ -610,10 +680,35 @@ static int hist_entry__sym_to_snprintf(struct hist_entry *he, char *bf,
 	return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A");
 }
 
+static int hist_entry__sym_from_filter(struct hist_entry *he, int type,
+				       const void *arg)
+{
+	const char *sym = arg;
+
+	if (type != HIST_FILTER__SYMBOL)
+		return -1;
+
+	return sym && !(he->branch_info && he->branch_info->from.sym &&
+			strstr(he->branch_info->from.sym->name, sym));
+}
+
+static int hist_entry__sym_to_filter(struct hist_entry *he, int type,
+				       const void *arg)
+{
+	const char *sym = arg;
+
+	if (type != HIST_FILTER__SYMBOL)
+		return -1;
+
+	return sym && !(he->branch_info && he->branch_info->to.sym &&
+		        strstr(he->branch_info->to.sym->name, sym));
+}
+
 struct sort_entry sort_dso_from = {
 	.se_header	= "Source Shared Object",
 	.se_cmp		= sort__dso_from_cmp,
 	.se_snprintf	= hist_entry__dso_from_snprintf,
+	.se_filter	= hist_entry__dso_from_filter,
 	.se_width_idx	= HISTC_DSO_FROM,
 };
 
@@ -621,6 +716,7 @@ struct sort_entry sort_dso_to = {
 	.se_header	= "Target Shared Object",
 	.se_cmp		= sort__dso_to_cmp,
 	.se_snprintf	= hist_entry__dso_to_snprintf,
+	.se_filter	= hist_entry__dso_to_filter,
 	.se_width_idx	= HISTC_DSO_TO,
 };
 
@@ -628,6 +724,7 @@ struct sort_entry sort_sym_from = {
 	.se_header	= "Source Symbol",
 	.se_cmp		= sort__sym_from_cmp,
 	.se_snprintf	= hist_entry__sym_from_snprintf,
+	.se_filter	= hist_entry__sym_from_filter,
 	.se_width_idx	= HISTC_SYMBOL_FROM,
 };
 
@@ -635,6 +732,7 @@ struct sort_entry sort_sym_to = {
 	.se_header	= "Target Symbol",
 	.se_cmp		= sort__sym_to_cmp,
 	.se_snprintf	= hist_entry__sym_to_snprintf,
+	.se_filter	= hist_entry__sym_to_filter,
 	.se_width_idx	= HISTC_SYMBOL_TO,
 };
 
@@ -794,19 +892,9 @@ sort__locked_cmp(struct hist_entry *left, struct hist_entry *right)
 static int hist_entry__locked_snprintf(struct hist_entry *he, char *bf,
 				    size_t size, unsigned int width)
 {
-	const char *out;
-	u64 mask = PERF_MEM_LOCK_NA;
-
-	if (he->mem_info)
-		mask = he->mem_info->data_src.mem_lock;
-
-	if (mask & PERF_MEM_LOCK_NA)
-		out = "N/A";
-	else if (mask & PERF_MEM_LOCK_LOCKED)
-		out = "Yes";
-	else
-		out = "No";
+	char out[10];
 
+	perf_mem__lck_scnprintf(out, sizeof(out), he->mem_info);
 	return repsep_snprintf(bf, size, "%.*s", width, out);
 }
 
@@ -829,53 +917,12 @@ sort__tlb_cmp(struct hist_entry *left, struct hist_entry *right)
 	return (int64_t)(data_src_r.mem_dtlb - data_src_l.mem_dtlb);
 }
 
-static const char * const tlb_access[] = {
-	"N/A",
-	"HIT",
-	"MISS",
-	"L1",
-	"L2",
-	"Walker",
-	"Fault",
-};
-
 static int hist_entry__tlb_snprintf(struct hist_entry *he, char *bf,
 				    size_t size, unsigned int width)
 {
 	char out[64];
-	size_t sz = sizeof(out) - 1; /* -1 for null termination */
-	size_t l = 0, i;
-	u64 m = PERF_MEM_TLB_NA;
-	u64 hit, miss;
-
-	out[0] = '\0';
-
-	if (he->mem_info)
-		m = he->mem_info->data_src.mem_dtlb;
-
-	hit = m & PERF_MEM_TLB_HIT;
-	miss = m & PERF_MEM_TLB_MISS;
-
-	/* already taken care of */
-	m &= ~(PERF_MEM_TLB_HIT|PERF_MEM_TLB_MISS);
-
-	for (i = 0; m && i < ARRAY_SIZE(tlb_access); i++, m >>= 1) {
-		if (!(m & 0x1))
-			continue;
-		if (l) {
-			strcat(out, " or ");
-			l += 4;
-		}
-		strncat(out, tlb_access[i], sz - l);
-		l += strlen(tlb_access[i]);
-	}
-	if (*out == '\0')
-		strcpy(out, "N/A");
-	if (hit)
-		strncat(out, " hit", sz - l);
-	if (miss)
-		strncat(out, " miss", sz - l);
 
+	perf_mem__tlb_scnprintf(out, sizeof(out), he->mem_info);
 	return repsep_snprintf(bf, size, "%-*s", width, out);
 }
 
@@ -898,60 +945,12 @@ sort__lvl_cmp(struct hist_entry *left, struct hist_entry *right)
 	return (int64_t)(data_src_r.mem_lvl - data_src_l.mem_lvl);
 }
 
-static const char * const mem_lvl[] = {
-	"N/A",
-	"HIT",
-	"MISS",
-	"L1",
-	"LFB",
-	"L2",
-	"L3",
-	"Local RAM",
-	"Remote RAM (1 hop)",
-	"Remote RAM (2 hops)",
-	"Remote Cache (1 hop)",
-	"Remote Cache (2 hops)",
-	"I/O",
-	"Uncached",
-};
-
 static int hist_entry__lvl_snprintf(struct hist_entry *he, char *bf,
 				    size_t size, unsigned int width)
 {
 	char out[64];
-	size_t sz = sizeof(out) - 1; /* -1 for null termination */
-	size_t i, l = 0;
-	u64 m =  PERF_MEM_LVL_NA;
-	u64 hit, miss;
-
-	if (he->mem_info)
-		m  = he->mem_info->data_src.mem_lvl;
-
-	out[0] = '\0';
-
-	hit = m & PERF_MEM_LVL_HIT;
-	miss = m & PERF_MEM_LVL_MISS;
-
-	/* already taken care of */
-	m &= ~(PERF_MEM_LVL_HIT|PERF_MEM_LVL_MISS);
-
-	for (i = 0; m && i < ARRAY_SIZE(mem_lvl); i++, m >>= 1) {
-		if (!(m & 0x1))
-			continue;
-		if (l) {
-			strcat(out, " or ");
-			l += 4;
-		}
-		strncat(out, mem_lvl[i], sz - l);
-		l += strlen(mem_lvl[i]);
-	}
-	if (*out == '\0')
-		strcpy(out, "N/A");
-	if (hit)
-		strncat(out, " hit", sz - l);
-	if (miss)
-		strncat(out, " miss", sz - l);
 
+	perf_mem__lvl_scnprintf(out, sizeof(out), he->mem_info);
 	return repsep_snprintf(bf, size, "%-*s", width, out);
 }
 
@@ -974,41 +973,12 @@ sort__snoop_cmp(struct hist_entry *left, struct hist_entry *right)
 	return (int64_t)(data_src_r.mem_snoop - data_src_l.mem_snoop);
 }
 
-static const char * const snoop_access[] = {
-	"N/A",
-	"None",
-	"Miss",
-	"Hit",
-	"HitM",
-};
-
 static int hist_entry__snoop_snprintf(struct hist_entry *he, char *bf,
 				    size_t size, unsigned int width)
 {
 	char out[64];
-	size_t sz = sizeof(out) - 1; /* -1 for null termination */
-	size_t i, l = 0;
-	u64 m = PERF_MEM_SNOOP_NA;
-
-	out[0] = '\0';
-
-	if (he->mem_info)
-		m = he->mem_info->data_src.mem_snoop;
-
-	for (i = 0; m && i < ARRAY_SIZE(snoop_access); i++, m >>= 1) {
-		if (!(m & 0x1))
-			continue;
-		if (l) {
-			strcat(out, " or ");
-			l += 4;
-		}
-		strncat(out, snoop_access[i], sz - l);
-		l += strlen(snoop_access[i]);
-	}
-
-	if (*out == '\0')
-		strcpy(out, "N/A");
 
+	perf_mem__snp_scnprintf(out, sizeof(out), he->mem_info);
 	return repsep_snprintf(bf, size, "%-*s", width, out);
 }
 
@@ -1518,6 +1488,39 @@ bool perf_hpp__is_sort_entry(struct perf_hpp_fmt *format)
 	return format->header == __sort__hpp_header;
 }
 
+bool perf_hpp__is_trace_entry(struct perf_hpp_fmt *fmt)
+{
+	struct hpp_sort_entry *hse;
+
+	if (!perf_hpp__is_sort_entry(fmt))
+		return false;
+
+	hse = container_of(fmt, struct hpp_sort_entry, hpp);
+	return hse->se == &sort_trace;
+}
+
+bool perf_hpp__is_srcline_entry(struct perf_hpp_fmt *fmt)
+{
+	struct hpp_sort_entry *hse;
+
+	if (!perf_hpp__is_sort_entry(fmt))
+		return false;
+
+	hse = container_of(fmt, struct hpp_sort_entry, hpp);
+	return hse->se == &sort_srcline;
+}
+
+bool perf_hpp__is_srcfile_entry(struct perf_hpp_fmt *fmt)
+{
+	struct hpp_sort_entry *hse;
+
+	if (!perf_hpp__is_sort_entry(fmt))
+		return false;
+
+	hse = container_of(fmt, struct hpp_sort_entry, hpp);
+	return hse->se == &sort_srcfile;
+}
+
 static bool __sort__hpp_equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b)
 {
 	struct hpp_sort_entry *hse_a;
@@ -1592,6 +1595,22 @@ static struct perf_hpp_fmt *__hpp_dimension__alloc_hpp(struct hpp_dimension *hd)
 	return fmt;
 }
 
+int hist_entry__filter(struct hist_entry *he, int type, const void *arg)
+{
+	struct perf_hpp_fmt *fmt;
+	struct hpp_sort_entry *hse;
+
+	fmt = he->fmt;
+	if (fmt == NULL || !perf_hpp__is_sort_entry(fmt))
+		return -1;
+
+	hse = container_of(fmt, struct hpp_sort_entry, hpp);
+	if (hse->se->se_filter == NULL)
+		return -1;
+
+	return hse->se->se_filter(he, type, arg);
+}
+
 static int __sort_dimension__add_hpp_sort(struct sort_dimension *sd)
 {
 	struct hpp_sort_entry *hse = __sort_dimension__alloc_hpp(sd);

+ 13 - 1
tools/perf/util/sort.h

@@ -96,9 +96,11 @@ struct hist_entry {
 	s32			socket;
 	s32			cpu;
 	u8			cpumode;
+	u8			depth;
 
 	/* We are added by hists__add_dummy_entry. */
 	bool			dummy;
+	bool			leaf;
 
 	char			level;
 	u8			filtered;
@@ -120,13 +122,22 @@ struct hist_entry {
 	char			*srcline;
 	char			*srcfile;
 	struct symbol		*parent;
-	struct rb_root		sorted_chain;
 	struct branch_info	*branch_info;
 	struct hists		*hists;
 	struct mem_info		*mem_info;
 	void			*raw_data;
 	u32			raw_size;
 	void			*trace_output;
+	struct perf_hpp_fmt	*fmt;
+	struct hist_entry	*parent_he;
+	union {
+		/* this is for hierarchical entry structure */
+		struct {
+			struct rb_root	hroot_in;
+			struct rb_root  hroot_out;
+		};				/* non-leaf entries */
+		struct rb_root	sorted_chain;	/* leaf entry has callchains */
+	};
 	struct callchain_root	callchain[0]; /* must be last member */
 };
 
@@ -234,6 +245,7 @@ struct sort_entry {
 	int64_t	(*se_sort)(struct hist_entry *, struct hist_entry *);
 	int	(*se_snprintf)(struct hist_entry *he, char *bf, size_t size,
 			       unsigned int width);
+	int	(*se_filter)(struct hist_entry *he, int type, const void *arg);
 	u8	se_width_idx;
 };
 

+ 2 - 1
tools/perf/util/symbol.h

@@ -110,7 +110,8 @@ struct symbol_conf {
 			has_filter,
 			show_ref_callgraph,
 			hide_unresolved,
-			raw_trace;
+			raw_trace,
+			report_hierarchy;
 	const char	*vmlinux_name,
 			*kallsyms_name,
 			*source_prefix,

+ 37 - 0
tools/perf/util/util.c

@@ -14,6 +14,7 @@
 #include <limits.h>
 #include <byteswap.h>
 #include <linux/kernel.h>
+#include <linux/log2.h>
 #include <unistd.h>
 #include "callchain.h"
 #include "strlist.h"
@@ -670,3 +671,39 @@ int fetch_current_timestamp(char *buf, size_t sz)
 
 	return 0;
 }
+
+void print_binary(unsigned char *data, size_t len,
+		  size_t bytes_per_line, print_binary_t printer,
+		  void *extra)
+{
+	size_t i, j, mask;
+
+	if (!printer)
+		return;
+
+	bytes_per_line = roundup_pow_of_two(bytes_per_line);
+	mask = bytes_per_line - 1;
+
+	printer(BINARY_PRINT_DATA_BEGIN, 0, extra);
+	for (i = 0; i < len; i++) {
+		if ((i & mask) == 0) {
+			printer(BINARY_PRINT_LINE_BEGIN, -1, extra);
+			printer(BINARY_PRINT_ADDR, i, extra);
+		}
+
+		printer(BINARY_PRINT_NUM_DATA, data[i], extra);
+
+		if (((i & mask) == mask) || i == len - 1) {
+			for (j = 0; j < mask-(i & mask); j++)
+				printer(BINARY_PRINT_NUM_PAD, -1, extra);
+
+			printer(BINARY_PRINT_SEP, i, extra);
+			for (j = i & ~mask; j <= i; j++)
+				printer(BINARY_PRINT_CHAR_DATA, data[j], extra);
+			for (j = 0; j < mask-(i & mask); j++)
+				printer(BINARY_PRINT_CHAR_PAD, i, extra);
+			printer(BINARY_PRINT_LINE_END, -1, extra);
+		}
+	}
+	printer(BINARY_PRINT_DATA_END, -1, extra);
+}

+ 22 - 0
tools/perf/util/util.h

@@ -82,6 +82,8 @@
 
 extern const char *graph_line;
 extern const char *graph_dotted_line;
+extern const char *spaces;
+extern const char *dots;
 extern char buildid_dir[];
 
 /* On most systems <limits.h> would have given us this, but
@@ -345,4 +347,24 @@ const char *perf_tip(const char *dirpath);
 bool is_regular_file(const char *file);
 int fetch_current_timestamp(char *buf, size_t sz);
 
+enum binary_printer_ops {
+	BINARY_PRINT_DATA_BEGIN,
+	BINARY_PRINT_LINE_BEGIN,
+	BINARY_PRINT_ADDR,
+	BINARY_PRINT_NUM_DATA,
+	BINARY_PRINT_NUM_PAD,
+	BINARY_PRINT_SEP,
+	BINARY_PRINT_CHAR_DATA,
+	BINARY_PRINT_CHAR_PAD,
+	BINARY_PRINT_LINE_END,
+	BINARY_PRINT_DATA_END,
+};
+
+typedef void (*print_binary_t)(enum binary_printer_ops,
+			       unsigned int val,
+			       void *extra);
+
+void print_binary(unsigned char *data, size_t len,
+		  size_t bytes_per_line, print_binary_t printer,
+		  void *extra);
 #endif /* GIT_COMPAT_UTIL_H */