Ver código fonte

Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes:

User visible changes:

  - 'perf bench mem' now prefaults unconditionally, no sense in
    providing modes where page faults are measured. (Ingo Molnar)

  - Harmonize -l/--nr_loops accross 'perf bench'. (Ingo Molnar)

  - Various 'perf bench' consistency improvements. (Ingo Molnar)

  - Suppress libtraceevent warnings in non-verbose 'perf test' mode.
    (Namhyung Kim)

  - Move some tracepoint event test error messages to the verbose mode
    of 'perf test'. (Namhyung Kim)

  - Make 'perf help' usage message consistent with other tools. (Yunlong Song)

Build fixes:

  - Fix 'perf bench' build with gcc 4.4.7. (Arnaldo Carvalho de Melo)

Infrastructure changes:

  - 'perf stat' prep work for the 'perf stat scripting' patchkit. (Jiri Olsa)

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Ingo Molnar 9 anos atrás
pai
commit
40d4f23f42

+ 19 - 35
tools/perf/Documentation/perf-bench.txt

@@ -82,7 +82,7 @@ Be multi thread instead of multi process
 Specify number of groups
 
 -l::
---loop=::
+--nr_loops=::
 Specify number of loops
 
 Example of *messaging*
@@ -139,64 +139,48 @@ Suite for evaluating performance of simple memory copy in various ways.
 Options of *memcpy*
 ^^^^^^^^^^^^^^^^^^^
 -l::
---length::
-Specify length of memory to copy (default: 1MB).
+--size::
+Specify size of memory to copy (default: 1MB).
 Available units are B, KB, MB, GB and TB (case insensitive).
 
--r::
---routine::
-Specify routine to copy (default: default).
-Available routines are depend on the architecture.
+-f::
+--function::
+Specify function to copy (default: default).
+Available functions are depend on the architecture.
 On x86-64, x86-64-unrolled, x86-64-movsq and x86-64-movsb are supported.
 
--i::
---iterations::
+-l::
+--nr_loops::
 Repeat memcpy invocation this number of times.
 
 -c::
---cycle::
+--cycles::
 Use perf's cpu-cycles event instead of gettimeofday syscall.
 
--o::
---only-prefault::
-Show only the result with page faults before memcpy.
-
--n::
---no-prefault::
-Show only the result without page faults before memcpy.
-
 *memset*::
 Suite for evaluating performance of simple memory set in various ways.
 
 Options of *memset*
 ^^^^^^^^^^^^^^^^^^^
 -l::
---length::
-Specify length of memory to set (default: 1MB).
+--size::
+Specify size of memory to set (default: 1MB).
 Available units are B, KB, MB, GB and TB (case insensitive).
 
--r::
---routine::
-Specify routine to set (default: default).
-Available routines are depend on the architecture.
+-f::
+--function::
+Specify function to set (default: default).
+Available functions are depend on the architecture.
 On x86-64, x86-64-unrolled, x86-64-stosq and x86-64-stosb are supported.
 
--i::
---iterations::
+-l::
+--nr_loops::
 Repeat memset invocation this number of times.
 
 -c::
---cycle::
+--cycles::
 Use perf's cpu-cycles event instead of gettimeofday syscall.
 
--o::
---only-prefault::
-Show only the result with page faults before memset.
-
--n::
---no-prefault::
-Show only the result without page faults before memset.
-
 SUITES FOR 'numa'
 ~~~~~~~~~~~~~~~~~
 *mem*::

+ 1 - 1
tools/perf/bench/Build

@@ -1,6 +1,6 @@
 perf-y += sched-messaging.o
 perf-y += sched-pipe.o
-perf-y += mem-memcpy.o
+perf-y += mem-functions.o
 perf-y += futex-hash.o
 perf-y += futex-wake.o
 perf-y += futex-wake-parallel.o

+ 379 - 0
tools/perf/bench/mem-functions.c

@@ -0,0 +1,379 @@
+/*
+ * mem-memcpy.c
+ *
+ * Simple memcpy() and memset() benchmarks
+ *
+ * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
+ */
+
+#include "../perf.h"
+#include "../util/util.h"
+#include "../util/parse-options.h"
+#include "../util/header.h"
+#include "../util/cloexec.h"
+#include "bench.h"
+#include "mem-memcpy-arch.h"
+#include "mem-memset-arch.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+#include <errno.h>
+
+#define K 1024
+
+static const char	*size_str	= "1MB";
+static const char	*function_str	= "all";
+static int		nr_loops	= 1;
+static bool		use_cycles;
+static int		cycles_fd;
+
+static const struct option options[] = {
+	OPT_STRING('s', "size", &size_str, "1MB",
+		    "Specify the size of the memory buffers. "
+		    "Available units: B, KB, MB, GB and TB (case insensitive)"),
+
+	OPT_STRING('f', "function", &function_str, "all",
+		    "Specify the function to run, \"all\" runs all available functions, \"help\" lists them"),
+
+	OPT_INTEGER('l', "nr_loops", &nr_loops,
+		    "Specify the number of loops to run. (default: 1)"),
+
+	OPT_BOOLEAN('c', "cycles", &use_cycles,
+		    "Use a cycles event instead of gettimeofday() to measure performance"),
+
+	OPT_END()
+};
+
+typedef void *(*memcpy_t)(void *, const void *, size_t);
+typedef void *(*memset_t)(void *, int, size_t);
+
+struct function {
+	const char *name;
+	const char *desc;
+	union {
+		memcpy_t memcpy;
+		memset_t memset;
+	} fn;
+};
+
+static struct perf_event_attr cycle_attr = {
+	.type		= PERF_TYPE_HARDWARE,
+	.config		= PERF_COUNT_HW_CPU_CYCLES
+};
+
+static void init_cycles(void)
+{
+	cycles_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, perf_event_open_cloexec_flag());
+
+	if (cycles_fd < 0 && errno == ENOSYS)
+		die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
+	else
+		BUG_ON(cycles_fd < 0);
+}
+
+static u64 get_cycles(void)
+{
+	int ret;
+	u64 clk;
+
+	ret = read(cycles_fd, &clk, sizeof(u64));
+	BUG_ON(ret != sizeof(u64));
+
+	return clk;
+}
+
+static double timeval2double(struct timeval *ts)
+{
+	return (double)ts->tv_sec + (double)ts->tv_usec / (double)1000000;
+}
+
+#define print_bps(x) do {						\
+		if (x < K)						\
+			printf(" %14lf bytes/sec\n", x);		\
+		else if (x < K * K)					\
+			printf(" %14lfd KB/sec\n", x / K);		\
+		else if (x < K * K * K)					\
+			printf(" %14lf MB/sec\n", x / K / K);		\
+		else							\
+			printf(" %14lf GB/sec\n", x / K / K / K);	\
+	} while (0)
+
+struct bench_mem_info {
+	const struct function *functions;
+	u64 (*do_cycles)(const struct function *r, size_t size);
+	double (*do_gettimeofday)(const struct function *r, size_t size);
+	const char *const *usage;
+};
+
+static void __bench_mem_function(struct bench_mem_info *info, int r_idx, size_t size, double size_total)
+{
+	const struct function *r = &info->functions[r_idx];
+	double result_bps = 0.0;
+	u64 result_cycles = 0;
+
+	printf("# function '%s' (%s)\n", r->name, r->desc);
+
+	if (bench_format == BENCH_FORMAT_DEFAULT)
+		printf("# Copying %s bytes ...\n\n", size_str);
+
+	if (use_cycles) {
+		result_cycles = info->do_cycles(r, size);
+	} else {
+		result_bps = info->do_gettimeofday(r, size);
+	}
+
+	switch (bench_format) {
+	case BENCH_FORMAT_DEFAULT:
+		if (use_cycles) {
+			printf(" %14lf cycles/byte\n", (double)result_cycles/size_total);
+		} else {
+			print_bps(result_bps);
+		}
+		break;
+
+	case BENCH_FORMAT_SIMPLE:
+		if (use_cycles) {
+			printf("%lf\n", (double)result_cycles/size_total);
+		} else {
+			printf("%lf\n", result_bps);
+		}
+		break;
+
+	default:
+		BUG_ON(1);
+		break;
+	}
+}
+
+static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *info)
+{
+	int i;
+	size_t size;
+	double size_total;
+
+	argc = parse_options(argc, argv, options, info->usage, 0);
+
+	if (use_cycles)
+		init_cycles();
+
+	size = (size_t)perf_atoll((char *)size_str);
+	size_total = (double)size * nr_loops;
+
+	if ((s64)size <= 0) {
+		fprintf(stderr, "Invalid size:%s\n", size_str);
+		return 1;
+	}
+
+	if (!strncmp(function_str, "all", 3)) {
+		for (i = 0; info->functions[i].name; i++)
+			__bench_mem_function(info, i, size, size_total);
+		return 0;
+	}
+
+	for (i = 0; info->functions[i].name; i++) {
+		if (!strcmp(info->functions[i].name, function_str))
+			break;
+	}
+	if (!info->functions[i].name) {
+		if (strcmp(function_str, "help") && strcmp(function_str, "h"))
+			printf("Unknown function: %s\n", function_str);
+		printf("Available functions:\n");
+		for (i = 0; info->functions[i].name; i++) {
+			printf("\t%s ... %s\n",
+			       info->functions[i].name, info->functions[i].desc);
+		}
+		return 1;
+	}
+
+	__bench_mem_function(info, i, size, size_total);
+
+	return 0;
+}
+
+static void memcpy_alloc_mem(void **dst, void **src, size_t size)
+{
+	*dst = zalloc(size);
+	if (!*dst)
+		die("memory allocation failed - maybe size is too large?\n");
+
+	*src = zalloc(size);
+	if (!*src)
+		die("memory allocation failed - maybe size is too large?\n");
+
+	/* Make sure to always prefault zero pages even if MMAP_THRESH is crossed: */
+	memset(*src, 0, size);
+}
+
+static u64 do_memcpy_cycles(const struct function *r, size_t size)
+{
+	u64 cycle_start = 0ULL, cycle_end = 0ULL;
+	void *src = NULL, *dst = NULL;
+	memcpy_t fn = r->fn.memcpy;
+	int i;
+
+	memcpy_alloc_mem(&dst, &src, size);
+
+	/*
+	 * We prefault the freshly allocated memory range here,
+	 * to not measure page fault overhead:
+	 */
+	fn(dst, src, size);
+
+	cycle_start = get_cycles();
+	for (i = 0; i < nr_loops; ++i)
+		fn(dst, src, size);
+	cycle_end = get_cycles();
+
+	free(src);
+	free(dst);
+	return cycle_end - cycle_start;
+}
+
+static double do_memcpy_gettimeofday(const struct function *r, size_t size)
+{
+	struct timeval tv_start, tv_end, tv_diff;
+	memcpy_t fn = r->fn.memcpy;
+	void *src = NULL, *dst = NULL;
+	int i;
+
+	memcpy_alloc_mem(&dst, &src, size);
+
+	/*
+	 * We prefault the freshly allocated memory range here,
+	 * to not measure page fault overhead:
+	 */
+	fn(dst, src, size);
+
+	BUG_ON(gettimeofday(&tv_start, NULL));
+	for (i = 0; i < nr_loops; ++i)
+		fn(dst, src, size);
+	BUG_ON(gettimeofday(&tv_end, NULL));
+
+	timersub(&tv_end, &tv_start, &tv_diff);
+
+	free(src);
+	free(dst);
+
+	return (double)(((double)size * nr_loops) / timeval2double(&tv_diff));
+}
+
+struct function memcpy_functions[] = {
+	{ .name		= "default",
+	  .desc		= "Default memcpy() provided by glibc",
+	  .fn.memcpy	= memcpy },
+
+#ifdef HAVE_ARCH_X86_64_SUPPORT
+# define MEMCPY_FN(_fn, _name, _desc) {.name = _name, .desc = _desc, .fn.memcpy = _fn},
+# include "mem-memcpy-x86-64-asm-def.h"
+# undef MEMCPY_FN
+#endif
+
+	{ .name = NULL, }
+};
+
+static const char * const bench_mem_memcpy_usage[] = {
+	"perf bench mem memcpy <options>",
+	NULL
+};
+
+int bench_mem_memcpy(int argc, const char **argv, const char *prefix __maybe_unused)
+{
+	struct bench_mem_info info = {
+		.functions		= memcpy_functions,
+		.do_cycles		= do_memcpy_cycles,
+		.do_gettimeofday	= do_memcpy_gettimeofday,
+		.usage			= bench_mem_memcpy_usage,
+	};
+
+	return bench_mem_common(argc, argv, &info);
+}
+
+static void memset_alloc_mem(void **dst, size_t size)
+{
+	*dst = zalloc(size);
+	if (!*dst)
+		die("memory allocation failed - maybe size is too large?\n");
+}
+
+static u64 do_memset_cycles(const struct function *r, size_t size)
+{
+	u64 cycle_start = 0ULL, cycle_end = 0ULL;
+	memset_t fn = r->fn.memset;
+	void *dst = NULL;
+	int i;
+
+	memset_alloc_mem(&dst, size);
+
+	/*
+	 * We prefault the freshly allocated memory range here,
+	 * to not measure page fault overhead:
+	 */
+	fn(dst, -1, size);
+
+	cycle_start = get_cycles();
+	for (i = 0; i < nr_loops; ++i)
+		fn(dst, i, size);
+	cycle_end = get_cycles();
+
+	free(dst);
+	return cycle_end - cycle_start;
+}
+
+static double do_memset_gettimeofday(const struct function *r, size_t size)
+{
+	struct timeval tv_start, tv_end, tv_diff;
+	memset_t fn = r->fn.memset;
+	void *dst = NULL;
+	int i;
+
+	memset_alloc_mem(&dst, size);
+
+	/*
+	 * We prefault the freshly allocated memory range here,
+	 * to not measure page fault overhead:
+	 */
+	fn(dst, -1, size);
+
+	BUG_ON(gettimeofday(&tv_start, NULL));
+	for (i = 0; i < nr_loops; ++i)
+		fn(dst, i, size);
+	BUG_ON(gettimeofday(&tv_end, NULL));
+
+	timersub(&tv_end, &tv_start, &tv_diff);
+
+	free(dst);
+	return (double)(((double)size * nr_loops) / timeval2double(&tv_diff));
+}
+
+static const char * const bench_mem_memset_usage[] = {
+	"perf bench mem memset <options>",
+	NULL
+};
+
+static const struct function memset_functions[] = {
+	{ .name		= "default",
+	  .desc		= "Default memset() provided by glibc",
+	  .fn.memset	= memset },
+
+#ifdef HAVE_ARCH_X86_64_SUPPORT
+# define MEMSET_FN(_fn, _name, _desc) { .name = _name, .desc = _desc, .fn.memset = _fn },
+# include "mem-memset-x86-64-asm-def.h"
+# undef MEMSET_FN
+#endif
+
+	{ .name = NULL, }
+};
+
+int bench_mem_memset(int argc, const char **argv, const char *prefix __maybe_unused)
+{
+	struct bench_mem_info info = {
+		.functions		= memset_functions,
+		.do_cycles		= do_memset_cycles,
+		.do_gettimeofday	= do_memset_gettimeofday,
+		.usage			= bench_mem_memset_usage,
+	};
+
+	return bench_mem_common(argc, argv, &info);
+}

+ 0 - 434
tools/perf/bench/mem-memcpy.c

@@ -1,434 +0,0 @@
-/*
- * mem-memcpy.c
- *
- * memcpy: Simple memory copy in various ways
- *
- * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
- */
-
-#include "../perf.h"
-#include "../util/util.h"
-#include "../util/parse-options.h"
-#include "../util/header.h"
-#include "../util/cloexec.h"
-#include "bench.h"
-#include "mem-memcpy-arch.h"
-#include "mem-memset-arch.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/time.h>
-#include <errno.h>
-
-#define K 1024
-
-static const char	*length_str	= "1MB";
-static const char	*routine	= "default";
-static int		iterations	= 1;
-static bool		use_cycle;
-static int		cycle_fd;
-static bool		only_prefault;
-static bool		no_prefault;
-
-static const struct option options[] = {
-	OPT_STRING('l', "length", &length_str, "1MB",
-		    "Specify length of memory to copy. "
-		    "Available units: B, KB, MB, GB and TB (upper and lower)"),
-	OPT_STRING('r', "routine", &routine, "default",
-		    "Specify routine to copy, \"all\" runs all available routines"),
-	OPT_INTEGER('i', "iterations", &iterations,
-		    "repeat memcpy() invocation this number of times"),
-	OPT_BOOLEAN('c', "cycle", &use_cycle,
-		    "Use cycles event instead of gettimeofday() for measuring"),
-	OPT_BOOLEAN('o', "only-prefault", &only_prefault,
-		    "Show only the result with page faults before memcpy()"),
-	OPT_BOOLEAN('n', "no-prefault", &no_prefault,
-		    "Show only the result without page faults before memcpy()"),
-	OPT_END()
-};
-
-typedef void *(*memcpy_t)(void *, const void *, size_t);
-typedef void *(*memset_t)(void *, int, size_t);
-
-struct routine {
-	const char *name;
-	const char *desc;
-	union {
-		memcpy_t memcpy;
-		memset_t memset;
-	} fn;
-};
-
-struct routine memcpy_routines[] = {
-	{ .name = "default",
-	  .desc = "Default memcpy() provided by glibc",
-	  .fn.memcpy = memcpy },
-#ifdef HAVE_ARCH_X86_64_SUPPORT
-
-#define MEMCPY_FN(_fn, _name, _desc) {.name = _name, .desc = _desc, .fn.memcpy = _fn},
-#include "mem-memcpy-x86-64-asm-def.h"
-#undef MEMCPY_FN
-
-#endif
-
-	{ NULL,
-	  NULL,
-	  {NULL}   }
-};
-
-static const char * const bench_mem_memcpy_usage[] = {
-	"perf bench mem memcpy <options>",
-	NULL
-};
-
-static struct perf_event_attr cycle_attr = {
-	.type		= PERF_TYPE_HARDWARE,
-	.config		= PERF_COUNT_HW_CPU_CYCLES
-};
-
-static void init_cycle(void)
-{
-	cycle_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1,
-				       perf_event_open_cloexec_flag());
-
-	if (cycle_fd < 0 && errno == ENOSYS)
-		die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
-	else
-		BUG_ON(cycle_fd < 0);
-}
-
-static u64 get_cycle(void)
-{
-	int ret;
-	u64 clk;
-
-	ret = read(cycle_fd, &clk, sizeof(u64));
-	BUG_ON(ret != sizeof(u64));
-
-	return clk;
-}
-
-static double timeval2double(struct timeval *ts)
-{
-	return (double)ts->tv_sec +
-		(double)ts->tv_usec / (double)1000000;
-}
-
-#define pf (no_prefault ? 0 : 1)
-
-#define print_bps(x) do {					\
-		if (x < K)					\
-			printf(" %14lf B/Sec", x);		\
-		else if (x < K * K)				\
-			printf(" %14lfd KB/Sec", x / K);	\
-		else if (x < K * K * K)				\
-			printf(" %14lf MB/Sec", x / K / K);	\
-		else						\
-			printf(" %14lf GB/Sec", x / K / K / K); \
-	} while (0)
-
-struct bench_mem_info {
-	const struct routine *routines;
-	u64 (*do_cycle)(const struct routine *r, size_t len, bool prefault);
-	double (*do_gettimeofday)(const struct routine *r, size_t len, bool prefault);
-	const char *const *usage;
-};
-
-static void __bench_mem_routine(struct bench_mem_info *info, int r_idx, size_t len, double totallen)
-{
-	const struct routine *r = &info->routines[r_idx];
-	double result_bps[2];
-	u64 result_cycle[2];
-
-	result_cycle[0] = result_cycle[1] = 0ULL;
-	result_bps[0] = result_bps[1] = 0.0;
-
-	printf("Routine %s (%s)\n", r->name, r->desc);
-
-	if (bench_format == BENCH_FORMAT_DEFAULT)
-		printf("# Copying %s Bytes ...\n\n", length_str);
-
-	if (!only_prefault && !no_prefault) {
-		/* show both of results */
-		if (use_cycle) {
-			result_cycle[0] = info->do_cycle(r, len, false);
-			result_cycle[1] = info->do_cycle(r, len, true);
-		} else {
-			result_bps[0]   = info->do_gettimeofday(r, len, false);
-			result_bps[1]   = info->do_gettimeofday(r, len, true);
-		}
-	} else {
-		if (use_cycle)
-			result_cycle[pf] = info->do_cycle(r, len, only_prefault);
-		else
-			result_bps[pf] = info->do_gettimeofday(r, len, only_prefault);
-	}
-
-	switch (bench_format) {
-	case BENCH_FORMAT_DEFAULT:
-		if (!only_prefault && !no_prefault) {
-			if (use_cycle) {
-				printf(" %14lf Cycle/Byte\n",
-					(double)result_cycle[0]
-					/ totallen);
-				printf(" %14lf Cycle/Byte (with prefault)\n",
-					(double)result_cycle[1]
-					/ totallen);
-			} else {
-				print_bps(result_bps[0]);
-				printf("\n");
-				print_bps(result_bps[1]);
-				printf(" (with prefault)\n");
-			}
-		} else {
-			if (use_cycle) {
-				printf(" %14lf Cycle/Byte",
-					(double)result_cycle[pf]
-					/ totallen);
-			} else
-				print_bps(result_bps[pf]);
-
-			printf("%s\n", only_prefault ? " (with prefault)" : "");
-		}
-		break;
-	case BENCH_FORMAT_SIMPLE:
-		if (!only_prefault && !no_prefault) {
-			if (use_cycle) {
-				printf("%lf %lf\n",
-					(double)result_cycle[0] / totallen,
-					(double)result_cycle[1] / totallen);
-			} else {
-				printf("%lf %lf\n",
-					result_bps[0], result_bps[1]);
-			}
-		} else {
-			if (use_cycle) {
-				printf("%lf\n", (double)result_cycle[pf]
-					/ totallen);
-			} else
-				printf("%lf\n", result_bps[pf]);
-		}
-		break;
-	default:
-		/* reaching this means there's some disaster: */
-		die("unknown format: %d\n", bench_format);
-		break;
-	}
-}
-
-static int bench_mem_common(int argc, const char **argv,
-		     const char *prefix __maybe_unused,
-		     struct bench_mem_info *info)
-{
-	int i;
-	size_t len;
-	double totallen;
-
-	argc = parse_options(argc, argv, options,
-			     info->usage, 0);
-
-	if (no_prefault && only_prefault) {
-		fprintf(stderr, "Invalid options: -o and -n are mutually exclusive\n");
-		return 1;
-	}
-
-	if (use_cycle)
-		init_cycle();
-
-	len = (size_t)perf_atoll((char *)length_str);
-	totallen = (double)len * iterations;
-
-	if ((s64)len <= 0) {
-		fprintf(stderr, "Invalid length:%s\n", length_str);
-		return 1;
-	}
-
-	/* same to without specifying either of prefault and no-prefault */
-	if (only_prefault && no_prefault)
-		only_prefault = no_prefault = false;
-
-	if (!strncmp(routine, "all", 3)) {
-		for (i = 0; info->routines[i].name; i++)
-			__bench_mem_routine(info, i, len, totallen);
-		return 0;
-	}
-
-	for (i = 0; info->routines[i].name; i++) {
-		if (!strcmp(info->routines[i].name, routine))
-			break;
-	}
-	if (!info->routines[i].name) {
-		printf("Unknown routine:%s\n", routine);
-		printf("Available routines...\n");
-		for (i = 0; info->routines[i].name; i++) {
-			printf("\t%s ... %s\n",
-			       info->routines[i].name, info->routines[i].desc);
-		}
-		return 1;
-	}
-
-	__bench_mem_routine(info, i, len, totallen);
-
-	return 0;
-}
-
-static void memcpy_alloc_mem(void **dst, void **src, size_t length)
-{
-	*dst = zalloc(length);
-	if (!*dst)
-		die("memory allocation failed - maybe length is too large?\n");
-
-	*src = zalloc(length);
-	if (!*src)
-		die("memory allocation failed - maybe length is too large?\n");
-	/* Make sure to always replace the zero pages even if MMAP_THRESH is crossed */
-	memset(*src, 0, length);
-}
-
-static u64 do_memcpy_cycle(const struct routine *r, size_t len, bool prefault)
-{
-	u64 cycle_start = 0ULL, cycle_end = 0ULL;
-	void *src = NULL, *dst = NULL;
-	memcpy_t fn = r->fn.memcpy;
-	int i;
-
-	memcpy_alloc_mem(&dst, &src, len);
-
-	if (prefault)
-		fn(dst, src, len);
-
-	cycle_start = get_cycle();
-	for (i = 0; i < iterations; ++i)
-		fn(dst, src, len);
-	cycle_end = get_cycle();
-
-	free(src);
-	free(dst);
-	return cycle_end - cycle_start;
-}
-
-static double do_memcpy_gettimeofday(const struct routine *r, size_t len,
-				     bool prefault)
-{
-	struct timeval tv_start, tv_end, tv_diff;
-	memcpy_t fn = r->fn.memcpy;
-	void *src = NULL, *dst = NULL;
-	int i;
-
-	memcpy_alloc_mem(&dst, &src, len);
-
-	if (prefault)
-		fn(dst, src, len);
-
-	BUG_ON(gettimeofday(&tv_start, NULL));
-	for (i = 0; i < iterations; ++i)
-		fn(dst, src, len);
-	BUG_ON(gettimeofday(&tv_end, NULL));
-
-	timersub(&tv_end, &tv_start, &tv_diff);
-
-	free(src);
-	free(dst);
-	return (double)(((double)len * iterations) / timeval2double(&tv_diff));
-}
-
-int bench_mem_memcpy(int argc, const char **argv,
-		     const char *prefix __maybe_unused)
-{
-	struct bench_mem_info info = {
-		.routines = memcpy_routines,
-		.do_cycle = do_memcpy_cycle,
-		.do_gettimeofday = do_memcpy_gettimeofday,
-		.usage = bench_mem_memcpy_usage,
-	};
-
-	return bench_mem_common(argc, argv, prefix, &info);
-}
-
-static void memset_alloc_mem(void **dst, size_t length)
-{
-	*dst = zalloc(length);
-	if (!*dst)
-		die("memory allocation failed - maybe length is too large?\n");
-}
-
-static u64 do_memset_cycle(const struct routine *r, size_t len, bool prefault)
-{
-	u64 cycle_start = 0ULL, cycle_end = 0ULL;
-	memset_t fn = r->fn.memset;
-	void *dst = NULL;
-	int i;
-
-	memset_alloc_mem(&dst, len);
-
-	if (prefault)
-		fn(dst, -1, len);
-
-	cycle_start = get_cycle();
-	for (i = 0; i < iterations; ++i)
-		fn(dst, i, len);
-	cycle_end = get_cycle();
-
-	free(dst);
-	return cycle_end - cycle_start;
-}
-
-static double do_memset_gettimeofday(const struct routine *r, size_t len,
-				     bool prefault)
-{
-	struct timeval tv_start, tv_end, tv_diff;
-	memset_t fn = r->fn.memset;
-	void *dst = NULL;
-	int i;
-
-	memset_alloc_mem(&dst, len);
-
-	if (prefault)
-		fn(dst, -1, len);
-
-	BUG_ON(gettimeofday(&tv_start, NULL));
-	for (i = 0; i < iterations; ++i)
-		fn(dst, i, len);
-	BUG_ON(gettimeofday(&tv_end, NULL));
-
-	timersub(&tv_end, &tv_start, &tv_diff);
-
-	free(dst);
-	return (double)(((double)len * iterations) / timeval2double(&tv_diff));
-}
-
-static const char * const bench_mem_memset_usage[] = {
-	"perf bench mem memset <options>",
-	NULL
-};
-
-static const struct routine memset_routines[] = {
-	{ .name ="default",
-	  .desc = "Default memset() provided by glibc",
-	  .fn.memset = memset },
-#ifdef HAVE_ARCH_X86_64_SUPPORT
-
-#define MEMSET_FN(_fn, _name, _desc) { .name = _name, .desc = _desc, .fn.memset = _fn },
-#include "mem-memset-x86-64-asm-def.h"
-#undef MEMSET_FN
-
-#endif
-
-	{ .name = NULL,
-	  .desc = NULL,
-	  .fn.memset = NULL   }
-};
-
-int bench_mem_memset(int argc, const char **argv,
-		     const char *prefix __maybe_unused)
-{
-	struct bench_mem_info info = {
-		.routines = memset_routines,
-		.do_cycle = do_memset_cycle,
-		.do_gettimeofday = do_memset_gettimeofday,
-		.usage = bench_mem_memset_usage,
-	};
-
-	return bench_mem_common(argc, argv, prefix, &info);
-}

+ 2 - 2
tools/perf/bench/numa.c

@@ -164,8 +164,8 @@ static const struct option options[] = {
 	OPT_STRING('L', "mb_proc_locked", &p0.mb_proc_locked_str,"MB", "process serialized/locked memory access (MBs), <= process_memory"),
 	OPT_STRING('T', "mb_thread"	, &p0.mb_thread_str,	"MB", "thread  memory (MBs)"),
 
-	OPT_UINTEGER('l', "nr_loops"	, &p0.nr_loops,		"max number of loops to run"),
-	OPT_UINTEGER('s', "nr_secs"	, &p0.nr_secs,		"max number of seconds to run"),
+	OPT_UINTEGER('l', "nr_loops"	, &p0.nr_loops,		"max number of loops to run (default: unlimited)"),
+	OPT_UINTEGER('s', "nr_secs"	, &p0.nr_secs,		"max number of seconds to run (default: 5 secs)"),
 	OPT_UINTEGER('u', "usleep"	, &p0.sleep_usecs,	"usecs to sleep per loop iteration"),
 
 	OPT_BOOLEAN('R', "data_reads"	, &p0.data_reads,	"access the data via writes (can be mixed with -W)"),

+ 5 - 5
tools/perf/bench/sched-messaging.c

@@ -33,7 +33,7 @@
 #define DATASIZE 100
 
 static bool use_pipes = false;
-static unsigned int loops = 100;
+static unsigned int nr_loops = 100;
 static bool thread_mode = false;
 static unsigned int num_groups = 10;
 
@@ -79,7 +79,7 @@ static void ready(int ready_out, int wakefd)
 		err(EXIT_FAILURE, "poll");
 }
 
-/* Sender sprays loops messages down each file descriptor */
+/* Sender sprays nr_loops messages down each file descriptor */
 static void *sender(struct sender_context *ctx)
 {
 	char data[DATASIZE];
@@ -88,7 +88,7 @@ static void *sender(struct sender_context *ctx)
 	ready(ctx->ready_out, ctx->wakefd);
 
 	/* Now pump to every receiver. */
-	for (i = 0; i < loops; i++) {
+	for (i = 0; i < nr_loops; i++) {
 		for (j = 0; j < ctx->num_fds; j++) {
 			int ret, done = 0;
 
@@ -213,7 +213,7 @@ static unsigned int group(pthread_t *pth,
 		/* Create the pipe between client and server */
 		fdpair(fds);
 
-		ctx->num_packets = num_fds * loops;
+		ctx->num_packets = num_fds * nr_loops;
 		ctx->in_fds[0] = fds[0];
 		ctx->in_fds[1] = fds[1];
 		ctx->ready_out = ready_out;
@@ -250,7 +250,7 @@ static const struct option options[] = {
 	OPT_BOOLEAN('t', "thread", &thread_mode,
 		    "Be multi thread instead of multi process"),
 	OPT_UINTEGER('g', "group", &num_groups, "Specify number of groups"),
-	OPT_UINTEGER('l', "loop", &loops, "Specify number of loops"),
+	OPT_UINTEGER('l', "nr_loops", &nr_loops, "Specify the number of loops to run (default: 100)"),
 	OPT_END()
 };
 

+ 7 - 7
tools/perf/builtin-bench.c

@@ -36,7 +36,7 @@ struct bench {
 #ifdef HAVE_LIBNUMA_SUPPORT
 static struct bench numa_benchmarks[] = {
 	{ "mem",	"Benchmark for NUMA workloads",			bench_numa		},
-	{ "all",	"Test all NUMA benchmarks",			NULL			},
+	{ "all",	"Run all NUMA benchmarks",			NULL			},
 	{ NULL,		NULL,						NULL			}
 };
 #endif
@@ -44,14 +44,14 @@ static struct bench numa_benchmarks[] = {
 static struct bench sched_benchmarks[] = {
 	{ "messaging",	"Benchmark for scheduling and IPC",		bench_sched_messaging	},
 	{ "pipe",	"Benchmark for pipe() between two processes",	bench_sched_pipe	},
-	{ "all",	"Test all scheduler benchmarks",		NULL			},
+	{ "all",	"Run all scheduler benchmarks",		NULL			},
 	{ NULL,		NULL,						NULL			}
 };
 
 static struct bench mem_benchmarks[] = {
-	{ "memcpy",	"Benchmark for memcpy()",			bench_mem_memcpy	},
-	{ "memset",	"Benchmark for memset() tests",			bench_mem_memset	},
-	{ "all",	"Test all memory benchmarks",			NULL			},
+	{ "memcpy",	"Benchmark for memcpy() functions",		bench_mem_memcpy	},
+	{ "memset",	"Benchmark for memset() functions",		bench_mem_memset	},
+	{ "all",	"Run all memory access benchmarks",		NULL			},
 	{ NULL,		NULL,						NULL			}
 };
 
@@ -62,7 +62,7 @@ static struct bench futex_benchmarks[] = {
 	{ "requeue",	"Benchmark for futex requeue calls",            bench_futex_requeue	},
 	/* pi-futexes */
 	{ "lock-pi",	"Benchmark for futex lock_pi calls",            bench_futex_lock_pi	},
-	{ "all",	"Test all futex benchmarks",			NULL			},
+	{ "all",	"Run all futex benchmarks",			NULL			},
 	{ NULL,		NULL,						NULL			}
 };
 
@@ -110,7 +110,7 @@ int bench_format = BENCH_FORMAT_DEFAULT;
 unsigned int bench_repeat = 10; /* default number of times to repeat the run */
 
 static const struct option bench_options[] = {
-	OPT_STRING('f', "format", &bench_format_str, "default", "Specify format style"),
+	OPT_STRING('f', "format", &bench_format_str, "default|simple", "Specify the output formatting style"),
 	OPT_UINTEGER('r', "repeat",  &bench_repeat,   "Specify amount of times to repeat the run"),
 	OPT_END()
 };

+ 1 - 1
tools/perf/builtin-help.c

@@ -463,7 +463,7 @@ int cmd_help(int argc, const char **argv, const char *prefix __maybe_unused)
 			builtin_help_subcommands, builtin_help_usage, 0);
 
 	if (show_all) {
-		printf("\n usage: %s\n\n", perf_usage_string);
+		printf("\n Usage: %s\n\n", perf_usage_string);
 		list_commands("perf commands", &main_cmds, &other_cmds);
 		printf(" %s\n\n", perf_more_info_string);
 		return 0;

+ 4 - 1
tools/perf/builtin-script.c

@@ -686,7 +686,10 @@ static int process_attr(struct perf_tool *tool, union perf_event *event,
 
 	set_print_ip_opts(&evsel->attr);
 
-	return perf_evsel__check_attr(evsel, scr->session);
+	if (evsel->attr.sample_type)
+		err = perf_evsel__check_attr(evsel, scr->session);
+
+	return err;
 }
 
 static int process_comm_event(struct perf_tool *tool,

+ 19 - 4
tools/perf/builtin-stat.c

@@ -434,7 +434,7 @@ static void print_noise_pct(double total, double avg)
 
 static void print_noise(struct perf_evsel *evsel, double avg)
 {
-	struct perf_stat *ps;
+	struct perf_stat_evsel *ps;
 
 	if (run_count == 1)
 		return;
@@ -479,6 +479,7 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
 			csv_sep);
 		break;
 	case AGGR_GLOBAL:
+	case AGGR_UNSET:
 	default:
 		break;
 	}
@@ -671,7 +672,7 @@ static void print_aggr_thread(struct perf_evsel *counter, char *prefix)
 static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
 {
 	FILE *output = stat_config.output;
-	struct perf_stat *ps = counter->priv;
+	struct perf_stat_evsel *ps = counter->priv;
 	double avg = avg_stats(&ps->res_stats[0]);
 	int scaled = counter->counts->scaled;
 	double uval;
@@ -799,6 +800,8 @@ static void print_interval(char *prefix, struct timespec *ts)
 		case AGGR_GLOBAL:
 		default:
 			fprintf(output, "#           time             counts %*s events\n", unit_width, "unit");
+		case AGGR_UNSET:
+			break;
 		}
 	}
 
@@ -880,6 +883,7 @@ static void print_counters(struct timespec *ts, int argc, const char **argv)
 		evlist__for_each(evsel_list, counter)
 			print_counter(counter, prefix);
 		break;
+	case AGGR_UNSET:
 	default:
 		break;
 	}
@@ -940,6 +944,16 @@ static int stat__set_big_num(const struct option *opt __maybe_unused,
 	return 0;
 }
 
+static int perf_stat__get_socket(struct cpu_map *map, int cpu)
+{
+	return cpu_map__get_socket(map, cpu, NULL);
+}
+
+static int perf_stat__get_core(struct cpu_map *map, int cpu)
+{
+	return cpu_map__get_core(map, cpu, NULL);
+}
+
 static int perf_stat_init_aggr_mode(void)
 {
 	switch (stat_config.aggr_mode) {
@@ -948,18 +962,19 @@ static int perf_stat_init_aggr_mode(void)
 			perror("cannot build socket map");
 			return -1;
 		}
-		aggr_get_id = cpu_map__get_socket;
+		aggr_get_id = perf_stat__get_socket;
 		break;
 	case AGGR_CORE:
 		if (cpu_map__build_core_map(evsel_list->cpus, &aggr_map)) {
 			perror("cannot build core map");
 			return -1;
 		}
-		aggr_get_id = cpu_map__get_core;
+		aggr_get_id = perf_stat__get_core;
 		break;
 	case AGGR_NONE:
 	case AGGR_GLOBAL:
 	case AGGR_THREAD:
+	case AGGR_UNSET:
 	default:
 		break;
 	}

+ 1 - 1
tools/perf/tests/openat-syscall-all-cpus.c

@@ -34,7 +34,7 @@ int test__openat_syscall_event_on_all_cpus(void)
 	evsel = perf_evsel__newtp("syscalls", "sys_enter_openat");
 	if (IS_ERR(evsel)) {
 		tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "syscalls", "sys_enter_openat");
-		pr_err("%s\n", errbuf);
+		pr_debug("%s\n", errbuf);
 		goto out_thread_map_delete;
 	}
 

+ 1 - 1
tools/perf/tests/openat-syscall-tp-fields.c

@@ -89,7 +89,7 @@ int test__syscall_openat_tp_fields(void)
 
 				err = perf_evsel__parse_sample(evsel, event, &sample);
 				if (err) {
-					pr_err("Can't parse sample, err = %d\n", err);
+					pr_debug("Can't parse sample, err = %d\n", err);
 					goto out_delete_evlist;
 				}
 

+ 1 - 1
tools/perf/tests/openat-syscall.c

@@ -22,7 +22,7 @@ int test__openat_syscall_event(void)
 	evsel = perf_evsel__newtp("syscalls", "sys_enter_openat");
 	if (IS_ERR(evsel)) {
 		tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "syscalls", "sys_enter_openat");
-		pr_err("%s\n", errbuf);
+		pr_debug("%s\n", errbuf);
 		goto out_thread_map_delete;
 	}
 

+ 14 - 0
tools/perf/tests/parse-events.c

@@ -5,6 +5,7 @@
 #include <api/fs/fs.h>
 #include "tests.h"
 #include "debug.h"
+#include "util.h"
 #include <linux/hw_breakpoint.h>
 #include <api/fs/fs.h>
 
@@ -1753,6 +1754,17 @@ static int test_pmu_events(void)
 	return ret;
 }
 
+static void debug_warn(const char *warn, va_list params)
+{
+	char msg[1024];
+
+	if (!verbose)
+		return;
+
+	vsnprintf(msg, sizeof(msg), warn, params);
+	fprintf(stderr, " Warning: %s\n", msg);
+}
+
 int test__parse_events(void)
 {
 	int ret1, ret2 = 0;
@@ -1764,6 +1776,8 @@ do {							\
 		ret2 = ret1;				\
 } while (0)
 
+	set_warning_routine(debug_warn);
+
 	TEST_EVENTS(test__events);
 
 	if (test_pmu())

+ 2 - 2
tools/perf/tests/topology.c

@@ -73,10 +73,10 @@ static int check_cpu_topology(char *path, struct cpu_map *map)
 
 	for (i = 0; i < map->nr; i++) {
 		TEST_ASSERT_VAL("Core ID doesn't match",
-			(session->header.env.cpu[map->map[i]].core_id == (cpu_map__get_core(map, i) & 0xffff)));
+			(session->header.env.cpu[map->map[i]].core_id == (cpu_map__get_core(map, i, NULL) & 0xffff)));
 
 		TEST_ASSERT_VAL("Socket ID doesn't match",
-			(session->header.env.cpu[map->map[i]].socket_id == cpu_map__get_socket(map, i)));
+			(session->header.env.cpu[map->map[i]].socket_id == cpu_map__get_socket(map, i, NULL)));
 	}
 
 	perf_session__delete(session);

+ 9 - 8
tools/perf/util/cpumap.c

@@ -241,7 +241,7 @@ int cpu_map__get_socket_id(int cpu)
 	return ret ?: value;
 }
 
-int cpu_map__get_socket(struct cpu_map *map, int idx)
+int cpu_map__get_socket(struct cpu_map *map, int idx, void *data __maybe_unused)
 {
 	int cpu;
 
@@ -258,8 +258,9 @@ static int cmp_ids(const void *a, const void *b)
 	return *(int *)a - *(int *)b;
 }
 
-static int cpu_map__build_map(struct cpu_map *cpus, struct cpu_map **res,
-			      int (*f)(struct cpu_map *map, int cpu))
+int cpu_map__build_map(struct cpu_map *cpus, struct cpu_map **res,
+		       int (*f)(struct cpu_map *map, int cpu, void *data),
+		       void *data)
 {
 	struct cpu_map *c;
 	int nr = cpus->nr;
@@ -271,7 +272,7 @@ static int cpu_map__build_map(struct cpu_map *cpus, struct cpu_map **res,
 		return -1;
 
 	for (cpu = 0; cpu < nr; cpu++) {
-		s1 = f(cpus, cpu);
+		s1 = f(cpus, cpu, data);
 		for (s2 = 0; s2 < c->nr; s2++) {
 			if (s1 == c->map[s2])
 				break;
@@ -295,7 +296,7 @@ int cpu_map__get_core_id(int cpu)
 	return ret ?: value;
 }
 
-int cpu_map__get_core(struct cpu_map *map, int idx)
+int cpu_map__get_core(struct cpu_map *map, int idx, void *data)
 {
 	int cpu, s;
 
@@ -306,7 +307,7 @@ int cpu_map__get_core(struct cpu_map *map, int idx)
 
 	cpu = cpu_map__get_core_id(cpu);
 
-	s = cpu_map__get_socket(map, idx);
+	s = cpu_map__get_socket(map, idx, data);
 	if (s == -1)
 		return -1;
 
@@ -321,12 +322,12 @@ int cpu_map__get_core(struct cpu_map *map, int idx)
 
 int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp)
 {
-	return cpu_map__build_map(cpus, sockp, cpu_map__get_socket);
+	return cpu_map__build_map(cpus, sockp, cpu_map__get_socket, NULL);
 }
 
 int cpu_map__build_core_map(struct cpu_map *cpus, struct cpu_map **corep)
 {
-	return cpu_map__build_map(cpus, corep, cpu_map__get_core);
+	return cpu_map__build_map(cpus, corep, cpu_map__get_core, NULL);
 }
 
 /* setup simple routines to easily access node numbers given a cpu number */

+ 5 - 2
tools/perf/util/cpumap.h

@@ -19,9 +19,9 @@ struct cpu_map *cpu_map__dummy_new(void);
 struct cpu_map *cpu_map__read(FILE *file);
 size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp);
 int cpu_map__get_socket_id(int cpu);
-int cpu_map__get_socket(struct cpu_map *map, int idx);
+int cpu_map__get_socket(struct cpu_map *map, int idx, void *data);
 int cpu_map__get_core_id(int cpu);
-int cpu_map__get_core(struct cpu_map *map, int idx);
+int cpu_map__get_core(struct cpu_map *map, int idx, void *data);
 int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp);
 int cpu_map__build_core_map(struct cpu_map *cpus, struct cpu_map **corep);
 
@@ -87,4 +87,7 @@ static inline int cpu__get_node(int cpu)
 	return cpunode_map[cpu];
 }
 
+int cpu_map__build_map(struct cpu_map *cpus, struct cpu_map **res,
+		       int (*f)(struct cpu_map *map, int cpu, void *data),
+		       void *data);
 #endif /* __PERF_CPUMAP_H */

+ 2 - 2
tools/perf/util/parse-options.c

@@ -648,7 +648,7 @@ int usage_with_options_internal(const char * const *usagestr,
 	if (!usagestr)
 		return PARSE_OPT_HELP;
 
-	fprintf(stderr, "\n usage: %s\n", *usagestr++);
+	fprintf(stderr, "\n Usage: %s\n", *usagestr++);
 	while (*usagestr && **usagestr)
 		fprintf(stderr, "    or: %s\n", *usagestr++);
 	while (*usagestr) {
@@ -684,7 +684,7 @@ int parse_options_usage(const char * const *usagestr,
 	if (!usagestr)
 		goto opt;
 
-	fprintf(stderr, "\n usage: %s\n", *usagestr++);
+	fprintf(stderr, "\n Usage: %s\n", *usagestr++);
 	while (*usagestr && **usagestr)
 		fprintf(stderr, "    or: %s\n", *usagestr++);
 	while (*usagestr) {

+ 7 - 6
tools/perf/util/stat.c

@@ -67,7 +67,7 @@ double rel_stddev_stats(double stddev, double avg)
 bool __perf_evsel_stat__is(struct perf_evsel *evsel,
 			   enum perf_stat_evsel_id id)
 {
-	struct perf_stat *ps = evsel->priv;
+	struct perf_stat_evsel *ps = evsel->priv;
 
 	return ps->id == id;
 }
@@ -84,7 +84,7 @@ static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = {
 
 void perf_stat_evsel_id_init(struct perf_evsel *evsel)
 {
-	struct perf_stat *ps = evsel->priv;
+	struct perf_stat_evsel *ps = evsel->priv;
 	int i;
 
 	/* ps->id is 0 hence PERF_STAT_EVSEL_ID__NONE by default */
@@ -100,7 +100,7 @@ void perf_stat_evsel_id_init(struct perf_evsel *evsel)
 void perf_evsel__reset_stat_priv(struct perf_evsel *evsel)
 {
 	int i;
-	struct perf_stat *ps = evsel->priv;
+	struct perf_stat_evsel *ps = evsel->priv;
 
 	for (i = 0; i < 3; i++)
 		init_stats(&ps->res_stats[i]);
@@ -110,7 +110,7 @@ void perf_evsel__reset_stat_priv(struct perf_evsel *evsel)
 
 int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
 {
-	evsel->priv = zalloc(sizeof(struct perf_stat));
+	evsel->priv = zalloc(sizeof(struct perf_stat_evsel));
 	if (evsel->priv == NULL)
 		return -ENOMEM;
 	perf_evsel__reset_stat_priv(evsel);
@@ -230,7 +230,7 @@ static int check_per_pkg(struct perf_evsel *counter,
 	if (!(vals->run && vals->ena))
 		return 0;
 
-	s = cpu_map__get_socket(cpus, cpu);
+	s = cpu_map__get_socket(cpus, cpu, NULL);
 	if (s < 0)
 		return -1;
 
@@ -272,6 +272,7 @@ process_counter_values(struct perf_stat_config *config, struct perf_evsel *evsel
 			aggr->ena += count->ena;
 			aggr->run += count->run;
 		}
+	case AGGR_UNSET:
 	default:
 		break;
 	}
@@ -304,7 +305,7 @@ int perf_stat_process_counter(struct perf_stat_config *config,
 			      struct perf_evsel *counter)
 {
 	struct perf_counts_values *aggr = &counter->counts->aggr;
-	struct perf_stat *ps = counter->priv;
+	struct perf_stat_evsel *ps = counter->priv;
 	u64 *count = counter->counts->aggr.values;
 	int i, ret;
 

+ 2 - 1
tools/perf/util/stat.h

@@ -20,7 +20,7 @@ enum perf_stat_evsel_id {
 	PERF_STAT_EVSEL_ID__MAX,
 };
 
-struct perf_stat {
+struct perf_stat_evsel {
 	struct stats		res_stats[3];
 	enum perf_stat_evsel_id	id;
 };
@@ -31,6 +31,7 @@ enum aggr_mode {
 	AGGR_SOCKET,
 	AGGR_CORE,
 	AGGR_THREAD,
+	AGGR_UNSET,
 };
 
 struct perf_stat_config {

+ 5 - 0
tools/perf/util/usage.c

@@ -50,6 +50,11 @@ void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN)
 	die_routine = routine;
 }
 
+void set_warning_routine(void (*routine)(const char *err, va_list params))
+{
+	warn_routine = routine;
+}
+
 void usage(const char *err)
 {
 	usage_routine(err);

+ 1 - 0
tools/perf/util/util.h

@@ -145,6 +145,7 @@ extern void warning(const char *err, ...) __attribute__((format (printf, 1, 2)))
 
 
 extern void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN);
+extern void set_warning_routine(void (*routine)(const char *err, va_list params));
 
 extern int prefixcmp(const char *str, const char *prefix);
 extern void set_buildid_dir(const char *dir);