Эх сурвалжийг харах

Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

 User visible changes:

  - Show precise number of samples in at the end of a 'record' session, if
    processing build ids, since we will then traverse the whole perf.data file
    and see all the PERF_RECORD_SAMPLE records, otherwise stop showing the
    previous off-base heuristicly counted number of "samples"  (Namhyung Kim).

  - Support to read compressed module from build-id cache (Namhyung Kim)

 Infrastructure changes:

  - Cache eh/debug frame offset for dwarf unwind (Namhyung Kim)

  - Set header version correctly in all cases (Namhyung Kim)

  - Set attr.task bit for a tracking event, to be consistent (Namhyung Kim)
    perf tools: Use perf_data_file__fd() consistently
    perf symbols: Convert lseek + read to pread

  - Don't rely on malloc working for sz 0, fixing another problem when
    using uClibc (Vineet Gupta)

  - Provide stub for missing pthread_attr_setaffinity_np for libcs where this
    is not available, such as uClibc (Vineet Gupta)

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Ingo Molnar 10 жил өмнө
parent
commit
1ed39bac21

+ 13 - 0
tools/perf/bench/futex.h

@@ -68,4 +68,17 @@ futex_cmp_requeue(u_int32_t *uaddr, u_int32_t val, u_int32_t *uaddr2, int nr_wak
 		 val, opflags);
 }
 
+#ifndef HAVE_PTHREAD_ATTR_SETAFFINITY_NP
+#include <pthread.h>
+static inline int pthread_attr_setaffinity_np(pthread_attr_t *attr,
+					      size_t cpusetsize,
+					      cpu_set_t *cpuset)
+{
+	attr = attr;
+	cpusetsize = cpusetsize;
+	cpuset = cpuset;
+	return 0;
+}
+#endif
+
 #endif /* _FUTEX_H */

+ 3 - 2
tools/perf/builtin-inject.c

@@ -343,6 +343,7 @@ static int __cmd_inject(struct perf_inject *inject)
 	int ret = -EINVAL;
 	struct perf_session *session = inject->session;
 	struct perf_data_file *file_out = &inject->output;
+	int fd = perf_data_file__fd(file_out);
 
 	signal(SIGINT, sig_handler);
 
@@ -376,7 +377,7 @@ static int __cmd_inject(struct perf_inject *inject)
 	}
 
 	if (!file_out->is_pipe)
-		lseek(file_out->fd, session->header.data_offset, SEEK_SET);
+		lseek(fd, session->header.data_offset, SEEK_SET);
 
 	ret = perf_session__process_events(session, &inject->tool);
 
@@ -385,7 +386,7 @@ static int __cmd_inject(struct perf_inject *inject)
 			perf_header__set_feat(&session->header,
 					      HEADER_BUILD_ID);
 		session->header.data_size = inject->bytes_written;
-		perf_session__write_header(session, session->evlist, file_out->fd, true);
+		perf_session__write_header(session, session->evlist, fd, true);
 	}
 
 	return ret;

+ 48 - 22
tools/perf/builtin-record.c

@@ -190,16 +190,30 @@ out:
 	return rc;
 }
 
+static int process_sample_event(struct perf_tool *tool,
+				union perf_event *event,
+				struct perf_sample *sample,
+				struct perf_evsel *evsel,
+				struct machine *machine)
+{
+	struct record *rec = container_of(tool, struct record, tool);
+
+	rec->samples++;
+
+	return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
+}
+
 static int process_buildids(struct record *rec)
 {
 	struct perf_data_file *file  = &rec->file;
 	struct perf_session *session = rec->session;
-	u64 start = session->header.data_offset;
 
-	u64 size = lseek(file->fd, 0, SEEK_CUR);
+	u64 size = lseek(perf_data_file__fd(file), 0, SEEK_CUR);
 	if (size == 0)
 		return 0;
 
+	file->size = size;
+
 	/*
 	 * During this process, it'll load kernel map and replace the
 	 * dso->long_name to a real pathname it found.  In this case
@@ -211,9 +225,7 @@ static int process_buildids(struct record *rec)
 	 */
 	symbol_conf.ignore_vmlinux_buildid = true;
 
-	return __perf_session__process_events(session, start,
-					      size - start,
-					      size, &build_id__mark_dso_hit_ops);
+	return perf_session__process_events(session, &rec->tool);
 }
 
 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
@@ -322,6 +334,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 	struct perf_data_file *file = &rec->file;
 	struct perf_session *session;
 	bool disabled = false, draining = false;
+	int fd;
 
 	rec->progname = argv[0];
 
@@ -336,6 +349,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 		return -1;
 	}
 
+	fd = perf_data_file__fd(file);
 	rec->session = session;
 
 	record__init_features(rec);
@@ -360,12 +374,11 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 		perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
 
 	if (file->is_pipe) {
-		err = perf_header__write_pipe(file->fd);
+		err = perf_header__write_pipe(fd);
 		if (err < 0)
 			goto out_child;
 	} else {
-		err = perf_session__write_header(session, rec->evlist,
-						 file->fd, false);
+		err = perf_session__write_header(session, rec->evlist, fd, false);
 		if (err < 0)
 			goto out_child;
 	}
@@ -397,7 +410,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 			 * return this more properly and also
 			 * propagate errors that now are calling die()
 			 */
-			err = perf_event__synthesize_tracing_data(tool, file->fd, rec->evlist,
+			err = perf_event__synthesize_tracing_data(tool,	fd, rec->evlist,
 								  process_synthesized_event);
 			if (err <= 0) {
 				pr_err("Couldn't record tracing data.\n");
@@ -504,19 +517,9 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 		goto out_child;
 	}
 
-	if (!quiet) {
+	if (!quiet)
 		fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
 
-		/*
-		 * Approximate RIP event size: 24 bytes.
-		 */
-		fprintf(stderr,
-			"[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
-			(double)rec->bytes_written / 1024.0 / 1024.0,
-			file->path,
-			rec->bytes_written / 24);
-	}
-
 out_child:
 	if (forks) {
 		int exit_status;
@@ -535,13 +538,29 @@ out_child:
 	} else
 		status = err;
 
+	/* this will be recalculated during process_buildids() */
+	rec->samples = 0;
+
 	if (!err && !file->is_pipe) {
 		rec->session->header.data_size += rec->bytes_written;
 
 		if (!rec->no_buildid)
 			process_buildids(rec);
-		perf_session__write_header(rec->session, rec->evlist,
-					   file->fd, true);
+		perf_session__write_header(rec->session, rec->evlist, fd, true);
+	}
+
+	if (!err && !quiet) {
+		char samples[128];
+
+		if (rec->samples)
+			scnprintf(samples, sizeof(samples),
+				  " (%" PRIu64 " samples)", rec->samples);
+		else
+			samples[0] = '\0';
+
+		fprintf(stderr,	"[ perf record: Captured and wrote %.3f MB %s%s ]\n",
+			perf_data_file__size(file) / 1024.0 / 1024.0,
+			file->path, samples);
 	}
 
 out_delete_session:
@@ -720,6 +739,13 @@ static struct record record = {
 			.default_per_cpu = true,
 		},
 	},
+	.tool = {
+		.sample		= process_sample_event,
+		.fork		= perf_event__process_fork,
+		.comm		= perf_event__process_comm,
+		.mmap		= perf_event__process_mmap,
+		.mmap2		= perf_event__process_mmap2,
+	},
 };
 
 #define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace) recording: "

+ 6 - 0
tools/perf/config/Makefile

@@ -198,6 +198,7 @@ CORE_FEATURE_TESTS =			\
 	libpython-version		\
 	libslang			\
 	libunwind			\
+	pthread-attr-setaffinity-np	\
 	stackprotector-all		\
 	timerfd				\
 	libdw-dwarf-unwind		\
@@ -226,6 +227,7 @@ VF_FEATURE_TESTS =			\
 	libelf-getphdrnum		\
 	libelf-mmap			\
 	libpython-version		\
+	pthread-attr-setaffinity-np	\
 	stackprotector-all		\
 	timerfd				\
 	libunwind-debug-frame		\
@@ -301,6 +303,10 @@ ifeq ($(feature-sync-compare-and-swap), 1)
   CFLAGS += -DHAVE_SYNC_COMPARE_AND_SWAP_SUPPORT
 endif
 
+ifeq ($(feature-pthread-attr-setaffinity-np), 1)
+  CFLAGS += -DHAVE_PTHREAD_ATTR_SETAFFINITY_NP
+endif
+
 ifndef NO_BIONIC
   $(call feature_check,bionic)
   ifeq ($(feature-bionic), 1)

+ 4 - 0
tools/perf/config/feature-checks/Makefile

@@ -25,6 +25,7 @@ FILES=					\
 	test-libslang.bin		\
 	test-libunwind.bin		\
 	test-libunwind-debug-frame.bin	\
+	test-pthread-attr-setaffinity-np.bin	\
 	test-stackprotector-all.bin	\
 	test-timerfd.bin		\
 	test-libdw-dwarf-unwind.bin	\
@@ -47,6 +48,9 @@ test-all.bin:
 test-hello.bin:
 	$(BUILD)
 
+test-pthread-attr-setaffinity-np.bin:
+	$(BUILD) -Werror -lpthread
+
 test-stackprotector-all.bin:
 	$(BUILD) -Werror -fstack-protector-all
 

+ 5 - 0
tools/perf/config/feature-checks/test-all.c

@@ -97,6 +97,10 @@
 # include "test-zlib.c"
 #undef main
 
+#define main main_test_pthread_attr_setaffinity_np
+# include "test-pthread_attr_setaffinity_np.c"
+#undef main
+
 int main(int argc, char *argv[])
 {
 	main_test_libpython();
@@ -121,6 +125,7 @@ int main(int argc, char *argv[])
 	main_test_libdw_dwarf_unwind();
 	main_test_sync_compare_and_swap(argc, argv);
 	main_test_zlib();
+	main_test_pthread_attr_setaffinity_np();
 
 	return 0;
 }

+ 14 - 0
tools/perf/config/feature-checks/test-pthread-attr-setaffinity-np.c

@@ -0,0 +1,14 @@
+#include <stdint.h>
+#include <pthread.h>
+
+int main(void)
+{
+	int ret = 0;
+	pthread_attr_t thread_attr;
+
+	pthread_attr_init(&thread_attr);
+	/* don't care abt exact args, just the API itself in libpthread */
+	ret = pthread_attr_setaffinity_np(&thread_attr, 0, NULL);
+
+	return ret;
+}

+ 1 - 5
tools/perf/util/dso.c

@@ -532,12 +532,8 @@ dso_cache__read(struct dso *dso, u64 offset, u8 *data, ssize_t size)
 			break;
 
 		cache_offset = offset & DSO__DATA_CACHE_MASK;
-		ret = -EINVAL;
 
-		if (-1 == lseek(dso->data.fd, cache_offset, SEEK_SET))
-			break;
-
-		ret = read(dso->data.fd, cache->data, DSO__DATA_CACHE_SIZE);
+		ret = pread(dso->data.fd, cache->data, DSO__DATA_CACHE_SIZE, cache_offset);
 		if (ret <= 0)
 			break;
 

+ 1 - 0
tools/perf/util/dso.h

@@ -139,6 +139,7 @@ struct dso {
 		u32		 status_seen;
 		size_t		 file_size;
 		struct list_head open_entry;
+		u64		 frame_offset;
 	} data;
 
 	union { /* Tool specific area */

+ 4 - 0
tools/perf/util/evsel.c

@@ -709,6 +709,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts)
 	if (opts->sample_weight)
 		perf_evsel__set_sample_bit(evsel, WEIGHT);
 
+	attr->task  = track;
 	attr->mmap  = track;
 	attr->mmap2 = track && !perf_missing_features.mmap2;
 	attr->comm  = track;
@@ -797,6 +798,9 @@ int perf_evsel__enable(struct perf_evsel *evsel, int ncpus, int nthreads)
 
 int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads)
 {
+	if (ncpus == 0 || nthreads == 0)
+		return 0;
+
 	if (evsel->system_wide)
 		nthreads = 1;
 

+ 1 - 1
tools/perf/util/header.c

@@ -2237,6 +2237,7 @@ static int check_magic_endian(u64 magic, uint64_t hdr_sz,
 	 * - unique number to identify actual perf.data files
 	 * - encode endianness of file
 	 */
+	ph->version = PERF_HEADER_VERSION_2;
 
 	/* check magic number with one endianness */
 	if (magic == __perf_magic2)
@@ -2247,7 +2248,6 @@ static int check_magic_endian(u64 magic, uint64_t hdr_sz,
 		return -1;
 
 	ph->needs_swap = true;
-	ph->version = PERF_HEADER_VERSION_2;
 
 	return 0;
 }

+ 3 - 3
tools/perf/util/session.c

@@ -1251,9 +1251,9 @@ fetch_mmaped_event(struct perf_session *session,
 #define NUM_MMAPS 128
 #endif
 
-int __perf_session__process_events(struct perf_session *session,
-				   u64 data_offset, u64 data_size,
-				   u64 file_size, struct perf_tool *tool)
+static int __perf_session__process_events(struct perf_session *session,
+					  u64 data_offset, u64 data_size,
+					  u64 file_size, struct perf_tool *tool)
 {
 	int fd = perf_data_file__fd(session->file);
 	u64 head, page_offset, file_offset, file_pos, size;

+ 0 - 3
tools/perf/util/session.h

@@ -49,9 +49,6 @@ int perf_session__peek_event(struct perf_session *session, off_t file_offset,
 			     union perf_event **event_ptr,
 			     struct perf_sample *sample);
 
-int __perf_session__process_events(struct perf_session *session,
-				   u64 data_offset, u64 data_size, u64 size,
-				   struct perf_tool *tool);
 int perf_session__process_events(struct perf_session *session,
 				 struct perf_tool *tool);
 

+ 8 - 5
tools/perf/util/symbol-elf.c

@@ -574,13 +574,16 @@ static int decompress_kmodule(struct dso *dso, const char *name,
 	const char *ext = strrchr(name, '.');
 	char tmpbuf[] = "/tmp/perf-kmod-XXXXXX";
 
-	if ((type != DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP &&
-	     type != DSO_BINARY_TYPE__GUEST_KMODULE_COMP) ||
-	    type != dso->symtab_type)
+	if (type != DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP &&
+	    type != DSO_BINARY_TYPE__GUEST_KMODULE_COMP &&
+	    type != DSO_BINARY_TYPE__BUILD_ID_CACHE)
 		return -1;
 
-	if (!ext || !is_supported_compression(ext + 1))
-		return -1;
+	if (!ext || !is_supported_compression(ext + 1)) {
+		ext = strrchr(dso->name, '.');
+		if (!ext || !is_supported_compression(ext + 1))
+			return -1;
+	}
 
 	fd = mkstemp(tmpbuf);
 	if (fd < 0)

+ 20 - 11
tools/perf/util/unwind-libunwind.c

@@ -266,14 +266,17 @@ static int read_unwind_spec_eh_frame(struct dso *dso, struct machine *machine,
 				     u64 *fde_count)
 {
 	int ret = -EINVAL, fd;
-	u64 offset;
+	u64 offset = dso->data.frame_offset;
 
-	fd = dso__data_fd(dso, machine);
-	if (fd < 0)
-		return -EINVAL;
+	if (offset == 0) {
+		fd = dso__data_fd(dso, machine);
+		if (fd < 0)
+			return -EINVAL;
 
-	/* Check the .eh_frame section for unwinding info */
-	offset = elf_section_offset(fd, ".eh_frame_hdr");
+		/* Check the .eh_frame section for unwinding info */
+		offset = elf_section_offset(fd, ".eh_frame_hdr");
+		dso->data.frame_offset = offset;
+	}
 
 	if (offset)
 		ret = unwind_spec_ehframe(dso, machine, offset,
@@ -287,14 +290,20 @@ static int read_unwind_spec_eh_frame(struct dso *dso, struct machine *machine,
 static int read_unwind_spec_debug_frame(struct dso *dso,
 					struct machine *machine, u64 *offset)
 {
-	int fd = dso__data_fd(dso, machine);
+	int fd;
+	u64 ofs = dso->data.frame_offset;
 
-	if (fd < 0)
-		return -EINVAL;
+	if (ofs == 0) {
+		fd = dso__data_fd(dso, machine);
+		if (fd < 0)
+			return -EINVAL;
 
-	/* Check the .debug_frame section for unwinding info */
-	*offset = elf_section_offset(fd, ".debug_frame");
+		/* Check the .debug_frame section for unwinding info */
+		ofs = elf_section_offset(fd, ".debug_frame");
+		dso->data.frame_offset = ofs;
+	}
 
+	*offset = ofs;
 	if (*offset)
 		return 0;