Kaynağa Gözat

Merge tag 'perf-core-for-mingo-4.15-20171003' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

- Multithread the synthesizing of PERF_RECORD_ events for pre-existing
  threads in 'perf top', speeding up that phase, greatly improving the
  user experience in systems such as Intel's Knights Mill (Kan Liang)

- 'perf test' fixes for the perf_event_attr test case (Jiri Olsa, Thomas Richter)

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Ingo Molnar 7 yıl önce
ebeveyn
işleme
4b50239a76
38 değiştirilmiş dosya ile 439 ekleme ve 125 silme
  1. 6 0
      tools/arch/s390/include/uapi/asm/kvm.h
  2. 2 0
      tools/arch/x86/include/asm/cpufeatures.h
  3. 3 1
      tools/arch/x86/include/asm/disabled-features.h
  4. 34 0
      tools/include/asm-generic/hugetlb_encode.h
  5. 22 0
      tools/include/uapi/drm/drm.h
  6. 49 2
      tools/include/uapi/drm/i915_drm.h
  7. 20 12
      tools/include/uapi/linux/bpf.h
  8. 2 1
      tools/include/uapi/linux/kvm.h
  9. 23 1
      tools/include/uapi/linux/mman.h
  10. 3 0
      tools/perf/Documentation/perf-top.txt
  11. 0 1
      tools/perf/arch/s390/util/Build
  12. 0 29
      tools/perf/arch/s390/util/sym-handling.c
  13. 2 1
      tools/perf/builtin-kvm.c
  14. 1 1
      tools/perf/builtin-record.c
  15. 12 1
      tools/perf/builtin-top.c
  16. 1 1
      tools/perf/builtin-trace.c
  17. 1 1
      tools/perf/tests/attr.c
  18. 5 1
      tools/perf/tests/attr.py
  19. 1 1
      tools/perf/tests/attr/base-record
  20. 1 0
      tools/perf/tests/attr/test-record-group
  21. 1 1
      tools/perf/tests/attr/test-record-group-sampling
  22. 1 0
      tools/perf/tests/attr/test-record-group1
  23. 2 0
      tools/perf/tests/attr/test-stat-group
  24. 2 0
      tools/perf/tests/attr/test-stat-group1
  25. 1 1
      tools/perf/tests/mmap-thread-lookup.c
  26. 21 14
      tools/perf/util/callchain.c
  27. 17 1
      tools/perf/util/comm.c
  28. 132 31
      tools/perf/util/event.c
  29. 2 1
      tools/perf/util/event.h
  30. 6 1
      tools/perf/util/evsel.c
  31. 6 2
      tools/perf/util/machine.c
  32. 6 3
      tools/perf/util/machine.h
  33. 1 7
      tools/perf/util/symbol-elf.c
  34. 0 3
      tools/perf/util/symbol.h
  35. 1 1
      tools/perf/util/syscalltbl.c
  36. 48 5
      tools/perf/util/thread.c
  37. 3 0
      tools/perf/util/thread.h
  38. 1 0
      tools/perf/util/top.h

+ 6 - 0
tools/arch/s390/include/uapi/asm/kvm.h

@@ -88,6 +88,12 @@ struct kvm_s390_io_adapter_req {
 /* kvm attributes for KVM_S390_VM_TOD */
 #define KVM_S390_VM_TOD_LOW		0
 #define KVM_S390_VM_TOD_HIGH		1
+#define KVM_S390_VM_TOD_EXT		2
+
+struct kvm_s390_vm_tod_clock {
+	__u8  epoch_idx;
+	__u64 tod;
+};
 
 /* kvm attributes for KVM_S390_VM_CPU_MODEL */
 /* processor related attributes are r/w */

+ 2 - 0
tools/arch/x86/include/asm/cpufeatures.h

@@ -196,6 +196,7 @@
 
 #define X86_FEATURE_HW_PSTATE	( 7*32+ 8) /* AMD HW-PState */
 #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
+#define X86_FEATURE_SME		( 7*32+10) /* AMD Secure Memory Encryption */
 
 #define X86_FEATURE_INTEL_PPIN	( 7*32+14) /* Intel Processor Inventory Number */
 #define X86_FEATURE_INTEL_PT	( 7*32+15) /* Intel Processor Trace */
@@ -287,6 +288,7 @@
 #define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
 #define X86_FEATURE_AVIC	(15*32+13) /* Virtual Interrupt Controller */
 #define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */
+#define X86_FEATURE_VGIF	(15*32+16) /* Virtual GIF */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */
 #define X86_FEATURE_AVX512VBMI  (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/

+ 3 - 1
tools/arch/x86/include/asm/disabled-features.h

@@ -21,11 +21,13 @@
 # define DISABLE_K6_MTRR	(1<<(X86_FEATURE_K6_MTRR & 31))
 # define DISABLE_CYRIX_ARR	(1<<(X86_FEATURE_CYRIX_ARR & 31))
 # define DISABLE_CENTAUR_MCR	(1<<(X86_FEATURE_CENTAUR_MCR & 31))
+# define DISABLE_PCID		0
 #else
 # define DISABLE_VME		0
 # define DISABLE_K6_MTRR	0
 # define DISABLE_CYRIX_ARR	0
 # define DISABLE_CENTAUR_MCR	0
+# define DISABLE_PCID		(1<<(X86_FEATURE_PCID & 31))
 #endif /* CONFIG_X86_64 */
 
 #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
@@ -49,7 +51,7 @@
 #define DISABLED_MASK1	0
 #define DISABLED_MASK2	0
 #define DISABLED_MASK3	(DISABLE_CYRIX_ARR|DISABLE_CENTAUR_MCR|DISABLE_K6_MTRR)
-#define DISABLED_MASK4	0
+#define DISABLED_MASK4	(DISABLE_PCID)
 #define DISABLED_MASK5	0
 #define DISABLED_MASK6	0
 #define DISABLED_MASK7	0

+ 34 - 0
tools/include/asm-generic/hugetlb_encode.h

@@ -0,0 +1,34 @@
+#ifndef _ASM_GENERIC_HUGETLB_ENCODE_H_
+#define _ASM_GENERIC_HUGETLB_ENCODE_H_
+
+/*
+ * Several system calls take a flag to request "hugetlb" huge pages.
+ * Without further specification, these system calls will use the
+ * system's default huge page size.  If a system supports multiple
+ * huge page sizes, the desired huge page size can be specified in
+ * bits [26:31] of the flag arguments.  The value in these 6 bits
+ * will encode the log2 of the huge page size.
+ *
+ * The following definitions are associated with this huge page size
+ * encoding in flag arguments.  System call specific header files
+ * that use this encoding should include this file.  They can then
+ * provide definitions based on these with their own specific prefix.
+ * for example:
+ * #define MAP_HUGE_SHIFT HUGETLB_FLAG_ENCODE_SHIFT
+ */
+
+#define HUGETLB_FLAG_ENCODE_SHIFT	26
+#define HUGETLB_FLAG_ENCODE_MASK	0x3f
+
+#define HUGETLB_FLAG_ENCODE_64KB	(16 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_512KB	(19 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_1MB		(20 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_2MB		(21 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_8MB		(23 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_16MB	(24 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_256MB	(28 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_1GB		(30 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_2GB		(31 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_16GB	(34 << HUGETLB_FLAG_ENCODE_SHIFT)
+
+#endif /* _ASM_GENERIC_HUGETLB_ENCODE_H_ */

+ 22 - 0
tools/include/uapi/drm/drm.h

@@ -700,6 +700,7 @@ struct drm_prime_handle {
 
 struct drm_syncobj_create {
 	__u32 handle;
+#define DRM_SYNCOBJ_CREATE_SIGNALED (1 << 0)
 	__u32 flags;
 };
 
@@ -718,6 +719,24 @@ struct drm_syncobj_handle {
 	__u32 pad;
 };
 
+#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
+#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1)
+struct drm_syncobj_wait {
+	__u64 handles;
+	/* absolute timeout */
+	__s64 timeout_nsec;
+	__u32 count_handles;
+	__u32 flags;
+	__u32 first_signaled; /* only valid when not waiting all */
+	__u32 pad;
+};
+
+struct drm_syncobj_array {
+	__u64 handles;
+	__u32 count_handles;
+	__u32 pad;
+};
+
 #if defined(__cplusplus)
 }
 #endif
@@ -840,6 +859,9 @@ extern "C" {
 #define DRM_IOCTL_SYNCOBJ_DESTROY	DRM_IOWR(0xC0, struct drm_syncobj_destroy)
 #define DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD	DRM_IOWR(0xC1, struct drm_syncobj_handle)
 #define DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE	DRM_IOWR(0xC2, struct drm_syncobj_handle)
+#define DRM_IOCTL_SYNCOBJ_WAIT		DRM_IOWR(0xC3, struct drm_syncobj_wait)
+#define DRM_IOCTL_SYNCOBJ_RESET		DRM_IOWR(0xC4, struct drm_syncobj_array)
+#define DRM_IOCTL_SYNCOBJ_SIGNAL	DRM_IOWR(0xC5, struct drm_syncobj_array)
 
 /**
  * Device specific ioctls should only be in their respective headers

+ 49 - 2
tools/include/uapi/drm/i915_drm.h

@@ -260,6 +260,8 @@ typedef struct _drm_i915_sarea {
 #define DRM_I915_GEM_CONTEXT_GETPARAM	0x34
 #define DRM_I915_GEM_CONTEXT_SETPARAM	0x35
 #define DRM_I915_PERF_OPEN		0x36
+#define DRM_I915_PERF_ADD_CONFIG	0x37
+#define DRM_I915_PERF_REMOVE_CONFIG	0x38
 
 #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
 #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
@@ -315,6 +317,8 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM	DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_GETPARAM, struct drm_i915_gem_context_param)
 #define DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM	DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_SETPARAM, struct drm_i915_gem_context_param)
 #define DRM_IOCTL_I915_PERF_OPEN	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_OPEN, struct drm_i915_perf_open_param)
+#define DRM_IOCTL_I915_PERF_ADD_CONFIG	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_ADD_CONFIG, struct drm_i915_perf_oa_config)
+#define DRM_IOCTL_I915_PERF_REMOVE_CONFIG	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_REMOVE_CONFIG, __u64)
 
 /* Allow drivers to submit batchbuffers directly to hardware, relying
  * on the security mechanisms provided by hardware.
@@ -431,6 +435,11 @@ typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_HAS_EXEC_BATCH_FIRST	 48
 
+/* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying an array of
+ * drm_i915_gem_exec_fence structures.  See I915_EXEC_FENCE_ARRAY.
+ */
+#define I915_PARAM_HAS_EXEC_FENCE_ARRAY  49
+
 typedef struct drm_i915_getparam {
 	__s32 param;
 	/*
@@ -812,6 +821,17 @@ struct drm_i915_gem_exec_object2 {
 	__u64 rsvd2;
 };
 
+struct drm_i915_gem_exec_fence {
+	/**
+	 * User's handle for a drm_syncobj to wait on or signal.
+	 */
+	__u32 handle;
+
+#define I915_EXEC_FENCE_WAIT            (1<<0)
+#define I915_EXEC_FENCE_SIGNAL          (1<<1)
+	__u32 flags;
+};
+
 struct drm_i915_gem_execbuffer2 {
 	/**
 	 * List of gem_exec_object2 structs
@@ -826,7 +846,11 @@ struct drm_i915_gem_execbuffer2 {
 	__u32 DR1;
 	__u32 DR4;
 	__u32 num_cliprects;
-	/** This is a struct drm_clip_rect *cliprects */
+	/**
+	 * This is a struct drm_clip_rect *cliprects if I915_EXEC_FENCE_ARRAY
+	 * is not set.  If I915_EXEC_FENCE_ARRAY is set, then this is a
+	 * struct drm_i915_gem_exec_fence *fences.
+	 */
 	__u64 cliprects_ptr;
 #define I915_EXEC_RING_MASK              (7<<0)
 #define I915_EXEC_DEFAULT                (0<<0)
@@ -927,7 +951,14 @@ struct drm_i915_gem_execbuffer2 {
  * element).
  */
 #define I915_EXEC_BATCH_FIRST		(1<<18)
-#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_BATCH_FIRST<<1))
+
+/* Setting I915_FENCE_ARRAY implies that num_cliprects and cliprects_ptr
+ * define an array of i915_gem_exec_fence structures which specify a set of
+ * dma fences to wait upon or signal.
+ */
+#define I915_EXEC_FENCE_ARRAY   (1<<19)
+
+#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_ARRAY<<1))
 
 #define I915_EXEC_CONTEXT_ID_MASK	(0xffffffff)
 #define i915_execbuffer2_set_context_id(eb2, context) \
@@ -1467,6 +1498,22 @@ enum drm_i915_perf_record_type {
 	DRM_I915_PERF_RECORD_MAX /* non-ABI */
 };
 
+/**
+ * Structure to upload perf dynamic configuration into the kernel.
+ */
+struct drm_i915_perf_oa_config {
+	/** String formatted like "%08x-%04x-%04x-%04x-%012x" */
+	char uuid[36];
+
+	__u32 n_mux_regs;
+	__u32 n_boolean_regs;
+	__u32 n_flex_regs;
+
+	__u64 __user mux_regs_ptr;
+	__u64 __user boolean_regs_ptr;
+	__u64 __user flex_regs_ptr;
+};
+
 #if defined(__cplusplus)
 }
 #endif

+ 20 - 12
tools/include/uapi/linux/bpf.h

@@ -143,12 +143,6 @@ enum bpf_attach_type {
 
 #define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
 
-enum bpf_sockmap_flags {
-	BPF_SOCKMAP_UNSPEC,
-	BPF_SOCKMAP_STRPARSER,
-	__MAX_BPF_SOCKMAP_FLAG
-};
-
 /* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command
  * to the given target_fd cgroup the descendent cgroup will be able to
  * override effective bpf program that was inherited from this cgroup
@@ -368,9 +362,20 @@ union bpf_attr {
  * int bpf_redirect(ifindex, flags)
  *     redirect to another netdev
  *     @ifindex: ifindex of the net device
- *     @flags: bit 0 - if set, redirect to ingress instead of egress
- *             other bits - reserved
- *     Return: TC_ACT_REDIRECT
+ *     @flags:
+ *	  cls_bpf:
+ *          bit 0 - if set, redirect to ingress instead of egress
+ *          other bits - reserved
+ *	  xdp_bpf:
+ *	    all bits - reserved
+ *     Return: cls_bpf: TC_ACT_REDIRECT on success or TC_ACT_SHOT on error
+ *	       xdp_bfp: XDP_REDIRECT on success or XDP_ABORT on error
+ * int bpf_redirect_map(map, key, flags)
+ *     redirect to endpoint in map
+ *     @map: pointer to dev map
+ *     @key: index in map to lookup
+ *     @flags: --
+ *     Return: XDP_REDIRECT on success or XDP_ABORT on error
  *
  * u32 bpf_get_route_realm(skb)
  *     retrieve a dst's tclassid
@@ -632,7 +637,7 @@ union bpf_attr {
 	FN(skb_adjust_room),		\
 	FN(redirect_map),		\
 	FN(sk_redirect_map),		\
-	FN(sock_map_update),
+	FN(sock_map_update),		\
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -753,20 +758,23 @@ struct bpf_sock {
 	__u32 family;
 	__u32 type;
 	__u32 protocol;
+	__u32 mark;
+	__u32 priority;
 };
 
 #define XDP_PACKET_HEADROOM 256
 
 /* User return codes for XDP prog type.
  * A valid XDP program must return one of these defined values. All other
- * return codes are reserved for future use. Unknown return codes will result
- * in packet drop.
+ * return codes are reserved for future use. Unknown return codes will
+ * result in packet drops and a warning via bpf_warn_invalid_xdp_action().
  */
 enum xdp_action {
 	XDP_ABORTED = 0,
 	XDP_DROP,
 	XDP_PASS,
 	XDP_TX,
+	XDP_REDIRECT,
 };
 
 /* user accessible metadata for XDP packet hook

+ 2 - 1
tools/include/uapi/linux/kvm.h

@@ -711,7 +711,8 @@ struct kvm_ppc_one_seg_page_size {
 struct kvm_ppc_smmu_info {
 	__u64 flags;
 	__u32 slb_size;
-	__u32 pad;
+	__u16 data_keys;	/* # storage keys supported for data */
+	__u16 instr_keys;	/* # storage keys supported for instructions */
 	struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ];
 };
 

+ 23 - 1
tools/include/uapi/linux/mman.h

@@ -1,7 +1,8 @@
 #ifndef _UAPI_LINUX_MMAN_H
 #define _UAPI_LINUX_MMAN_H
 
-#include <uapi/asm/mman.h>
+#include <asm/mman.h>
+#include <asm-generic/hugetlb_encode.h>
 
 #define MREMAP_MAYMOVE	1
 #define MREMAP_FIXED	2
@@ -10,4 +11,25 @@
 #define OVERCOMMIT_ALWAYS		1
 #define OVERCOMMIT_NEVER		2
 
+/*
+ * Huge page size encoding when MAP_HUGETLB is specified, and a huge page
+ * size other than the default is desired.  See hugetlb_encode.h.
+ * All known huge page size encodings are provided here.  It is the
+ * responsibility of the application to know which sizes are supported on
+ * the running system.  See mmap(2) man page for details.
+ */
+#define MAP_HUGE_SHIFT	HUGETLB_FLAG_ENCODE_SHIFT
+#define MAP_HUGE_MASK	HUGETLB_FLAG_ENCODE_MASK
+
+#define MAP_HUGE_64KB	HUGETLB_FLAG_ENCODE_64KB
+#define MAP_HUGE_512KB	HUGETLB_FLAG_ENCODE_512KB
+#define MAP_HUGE_1MB	HUGETLB_FLAG_ENCODE_1MB
+#define MAP_HUGE_2MB	HUGETLB_FLAG_ENCODE_2MB
+#define MAP_HUGE_8MB	HUGETLB_FLAG_ENCODE_8MB
+#define MAP_HUGE_16MB	HUGETLB_FLAG_ENCODE_16MB
+#define MAP_HUGE_256MB	HUGETLB_FLAG_ENCODE_256MB
+#define MAP_HUGE_1GB	HUGETLB_FLAG_ENCODE_1GB
+#define MAP_HUGE_2GB	HUGETLB_FLAG_ENCODE_2GB
+#define MAP_HUGE_16GB	HUGETLB_FLAG_ENCODE_16GB
+
 #endif /* _UAPI_LINUX_MMAN_H */

+ 3 - 0
tools/perf/Documentation/perf-top.txt

@@ -240,6 +240,9 @@ Default is to monitor all CPUS.
 --force::
 	Don't do ownership validation.
 
+--num-thread-synthesize::
+	The number of threads to run when synthesizing events for existing processes.
+	By default, the number of threads equals to the number of online CPUs.
 
 INTERACTIVE PROMPTING KEYS
 --------------------------

+ 0 - 1
tools/perf/arch/s390/util/Build

@@ -1,5 +1,4 @@
 libperf-y += header.o
-libperf-y += sym-handling.o
 libperf-y += kvm-stat.o
 
 libperf-$(CONFIG_DWARF) += dwarf-regs.o

+ 0 - 29
tools/perf/arch/s390/util/sym-handling.c

@@ -1,29 +0,0 @@
-/*
- * Architecture specific ELF symbol handling and relocation mapping.
- *
- * Copyright 2017 IBM Corp.
- * Author(s): Thomas Richter <tmricht@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- */
-
-#include "symbol.h"
-
-#ifdef HAVE_LIBELF_SUPPORT
-bool elf__needs_adjust_symbols(GElf_Ehdr ehdr)
-{
-	if (ehdr.e_type == ET_EXEC)
-		return false;
-	return ehdr.e_type == ET_REL || ehdr.e_type == ET_DYN;
-}
-
-void arch__adjust_sym_map_offset(GElf_Sym *sym,
-				 GElf_Shdr *shdr __maybe_unused,
-				 struct map *map)
-{
-	if (map->type == MAP__FUNCTION)
-		sym->st_value += map->start;
-}
-#endif

+ 2 - 1
tools/perf/builtin-kvm.c

@@ -1441,7 +1441,8 @@ static int kvm_events_live(struct perf_kvm_stat *kvm,
 	perf_session__set_id_hdr_size(kvm->session);
 	ordered_events__set_copy_on_queue(&kvm->session->ordered_events, true);
 	machine__synthesize_threads(&kvm->session->machines.host, &kvm->opts.target,
-				    kvm->evlist->threads, false, kvm->opts.proc_map_timeout);
+				    kvm->evlist->threads, false,
+				    kvm->opts.proc_map_timeout, 1);
 	err = kvm_live_open_events(kvm);
 	if (err)
 		goto out;

+ 1 - 1
tools/perf/builtin-record.c

@@ -863,7 +863,7 @@ static int record__synthesize(struct record *rec, bool tail)
 
 	err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
 					    process_synthesized_event, opts->sample_address,
-					    opts->proc_map_timeout);
+					    opts->proc_map_timeout, 1);
 out:
 	return err;
 }

+ 12 - 1
tools/perf/builtin-top.c

@@ -958,8 +958,16 @@ static int __cmd_top(struct perf_top *top)
 	if (perf_session__register_idle_thread(top->session) < 0)
 		goto out_delete;
 
+	if (top->nr_threads_synthesize > 1)
+		perf_set_multithreaded();
+
 	machine__synthesize_threads(&top->session->machines.host, &opts->target,
-				    top->evlist->threads, false, opts->proc_map_timeout);
+				    top->evlist->threads, false,
+				    opts->proc_map_timeout,
+				    top->nr_threads_synthesize);
+
+	if (top->nr_threads_synthesize > 1)
+		perf_set_singlethreaded();
 
 	if (perf_hpp_list.socket) {
 		ret = perf_env__read_cpu_topology_map(&perf_env);
@@ -1112,6 +1120,7 @@ int cmd_top(int argc, const char **argv)
 		},
 		.max_stack	     = sysctl_perf_event_max_stack,
 		.sym_pcnt_filter     = 5,
+		.nr_threads_synthesize = UINT_MAX,
 	};
 	struct record_opts *opts = &top.record_opts;
 	struct target *target = &opts->target;
@@ -1221,6 +1230,8 @@ int cmd_top(int argc, const char **argv)
 	OPT_BOOLEAN(0, "hierarchy", &symbol_conf.report_hierarchy,
 		    "Show entries in a hierarchy"),
 	OPT_BOOLEAN(0, "force", &symbol_conf.force, "don't complain, do it"),
+	OPT_UINTEGER(0, "num-thread-synthesize", &top.nr_threads_synthesize,
+			"number of thread to run event synthesize"),
 	OPT_END()
 	};
 	const char * const top_usage[] = {

+ 1 - 1
tools/perf/builtin-trace.c

@@ -1131,7 +1131,7 @@ static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
 
 	err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
 					    evlist->threads, trace__tool_process, false,
-					    trace->opts.proc_map_timeout);
+					    trace->opts.proc_map_timeout, 1);
 	if (err)
 		symbol__exit();
 

+ 1 - 1
tools/perf/tests/attr.c

@@ -166,7 +166,7 @@ static int run_dir(const char *d, const char *perf)
 	snprintf(cmd, 3*PATH_MAX, PYTHON " %s/attr.py -d %s/attr/ -p %s %.*s",
 		 d, d, perf, vcnt, v);
 
-	return system(cmd);
+	return system(cmd) ? TEST_FAIL : TEST_OK;
 }
 
 int test__attr(struct test *test __maybe_unused, int subtest __maybe_unused)

+ 5 - 1
tools/perf/tests/attr.py

@@ -237,6 +237,7 @@ class Test(object):
         # events in result. Fail if there's not any.
         for exp_name, exp_event in expect.items():
             exp_list = []
+            res_event = {}
             log.debug("    matching [%s]" % exp_name)
             for res_name, res_event in result.items():
                 log.debug("      to [%s]" % res_name)
@@ -253,7 +254,10 @@ class Test(object):
                 if exp_event.optional():
                     log.debug("    %s does not match, but is optional" % exp_name)
                 else:
-                    exp_event.diff(res_event)
+                    if not res_event:
+                        log.debug("    res_event is empty");
+                    else:
+                        exp_event.diff(res_event)
                     raise Fail(self, 'match failure');
 
             match[exp_name] = exp_list

+ 1 - 1
tools/perf/tests/attr/base-record

@@ -23,7 +23,7 @@ comm=1
 freq=1
 inherit_stat=0
 enable_on_exec=1
-task=0
+task=1
 watermark=0
 precise_ip=0|1|2|3
 mmap_data=0

+ 1 - 0
tools/perf/tests/attr/test-record-group

@@ -17,5 +17,6 @@ sample_type=327
 read_format=4
 mmap=0
 comm=0
+task=0
 enable_on_exec=0
 disabled=0

+ 1 - 1
tools/perf/tests/attr/test-record-group-sampling

@@ -23,7 +23,7 @@ sample_type=343
 
 # PERF_FORMAT_ID | PERF_FORMAT_GROUP
 read_format=12
-
+task=0
 mmap=0
 comm=0
 enable_on_exec=0

+ 1 - 0
tools/perf/tests/attr/test-record-group1

@@ -18,5 +18,6 @@ sample_type=327
 read_format=4
 mmap=0
 comm=0
+task=0
 enable_on_exec=0
 disabled=0

+ 2 - 0
tools/perf/tests/attr/test-stat-group

@@ -6,6 +6,7 @@ ret     = 1
 [event-1:base-stat]
 fd=1
 group_fd=-1
+read_format=3|15
 
 [event-2:base-stat]
 fd=2
@@ -13,3 +14,4 @@ group_fd=1
 config=1
 disabled=0
 enable_on_exec=0
+read_format=3|15

+ 2 - 0
tools/perf/tests/attr/test-stat-group1

@@ -6,6 +6,7 @@ ret     = 1
 [event-1:base-stat]
 fd=1
 group_fd=-1
+read_format=3|15
 
 [event-2:base-stat]
 fd=2
@@ -13,3 +14,4 @@ group_fd=1
 config=1
 disabled=0
 enable_on_exec=0
+read_format=3|15

+ 1 - 1
tools/perf/tests/mmap-thread-lookup.c

@@ -131,7 +131,7 @@ static int synth_all(struct machine *machine)
 {
 	return perf_event__synthesize_threads(NULL,
 					      perf_event__process,
-					      machine, 0, 500);
+					      machine, 0, 500, 1);
 }
 
 static int synth_process(struct machine *machine)

+ 21 - 14
tools/perf/util/callchain.c

@@ -65,8 +65,6 @@ static int parse_callchain_mode(const char *value)
 		callchain_param.mode = CHAIN_FOLDED;
 		return 0;
 	}
-
-	pr_err("Invalid callchain mode: %s\n", value);
 	return -1;
 }
 
@@ -82,8 +80,6 @@ static int parse_callchain_order(const char *value)
 		callchain_param.order_set = true;
 		return 0;
 	}
-
-	pr_err("Invalid callchain order: %s\n", value);
 	return -1;
 }
 
@@ -105,8 +101,6 @@ static int parse_callchain_sort_key(const char *value)
 		callchain_param.branch_callstack = 1;
 		return 0;
 	}
-
-	pr_err("Invalid callchain sort key: %s\n", value);
 	return -1;
 }
 
@@ -124,8 +118,6 @@ static int parse_callchain_value(const char *value)
 		callchain_param.value = CCVAL_COUNT;
 		return 0;
 	}
-
-	pr_err("Invalid callchain config key: %s\n", value);
 	return -1;
 }
 
@@ -319,12 +311,27 @@ int perf_callchain_config(const char *var, const char *value)
 
 		return ret;
 	}
-	if (!strcmp(var, "print-type"))
-		return parse_callchain_mode(value);
-	if (!strcmp(var, "order"))
-		return parse_callchain_order(value);
-	if (!strcmp(var, "sort-key"))
-		return parse_callchain_sort_key(value);
+	if (!strcmp(var, "print-type")){
+		int ret;
+		ret = parse_callchain_mode(value);
+		if (ret == -1)
+			pr_err("Invalid callchain mode: %s\n", value);
+		return ret;
+	}
+	if (!strcmp(var, "order")){
+		int ret;
+		ret = parse_callchain_order(value);
+		if (ret == -1)
+			pr_err("Invalid callchain order: %s\n", value);
+		return ret;
+	}
+	if (!strcmp(var, "sort-key")){
+		int ret;
+		ret = parse_callchain_sort_key(value);
+		if (ret == -1)
+			pr_err("Invalid callchain sort key: %s\n", value);
+		return ret;
+	}
 	if (!strcmp(var, "threshold")) {
 		callchain_param.min_percent = strtod(value, &endptr);
 		if (value == endptr) {

+ 17 - 1
tools/perf/util/comm.c

@@ -5,6 +5,7 @@
 #include <stdio.h>
 #include <string.h>
 #include <linux/refcount.h>
+#include "rwsem.h"
 
 struct comm_str {
 	char *str;
@@ -14,6 +15,7 @@ struct comm_str {
 
 /* Should perhaps be moved to struct machine */
 static struct rb_root comm_str_root;
+static struct rw_semaphore comm_str_lock = {.lock = PTHREAD_RWLOCK_INITIALIZER,};
 
 static struct comm_str *comm_str__get(struct comm_str *cs)
 {
@@ -25,7 +27,9 @@ static struct comm_str *comm_str__get(struct comm_str *cs)
 static void comm_str__put(struct comm_str *cs)
 {
 	if (cs && refcount_dec_and_test(&cs->refcnt)) {
+		down_write(&comm_str_lock);
 		rb_erase(&cs->rb_node, &comm_str_root);
+		up_write(&comm_str_lock);
 		zfree(&cs->str);
 		free(cs);
 	}
@@ -50,7 +54,8 @@ static struct comm_str *comm_str__alloc(const char *str)
 	return cs;
 }
 
-static struct comm_str *comm_str__findnew(const char *str, struct rb_root *root)
+static
+struct comm_str *__comm_str__findnew(const char *str, struct rb_root *root)
 {
 	struct rb_node **p = &root->rb_node;
 	struct rb_node *parent = NULL;
@@ -81,6 +86,17 @@ static struct comm_str *comm_str__findnew(const char *str, struct rb_root *root)
 	return new;
 }
 
+static struct comm_str *comm_str__findnew(const char *str, struct rb_root *root)
+{
+	struct comm_str *cs;
+
+	down_write(&comm_str_lock);
+	cs = __comm_str__findnew(str, root);
+	up_write(&comm_str_lock);
+
+	return cs;
+}
+
 struct comm *comm__new(const char *str, u64 timestamp, bool exec)
 {
 	struct comm *comm = zalloc(sizeof(*comm));

+ 132 - 31
tools/perf/util/event.c

@@ -678,23 +678,21 @@ out:
 	return err;
 }
 
-int perf_event__synthesize_threads(struct perf_tool *tool,
-				   perf_event__handler_t process,
-				   struct machine *machine,
-				   bool mmap_data,
-				   unsigned int proc_map_timeout)
+static int __perf_event__synthesize_threads(struct perf_tool *tool,
+					    perf_event__handler_t process,
+					    struct machine *machine,
+					    bool mmap_data,
+					    unsigned int proc_map_timeout,
+					    struct dirent **dirent,
+					    int start,
+					    int num)
 {
 	union perf_event *comm_event, *mmap_event, *fork_event;
 	union perf_event *namespaces_event;
-	char proc_path[PATH_MAX];
-	struct dirent **dirent;
 	int err = -1;
 	char *end;
 	pid_t pid;
-	int n, i;
-
-	if (machine__is_default_guest(machine))
-		return 0;
+	int i;
 
 	comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size);
 	if (comm_event == NULL)
@@ -714,34 +712,25 @@ int perf_event__synthesize_threads(struct perf_tool *tool,
 	if (namespaces_event == NULL)
 		goto out_free_fork;
 
-	snprintf(proc_path, sizeof(proc_path), "%s/proc", machine->root_dir);
-	n = scandir(proc_path, &dirent, 0, alphasort);
-
-	if (n < 0)
-		goto out_free_namespaces;
-
-	for (i = 0; i < n; i++) {
+	for (i = start; i < start + num; i++) {
 		if (!isdigit(dirent[i]->d_name[0]))
 			continue;
 
 		pid = (pid_t)strtol(dirent[i]->d_name, &end, 10);
 		/* only interested in proper numerical dirents */
-		if (!*end) {
-			/*
-			 * We may race with exiting thread, so don't stop just because
-			 * one thread couldn't be synthesized.
-			 */
-			__event__synthesize_thread(comm_event, mmap_event, fork_event,
-						   namespaces_event, pid, 1, process,
-						   tool, machine, mmap_data,
-						   proc_map_timeout);
-		}
-		free(dirent[i]);
+		if (*end)
+			continue;
+		/*
+		 * We may race with exiting thread, so don't stop just because
+		 * one thread couldn't be synthesized.
+		 */
+		__event__synthesize_thread(comm_event, mmap_event, fork_event,
+					   namespaces_event, pid, 1, process,
+					   tool, machine, mmap_data,
+					   proc_map_timeout);
 	}
-	free(dirent);
 	err = 0;
 
-out_free_namespaces:
 	free(namespaces_event);
 out_free_fork:
 	free(fork_event);
@@ -753,6 +742,118 @@ out:
 	return err;
 }
 
+struct synthesize_threads_arg {
+	struct perf_tool *tool;
+	perf_event__handler_t process;
+	struct machine *machine;
+	bool mmap_data;
+	unsigned int proc_map_timeout;
+	struct dirent **dirent;
+	int num;
+	int start;
+};
+
+static void *synthesize_threads_worker(void *arg)
+{
+	struct synthesize_threads_arg *args = arg;
+
+	__perf_event__synthesize_threads(args->tool, args->process,
+					 args->machine, args->mmap_data,
+					 args->proc_map_timeout, args->dirent,
+					 args->start, args->num);
+	return NULL;
+}
+
+int perf_event__synthesize_threads(struct perf_tool *tool,
+				   perf_event__handler_t process,
+				   struct machine *machine,
+				   bool mmap_data,
+				   unsigned int proc_map_timeout,
+				   unsigned int nr_threads_synthesize)
+{
+	struct synthesize_threads_arg *args = NULL;
+	pthread_t *synthesize_threads = NULL;
+	char proc_path[PATH_MAX];
+	struct dirent **dirent;
+	int num_per_thread;
+	int m, n, i, j;
+	int thread_nr;
+	int base = 0;
+	int err = -1;
+
+
+	if (machine__is_default_guest(machine))
+		return 0;
+
+	snprintf(proc_path, sizeof(proc_path), "%s/proc", machine->root_dir);
+	n = scandir(proc_path, &dirent, 0, alphasort);
+	if (n < 0)
+		return err;
+
+	if (nr_threads_synthesize == UINT_MAX)
+		thread_nr = sysconf(_SC_NPROCESSORS_ONLN);
+	else
+		thread_nr = nr_threads_synthesize;
+
+	if (thread_nr <= 1) {
+		err = __perf_event__synthesize_threads(tool, process,
+						       machine, mmap_data,
+						       proc_map_timeout,
+						       dirent, base, n);
+		goto free_dirent;
+	}
+	if (thread_nr > n)
+		thread_nr = n;
+
+	synthesize_threads = calloc(sizeof(pthread_t), thread_nr);
+	if (synthesize_threads == NULL)
+		goto free_dirent;
+
+	args = calloc(sizeof(*args), thread_nr);
+	if (args == NULL)
+		goto free_threads;
+
+	num_per_thread = n / thread_nr;
+	m = n % thread_nr;
+	for (i = 0; i < thread_nr; i++) {
+		args[i].tool = tool;
+		args[i].process = process;
+		args[i].machine = machine;
+		args[i].mmap_data = mmap_data;
+		args[i].proc_map_timeout = proc_map_timeout;
+		args[i].dirent = dirent;
+	}
+	for (i = 0; i < m; i++) {
+		args[i].num = num_per_thread + 1;
+		args[i].start = i * args[i].num;
+	}
+	if (i != 0)
+		base = args[i-1].start + args[i-1].num;
+	for (j = i; j < thread_nr; j++) {
+		args[j].num = num_per_thread;
+		args[j].start = base + (j - i) * args[i].num;
+	}
+
+	for (i = 0; i < thread_nr; i++) {
+		if (pthread_create(&synthesize_threads[i], NULL,
+				   synthesize_threads_worker, &args[i]))
+			goto out_join;
+	}
+	err = 0;
+out_join:
+	for (i = 0; i < thread_nr; i++)
+		pthread_join(synthesize_threads[i], NULL);
+	free(args);
+free_threads:
+	free(synthesize_threads);
+free_dirent:
+	for (i = 0; i < n; i++)
+		free(dirent[i]);
+	free(dirent);
+
+	return err;
+}
+
 struct process_symbol_args {
 	const char *name;
 	u64	   start;

+ 2 - 1
tools/perf/util/event.h

@@ -680,7 +680,8 @@ int perf_event__synthesize_cpu_map(struct perf_tool *tool,
 int perf_event__synthesize_threads(struct perf_tool *tool,
 				   perf_event__handler_t process,
 				   struct machine *machine, bool mmap_data,
-				   unsigned int proc_map_timeout);
+				   unsigned int proc_map_timeout,
+				   unsigned int nr_threads_synthesize);
 int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
 				       perf_event__handler_t process,
 				       struct machine *machine);

+ 6 - 1
tools/perf/util/evsel.c

@@ -271,12 +271,17 @@ struct perf_evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx)
 	return evsel;
 }
 
+static bool perf_event_can_profile_kernel(void)
+{
+	return geteuid() == 0 || perf_event_paranoid() == -1;
+}
+
 struct perf_evsel *perf_evsel__new_cycles(bool precise)
 {
 	struct perf_event_attr attr = {
 		.type	= PERF_TYPE_HARDWARE,
 		.config	= PERF_COUNT_HW_CPU_CYCLES,
-		.exclude_kernel	= geteuid() != 0,
+		.exclude_kernel	= !perf_event_can_profile_kernel(),
 	};
 	struct perf_evsel *evsel;
 

+ 6 - 2
tools/perf/util/machine.c

@@ -2218,12 +2218,16 @@ int machines__for_each_thread(struct machines *machines,
 int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool,
 				  struct target *target, struct thread_map *threads,
 				  perf_event__handler_t process, bool data_mmap,
-				  unsigned int proc_map_timeout)
+				  unsigned int proc_map_timeout,
+				  unsigned int nr_threads_synthesize)
 {
 	if (target__has_task(target))
 		return perf_event__synthesize_thread_map(tool, threads, process, machine, data_mmap, proc_map_timeout);
 	else if (target__has_cpu(target))
-		return perf_event__synthesize_threads(tool, process, machine, data_mmap, proc_map_timeout);
+		return perf_event__synthesize_threads(tool, process,
+						      machine, data_mmap,
+						      proc_map_timeout,
+						      nr_threads_synthesize);
 	/* command specified */
 	return 0;
 }

+ 6 - 3
tools/perf/util/machine.h

@@ -257,15 +257,18 @@ int machines__for_each_thread(struct machines *machines,
 int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool,
 				  struct target *target, struct thread_map *threads,
 				  perf_event__handler_t process, bool data_mmap,
-				  unsigned int proc_map_timeout);
+				  unsigned int proc_map_timeout,
+				  unsigned int nr_threads_synthesize);
 static inline
 int machine__synthesize_threads(struct machine *machine, struct target *target,
 				struct thread_map *threads, bool data_mmap,
-				unsigned int proc_map_timeout)
+				unsigned int proc_map_timeout,
+				unsigned int nr_threads_synthesize)
 {
 	return __machine__synthesize_threads(machine, NULL, target, threads,
 					     perf_event__process, data_mmap,
-					     proc_map_timeout);
+					     proc_map_timeout,
+					     nr_threads_synthesize);
 }
 
 pid_t machine__get_current_tid(struct machine *machine, int cpu);

+ 1 - 7
tools/perf/util/symbol-elf.c

@@ -810,12 +810,6 @@ static u64 ref_reloc(struct kmap *kmap)
 void __weak arch__sym_update(struct symbol *s __maybe_unused,
 		GElf_Sym *sym __maybe_unused) { }
 
-void __weak arch__adjust_sym_map_offset(GElf_Sym *sym, GElf_Shdr *shdr,
-				       struct map *map __maybe_unused)
-{
-	sym->st_value -= shdr->sh_addr - shdr->sh_offset;
-}
-
 int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
 		  struct symsrc *runtime_ss, int kmodule)
 {
@@ -996,7 +990,7 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
 
 			/* Adjust symbol to map to file offset */
 			if (adjust_kernel_syms)
-				arch__adjust_sym_map_offset(&sym, &shdr, map);
+				sym.st_value -= shdr.sh_addr - shdr.sh_offset;
 
 			if (strcmp(section_name,
 				   (curr_dso->short_name +

+ 0 - 3
tools/perf/util/symbol.h

@@ -344,9 +344,6 @@ int setup_intlist(struct intlist **list, const char *list_str,
 #ifdef HAVE_LIBELF_SUPPORT
 bool elf__needs_adjust_symbols(GElf_Ehdr ehdr);
 void arch__sym_update(struct symbol *s, GElf_Sym *sym);
-void arch__adjust_sym_map_offset(GElf_Sym *sym,
-				 GElf_Shdr *shdr __maybe_unused,
-				 struct map *map __maybe_unused);
 #endif
 
 #define SYMBOL_A 0

+ 1 - 1
tools/perf/util/syscalltbl.c

@@ -15,9 +15,9 @@
 
 #include "syscalltbl.h"
 #include <stdlib.h>
+#include <linux/compiler.h>
 
 #ifdef HAVE_SYSCALL_TABLE
-#include <linux/compiler.h>
 #include <string.h>
 #include "string2.h"
 #include "util.h"

+ 48 - 5
tools/perf/util/thread.c

@@ -45,6 +45,8 @@ struct thread *thread__new(pid_t pid, pid_t tid)
 		thread->cpu = -1;
 		INIT_LIST_HEAD(&thread->namespaces_list);
 		INIT_LIST_HEAD(&thread->comm_list);
+		init_rwsem(&thread->namespaces_lock);
+		init_rwsem(&thread->comm_lock);
 
 		comm_str = malloc(32);
 		if (!comm_str)
@@ -83,18 +85,26 @@ void thread__delete(struct thread *thread)
 		map_groups__put(thread->mg);
 		thread->mg = NULL;
 	}
+	down_write(&thread->namespaces_lock);
 	list_for_each_entry_safe(namespaces, tmp_namespaces,
 				 &thread->namespaces_list, list) {
 		list_del(&namespaces->list);
 		namespaces__free(namespaces);
 	}
+	up_write(&thread->namespaces_lock);
+
+	down_write(&thread->comm_lock);
 	list_for_each_entry_safe(comm, tmp_comm, &thread->comm_list, list) {
 		list_del(&comm->list);
 		comm__free(comm);
 	}
+	up_write(&thread->comm_lock);
+
 	unwind__finish_access(thread);
 	nsinfo__zput(thread->nsinfo);
 
+	exit_rwsem(&thread->namespaces_lock);
+	exit_rwsem(&thread->comm_lock);
 	free(thread);
 }
 
@@ -125,8 +135,8 @@ struct namespaces *thread__namespaces(const struct thread *thread)
 	return list_first_entry(&thread->namespaces_list, struct namespaces, list);
 }
 
-int thread__set_namespaces(struct thread *thread, u64 timestamp,
-			   struct namespaces_event *event)
+static int __thread__set_namespaces(struct thread *thread, u64 timestamp,
+				    struct namespaces_event *event)
 {
 	struct namespaces *new, *curr = thread__namespaces(thread);
 
@@ -149,6 +159,17 @@ int thread__set_namespaces(struct thread *thread, u64 timestamp,
 	return 0;
 }
 
+int thread__set_namespaces(struct thread *thread, u64 timestamp,
+			   struct namespaces_event *event)
+{
+	int ret;
+
+	down_write(&thread->namespaces_lock);
+	ret = __thread__set_namespaces(thread, timestamp, event);
+	up_write(&thread->namespaces_lock);
+	return ret;
+}
+
 struct comm *thread__comm(const struct thread *thread)
 {
 	if (list_empty(&thread->comm_list))
@@ -170,8 +191,8 @@ struct comm *thread__exec_comm(const struct thread *thread)
 	return last;
 }
 
-int __thread__set_comm(struct thread *thread, const char *str, u64 timestamp,
-		       bool exec)
+static int ____thread__set_comm(struct thread *thread, const char *str,
+				u64 timestamp, bool exec)
 {
 	struct comm *new, *curr = thread__comm(thread);
 
@@ -195,6 +216,17 @@ int __thread__set_comm(struct thread *thread, const char *str, u64 timestamp,
 	return 0;
 }
 
+int __thread__set_comm(struct thread *thread, const char *str, u64 timestamp,
+		       bool exec)
+{
+	int ret;
+
+	down_write(&thread->comm_lock);
+	ret = ____thread__set_comm(thread, str, timestamp, exec);
+	up_write(&thread->comm_lock);
+	return ret;
+}
+
 int thread__set_comm_from_proc(struct thread *thread)
 {
 	char path[64];
@@ -212,7 +244,7 @@ int thread__set_comm_from_proc(struct thread *thread)
 	return err;
 }
 
-const char *thread__comm_str(const struct thread *thread)
+static const char *__thread__comm_str(const struct thread *thread)
 {
 	const struct comm *comm = thread__comm(thread);
 
@@ -222,6 +254,17 @@ const char *thread__comm_str(const struct thread *thread)
 	return comm__str(comm);
 }
 
+const char *thread__comm_str(const struct thread *thread)
+{
+	const char *str;
+
+	down_read((struct rw_semaphore *)&thread->comm_lock);
+	str = __thread__comm_str(thread);
+	up_read((struct rw_semaphore *)&thread->comm_lock);
+
+	return str;
+}
+
 /* CHECKME: it should probably better return the max comm len from its comm list */
 int thread__comm_len(struct thread *thread)
 {

+ 3 - 0
tools/perf/util/thread.h

@@ -9,6 +9,7 @@
 #include "symbol.h"
 #include <strlist.h>
 #include <intlist.h>
+#include "rwsem.h"
 
 struct thread_stack;
 struct unwind_libunwind_ops;
@@ -29,7 +30,9 @@ struct thread {
 	int			comm_len;
 	bool			dead; /* if set thread has exited */
 	struct list_head	namespaces_list;
+	struct rw_semaphore	namespaces_lock;
 	struct list_head	comm_list;
+	struct rw_semaphore	comm_lock;
 	u64			db_id;
 
 	void			*priv;

+ 1 - 0
tools/perf/util/top.h

@@ -37,6 +37,7 @@ struct perf_top {
 	int		   sym_pcnt_filter;
 	const char	   *sym_filter;
 	float		   min_percent;
+	unsigned int	   nr_threads_synthesize;
 };
 
 #define CONSOLE_CLEAR ""