11 years ago · 6480c56130
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -411,7 +411,7 @@ LIB_OBJS += $(OUTPUT)tests/code-reading.o
 
															 LIB_OBJS += $(OUTPUT)tests/sample-parsing.o
														
 
															 LIB_OBJS += $(OUTPUT)tests/parse-no-sample-id-all.o
														
 
															 ifndef NO_DWARF_UNWIND
														
 
															-ifeq ($(ARCH),x86)
														
 
															+ifeq ($(ARCH),$(filter $(ARCH),x86 arm))
														
 
															 LIB_OBJS += $(OUTPUT)tests/dwarf-unwind.o
														
 
															 endif
														
 
															 endif
														
--- a/tools/perf/arch/arm/Makefile
+++ b/tools/perf/arch/arm/Makefile
@@ -5,3 +5,10 @@ endif
 
															 ifndef NO_LIBUNWIND
														
 
															 LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libunwind.o
														
 
															 endif
														
 
															+ifndef NO_LIBDW_DWARF_UNWIND
														
 
															+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libdw.o
														
 
															+endif
														
 
															+ifndef NO_DWARF_UNWIND
														
 
															+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/regs_load.o
														
 
															+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/dwarf-unwind.o
														
 
															+endif
														
--- a/tools/perf/arch/arm/include/perf_regs.h
+++ b/tools/perf/arch/arm/include/perf_regs.h
@@ -2,10 +2,15 @@
 
															 #define ARCH_PERF_REGS_H
														
 
															 #include <stdlib.h>
														
 
															-#include "../../util/types.h"
														
 
															+#include <linux/types.h>
														
 
															 #include <asm/perf_regs.h>
														
 
															+void perf_regs_load(u64 *regs);
														
 
															+
														
 
															 #define PERF_REGS_MASK	((1ULL << PERF_REG_ARM_MAX) - 1)
														
 
															+#define PERF_REGS_MAX	PERF_REG_ARM_MAX
														
 
															+#define PERF_SAMPLE_REGS_ABI	PERF_SAMPLE_REGS_ABI_32
														
 
															+
														
 
															 #define PERF_REG_IP	PERF_REG_ARM_PC
														
 
															 #define PERF_REG_SP	PERF_REG_ARM_SP
														
--- a/tools/perf/arch/arm/tests/dwarf-unwind.c
+++ b/tools/perf/arch/arm/tests/dwarf-unwind.c
@@ -0,0 +1,60 @@
 
															+#include <string.h>
														
 
															+#include "perf_regs.h"
														
 
															+#include "thread.h"
														
 
															+#include "map.h"
														
 
															+#include "event.h"
														
 
															+#include "tests/tests.h"
														
 
															+
														
 
															+#define STACK_SIZE 8192
														
 
															+
														
 
															+static int sample_ustack(struct perf_sample *sample,
														
 
															+			 struct thread *thread, u64 *regs)
														
 
															+{
														
 
															+	struct stack_dump *stack = &sample->user_stack;
														
 
															+	struct map *map;
														
 
															+	unsigned long sp;
														
 
															+	u64 stack_size, *buf;
														
 
															+
														
 
															+	buf = malloc(STACK_SIZE);
														
 
															+	if (!buf) {
														
 
															+		pr_debug("failed to allocate sample uregs data\n");
														
 
															+		return -1;
														
 
															+	}
														
 
															+
														
 
															+	sp = (unsigned long) regs[PERF_REG_ARM_SP];
														
 
															+
														
 
															+	map = map_groups__find(thread->mg, MAP__VARIABLE, (u64) sp);
														
 
															+	if (!map) {
														
 
															+		pr_debug("failed to get stack map\n");
														
 
															+		free(buf);
														
 
															+		return -1;
														
 
															+	}
														
 
															+
														
 
															+	stack_size = map->end - sp;
														
 
															+	stack_size = stack_size > STACK_SIZE ? STACK_SIZE : stack_size;
														
 
															+
														
 
															+	memcpy(buf, (void *) sp, stack_size);
														
 
															+	stack->data = (char *) buf;
														
 
															+	stack->size = stack_size;
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+int test__arch_unwind_sample(struct perf_sample *sample,
														
 
															+			     struct thread *thread)
														
 
															+{
														
 
															+	struct regs_dump *regs = &sample->user_regs;
														
 
															+	u64 *buf;
														
 
															+
														
 
															+	buf = calloc(1, sizeof(u64) * PERF_REGS_MAX);
														
 
															+	if (!buf) {
														
 
															+		pr_debug("failed to allocate sample uregs data\n");
														
 
															+		return -1;
														
 
															+	}
														
 
															+
														
 
															+	perf_regs_load(buf);
														
 
															+	regs->abi  = PERF_SAMPLE_REGS_ABI;
														
 
															+	regs->regs = buf;
														
 
															+	regs->mask = PERF_REGS_MASK;
														
 
															+
														
 
															+	return sample_ustack(sample, thread, buf);
														
 
															+}
														
--- a/tools/perf/arch/arm/tests/regs_load.S
+++ b/tools/perf/arch/arm/tests/regs_load.S
@@ -0,0 +1,58 @@
 
															+#include <linux/linkage.h>
														
 
															+
														
 
															+#define R0 0x00
														
 
															+#define R1 0x08
														
 
															+#define R2 0x10
														
 
															+#define R3 0x18
														
 
															+#define R4 0x20
														
 
															+#define R5 0x28
														
 
															+#define R6 0x30
														
 
															+#define R7 0x38
														
 
															+#define R8 0x40
														
 
															+#define R9 0x48
														
 
															+#define SL 0x50
														
 
															+#define FP 0x58
														
 
															+#define IP 0x60
														
 
															+#define SP 0x68
														
 
															+#define LR 0x70
														
 
															+#define PC 0x78
														
 
															+
														
 
															+/*
														
 
															+ * Implementation of void perf_regs_load(u64 *regs);
														
 
															+ *
														
 
															+ * This functions fills in the 'regs' buffer from the actual registers values,
														
 
															+ * in the way the perf built-in unwinding test expects them:
														
 
															+ * - the PC at the time at the call to this function. Since this function
														
 
															+ *   is called using a bl instruction, the PC value is taken from LR.
														
 
															+ * The built-in unwinding test then unwinds the call stack from the dwarf
														
 
															+ * information in unwind__get_entries.
														
 
															+ *
														
 
															+ * Notes:
														
 
															+ * - the 8 bytes stride in the registers offsets comes from the fact
														
 
															+ * that the registers are stored in an u64 array (u64 *regs),
														
 
															+ * - the regs buffer needs to be zeroed before the call to this function,
														
 
															+ * in this case using a calloc in dwarf-unwind.c.
														
 
															+ */
														
 
															+
														
 
															+.text
														
 
															+.type perf_regs_load,%function
														
 
															+ENTRY(perf_regs_load)
														
 
															+	str r0, [r0, #R0]
														
 
															+	str r1, [r0, #R1]
														
 
															+	str r2, [r0, #R2]
														
 
															+	str r3, [r0, #R3]
														
 
															+	str r4, [r0, #R4]
														
 
															+	str r5, [r0, #R5]
														
 
															+	str r6, [r0, #R6]
														
 
															+	str r7, [r0, #R7]
														
 
															+	str r8, [r0, #R8]
														
 
															+	str r9, [r0, #R9]
														
 
															+	str sl, [r0, #SL]
														
 
															+	str fp, [r0, #FP]
														
 
															+	str ip, [r0, #IP]
														
 
															+	str sp, [r0, #SP]
														
 
															+	str lr, [r0, #LR]
														
 
															+	str lr, [r0, #PC]	// store pc as lr in order to skip the call
														
 
															+	                        //  to this function
														
 
															+	mov pc, lr
														
 
															+ENDPROC(perf_regs_load)
														
--- a/tools/perf/arch/arm/util/unwind-libdw.c
+++ b/tools/perf/arch/arm/util/unwind-libdw.c
@@ -0,0 +1,36 @@
 
															+#include <elfutils/libdwfl.h>
														
 
															+#include "../../util/unwind-libdw.h"
														
 
															+#include "../../util/perf_regs.h"
														
 
															+
														
 
															+bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
														
 
															+{
														
 
															+	struct unwind_info *ui = arg;
														
 
															+	struct regs_dump *user_regs = &ui->sample->user_regs;
														
 
															+	Dwarf_Word dwarf_regs[PERF_REG_ARM_MAX];
														
 
															+
														
 
															+#define REG(r) ({						\
														
 
															+	Dwarf_Word val = 0;					\
														
 
															+	perf_reg_value(&val, user_regs, PERF_REG_ARM_##r);	\
														
 
															+	val;							\
														
 
															+})
														
 
															+
														
 
															+	dwarf_regs[0]  = REG(R0);
														
 
															+	dwarf_regs[1]  = REG(R1);
														
 
															+	dwarf_regs[2]  = REG(R2);
														
 
															+	dwarf_regs[3]  = REG(R3);
														
 
															+	dwarf_regs[4]  = REG(R4);
														
 
															+	dwarf_regs[5]  = REG(R5);
														
 
															+	dwarf_regs[6]  = REG(R6);
														
 
															+	dwarf_regs[7]  = REG(R7);
														
 
															+	dwarf_regs[8]  = REG(R8);
														
 
															+	dwarf_regs[9]  = REG(R9);
														
 
															+	dwarf_regs[10] = REG(R10);
														
 
															+	dwarf_regs[11] = REG(FP);
														
 
															+	dwarf_regs[12] = REG(IP);
														
 
															+	dwarf_regs[13] = REG(SP);
														
 
															+	dwarf_regs[14] = REG(LR);
														
 
															+	dwarf_regs[15] = REG(PC);
														
 
															+
														
 
															+	return dwfl_thread_state_registers(thread, 0, PERF_REG_ARM_MAX,
														
 
															+					   dwarf_regs);
														
 
															+}
														
--- a/tools/perf/arch/arm64/include/perf_regs.h
+++ b/tools/perf/arch/arm64/include/perf_regs.h
@@ -2,7 +2,7 @@
 
															 #define ARCH_PERF_REGS_H
														
 
															 #include <stdlib.h>
														
 
															-#include "../../util/types.h"
														
 
															+#include <linux/types.h>
														
 
															 #include <asm/perf_regs.h>
														
 
															 #define PERF_REGS_MASK	((1ULL << PERF_REG_ARM64_MAX) - 1)
														
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -149,7 +149,6 @@ struct perf_sched {
 
															 	unsigned long	 nr_runs;
														
 
															 	unsigned long	 nr_timestamps;
														
 
															 	unsigned long	 nr_unordered_timestamps;
														
 
															-	unsigned long	 nr_state_machine_bugs;
														
 
															 	unsigned long	 nr_context_switch_bugs;
														
 
															 	unsigned long	 nr_events;
														
 
															 	unsigned long	 nr_lost_chunks;
														
@@ -1007,17 +1006,12 @@ static int latency_wakeup_event(struct perf_sched *sched,
 
															 				struct perf_sample *sample,
														
 
															 				struct machine *machine)
														
 
															 {
														
 
															-	const u32 pid	  = perf_evsel__intval(evsel, sample, "pid"),
														
 
															-		  success = perf_evsel__intval(evsel, sample, "success");
														
 
															+	const u32 pid	  = perf_evsel__intval(evsel, sample, "pid");
														
 
															 	struct work_atoms *atoms;
														
 
															 	struct work_atom *atom;
														
 
															 	struct thread *wakee;
														
 
															 	u64 timestamp = sample->time;
														
 
															-	/* Note for later, it may be interesting to observe the failing cases */
														
 
															-	if (!success)
														
 
															-		return 0;
														
 
															-
														
 
															 	wakee = machine__findnew_thread(machine, 0, pid);
														
 
															 	atoms = thread_atoms_search(&sched->atom_root, wakee, &sched->cmp_pid);
														
 
															 	if (!atoms) {
														
@@ -1037,12 +1031,18 @@ static int latency_wakeup_event(struct perf_sched *sched,
 
															 	atom = list_entry(atoms->work_list.prev, struct work_atom, list);
														
 
															 	/*
														
 
															+	 * As we do not guarantee the wakeup event happens when
														
 
															+	 * task is out of run queue, also may happen when task is
														
 
															+	 * on run queue and wakeup only change ->state to TASK_RUNNING,
														
 
															+	 * then we should not set the ->wake_up_time when wake up a
														
 
															+	 * task which is on run queue.
														
 
															+	 *
														
 
															 	 * You WILL be missing events if you've recorded only
														
 
															 	 * one CPU, or are only looking at only one, so don't
														
 
															-	 * make useless noise.
														
 
															+	 * skip in this case.
														
 
															 	 */
														
 
															 	if (sched->profile_cpu == -1 && atom->state != THREAD_SLEEPING)
														
 
															-		sched->nr_state_machine_bugs++;
														
 
															+		return 0;
														
 
															 	sched->nr_timestamps++;
														
 
															 	if (atom->sched_out_time > timestamp) {
														
@@ -1266,9 +1266,8 @@ static int process_sched_wakeup_event(struct perf_tool *tool,
 
															 static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
														
 
															 			    struct perf_sample *sample, struct machine *machine)
														
 
															 {
														
 
															-	const u32 prev_pid = perf_evsel__intval(evsel, sample, "prev_pid"),
														
 
															-		  next_pid = perf_evsel__intval(evsel, sample, "next_pid");
														
 
															-	struct thread *sched_out __maybe_unused, *sched_in;
														
 
															+	const u32 next_pid = perf_evsel__intval(evsel, sample, "next_pid");
														
 
															+	struct thread *sched_in;
														
 
															 	int new_shortname;
														
 
															 	u64 timestamp0, timestamp = sample->time;
														
 
															 	s64 delta;
														
@@ -1291,7 +1290,6 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
 
															 		return -1;
														
 
															 	}
														
 
															-	sched_out = machine__findnew_thread(machine, 0, prev_pid);
														
 
															 	sched_in = machine__findnew_thread(machine, 0, next_pid);
														
 
															 	sched->curr_thread[this_cpu] = sched_in;
														
@@ -1501,14 +1499,6 @@ static void print_bad_events(struct perf_sched *sched)
 
															 			(double)sched->nr_lost_events/(double)sched->nr_events * 100.0,
														
 
															 			sched->nr_lost_events, sched->nr_events, sched->nr_lost_chunks);
														
 
															 	}
														
 
															-	if (sched->nr_state_machine_bugs && sched->nr_timestamps) {
														
 
															-		printf("  INFO: %.3f%% state machine bugs (%ld out of %ld)",
														
 
															-			(double)sched->nr_state_machine_bugs/(double)sched->nr_timestamps*100.0,
														
 
															-			sched->nr_state_machine_bugs, sched->nr_timestamps);
														
 
															-		if (sched->nr_lost_events)
														
 
															-			printf(" (due to lost events?)");
														
 
															-		printf("\n");
														
 
															-	}
														
 
															 	if (sched->nr_context_switch_bugs && sched->nr_timestamps) {
														
 
															 		printf("  INFO: %.3f%% context switch bugs (%ld out of %ld)",
														
 
															 			(double)sched->nr_context_switch_bugs/(double)sched->nr_timestamps*100.0,
														
--- a/tools/perf/config/Makefile
+++ b/tools/perf/config/Makefile
@@ -40,11 +40,11 @@ ifeq ($(ARCH),arm64)
 
															   LIBUNWIND_LIBS = -lunwind -lunwind-aarch64
														
 
															 endif
														
 
															-# So far there's only x86 libdw unwind support merged in perf.
														
 
															+# So far there's only x86 and arm libdw unwind support merged in perf.
														
 
															 # Disable it on all other architectures in case libdw unwind
														
 
															 # support is detected in system. Add supported architectures
														
 
															 # to the check.
														
 
															-ifneq ($(ARCH),x86)
														
 
															+ifneq ($(ARCH),$(filter $(ARCH),x86 arm))
														
 
															   NO_LIBDW_DWARF_UNWIND := 1
														
 
															 endif
														
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -115,7 +115,7 @@ static struct test {
 
															 		.desc = "Test parsing with no sample_id_all bit set",
														
 
															 		.func = test__parse_no_sample_id_all,
														
 
															 	},
														
 
															-#if defined(__x86_64__) || defined(__i386__)
														
 
															+#if defined(__x86_64__) || defined(__i386__) || defined(__arm__)
														
 
															 #ifdef HAVE_DWARF_UNWIND_SUPPORT
														
 
															 	{
														
 
															 		.desc = "Test dwarf unwind",
														
--- a/tools/perf/tests/evsel-tp-sched.c
+++ b/tools/perf/tests/evsel-tp-sched.c
@@ -74,9 +74,6 @@ int test__perf_evsel__tp_sched_test(void)
 
															 	if (perf_evsel__test_field(evsel, "prio", 4, true))
														
 
															 		ret = -1;
														
 
															-	if (perf_evsel__test_field(evsel, "success", 4, true))
														
 
															-		ret = -1;
														
 
															-
														
 
															 	if (perf_evsel__test_field(evsel, "target_cpu", 4, true))
														
 
															 		ret = -1;
														
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -45,7 +45,7 @@ int test__hists_filter(void);
 
															 int test__mmap_thread_lookup(void);
														
 
															 int test__thread_mg_share(void);
														
 
															-#if defined(__x86_64__) || defined(__i386__)
														
 
															+#if defined(__x86_64__) || defined(__i386__) || defined(__arm__)
														
 
															 #ifdef HAVE_DWARF_UNWIND_SUPPORT
														
 
															 struct thread;
														
 
															 struct perf_sample;
														
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1625,13 +1625,14 @@ out_delete_map:
 
															 void perf_session__fprintf_info(struct perf_session *session, FILE *fp,
														
 
															 				bool full)
														
 
															 {
														
 
															-	int fd = perf_data_file__fd(session->file);
														
 
															 	struct stat st;
														
 
															-	int ret;
														
 
															+	int fd, ret;
														
 
															 	if (session == NULL || fp == NULL)
														
 
															 		return;
														
 
															+	fd = perf_data_file__fd(session->file);
														
 
															+
														
 
															 	ret = fstat(fd, &st);
														
 
															 	if (ret == -1)
														
 
															 		return;