|
@@ -25,7 +25,7 @@
|
|
#include <linux/irq.h>
|
|
#include <linux/irq.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/export.h>
|
|
#include <linux/export.h>
|
|
-#include <linux/of.h>
|
|
|
|
|
|
+#include <linux/of_device.h>
|
|
#include <linux/perf_event.h>
|
|
#include <linux/perf_event.h>
|
|
#include <linux/platform_device.h>
|
|
#include <linux/platform_device.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/slab.h>
|
|
@@ -36,7 +36,6 @@
|
|
#include <asm/irq.h>
|
|
#include <asm/irq.h>
|
|
#include <asm/irq_regs.h>
|
|
#include <asm/irq_regs.h>
|
|
#include <asm/pmu.h>
|
|
#include <asm/pmu.h>
|
|
-#include <asm/stacktrace.h>
|
|
|
|
|
|
|
|
/*
|
|
/*
|
|
* ARMv8 supports a maximum of 32 events.
|
|
* ARMv8 supports a maximum of 32 events.
|
|
@@ -78,6 +77,16 @@ EXPORT_SYMBOL_GPL(perf_num_counters);
|
|
|
|
|
|
#define CACHE_OP_UNSUPPORTED 0xFFFF
|
|
#define CACHE_OP_UNSUPPORTED 0xFFFF
|
|
|
|
|
|
|
|
+#define PERF_MAP_ALL_UNSUPPORTED \
|
|
|
|
+ [0 ... PERF_COUNT_HW_MAX - 1] = HW_OP_UNSUPPORTED
|
|
|
|
+
|
|
|
|
+#define PERF_CACHE_MAP_ALL_UNSUPPORTED \
|
|
|
|
+[0 ... C(MAX) - 1] = { \
|
|
|
|
+ [0 ... C(OP_MAX) - 1] = { \
|
|
|
|
+ [0 ... C(RESULT_MAX) - 1] = CACHE_OP_UNSUPPORTED, \
|
|
|
|
+ }, \
|
|
|
|
+}
|
|
|
|
+
|
|
static int
|
|
static int
|
|
armpmu_map_cache_event(const unsigned (*cache_map)
|
|
armpmu_map_cache_event(const unsigned (*cache_map)
|
|
[PERF_COUNT_HW_CACHE_MAX]
|
|
[PERF_COUNT_HW_CACHE_MAX]
|
|
@@ -435,10 +444,8 @@ armpmu_reserve_hardware(struct arm_pmu *armpmu)
|
|
unsigned int i, irqs;
|
|
unsigned int i, irqs;
|
|
struct platform_device *pmu_device = armpmu->plat_device;
|
|
struct platform_device *pmu_device = armpmu->plat_device;
|
|
|
|
|
|
- if (!pmu_device) {
|
|
|
|
- pr_err("no PMU device registered\n");
|
|
|
|
|
|
+ if (!pmu_device)
|
|
return -ENODEV;
|
|
return -ENODEV;
|
|
- }
|
|
|
|
|
|
|
|
irqs = min(pmu_device->num_resources, num_possible_cpus());
|
|
irqs = min(pmu_device->num_resources, num_possible_cpus());
|
|
if (!irqs) {
|
|
if (!irqs) {
|
|
@@ -703,118 +710,28 @@ enum armv8_pmuv3_perf_types {
|
|
|
|
|
|
/* PMUv3 HW events mapping. */
|
|
/* PMUv3 HW events mapping. */
|
|
static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = {
|
|
static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = {
|
|
|
|
+ PERF_MAP_ALL_UNSUPPORTED,
|
|
[PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES,
|
|
[PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES,
|
|
[PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED,
|
|
[PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED,
|
|
[PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS,
|
|
[PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS,
|
|
[PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL,
|
|
[PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL,
|
|
- [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = HW_OP_UNSUPPORTED,
|
|
|
|
[PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
|
|
[PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
|
|
- [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
|
|
|
|
- [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = HW_OP_UNSUPPORTED,
|
|
|
|
- [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = HW_OP_UNSUPPORTED,
|
|
|
|
};
|
|
};
|
|
|
|
|
|
static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
|
|
static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
|
|
[PERF_COUNT_HW_CACHE_OP_MAX]
|
|
[PERF_COUNT_HW_CACHE_OP_MAX]
|
|
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
|
|
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
|
|
- [C(L1D)] = {
|
|
|
|
- [C(OP_READ)] = {
|
|
|
|
- [C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS,
|
|
|
|
- [C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL,
|
|
|
|
- },
|
|
|
|
- [C(OP_WRITE)] = {
|
|
|
|
- [C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS,
|
|
|
|
- [C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL,
|
|
|
|
- },
|
|
|
|
- [C(OP_PREFETCH)] = {
|
|
|
|
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
|
|
|
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
|
|
|
- },
|
|
|
|
- },
|
|
|
|
- [C(L1I)] = {
|
|
|
|
- [C(OP_READ)] = {
|
|
|
|
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
|
|
|
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
|
|
|
- },
|
|
|
|
- [C(OP_WRITE)] = {
|
|
|
|
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
|
|
|
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
|
|
|
- },
|
|
|
|
- [C(OP_PREFETCH)] = {
|
|
|
|
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
|
|
|
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
|
|
|
- },
|
|
|
|
- },
|
|
|
|
- [C(LL)] = {
|
|
|
|
- [C(OP_READ)] = {
|
|
|
|
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
|
|
|
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
|
|
|
- },
|
|
|
|
- [C(OP_WRITE)] = {
|
|
|
|
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
|
|
|
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
|
|
|
- },
|
|
|
|
- [C(OP_PREFETCH)] = {
|
|
|
|
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
|
|
|
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
|
|
|
- },
|
|
|
|
- },
|
|
|
|
- [C(DTLB)] = {
|
|
|
|
- [C(OP_READ)] = {
|
|
|
|
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
|
|
|
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
|
|
|
- },
|
|
|
|
- [C(OP_WRITE)] = {
|
|
|
|
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
|
|
|
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
|
|
|
- },
|
|
|
|
- [C(OP_PREFETCH)] = {
|
|
|
|
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
|
|
|
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
|
|
|
- },
|
|
|
|
- },
|
|
|
|
- [C(ITLB)] = {
|
|
|
|
- [C(OP_READ)] = {
|
|
|
|
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
|
|
|
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
|
|
|
- },
|
|
|
|
- [C(OP_WRITE)] = {
|
|
|
|
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
|
|
|
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
|
|
|
- },
|
|
|
|
- [C(OP_PREFETCH)] = {
|
|
|
|
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
|
|
|
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
|
|
|
- },
|
|
|
|
- },
|
|
|
|
- [C(BPU)] = {
|
|
|
|
- [C(OP_READ)] = {
|
|
|
|
- [C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED,
|
|
|
|
- [C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
|
|
|
|
- },
|
|
|
|
- [C(OP_WRITE)] = {
|
|
|
|
- [C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED,
|
|
|
|
- [C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
|
|
|
|
- },
|
|
|
|
- [C(OP_PREFETCH)] = {
|
|
|
|
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
|
|
|
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
|
|
|
- },
|
|
|
|
- },
|
|
|
|
- [C(NODE)] = {
|
|
|
|
- [C(OP_READ)] = {
|
|
|
|
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
|
|
|
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
|
|
|
- },
|
|
|
|
- [C(OP_WRITE)] = {
|
|
|
|
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
|
|
|
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
|
|
|
- },
|
|
|
|
- [C(OP_PREFETCH)] = {
|
|
|
|
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
|
|
|
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
|
|
|
- },
|
|
|
|
- },
|
|
|
|
|
|
+ PERF_CACHE_MAP_ALL_UNSUPPORTED,
|
|
|
|
+
|
|
|
|
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS,
|
|
|
|
+ [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL,
|
|
|
|
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS,
|
|
|
|
+ [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL,
|
|
|
|
+
|
|
|
|
+ [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED,
|
|
|
|
+ [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
|
|
|
|
+ [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED,
|
|
|
|
+ [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
|
|
};
|
|
};
|
|
|
|
|
|
/*
|
|
/*
|
|
@@ -1337,7 +1254,7 @@ static int armpmu_device_probe(struct platform_device *pdev)
|
|
}
|
|
}
|
|
|
|
|
|
for_each_possible_cpu(cpu)
|
|
for_each_possible_cpu(cpu)
|
|
- if (arch_find_n_match_cpu_physical_id(dn, cpu, NULL))
|
|
|
|
|
|
+ if (dn == of_cpu_device_node_get(cpu))
|
|
break;
|
|
break;
|
|
|
|
|
|
if (cpu >= nr_cpu_ids) {
|
|
if (cpu >= nr_cpu_ids) {
|
|
@@ -1415,180 +1332,3 @@ static int __init init_hw_perf_events(void)
|
|
}
|
|
}
|
|
early_initcall(init_hw_perf_events);
|
|
early_initcall(init_hw_perf_events);
|
|
|
|
|
|
-/*
|
|
|
|
- * Callchain handling code.
|
|
|
|
- */
|
|
|
|
-struct frame_tail {
|
|
|
|
- struct frame_tail __user *fp;
|
|
|
|
- unsigned long lr;
|
|
|
|
-} __attribute__((packed));
|
|
|
|
-
|
|
|
|
-/*
|
|
|
|
- * Get the return address for a single stackframe and return a pointer to the
|
|
|
|
- * next frame tail.
|
|
|
|
- */
|
|
|
|
-static struct frame_tail __user *
|
|
|
|
-user_backtrace(struct frame_tail __user *tail,
|
|
|
|
- struct perf_callchain_entry *entry)
|
|
|
|
-{
|
|
|
|
- struct frame_tail buftail;
|
|
|
|
- unsigned long err;
|
|
|
|
-
|
|
|
|
- /* Also check accessibility of one struct frame_tail beyond */
|
|
|
|
- if (!access_ok(VERIFY_READ, tail, sizeof(buftail)))
|
|
|
|
- return NULL;
|
|
|
|
-
|
|
|
|
- pagefault_disable();
|
|
|
|
- err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail));
|
|
|
|
- pagefault_enable();
|
|
|
|
-
|
|
|
|
- if (err)
|
|
|
|
- return NULL;
|
|
|
|
-
|
|
|
|
- perf_callchain_store(entry, buftail.lr);
|
|
|
|
-
|
|
|
|
- /*
|
|
|
|
- * Frame pointers should strictly progress back up the stack
|
|
|
|
- * (towards higher addresses).
|
|
|
|
- */
|
|
|
|
- if (tail >= buftail.fp)
|
|
|
|
- return NULL;
|
|
|
|
-
|
|
|
|
- return buftail.fp;
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-#ifdef CONFIG_COMPAT
|
|
|
|
-/*
|
|
|
|
- * The registers we're interested in are at the end of the variable
|
|
|
|
- * length saved register structure. The fp points at the end of this
|
|
|
|
- * structure so the address of this struct is:
|
|
|
|
- * (struct compat_frame_tail *)(xxx->fp)-1
|
|
|
|
- *
|
|
|
|
- * This code has been adapted from the ARM OProfile support.
|
|
|
|
- */
|
|
|
|
-struct compat_frame_tail {
|
|
|
|
- compat_uptr_t fp; /* a (struct compat_frame_tail *) in compat mode */
|
|
|
|
- u32 sp;
|
|
|
|
- u32 lr;
|
|
|
|
-} __attribute__((packed));
|
|
|
|
-
|
|
|
|
-static struct compat_frame_tail __user *
|
|
|
|
-compat_user_backtrace(struct compat_frame_tail __user *tail,
|
|
|
|
- struct perf_callchain_entry *entry)
|
|
|
|
-{
|
|
|
|
- struct compat_frame_tail buftail;
|
|
|
|
- unsigned long err;
|
|
|
|
-
|
|
|
|
- /* Also check accessibility of one struct frame_tail beyond */
|
|
|
|
- if (!access_ok(VERIFY_READ, tail, sizeof(buftail)))
|
|
|
|
- return NULL;
|
|
|
|
-
|
|
|
|
- pagefault_disable();
|
|
|
|
- err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail));
|
|
|
|
- pagefault_enable();
|
|
|
|
-
|
|
|
|
- if (err)
|
|
|
|
- return NULL;
|
|
|
|
-
|
|
|
|
- perf_callchain_store(entry, buftail.lr);
|
|
|
|
-
|
|
|
|
- /*
|
|
|
|
- * Frame pointers should strictly progress back up the stack
|
|
|
|
- * (towards higher addresses).
|
|
|
|
- */
|
|
|
|
- if (tail + 1 >= (struct compat_frame_tail __user *)
|
|
|
|
- compat_ptr(buftail.fp))
|
|
|
|
- return NULL;
|
|
|
|
-
|
|
|
|
- return (struct compat_frame_tail __user *)compat_ptr(buftail.fp) - 1;
|
|
|
|
-}
|
|
|
|
-#endif /* CONFIG_COMPAT */
|
|
|
|
-
|
|
|
|
-void perf_callchain_user(struct perf_callchain_entry *entry,
|
|
|
|
- struct pt_regs *regs)
|
|
|
|
-{
|
|
|
|
- if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
|
|
|
|
- /* We don't support guest os callchain now */
|
|
|
|
- return;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- perf_callchain_store(entry, regs->pc);
|
|
|
|
-
|
|
|
|
- if (!compat_user_mode(regs)) {
|
|
|
|
- /* AARCH64 mode */
|
|
|
|
- struct frame_tail __user *tail;
|
|
|
|
-
|
|
|
|
- tail = (struct frame_tail __user *)regs->regs[29];
|
|
|
|
-
|
|
|
|
- while (entry->nr < PERF_MAX_STACK_DEPTH &&
|
|
|
|
- tail && !((unsigned long)tail & 0xf))
|
|
|
|
- tail = user_backtrace(tail, entry);
|
|
|
|
- } else {
|
|
|
|
-#ifdef CONFIG_COMPAT
|
|
|
|
- /* AARCH32 compat mode */
|
|
|
|
- struct compat_frame_tail __user *tail;
|
|
|
|
-
|
|
|
|
- tail = (struct compat_frame_tail __user *)regs->compat_fp - 1;
|
|
|
|
-
|
|
|
|
- while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
|
|
|
|
- tail && !((unsigned long)tail & 0x3))
|
|
|
|
- tail = compat_user_backtrace(tail, entry);
|
|
|
|
-#endif
|
|
|
|
- }
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-/*
|
|
|
|
- * Gets called by walk_stackframe() for every stackframe. This will be called
|
|
|
|
- * whist unwinding the stackframe and is like a subroutine return so we use
|
|
|
|
- * the PC.
|
|
|
|
- */
|
|
|
|
-static int callchain_trace(struct stackframe *frame, void *data)
|
|
|
|
-{
|
|
|
|
- struct perf_callchain_entry *entry = data;
|
|
|
|
- perf_callchain_store(entry, frame->pc);
|
|
|
|
- return 0;
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-void perf_callchain_kernel(struct perf_callchain_entry *entry,
|
|
|
|
- struct pt_regs *regs)
|
|
|
|
-{
|
|
|
|
- struct stackframe frame;
|
|
|
|
-
|
|
|
|
- if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
|
|
|
|
- /* We don't support guest os callchain now */
|
|
|
|
- return;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- frame.fp = regs->regs[29];
|
|
|
|
- frame.sp = regs->sp;
|
|
|
|
- frame.pc = regs->pc;
|
|
|
|
-
|
|
|
|
- walk_stackframe(&frame, callchain_trace, entry);
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-unsigned long perf_instruction_pointer(struct pt_regs *regs)
|
|
|
|
-{
|
|
|
|
- if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
|
|
|
|
- return perf_guest_cbs->get_guest_ip();
|
|
|
|
-
|
|
|
|
- return instruction_pointer(regs);
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-unsigned long perf_misc_flags(struct pt_regs *regs)
|
|
|
|
-{
|
|
|
|
- int misc = 0;
|
|
|
|
-
|
|
|
|
- if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
|
|
|
|
- if (perf_guest_cbs->is_user_mode())
|
|
|
|
- misc |= PERF_RECORD_MISC_GUEST_USER;
|
|
|
|
- else
|
|
|
|
- misc |= PERF_RECORD_MISC_GUEST_KERNEL;
|
|
|
|
- } else {
|
|
|
|
- if (user_mode(regs))
|
|
|
|
- misc |= PERF_RECORD_MISC_USER;
|
|
|
|
- else
|
|
|
|
- misc |= PERF_RECORD_MISC_KERNEL;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- return misc;
|
|
|
|
-}
|
|
|