|
@@ -177,6 +177,14 @@ static struct event_constraint intel_slm_event_constraints[] __read_mostly =
|
|
EVENT_CONSTRAINT_END
|
|
EVENT_CONSTRAINT_END
|
|
};
|
|
};
|
|
|
|
|
|
|
|
+struct event_constraint intel_skl_event_constraints[] = {
|
|
|
|
+ FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
|
|
|
|
+ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
|
|
|
|
+ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
|
|
|
|
+ INTEL_UEVENT_CONSTRAINT(0x1c0, 0x2), /* INST_RETIRED.PREC_DIST */
|
|
|
|
+ EVENT_CONSTRAINT_END
|
|
|
|
+};
|
|
|
|
+
|
|
static struct extra_reg intel_snb_extra_regs[] __read_mostly = {
|
|
static struct extra_reg intel_snb_extra_regs[] __read_mostly = {
|
|
/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
|
|
/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
|
|
INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3f807f8fffull, RSP_0),
|
|
INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3f807f8fffull, RSP_0),
|
|
@@ -193,6 +201,13 @@ static struct extra_reg intel_snbep_extra_regs[] __read_mostly = {
|
|
EVENT_EXTRA_END
|
|
EVENT_EXTRA_END
|
|
};
|
|
};
|
|
|
|
|
|
|
|
+static struct extra_reg intel_skl_extra_regs[] __read_mostly = {
|
|
|
|
+ INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0),
|
|
|
|
+ INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1),
|
|
|
|
+ INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
|
|
|
|
+ EVENT_EXTRA_END
|
|
|
|
+};
|
|
|
|
+
|
|
EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3");
|
|
EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3");
|
|
EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3");
|
|
EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3");
|
|
EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2");
|
|
EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2");
|
|
@@ -244,6 +259,200 @@ static u64 intel_pmu_event_map(int hw_event)
|
|
return intel_perfmon_event_map[hw_event];
|
|
return intel_perfmon_event_map[hw_event];
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+/*
|
|
|
|
+ * Notes on the events:
|
|
|
|
+ * - data reads do not include code reads (comparable to earlier tables)
|
|
|
|
+ * - data counts include speculative execution (except L1 write, dtlb, bpu)
|
|
|
|
+ * - remote node access includes remote memory, remote cache, remote mmio.
|
|
|
|
+ * - prefetches are not included in the counts.
|
|
|
|
+ * - icache miss does not include decoded icache
|
|
|
|
+ */
|
|
|
|
+
|
|
|
|
+#define SKL_DEMAND_DATA_RD BIT_ULL(0)
|
|
|
|
+#define SKL_DEMAND_RFO BIT_ULL(1)
|
|
|
|
+#define SKL_ANY_RESPONSE BIT_ULL(16)
|
|
|
|
+#define SKL_SUPPLIER_NONE BIT_ULL(17)
|
|
|
|
+#define SKL_L3_MISS_LOCAL_DRAM BIT_ULL(26)
|
|
|
|
+#define SKL_L3_MISS_REMOTE_HOP0_DRAM BIT_ULL(27)
|
|
|
|
+#define SKL_L3_MISS_REMOTE_HOP1_DRAM BIT_ULL(28)
|
|
|
|
+#define SKL_L3_MISS_REMOTE_HOP2P_DRAM BIT_ULL(29)
|
|
|
|
+#define SKL_L3_MISS (SKL_L3_MISS_LOCAL_DRAM| \
|
|
|
|
+ SKL_L3_MISS_REMOTE_HOP0_DRAM| \
|
|
|
|
+ SKL_L3_MISS_REMOTE_HOP1_DRAM| \
|
|
|
|
+ SKL_L3_MISS_REMOTE_HOP2P_DRAM)
|
|
|
|
+#define SKL_SPL_HIT BIT_ULL(30)
|
|
|
|
+#define SKL_SNOOP_NONE BIT_ULL(31)
|
|
|
|
+#define SKL_SNOOP_NOT_NEEDED BIT_ULL(32)
|
|
|
|
+#define SKL_SNOOP_MISS BIT_ULL(33)
|
|
|
|
+#define SKL_SNOOP_HIT_NO_FWD BIT_ULL(34)
|
|
|
|
+#define SKL_SNOOP_HIT_WITH_FWD BIT_ULL(35)
|
|
|
|
+#define SKL_SNOOP_HITM BIT_ULL(36)
|
|
|
|
+#define SKL_SNOOP_NON_DRAM BIT_ULL(37)
|
|
|
|
+#define SKL_ANY_SNOOP (SKL_SPL_HIT|SKL_SNOOP_NONE| \
|
|
|
|
+ SKL_SNOOP_NOT_NEEDED|SKL_SNOOP_MISS| \
|
|
|
|
+ SKL_SNOOP_HIT_NO_FWD|SKL_SNOOP_HIT_WITH_FWD| \
|
|
|
|
+ SKL_SNOOP_HITM|SKL_SNOOP_NON_DRAM)
|
|
|
|
+#define SKL_DEMAND_READ SKL_DEMAND_DATA_RD
|
|
|
|
+#define SKL_SNOOP_DRAM (SKL_SNOOP_NONE| \
|
|
|
|
+ SKL_SNOOP_NOT_NEEDED|SKL_SNOOP_MISS| \
|
|
|
|
+ SKL_SNOOP_HIT_NO_FWD|SKL_SNOOP_HIT_WITH_FWD| \
|
|
|
|
+ SKL_SNOOP_HITM|SKL_SPL_HIT)
|
|
|
|
+#define SKL_DEMAND_WRITE SKL_DEMAND_RFO
|
|
|
|
+#define SKL_LLC_ACCESS SKL_ANY_RESPONSE
|
|
|
|
+#define SKL_L3_MISS_REMOTE (SKL_L3_MISS_REMOTE_HOP0_DRAM| \
|
|
|
|
+ SKL_L3_MISS_REMOTE_HOP1_DRAM| \
|
|
|
|
+ SKL_L3_MISS_REMOTE_HOP2P_DRAM)
|
|
|
|
+
|
|
|
|
+static __initconst const u64 skl_hw_cache_event_ids
|
|
|
|
+ [PERF_COUNT_HW_CACHE_MAX]
|
|
|
|
+ [PERF_COUNT_HW_CACHE_OP_MAX]
|
|
|
|
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] =
|
|
|
|
+{
|
|
|
|
+ [ C(L1D ) ] = {
|
|
|
|
+ [ C(OP_READ) ] = {
|
|
|
|
+ [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_INST_RETIRED.ALL_LOADS */
|
|
|
|
+ [ C(RESULT_MISS) ] = 0x151, /* L1D.REPLACEMENT */
|
|
|
|
+ },
|
|
|
|
+ [ C(OP_WRITE) ] = {
|
|
|
|
+ [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_INST_RETIRED.ALL_STORES */
|
|
|
|
+ [ C(RESULT_MISS) ] = 0x0,
|
|
|
|
+ },
|
|
|
|
+ [ C(OP_PREFETCH) ] = {
|
|
|
|
+ [ C(RESULT_ACCESS) ] = 0x0,
|
|
|
|
+ [ C(RESULT_MISS) ] = 0x0,
|
|
|
|
+ },
|
|
|
|
+ },
|
|
|
|
+ [ C(L1I ) ] = {
|
|
|
|
+ [ C(OP_READ) ] = {
|
|
|
|
+ [ C(RESULT_ACCESS) ] = 0x0,
|
|
|
|
+ [ C(RESULT_MISS) ] = 0x283, /* ICACHE_64B.MISS */
|
|
|
|
+ },
|
|
|
|
+ [ C(OP_WRITE) ] = {
|
|
|
|
+ [ C(RESULT_ACCESS) ] = -1,
|
|
|
|
+ [ C(RESULT_MISS) ] = -1,
|
|
|
|
+ },
|
|
|
|
+ [ C(OP_PREFETCH) ] = {
|
|
|
|
+ [ C(RESULT_ACCESS) ] = 0x0,
|
|
|
|
+ [ C(RESULT_MISS) ] = 0x0,
|
|
|
|
+ },
|
|
|
|
+ },
|
|
|
|
+ [ C(LL ) ] = {
|
|
|
|
+ [ C(OP_READ) ] = {
|
|
|
|
+ [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */
|
|
|
|
+ [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */
|
|
|
|
+ },
|
|
|
|
+ [ C(OP_WRITE) ] = {
|
|
|
|
+ [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */
|
|
|
|
+ [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */
|
|
|
|
+ },
|
|
|
|
+ [ C(OP_PREFETCH) ] = {
|
|
|
|
+ [ C(RESULT_ACCESS) ] = 0x0,
|
|
|
|
+ [ C(RESULT_MISS) ] = 0x0,
|
|
|
|
+ },
|
|
|
|
+ },
|
|
|
|
+ [ C(DTLB) ] = {
|
|
|
|
+ [ C(OP_READ) ] = {
|
|
|
|
+ [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_INST_RETIRED.ALL_LOADS */
|
|
|
|
+ [ C(RESULT_MISS) ] = 0x608, /* DTLB_LOAD_MISSES.WALK_COMPLETED */
|
|
|
|
+ },
|
|
|
|
+ [ C(OP_WRITE) ] = {
|
|
|
|
+ [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_INST_RETIRED.ALL_STORES */
|
|
|
|
+ [ C(RESULT_MISS) ] = 0x649, /* DTLB_STORE_MISSES.WALK_COMPLETED */
|
|
|
|
+ },
|
|
|
|
+ [ C(OP_PREFETCH) ] = {
|
|
|
|
+ [ C(RESULT_ACCESS) ] = 0x0,
|
|
|
|
+ [ C(RESULT_MISS) ] = 0x0,
|
|
|
|
+ },
|
|
|
|
+ },
|
|
|
|
+ [ C(ITLB) ] = {
|
|
|
|
+ [ C(OP_READ) ] = {
|
|
|
|
+ [ C(RESULT_ACCESS) ] = 0x2085, /* ITLB_MISSES.STLB_HIT */
|
|
|
|
+ [ C(RESULT_MISS) ] = 0xe85, /* ITLB_MISSES.WALK_COMPLETED */
|
|
|
|
+ },
|
|
|
|
+ [ C(OP_WRITE) ] = {
|
|
|
|
+ [ C(RESULT_ACCESS) ] = -1,
|
|
|
|
+ [ C(RESULT_MISS) ] = -1,
|
|
|
|
+ },
|
|
|
|
+ [ C(OP_PREFETCH) ] = {
|
|
|
|
+ [ C(RESULT_ACCESS) ] = -1,
|
|
|
|
+ [ C(RESULT_MISS) ] = -1,
|
|
|
|
+ },
|
|
|
|
+ },
|
|
|
|
+ [ C(BPU ) ] = {
|
|
|
|
+ [ C(OP_READ) ] = {
|
|
|
|
+ [ C(RESULT_ACCESS) ] = 0xc4, /* BR_INST_RETIRED.ALL_BRANCHES */
|
|
|
|
+ [ C(RESULT_MISS) ] = 0xc5, /* BR_MISP_RETIRED.ALL_BRANCHES */
|
|
|
|
+ },
|
|
|
|
+ [ C(OP_WRITE) ] = {
|
|
|
|
+ [ C(RESULT_ACCESS) ] = -1,
|
|
|
|
+ [ C(RESULT_MISS) ] = -1,
|
|
|
|
+ },
|
|
|
|
+ [ C(OP_PREFETCH) ] = {
|
|
|
|
+ [ C(RESULT_ACCESS) ] = -1,
|
|
|
|
+ [ C(RESULT_MISS) ] = -1,
|
|
|
|
+ },
|
|
|
|
+ },
|
|
|
|
+ [ C(NODE) ] = {
|
|
|
|
+ [ C(OP_READ) ] = {
|
|
|
|
+ [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */
|
|
|
|
+ [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */
|
|
|
|
+ },
|
|
|
|
+ [ C(OP_WRITE) ] = {
|
|
|
|
+ [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */
|
|
|
|
+ [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */
|
|
|
|
+ },
|
|
|
|
+ [ C(OP_PREFETCH) ] = {
|
|
|
|
+ [ C(RESULT_ACCESS) ] = 0x0,
|
|
|
|
+ [ C(RESULT_MISS) ] = 0x0,
|
|
|
|
+ },
|
|
|
|
+ },
|
|
|
|
+};
|
|
|
|
+
|
|
|
|
+static __initconst const u64 skl_hw_cache_extra_regs
|
|
|
|
+ [PERF_COUNT_HW_CACHE_MAX]
|
|
|
|
+ [PERF_COUNT_HW_CACHE_OP_MAX]
|
|
|
|
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] =
|
|
|
|
+{
|
|
|
|
+ [ C(LL ) ] = {
|
|
|
|
+ [ C(OP_READ) ] = {
|
|
|
|
+ [ C(RESULT_ACCESS) ] = SKL_DEMAND_READ|
|
|
|
|
+ SKL_LLC_ACCESS|SKL_ANY_SNOOP,
|
|
|
|
+ [ C(RESULT_MISS) ] = SKL_DEMAND_READ|
|
|
|
|
+ SKL_L3_MISS|SKL_ANY_SNOOP|
|
|
|
|
+ SKL_SUPPLIER_NONE,
|
|
|
|
+ },
|
|
|
|
+ [ C(OP_WRITE) ] = {
|
|
|
|
+ [ C(RESULT_ACCESS) ] = SKL_DEMAND_WRITE|
|
|
|
|
+ SKL_LLC_ACCESS|SKL_ANY_SNOOP,
|
|
|
|
+ [ C(RESULT_MISS) ] = SKL_DEMAND_WRITE|
|
|
|
|
+ SKL_L3_MISS|SKL_ANY_SNOOP|
|
|
|
|
+ SKL_SUPPLIER_NONE,
|
|
|
|
+ },
|
|
|
|
+ [ C(OP_PREFETCH) ] = {
|
|
|
|
+ [ C(RESULT_ACCESS) ] = 0x0,
|
|
|
|
+ [ C(RESULT_MISS) ] = 0x0,
|
|
|
|
+ },
|
|
|
|
+ },
|
|
|
|
+ [ C(NODE) ] = {
|
|
|
|
+ [ C(OP_READ) ] = {
|
|
|
|
+ [ C(RESULT_ACCESS) ] = SKL_DEMAND_READ|
|
|
|
|
+ SKL_L3_MISS_LOCAL_DRAM|SKL_SNOOP_DRAM,
|
|
|
|
+ [ C(RESULT_MISS) ] = SKL_DEMAND_READ|
|
|
|
|
+ SKL_L3_MISS_REMOTE|SKL_SNOOP_DRAM,
|
|
|
|
+ },
|
|
|
|
+ [ C(OP_WRITE) ] = {
|
|
|
|
+ [ C(RESULT_ACCESS) ] = SKL_DEMAND_WRITE|
|
|
|
|
+ SKL_L3_MISS_LOCAL_DRAM|SKL_SNOOP_DRAM,
|
|
|
|
+ [ C(RESULT_MISS) ] = SKL_DEMAND_WRITE|
|
|
|
|
+ SKL_L3_MISS_REMOTE|SKL_SNOOP_DRAM,
|
|
|
|
+ },
|
|
|
|
+ [ C(OP_PREFETCH) ] = {
|
|
|
|
+ [ C(RESULT_ACCESS) ] = 0x0,
|
|
|
|
+ [ C(RESULT_MISS) ] = 0x0,
|
|
|
|
+ },
|
|
|
|
+ },
|
|
|
|
+};
|
|
|
|
+
|
|
#define SNB_DMND_DATA_RD (1ULL << 0)
|
|
#define SNB_DMND_DATA_RD (1ULL << 0)
|
|
#define SNB_DMND_RFO (1ULL << 1)
|
|
#define SNB_DMND_RFO (1ULL << 1)
|
|
#define SNB_DMND_IFETCH (1ULL << 2)
|
|
#define SNB_DMND_IFETCH (1ULL << 2)
|
|
@@ -3278,6 +3487,29 @@ __init int intel_pmu_init(void)
|
|
pr_cont("Broadwell events, ");
|
|
pr_cont("Broadwell events, ");
|
|
break;
|
|
break;
|
|
|
|
|
|
|
|
+ case 78: /* 14nm Skylake Mobile */
|
|
|
|
+ case 94: /* 14nm Skylake Desktop */
|
|
|
|
+ x86_pmu.late_ack = true;
|
|
|
|
+ memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
|
|
|
|
+ memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
|
|
|
|
+ intel_pmu_lbr_init_skl();
|
|
|
|
+
|
|
|
|
+ x86_pmu.event_constraints = intel_skl_event_constraints;
|
|
|
|
+ x86_pmu.pebs_constraints = intel_skl_pebs_event_constraints;
|
|
|
|
+ x86_pmu.extra_regs = intel_skl_extra_regs;
|
|
|
|
+ x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
|
|
|
|
+ /* all extra regs are per-cpu when HT is on */
|
|
|
|
+ x86_pmu.flags |= PMU_FL_HAS_RSP_1;
|
|
|
|
+ x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
|
|
|
|
+
|
|
|
|
+ x86_pmu.hw_config = hsw_hw_config;
|
|
|
|
+ x86_pmu.get_event_constraints = hsw_get_event_constraints;
|
|
|
|
+ x86_pmu.cpu_events = hsw_events_attrs;
|
|
|
|
+ WARN_ON(!x86_pmu.format_attrs);
|
|
|
|
+ x86_pmu.cpu_events = hsw_events_attrs;
|
|
|
|
+ pr_cont("Skylake events, ");
|
|
|
|
+ break;
|
|
|
|
+
|
|
default:
|
|
default:
|
|
switch (x86_pmu.version) {
|
|
switch (x86_pmu.version) {
|
|
case 1:
|
|
case 1:
|