|
@@ -415,6 +415,196 @@ static __initconst const u64 snb_hw_cache_event_ids
|
|
|
|
|
|
};
|
|
|
|
|
|
+/*
|
|
|
+ * Notes on the events:
|
|
|
+ * - data reads do not include code reads (comparable to earlier tables)
|
|
|
+ * - data counts include speculative execution (except L1 write, dtlb, bpu)
|
|
|
+ * - remote node access includes remote memory, remote cache, remote mmio.
|
|
|
+ * - prefetches are not included in the counts because they are not
|
|
|
+ * reliably counted.
|
|
|
+ */
|
|
|
+
|
|
|
+#define HSW_DEMAND_DATA_RD BIT_ULL(0)
|
|
|
+#define HSW_DEMAND_RFO BIT_ULL(1)
|
|
|
+#define HSW_ANY_RESPONSE BIT_ULL(16)
|
|
|
+#define HSW_SUPPLIER_NONE BIT_ULL(17)
|
|
|
+#define HSW_L3_MISS_LOCAL_DRAM BIT_ULL(22)
|
|
|
+#define HSW_L3_MISS_REMOTE_HOP0 BIT_ULL(27)
|
|
|
+#define HSW_L3_MISS_REMOTE_HOP1 BIT_ULL(28)
|
|
|
+#define HSW_L3_MISS_REMOTE_HOP2P BIT_ULL(29)
|
|
|
+#define HSW_L3_MISS (HSW_L3_MISS_LOCAL_DRAM| \
|
|
|
+ HSW_L3_MISS_REMOTE_HOP0|HSW_L3_MISS_REMOTE_HOP1| \
|
|
|
+ HSW_L3_MISS_REMOTE_HOP2P)
|
|
|
+#define HSW_SNOOP_NONE BIT_ULL(31)
|
|
|
+#define HSW_SNOOP_NOT_NEEDED BIT_ULL(32)
|
|
|
+#define HSW_SNOOP_MISS BIT_ULL(33)
|
|
|
+#define HSW_SNOOP_HIT_NO_FWD BIT_ULL(34)
|
|
|
+#define HSW_SNOOP_HIT_WITH_FWD BIT_ULL(35)
|
|
|
+#define HSW_SNOOP_HITM BIT_ULL(36)
|
|
|
+#define HSW_SNOOP_NON_DRAM BIT_ULL(37)
|
|
|
+#define HSW_ANY_SNOOP (HSW_SNOOP_NONE| \
|
|
|
+ HSW_SNOOP_NOT_NEEDED|HSW_SNOOP_MISS| \
|
|
|
+ HSW_SNOOP_HIT_NO_FWD|HSW_SNOOP_HIT_WITH_FWD| \
|
|
|
+ HSW_SNOOP_HITM|HSW_SNOOP_NON_DRAM)
|
|
|
+#define HSW_SNOOP_DRAM (HSW_ANY_SNOOP & ~HSW_SNOOP_NON_DRAM)
|
|
|
+#define HSW_DEMAND_READ HSW_DEMAND_DATA_RD
|
|
|
+#define HSW_DEMAND_WRITE HSW_DEMAND_RFO
|
|
|
+#define HSW_L3_MISS_REMOTE (HSW_L3_MISS_REMOTE_HOP0|\
|
|
|
+ HSW_L3_MISS_REMOTE_HOP1|HSW_L3_MISS_REMOTE_HOP2P)
|
|
|
+#define HSW_LLC_ACCESS HSW_ANY_RESPONSE
|
|
|
+
|
|
|
+static __initconst const u64 hsw_hw_cache_event_ids
|
|
|
+ [PERF_COUNT_HW_CACHE_MAX]
|
|
|
+ [PERF_COUNT_HW_CACHE_OP_MAX]
|
|
|
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] =
|
|
|
+{
|
|
|
+ [ C(L1D ) ] = {
|
|
|
+ [ C(OP_READ) ] = {
|
|
|
+ [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */
|
|
|
+ [ C(RESULT_MISS) ] = 0x151, /* L1D.REPLACEMENT */
|
|
|
+ },
|
|
|
+ [ C(OP_WRITE) ] = {
|
|
|
+ [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */
|
|
|
+ [ C(RESULT_MISS) ] = 0x0,
|
|
|
+ },
|
|
|
+ [ C(OP_PREFETCH) ] = {
|
|
|
+ [ C(RESULT_ACCESS) ] = 0x0,
|
|
|
+ [ C(RESULT_MISS) ] = 0x0,
|
|
|
+ },
|
|
|
+ },
|
|
|
+ [ C(L1I ) ] = {
|
|
|
+ [ C(OP_READ) ] = {
|
|
|
+ [ C(RESULT_ACCESS) ] = 0x0,
|
|
|
+ [ C(RESULT_MISS) ] = 0x280, /* ICACHE.MISSES */
|
|
|
+ },
|
|
|
+ [ C(OP_WRITE) ] = {
|
|
|
+ [ C(RESULT_ACCESS) ] = -1,
|
|
|
+ [ C(RESULT_MISS) ] = -1,
|
|
|
+ },
|
|
|
+ [ C(OP_PREFETCH) ] = {
|
|
|
+ [ C(RESULT_ACCESS) ] = 0x0,
|
|
|
+ [ C(RESULT_MISS) ] = 0x0,
|
|
|
+ },
|
|
|
+ },
|
|
|
+ [ C(LL ) ] = {
|
|
|
+ [ C(OP_READ) ] = {
|
|
|
+ [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */
|
|
|
+ [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */
|
|
|
+ },
|
|
|
+ [ C(OP_WRITE) ] = {
|
|
|
+ [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */
|
|
|
+ [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */
|
|
|
+ },
|
|
|
+ [ C(OP_PREFETCH) ] = {
|
|
|
+ [ C(RESULT_ACCESS) ] = 0x0,
|
|
|
+ [ C(RESULT_MISS) ] = 0x0,
|
|
|
+ },
|
|
|
+ },
|
|
|
+ [ C(DTLB) ] = {
|
|
|
+ [ C(OP_READ) ] = {
|
|
|
+ [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */
|
|
|
+ [ C(RESULT_MISS) ] = 0x108, /* DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK */
|
|
|
+ },
|
|
|
+ [ C(OP_WRITE) ] = {
|
|
|
+ [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */
|
|
|
+ [ C(RESULT_MISS) ] = 0x149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */
|
|
|
+ },
|
|
|
+ [ C(OP_PREFETCH) ] = {
|
|
|
+ [ C(RESULT_ACCESS) ] = 0x0,
|
|
|
+ [ C(RESULT_MISS) ] = 0x0,
|
|
|
+ },
|
|
|
+ },
|
|
|
+ [ C(ITLB) ] = {
|
|
|
+ [ C(OP_READ) ] = {
|
|
|
+ [ C(RESULT_ACCESS) ] = 0x6085, /* ITLB_MISSES.STLB_HIT */
|
|
|
+ [ C(RESULT_MISS) ] = 0x185, /* ITLB_MISSES.MISS_CAUSES_A_WALK */
|
|
|
+ },
|
|
|
+ [ C(OP_WRITE) ] = {
|
|
|
+ [ C(RESULT_ACCESS) ] = -1,
|
|
|
+ [ C(RESULT_MISS) ] = -1,
|
|
|
+ },
|
|
|
+ [ C(OP_PREFETCH) ] = {
|
|
|
+ [ C(RESULT_ACCESS) ] = -1,
|
|
|
+ [ C(RESULT_MISS) ] = -1,
|
|
|
+ },
|
|
|
+ },
|
|
|
+ [ C(BPU ) ] = {
|
|
|
+ [ C(OP_READ) ] = {
|
|
|
+ [ C(RESULT_ACCESS) ] = 0xc4, /* BR_INST_RETIRED.ALL_BRANCHES */
|
|
|
+ [ C(RESULT_MISS) ] = 0xc5, /* BR_MISP_RETIRED.ALL_BRANCHES */
|
|
|
+ },
|
|
|
+ [ C(OP_WRITE) ] = {
|
|
|
+ [ C(RESULT_ACCESS) ] = -1,
|
|
|
+ [ C(RESULT_MISS) ] = -1,
|
|
|
+ },
|
|
|
+ [ C(OP_PREFETCH) ] = {
|
|
|
+ [ C(RESULT_ACCESS) ] = -1,
|
|
|
+ [ C(RESULT_MISS) ] = -1,
|
|
|
+ },
|
|
|
+ },
|
|
|
+ [ C(NODE) ] = {
|
|
|
+ [ C(OP_READ) ] = {
|
|
|
+ [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */
|
|
|
+ [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */
|
|
|
+ },
|
|
|
+ [ C(OP_WRITE) ] = {
|
|
|
+ [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */
|
|
|
+ [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */
|
|
|
+ },
|
|
|
+ [ C(OP_PREFETCH) ] = {
|
|
|
+ [ C(RESULT_ACCESS) ] = 0x0,
|
|
|
+ [ C(RESULT_MISS) ] = 0x0,
|
|
|
+ },
|
|
|
+ },
|
|
|
+};
|
|
|
+
|
|
|
+static __initconst const u64 hsw_hw_cache_extra_regs
|
|
|
+ [PERF_COUNT_HW_CACHE_MAX]
|
|
|
+ [PERF_COUNT_HW_CACHE_OP_MAX]
|
|
|
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] =
|
|
|
+{
|
|
|
+ [ C(LL ) ] = {
|
|
|
+ [ C(OP_READ) ] = {
|
|
|
+ [ C(RESULT_ACCESS) ] = HSW_DEMAND_READ|
|
|
|
+ HSW_LLC_ACCESS,
|
|
|
+ [ C(RESULT_MISS) ] = HSW_DEMAND_READ|
|
|
|
+ HSW_L3_MISS|HSW_ANY_SNOOP,
|
|
|
+ },
|
|
|
+ [ C(OP_WRITE) ] = {
|
|
|
+ [ C(RESULT_ACCESS) ] = HSW_DEMAND_WRITE|
|
|
|
+ HSW_LLC_ACCESS,
|
|
|
+ [ C(RESULT_MISS) ] = HSW_DEMAND_WRITE|
|
|
|
+ HSW_L3_MISS|HSW_ANY_SNOOP,
|
|
|
+ },
|
|
|
+ [ C(OP_PREFETCH) ] = {
|
|
|
+ [ C(RESULT_ACCESS) ] = 0x0,
|
|
|
+ [ C(RESULT_MISS) ] = 0x0,
|
|
|
+ },
|
|
|
+ },
|
|
|
+ [ C(NODE) ] = {
|
|
|
+ [ C(OP_READ) ] = {
|
|
|
+ [ C(RESULT_ACCESS) ] = HSW_DEMAND_READ|
|
|
|
+ HSW_L3_MISS_LOCAL_DRAM|
|
|
|
+ HSW_SNOOP_DRAM,
|
|
|
+ [ C(RESULT_MISS) ] = HSW_DEMAND_READ|
|
|
|
+ HSW_L3_MISS_REMOTE|
|
|
|
+ HSW_SNOOP_DRAM,
|
|
|
+ },
|
|
|
+ [ C(OP_WRITE) ] = {
|
|
|
+ [ C(RESULT_ACCESS) ] = HSW_DEMAND_WRITE|
|
|
|
+ HSW_L3_MISS_LOCAL_DRAM|
|
|
|
+ HSW_SNOOP_DRAM,
|
|
|
+ [ C(RESULT_MISS) ] = HSW_DEMAND_WRITE|
|
|
|
+ HSW_L3_MISS_REMOTE|
|
|
|
+ HSW_SNOOP_DRAM,
|
|
|
+ },
|
|
|
+ [ C(OP_PREFETCH) ] = {
|
|
|
+ [ C(RESULT_ACCESS) ] = 0x0,
|
|
|
+ [ C(RESULT_MISS) ] = 0x0,
|
|
|
+ },
|
|
|
+ },
|
|
|
+};
|
|
|
+
|
|
|
static __initconst const u64 westmere_hw_cache_event_ids
|
|
|
[PERF_COUNT_HW_CACHE_MAX]
|
|
|
[PERF_COUNT_HW_CACHE_OP_MAX]
|
|
@@ -2520,8 +2710,8 @@ __init int intel_pmu_init(void)
|
|
|
case 69: /* 22nm Haswell ULT */
|
|
|
case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
|
|
|
x86_pmu.late_ack = true;
|
|
|
- memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids));
|
|
|
- memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
|
|
|
+ memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
|
|
|
+ memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
|
|
|
|
|
|
intel_pmu_lbr_init_hsw();
|
|
|
|