Browse Source

Merge branch 'perf/updates' into aarch64/for-next/core

Merge in arm64 perf updates:

  * xgene system PMUv3 support
  * 16-bit events for ARMv8.1
Will Deacon 8 years ago
parent
commit
9ad95c46c1
4 changed files with 635 additions and 62 deletions
  1. 1 1
      arch/arm/kernel/perf_event_v6.c
  2. 1 1
      arch/arm64/kernel/perf_event.c
  3. 5 4
      drivers/perf/Kconfig
  4. 628 56
      drivers/perf/xgene_pmu.c

+ 1 - 1
arch/arm/kernel/perf_event_v6.c

@@ -552,7 +552,7 @@ static int armv6mpcore_pmu_init(struct arm_pmu *cpu_pmu)
 	return 0;
 	return 0;
 }
 }
 
 
-static struct of_device_id armv6_pmu_of_device_ids[] = {
+static const struct of_device_id armv6_pmu_of_device_ids[] = {
 	{.compatible = "arm,arm11mpcore-pmu",	.data = armv6mpcore_pmu_init},
 	{.compatible = "arm,arm11mpcore-pmu",	.data = armv6mpcore_pmu_init},
 	{.compatible = "arm,arm1176-pmu",	.data = armv6_1176_pmu_init},
 	{.compatible = "arm,arm1176-pmu",	.data = armv6_1176_pmu_init},
 	{.compatible = "arm,arm1136-pmu",	.data = armv6_1136_pmu_init},
 	{.compatible = "arm,arm1136-pmu",	.data = armv6_1136_pmu_init},

+ 1 - 1
arch/arm64/kernel/perf_event.c

@@ -529,7 +529,7 @@ static struct attribute_group armv8_pmuv3_events_attr_group = {
 	.is_visible = armv8pmu_event_attr_is_visible,
 	.is_visible = armv8pmu_event_attr_is_visible,
 };
 };
 
 
-PMU_FORMAT_ATTR(event, "config:0-9");
+PMU_FORMAT_ATTR(event, "config:0-15");
 
 
 static struct attribute *armv8_pmuv3_format_attrs[] = {
 static struct attribute *armv8_pmuv3_format_attrs[] = {
 	&format_attr_event.attr,
 	&format_attr_event.attr,

+ 5 - 4
drivers/perf/Kconfig

@@ -3,9 +3,10 @@
 #
 #
 
 
 menu "Performance monitor support"
 menu "Performance monitor support"
+	depends on PERF_EVENTS
 
 
 config ARM_PMU
 config ARM_PMU
-	depends on PERF_EVENTS && (ARM || ARM64)
+	depends on ARM || ARM64
 	bool "ARM PMU framework"
 	bool "ARM PMU framework"
 	default y
 	default y
 	help
 	help
@@ -18,7 +19,7 @@ config ARM_PMU_ACPI
 
 
 config QCOM_L2_PMU
 config QCOM_L2_PMU
 	bool "Qualcomm Technologies L2-cache PMU"
 	bool "Qualcomm Technologies L2-cache PMU"
-	depends on ARCH_QCOM && ARM64 && PERF_EVENTS && ACPI
+	depends on ARCH_QCOM && ARM64 && ACPI
 	  help
 	  help
 	  Provides support for the L2 cache performance monitor unit (PMU)
 	  Provides support for the L2 cache performance monitor unit (PMU)
 	  in Qualcomm Technologies processors.
 	  in Qualcomm Technologies processors.
@@ -27,7 +28,7 @@ config QCOM_L2_PMU
 
 
 config QCOM_L3_PMU
 config QCOM_L3_PMU
 	bool "Qualcomm Technologies L3-cache PMU"
 	bool "Qualcomm Technologies L3-cache PMU"
-	depends on ARCH_QCOM && ARM64 && PERF_EVENTS && ACPI
+	depends on ARCH_QCOM && ARM64 && ACPI
 	select QCOM_IRQ_COMBINER
 	select QCOM_IRQ_COMBINER
 	help
 	help
 	   Provides support for the L3 cache performance monitor unit (PMU)
 	   Provides support for the L3 cache performance monitor unit (PMU)
@@ -36,7 +37,7 @@ config QCOM_L3_PMU
 	   monitoring L3 cache events.
 	   monitoring L3 cache events.
 
 
 config XGENE_PMU
 config XGENE_PMU
-        depends on PERF_EVENTS && ARCH_XGENE
+        depends on ARCH_XGENE
         bool "APM X-Gene SoC PMU"
         bool "APM X-Gene SoC PMU"
         default n
         default n
         help
         help

+ 628 - 56
drivers/perf/xgene_pmu.c

@@ -37,6 +37,8 @@
 
 
 #define CSW_CSWCR                       0x0000
 #define CSW_CSWCR                       0x0000
 #define  CSW_CSWCR_DUALMCB_MASK         BIT(0)
 #define  CSW_CSWCR_DUALMCB_MASK         BIT(0)
+#define  CSW_CSWCR_MCB0_ROUTING(x)	(((x) & 0x0C) >> 2)
+#define  CSW_CSWCR_MCB1_ROUTING(x)	(((x) & 0x30) >> 4)
 #define MCBADDRMR                       0x0000
 #define MCBADDRMR                       0x0000
 #define  MCBADDRMR_DUALMCU_MODE_MASK    BIT(2)
 #define  MCBADDRMR_DUALMCU_MODE_MASK    BIT(2)
 
 
@@ -50,8 +52,17 @@
 #define  PCPPMU_INT_L3C		BIT(2)
 #define  PCPPMU_INT_L3C		BIT(2)
 #define  PCPPMU_INT_IOB		BIT(3)
 #define  PCPPMU_INT_IOB		BIT(3)
 
 
+#define  PCPPMU_V3_INTMASK	0x00FF33FF
+#define  PCPPMU_V3_INTENMASK	0xFFFFFFFF
+#define  PCPPMU_V3_INTCLRMASK	0xFF00CC00
+#define  PCPPMU_V3_INT_MCU	0x000000FF
+#define  PCPPMU_V3_INT_MCB	0x00000300
+#define  PCPPMU_V3_INT_L3C	0x00FF0000
+#define  PCPPMU_V3_INT_IOB	0x00003000
+
 #define PMU_MAX_COUNTERS	4
 #define PMU_MAX_COUNTERS	4
-#define PMU_CNT_MAX_PERIOD	0x100000000ULL
+#define PMU_CNT_MAX_PERIOD	0xFFFFFFFFULL
+#define PMU_V3_CNT_MAX_PERIOD	0xFFFFFFFFFFFFFFFFULL
 #define PMU_OVERFLOW_MASK	0xF
 #define PMU_OVERFLOW_MASK	0xF
 #define PMU_PMCR_E		BIT(0)
 #define PMU_PMCR_E		BIT(0)
 #define PMU_PMCR_P		BIT(1)
 #define PMU_PMCR_P		BIT(1)
@@ -73,6 +84,10 @@
 #define PMU_PMOVSR		0xC80
 #define PMU_PMOVSR		0xC80
 #define PMU_PMCR		0xE04
 #define PMU_PMCR		0xE04
 
 
+/* PMU registers for V3 */
+#define PMU_PMOVSCLR		0xC80
+#define PMU_PMOVSSET		0xCC0
+
 #define to_pmu_dev(p)     container_of(p, struct xgene_pmu_dev, pmu)
 #define to_pmu_dev(p)     container_of(p, struct xgene_pmu_dev, pmu)
 #define GET_CNTR(ev)      (ev->hw.idx)
 #define GET_CNTR(ev)      (ev->hw.idx)
 #define GET_EVENTID(ev)   (ev->hw.config & 0xFFULL)
 #define GET_EVENTID(ev)   (ev->hw.config & 0xFFULL)
@@ -96,14 +111,33 @@ struct xgene_pmu_dev {
 	struct perf_event *pmu_counter_event[PMU_MAX_COUNTERS];
 	struct perf_event *pmu_counter_event[PMU_MAX_COUNTERS];
 };
 };
 
 
+struct xgene_pmu_ops {
+	void (*mask_int)(struct xgene_pmu *pmu);
+	void (*unmask_int)(struct xgene_pmu *pmu);
+	u64 (*read_counter)(struct xgene_pmu_dev *pmu, int idx);
+	void (*write_counter)(struct xgene_pmu_dev *pmu, int idx, u64 val);
+	void (*write_evttype)(struct xgene_pmu_dev *pmu_dev, int idx, u32 val);
+	void (*write_agentmsk)(struct xgene_pmu_dev *pmu_dev, u32 val);
+	void (*write_agent1msk)(struct xgene_pmu_dev *pmu_dev, u32 val);
+	void (*enable_counter)(struct xgene_pmu_dev *pmu_dev, int idx);
+	void (*disable_counter)(struct xgene_pmu_dev *pmu_dev, int idx);
+	void (*enable_counter_int)(struct xgene_pmu_dev *pmu_dev, int idx);
+	void (*disable_counter_int)(struct xgene_pmu_dev *pmu_dev, int idx);
+	void (*reset_counters)(struct xgene_pmu_dev *pmu_dev);
+	void (*start_counters)(struct xgene_pmu_dev *pmu_dev);
+	void (*stop_counters)(struct xgene_pmu_dev *pmu_dev);
+};
+
 struct xgene_pmu {
 struct xgene_pmu {
 	struct device *dev;
 	struct device *dev;
 	int version;
 	int version;
 	void __iomem *pcppmu_csr;
 	void __iomem *pcppmu_csr;
 	u32 mcb_active_mask;
 	u32 mcb_active_mask;
 	u32 mc_active_mask;
 	u32 mc_active_mask;
+	u32 l3c_active_mask;
 	cpumask_t cpu;
 	cpumask_t cpu;
 	raw_spinlock_t lock;
 	raw_spinlock_t lock;
+	const struct xgene_pmu_ops *ops;
 	struct list_head l3cpmus;
 	struct list_head l3cpmus;
 	struct list_head iobpmus;
 	struct list_head iobpmus;
 	struct list_head mcbpmus;
 	struct list_head mcbpmus;
@@ -125,11 +159,13 @@ struct xgene_pmu_data {
 enum xgene_pmu_version {
 enum xgene_pmu_version {
 	PCP_PMU_V1 = 1,
 	PCP_PMU_V1 = 1,
 	PCP_PMU_V2,
 	PCP_PMU_V2,
+	PCP_PMU_V3,
 };
 };
 
 
 enum xgene_pmu_dev_type {
 enum xgene_pmu_dev_type {
 	PMU_TYPE_L3C = 0,
 	PMU_TYPE_L3C = 0,
 	PMU_TYPE_IOB,
 	PMU_TYPE_IOB,
+	PMU_TYPE_IOB_SLOW,
 	PMU_TYPE_MCB,
 	PMU_TYPE_MCB,
 	PMU_TYPE_MC,
 	PMU_TYPE_MC,
 };
 };
@@ -195,6 +231,56 @@ static const struct attribute_group mc_pmu_format_attr_group = {
 	.attrs = mc_pmu_format_attrs,
 	.attrs = mc_pmu_format_attrs,
 };
 };
 
 
+static struct attribute *l3c_pmu_v3_format_attrs[] = {
+	XGENE_PMU_FORMAT_ATTR(l3c_eventid, "config:0-39"),
+	NULL,
+};
+
+static struct attribute *iob_pmu_v3_format_attrs[] = {
+	XGENE_PMU_FORMAT_ATTR(iob_eventid, "config:0-47"),
+	NULL,
+};
+
+static struct attribute *iob_slow_pmu_v3_format_attrs[] = {
+	XGENE_PMU_FORMAT_ATTR(iob_slow_eventid, "config:0-16"),
+	NULL,
+};
+
+static struct attribute *mcb_pmu_v3_format_attrs[] = {
+	XGENE_PMU_FORMAT_ATTR(mcb_eventid, "config:0-35"),
+	NULL,
+};
+
+static struct attribute *mc_pmu_v3_format_attrs[] = {
+	XGENE_PMU_FORMAT_ATTR(mc_eventid, "config:0-44"),
+	NULL,
+};
+
+static const struct attribute_group l3c_pmu_v3_format_attr_group = {
+	.name = "format",
+	.attrs = l3c_pmu_v3_format_attrs,
+};
+
+static const struct attribute_group iob_pmu_v3_format_attr_group = {
+	.name = "format",
+	.attrs = iob_pmu_v3_format_attrs,
+};
+
+static const struct attribute_group iob_slow_pmu_v3_format_attr_group = {
+	.name = "format",
+	.attrs = iob_slow_pmu_v3_format_attrs,
+};
+
+static const struct attribute_group mcb_pmu_v3_format_attr_group = {
+	.name = "format",
+	.attrs = mcb_pmu_v3_format_attrs,
+};
+
+static const struct attribute_group mc_pmu_v3_format_attr_group = {
+	.name = "format",
+	.attrs = mc_pmu_v3_format_attrs,
+};
+
 /*
 /*
  * sysfs event attributes
  * sysfs event attributes
  */
  */
@@ -311,6 +397,219 @@ static const struct attribute_group mc_pmu_events_attr_group = {
 	.attrs = mc_pmu_events_attrs,
 	.attrs = mc_pmu_events_attrs,
 };
 };
 
 
+static struct attribute *l3c_pmu_v3_events_attrs[] = {
+	XGENE_PMU_EVENT_ATTR(cycle-count,			0x00),
+	XGENE_PMU_EVENT_ATTR(read-hit,				0x01),
+	XGENE_PMU_EVENT_ATTR(read-miss,				0x02),
+	XGENE_PMU_EVENT_ATTR(index-flush-eviction,		0x03),
+	XGENE_PMU_EVENT_ATTR(write-caused-replacement,		0x04),
+	XGENE_PMU_EVENT_ATTR(write-not-caused-replacement,	0x05),
+	XGENE_PMU_EVENT_ATTR(clean-eviction,			0x06),
+	XGENE_PMU_EVENT_ATTR(dirty-eviction,			0x07),
+	XGENE_PMU_EVENT_ATTR(read,				0x08),
+	XGENE_PMU_EVENT_ATTR(write,				0x09),
+	XGENE_PMU_EVENT_ATTR(request,				0x0a),
+	XGENE_PMU_EVENT_ATTR(tq-bank-conflict-issue-stall,	0x0b),
+	XGENE_PMU_EVENT_ATTR(tq-full,				0x0c),
+	XGENE_PMU_EVENT_ATTR(ackq-full,				0x0d),
+	XGENE_PMU_EVENT_ATTR(wdb-full,				0x0e),
+	XGENE_PMU_EVENT_ATTR(odb-full,				0x10),
+	XGENE_PMU_EVENT_ATTR(wbq-full,				0x11),
+	XGENE_PMU_EVENT_ATTR(input-req-async-fifo-stall,	0x12),
+	XGENE_PMU_EVENT_ATTR(output-req-async-fifo-stall,	0x13),
+	XGENE_PMU_EVENT_ATTR(output-data-async-fifo-stall,	0x14),
+	XGENE_PMU_EVENT_ATTR(total-insertion,			0x15),
+	XGENE_PMU_EVENT_ATTR(sip-insertions-r-set,		0x16),
+	XGENE_PMU_EVENT_ATTR(sip-insertions-r-clear,		0x17),
+	XGENE_PMU_EVENT_ATTR(dip-insertions-r-set,		0x18),
+	XGENE_PMU_EVENT_ATTR(dip-insertions-r-clear,		0x19),
+	XGENE_PMU_EVENT_ATTR(dip-insertions-force-r-set,	0x1a),
+	XGENE_PMU_EVENT_ATTR(egression,				0x1b),
+	XGENE_PMU_EVENT_ATTR(replacement,			0x1c),
+	XGENE_PMU_EVENT_ATTR(old-replacement,			0x1d),
+	XGENE_PMU_EVENT_ATTR(young-replacement,			0x1e),
+	XGENE_PMU_EVENT_ATTR(r-set-replacement,			0x1f),
+	XGENE_PMU_EVENT_ATTR(r-clear-replacement,		0x20),
+	XGENE_PMU_EVENT_ATTR(old-r-replacement,			0x21),
+	XGENE_PMU_EVENT_ATTR(old-nr-replacement,		0x22),
+	XGENE_PMU_EVENT_ATTR(young-r-replacement,		0x23),
+	XGENE_PMU_EVENT_ATTR(young-nr-replacement,		0x24),
+	XGENE_PMU_EVENT_ATTR(bloomfilter-clearing,		0x25),
+	XGENE_PMU_EVENT_ATTR(generation-flip,			0x26),
+	XGENE_PMU_EVENT_ATTR(vcc-droop-detected,		0x27),
+	NULL,
+};
+
+static struct attribute *iob_fast_pmu_v3_events_attrs[] = {
+	XGENE_PMU_EVENT_ATTR(cycle-count,			0x00),
+	XGENE_PMU_EVENT_ATTR(pa-req-buf-alloc-all,		0x01),
+	XGENE_PMU_EVENT_ATTR(pa-req-buf-alloc-rd,		0x02),
+	XGENE_PMU_EVENT_ATTR(pa-req-buf-alloc-wr,		0x03),
+	XGENE_PMU_EVENT_ATTR(pa-all-cp-req,			0x04),
+	XGENE_PMU_EVENT_ATTR(pa-cp-blk-req,			0x05),
+	XGENE_PMU_EVENT_ATTR(pa-cp-ptl-req,			0x06),
+	XGENE_PMU_EVENT_ATTR(pa-cp-rd-req,			0x07),
+	XGENE_PMU_EVENT_ATTR(pa-cp-wr-req,			0x08),
+	XGENE_PMU_EVENT_ATTR(ba-all-req,			0x09),
+	XGENE_PMU_EVENT_ATTR(ba-rd-req,				0x0a),
+	XGENE_PMU_EVENT_ATTR(ba-wr-req,				0x0b),
+	XGENE_PMU_EVENT_ATTR(pa-rd-shared-req-issued,		0x10),
+	XGENE_PMU_EVENT_ATTR(pa-rd-exclusive-req-issued,	0x11),
+	XGENE_PMU_EVENT_ATTR(pa-wr-invalidate-req-issued-stashable, 0x12),
+	XGENE_PMU_EVENT_ATTR(pa-wr-invalidate-req-issued-nonstashable, 0x13),
+	XGENE_PMU_EVENT_ATTR(pa-wr-back-req-issued-stashable,	0x14),
+	XGENE_PMU_EVENT_ATTR(pa-wr-back-req-issued-nonstashable, 0x15),
+	XGENE_PMU_EVENT_ATTR(pa-ptl-wr-req,			0x16),
+	XGENE_PMU_EVENT_ATTR(pa-ptl-rd-req,			0x17),
+	XGENE_PMU_EVENT_ATTR(pa-wr-back-clean-data,		0x18),
+	XGENE_PMU_EVENT_ATTR(pa-wr-back-cancelled-on-SS,	0x1b),
+	XGENE_PMU_EVENT_ATTR(pa-barrier-occurrence,		0x1c),
+	XGENE_PMU_EVENT_ATTR(pa-barrier-cycles,			0x1d),
+	XGENE_PMU_EVENT_ATTR(pa-total-cp-snoops,		0x20),
+	XGENE_PMU_EVENT_ATTR(pa-rd-shared-snoop,		0x21),
+	XGENE_PMU_EVENT_ATTR(pa-rd-shared-snoop-hit,		0x22),
+	XGENE_PMU_EVENT_ATTR(pa-rd-exclusive-snoop,		0x23),
+	XGENE_PMU_EVENT_ATTR(pa-rd-exclusive-snoop-hit,		0x24),
+	XGENE_PMU_EVENT_ATTR(pa-rd-wr-invalid-snoop,		0x25),
+	XGENE_PMU_EVENT_ATTR(pa-rd-wr-invalid-snoop-hit,	0x26),
+	XGENE_PMU_EVENT_ATTR(pa-req-buffer-full,		0x28),
+	XGENE_PMU_EVENT_ATTR(cswlf-outbound-req-fifo-full,	0x29),
+	XGENE_PMU_EVENT_ATTR(cswlf-inbound-snoop-fifo-backpressure, 0x2a),
+	XGENE_PMU_EVENT_ATTR(cswlf-outbound-lack-fifo-full,	0x2b),
+	XGENE_PMU_EVENT_ATTR(cswlf-inbound-gack-fifo-backpressure, 0x2c),
+	XGENE_PMU_EVENT_ATTR(cswlf-outbound-data-fifo-full,	0x2d),
+	XGENE_PMU_EVENT_ATTR(cswlf-inbound-data-fifo-backpressure, 0x2e),
+	XGENE_PMU_EVENT_ATTR(cswlf-inbound-req-backpressure,	0x2f),
+	NULL,
+};
+
+static struct attribute *iob_slow_pmu_v3_events_attrs[] = {
+	XGENE_PMU_EVENT_ATTR(cycle-count,			0x00),
+	XGENE_PMU_EVENT_ATTR(pa-axi0-rd-req,			0x01),
+	XGENE_PMU_EVENT_ATTR(pa-axi0-wr-req,			0x02),
+	XGENE_PMU_EVENT_ATTR(pa-axi1-rd-req,			0x03),
+	XGENE_PMU_EVENT_ATTR(pa-axi1-wr-req,			0x04),
+	XGENE_PMU_EVENT_ATTR(ba-all-axi-req,			0x07),
+	XGENE_PMU_EVENT_ATTR(ba-axi-rd-req,			0x08),
+	XGENE_PMU_EVENT_ATTR(ba-axi-wr-req,			0x09),
+	XGENE_PMU_EVENT_ATTR(ba-free-list-empty,		0x10),
+	NULL,
+};
+
+static struct attribute *mcb_pmu_v3_events_attrs[] = {
+	XGENE_PMU_EVENT_ATTR(cycle-count,			0x00),
+	XGENE_PMU_EVENT_ATTR(req-receive,			0x01),
+	XGENE_PMU_EVENT_ATTR(rd-req-recv,			0x02),
+	XGENE_PMU_EVENT_ATTR(rd-req-recv-2,			0x03),
+	XGENE_PMU_EVENT_ATTR(wr-req-recv,			0x04),
+	XGENE_PMU_EVENT_ATTR(wr-req-recv-2,			0x05),
+	XGENE_PMU_EVENT_ATTR(rd-req-sent-to-mcu,		0x06),
+	XGENE_PMU_EVENT_ATTR(rd-req-sent-to-mcu-2,		0x07),
+	XGENE_PMU_EVENT_ATTR(rd-req-sent-to-spec-mcu,		0x08),
+	XGENE_PMU_EVENT_ATTR(rd-req-sent-to-spec-mcu-2,		0x09),
+	XGENE_PMU_EVENT_ATTR(glbl-ack-recv-for-rd-sent-to-spec-mcu, 0x0a),
+	XGENE_PMU_EVENT_ATTR(glbl-ack-go-recv-for-rd-sent-to-spec-mcu, 0x0b),
+	XGENE_PMU_EVENT_ATTR(glbl-ack-nogo-recv-for-rd-sent-to-spec-mcu, 0x0c),
+	XGENE_PMU_EVENT_ATTR(glbl-ack-go-recv-any-rd-req,	0x0d),
+	XGENE_PMU_EVENT_ATTR(glbl-ack-go-recv-any-rd-req-2,	0x0e),
+	XGENE_PMU_EVENT_ATTR(wr-req-sent-to-mcu,		0x0f),
+	XGENE_PMU_EVENT_ATTR(gack-recv,				0x10),
+	XGENE_PMU_EVENT_ATTR(rd-gack-recv,			0x11),
+	XGENE_PMU_EVENT_ATTR(wr-gack-recv,			0x12),
+	XGENE_PMU_EVENT_ATTR(cancel-rd-gack,			0x13),
+	XGENE_PMU_EVENT_ATTR(cancel-wr-gack,			0x14),
+	XGENE_PMU_EVENT_ATTR(mcb-csw-req-stall,			0x15),
+	XGENE_PMU_EVENT_ATTR(mcu-req-intf-blocked,		0x16),
+	XGENE_PMU_EVENT_ATTR(mcb-mcu-rd-intf-stall,		0x17),
+	XGENE_PMU_EVENT_ATTR(csw-rd-intf-blocked,		0x18),
+	XGENE_PMU_EVENT_ATTR(csw-local-ack-intf-blocked,	0x19),
+	XGENE_PMU_EVENT_ATTR(mcu-req-table-full,		0x1a),
+	XGENE_PMU_EVENT_ATTR(mcu-stat-table-full,		0x1b),
+	XGENE_PMU_EVENT_ATTR(mcu-wr-table-full,			0x1c),
+	XGENE_PMU_EVENT_ATTR(mcu-rdreceipt-resp,		0x1d),
+	XGENE_PMU_EVENT_ATTR(mcu-wrcomplete-resp,		0x1e),
+	XGENE_PMU_EVENT_ATTR(mcu-retryack-resp,			0x1f),
+	XGENE_PMU_EVENT_ATTR(mcu-pcrdgrant-resp,		0x20),
+	XGENE_PMU_EVENT_ATTR(mcu-req-from-lastload,		0x21),
+	XGENE_PMU_EVENT_ATTR(mcu-req-from-bypass,		0x22),
+	XGENE_PMU_EVENT_ATTR(volt-droop-detect,			0x23),
+	NULL,
+};
+
+static struct attribute *mc_pmu_v3_events_attrs[] = {
+	XGENE_PMU_EVENT_ATTR(cycle-count,			0x00),
+	XGENE_PMU_EVENT_ATTR(act-sent,				0x01),
+	XGENE_PMU_EVENT_ATTR(pre-sent,				0x02),
+	XGENE_PMU_EVENT_ATTR(rd-sent,				0x03),
+	XGENE_PMU_EVENT_ATTR(rda-sent,				0x04),
+	XGENE_PMU_EVENT_ATTR(wr-sent,				0x05),
+	XGENE_PMU_EVENT_ATTR(wra-sent,				0x06),
+	XGENE_PMU_EVENT_ATTR(pd-entry-vld,			0x07),
+	XGENE_PMU_EVENT_ATTR(sref-entry-vld,			0x08),
+	XGENE_PMU_EVENT_ATTR(prea-sent,				0x09),
+	XGENE_PMU_EVENT_ATTR(ref-sent,				0x0a),
+	XGENE_PMU_EVENT_ATTR(rd-rda-sent,			0x0b),
+	XGENE_PMU_EVENT_ATTR(wr-wra-sent,			0x0c),
+	XGENE_PMU_EVENT_ATTR(raw-hazard,			0x0d),
+	XGENE_PMU_EVENT_ATTR(war-hazard,			0x0e),
+	XGENE_PMU_EVENT_ATTR(waw-hazard,			0x0f),
+	XGENE_PMU_EVENT_ATTR(rar-hazard,			0x10),
+	XGENE_PMU_EVENT_ATTR(raw-war-waw-hazard,		0x11),
+	XGENE_PMU_EVENT_ATTR(hprd-lprd-wr-req-vld,		0x12),
+	XGENE_PMU_EVENT_ATTR(lprd-req-vld,			0x13),
+	XGENE_PMU_EVENT_ATTR(hprd-req-vld,			0x14),
+	XGENE_PMU_EVENT_ATTR(hprd-lprd-req-vld,			0x15),
+	XGENE_PMU_EVENT_ATTR(wr-req-vld,			0x16),
+	XGENE_PMU_EVENT_ATTR(partial-wr-req-vld,		0x17),
+	XGENE_PMU_EVENT_ATTR(rd-retry,				0x18),
+	XGENE_PMU_EVENT_ATTR(wr-retry,				0x19),
+	XGENE_PMU_EVENT_ATTR(retry-gnt,				0x1a),
+	XGENE_PMU_EVENT_ATTR(rank-change,			0x1b),
+	XGENE_PMU_EVENT_ATTR(dir-change,			0x1c),
+	XGENE_PMU_EVENT_ATTR(rank-dir-change,			0x1d),
+	XGENE_PMU_EVENT_ATTR(rank-active,			0x1e),
+	XGENE_PMU_EVENT_ATTR(rank-idle,				0x1f),
+	XGENE_PMU_EVENT_ATTR(rank-pd,				0x20),
+	XGENE_PMU_EVENT_ATTR(rank-sref,				0x21),
+	XGENE_PMU_EVENT_ATTR(queue-fill-gt-thresh,		0x22),
+	XGENE_PMU_EVENT_ATTR(queue-rds-gt-thresh,		0x23),
+	XGENE_PMU_EVENT_ATTR(queue-wrs-gt-thresh,		0x24),
+	XGENE_PMU_EVENT_ATTR(phy-updt-complt,			0x25),
+	XGENE_PMU_EVENT_ATTR(tz-fail,				0x26),
+	XGENE_PMU_EVENT_ATTR(dram-errc,				0x27),
+	XGENE_PMU_EVENT_ATTR(dram-errd,				0x28),
+	XGENE_PMU_EVENT_ATTR(rd-enq,				0x29),
+	XGENE_PMU_EVENT_ATTR(wr-enq,				0x2a),
+	XGENE_PMU_EVENT_ATTR(tmac-limit-reached,		0x2b),
+	XGENE_PMU_EVENT_ATTR(tmaw-tracker-full,			0x2c),
+	NULL,
+};
+
+static const struct attribute_group l3c_pmu_v3_events_attr_group = {
+	.name = "events",
+	.attrs = l3c_pmu_v3_events_attrs,
+};
+
+static const struct attribute_group iob_fast_pmu_v3_events_attr_group = {
+	.name = "events",
+	.attrs = iob_fast_pmu_v3_events_attrs,
+};
+
+static const struct attribute_group iob_slow_pmu_v3_events_attr_group = {
+	.name = "events",
+	.attrs = iob_slow_pmu_v3_events_attrs,
+};
+
+static const struct attribute_group mcb_pmu_v3_events_attr_group = {
+	.name = "events",
+	.attrs = mcb_pmu_v3_events_attrs,
+};
+
+static const struct attribute_group mc_pmu_v3_events_attr_group = {
+	.name = "events",
+	.attrs = mc_pmu_v3_events_attrs,
+};
+
 /*
 /*
  * sysfs cpumask attributes
  * sysfs cpumask attributes
  */
  */
@@ -334,7 +633,7 @@ static const struct attribute_group pmu_cpumask_attr_group = {
 };
 };
 
 
 /*
 /*
- * Per PMU device attribute groups
+ * Per PMU device attribute groups of PMU v1 and v2
  */
  */
 static const struct attribute_group *l3c_pmu_attr_groups[] = {
 static const struct attribute_group *l3c_pmu_attr_groups[] = {
 	&l3c_pmu_format_attr_group,
 	&l3c_pmu_format_attr_group,
@@ -364,6 +663,44 @@ static const struct attribute_group *mc_pmu_attr_groups[] = {
 	NULL
 	NULL
 };
 };
 
 
+/*
+ * Per PMU device attribute groups of PMU v3
+ */
+static const struct attribute_group *l3c_pmu_v3_attr_groups[] = {
+	&l3c_pmu_v3_format_attr_group,
+	&pmu_cpumask_attr_group,
+	&l3c_pmu_v3_events_attr_group,
+	NULL
+};
+
+static const struct attribute_group *iob_fast_pmu_v3_attr_groups[] = {
+	&iob_pmu_v3_format_attr_group,
+	&pmu_cpumask_attr_group,
+	&iob_fast_pmu_v3_events_attr_group,
+	NULL
+};
+
+static const struct attribute_group *iob_slow_pmu_v3_attr_groups[] = {
+	&iob_slow_pmu_v3_format_attr_group,
+	&pmu_cpumask_attr_group,
+	&iob_slow_pmu_v3_events_attr_group,
+	NULL
+};
+
+static const struct attribute_group *mcb_pmu_v3_attr_groups[] = {
+	&mcb_pmu_v3_format_attr_group,
+	&pmu_cpumask_attr_group,
+	&mcb_pmu_v3_events_attr_group,
+	NULL
+};
+
+static const struct attribute_group *mc_pmu_v3_attr_groups[] = {
+	&mc_pmu_v3_format_attr_group,
+	&pmu_cpumask_attr_group,
+	&mc_pmu_v3_events_attr_group,
+	NULL
+};
+
 static int get_next_avail_cntr(struct xgene_pmu_dev *pmu_dev)
 static int get_next_avail_cntr(struct xgene_pmu_dev *pmu_dev)
 {
 {
 	int cntr;
 	int cntr;
@@ -387,22 +724,66 @@ static inline void xgene_pmu_mask_int(struct xgene_pmu *xgene_pmu)
 	writel(PCPPMU_INTENMASK, xgene_pmu->pcppmu_csr + PCPPMU_INTMASK_REG);
 	writel(PCPPMU_INTENMASK, xgene_pmu->pcppmu_csr + PCPPMU_INTMASK_REG);
 }
 }
 
 
+static inline void xgene_pmu_v3_mask_int(struct xgene_pmu *xgene_pmu)
+{
+	writel(PCPPMU_V3_INTENMASK, xgene_pmu->pcppmu_csr + PCPPMU_INTMASK_REG);
+}
+
 static inline void xgene_pmu_unmask_int(struct xgene_pmu *xgene_pmu)
 static inline void xgene_pmu_unmask_int(struct xgene_pmu *xgene_pmu)
 {
 {
 	writel(PCPPMU_INTCLRMASK, xgene_pmu->pcppmu_csr + PCPPMU_INTMASK_REG);
 	writel(PCPPMU_INTCLRMASK, xgene_pmu->pcppmu_csr + PCPPMU_INTMASK_REG);
 }
 }
 
 
-static inline u32 xgene_pmu_read_counter(struct xgene_pmu_dev *pmu_dev, int idx)
+static inline void xgene_pmu_v3_unmask_int(struct xgene_pmu *xgene_pmu)
+{
+	writel(PCPPMU_V3_INTCLRMASK,
+	       xgene_pmu->pcppmu_csr + PCPPMU_INTMASK_REG);
+}
+
+static inline u64 xgene_pmu_read_counter32(struct xgene_pmu_dev *pmu_dev,
+					   int idx)
 {
 {
 	return readl(pmu_dev->inf->csr + PMU_PMEVCNTR0 + (4 * idx));
 	return readl(pmu_dev->inf->csr + PMU_PMEVCNTR0 + (4 * idx));
 }
 }
 
 
+static inline u64 xgene_pmu_read_counter64(struct xgene_pmu_dev *pmu_dev,
+					   int idx)
+{
+	u32 lo, hi;
+
+	/*
+	 * v3 has 64-bit counter registers composed by 2 32-bit registers
+	 * This can be a problem if the counter increases and carries
+	 * out of bit [31] between 2 reads. The extra reads would help
+	 * to prevent this issue.
+	 */
+	do {
+		hi = xgene_pmu_read_counter32(pmu_dev, 2 * idx + 1);
+		lo = xgene_pmu_read_counter32(pmu_dev, 2 * idx);
+	} while (hi != xgene_pmu_read_counter32(pmu_dev, 2 * idx + 1));
+
+	return (((u64)hi << 32) | lo);
+}
+
 static inline void
 static inline void
-xgene_pmu_write_counter(struct xgene_pmu_dev *pmu_dev, int idx, u32 val)
+xgene_pmu_write_counter32(struct xgene_pmu_dev *pmu_dev, int idx, u64 val)
 {
 {
 	writel(val, pmu_dev->inf->csr + PMU_PMEVCNTR0 + (4 * idx));
 	writel(val, pmu_dev->inf->csr + PMU_PMEVCNTR0 + (4 * idx));
 }
 }
 
 
+static inline void
+xgene_pmu_write_counter64(struct xgene_pmu_dev *pmu_dev, int idx, u64 val)
+{
+	u32 cnt_lo, cnt_hi;
+
+	cnt_hi = upper_32_bits(val);
+	cnt_lo = lower_32_bits(val);
+
+	/* v3 has 64-bit counter registers composed by 2 32-bit registers */
+	xgene_pmu_write_counter32(pmu_dev, 2 * idx, cnt_lo);
+	xgene_pmu_write_counter32(pmu_dev, 2 * idx + 1, cnt_hi);
+}
+
 static inline void
 static inline void
 xgene_pmu_write_evttype(struct xgene_pmu_dev *pmu_dev, int idx, u32 val)
 xgene_pmu_write_evttype(struct xgene_pmu_dev *pmu_dev, int idx, u32 val)
 {
 {
@@ -415,12 +796,18 @@ xgene_pmu_write_agentmsk(struct xgene_pmu_dev *pmu_dev, u32 val)
 	writel(val, pmu_dev->inf->csr + PMU_PMAMR0);
 	writel(val, pmu_dev->inf->csr + PMU_PMAMR0);
 }
 }
 
 
+static inline void
+xgene_pmu_v3_write_agentmsk(struct xgene_pmu_dev *pmu_dev, u32 val) { }
+
 static inline void
 static inline void
 xgene_pmu_write_agent1msk(struct xgene_pmu_dev *pmu_dev, u32 val)
 xgene_pmu_write_agent1msk(struct xgene_pmu_dev *pmu_dev, u32 val)
 {
 {
 	writel(val, pmu_dev->inf->csr + PMU_PMAMR1);
 	writel(val, pmu_dev->inf->csr + PMU_PMAMR1);
 }
 }
 
 
+static inline void
+xgene_pmu_v3_write_agent1msk(struct xgene_pmu_dev *pmu_dev, u32 val) { }
+
 static inline void
 static inline void
 xgene_pmu_enable_counter(struct xgene_pmu_dev *pmu_dev, int idx)
 xgene_pmu_enable_counter(struct xgene_pmu_dev *pmu_dev, int idx)
 {
 {
@@ -491,20 +878,22 @@ static inline void xgene_pmu_stop_counters(struct xgene_pmu_dev *pmu_dev)
 static void xgene_perf_pmu_enable(struct pmu *pmu)
 static void xgene_perf_pmu_enable(struct pmu *pmu)
 {
 {
 	struct xgene_pmu_dev *pmu_dev = to_pmu_dev(pmu);
 	struct xgene_pmu_dev *pmu_dev = to_pmu_dev(pmu);
+	struct xgene_pmu *xgene_pmu = pmu_dev->parent;
 	int enabled = bitmap_weight(pmu_dev->cntr_assign_mask,
 	int enabled = bitmap_weight(pmu_dev->cntr_assign_mask,
 			pmu_dev->max_counters);
 			pmu_dev->max_counters);
 
 
 	if (!enabled)
 	if (!enabled)
 		return;
 		return;
 
 
-	xgene_pmu_start_counters(pmu_dev);
+	xgene_pmu->ops->start_counters(pmu_dev);
 }
 }
 
 
 static void xgene_perf_pmu_disable(struct pmu *pmu)
 static void xgene_perf_pmu_disable(struct pmu *pmu)
 {
 {
 	struct xgene_pmu_dev *pmu_dev = to_pmu_dev(pmu);
 	struct xgene_pmu_dev *pmu_dev = to_pmu_dev(pmu);
+	struct xgene_pmu *xgene_pmu = pmu_dev->parent;
 
 
-	xgene_pmu_stop_counters(pmu_dev);
+	xgene_pmu->ops->stop_counters(pmu_dev);
 }
 }
 
 
 static int xgene_perf_event_init(struct perf_event *event)
 static int xgene_perf_event_init(struct perf_event *event)
@@ -572,49 +961,56 @@ static int xgene_perf_event_init(struct perf_event *event)
 static void xgene_perf_enable_event(struct perf_event *event)
 static void xgene_perf_enable_event(struct perf_event *event)
 {
 {
 	struct xgene_pmu_dev *pmu_dev = to_pmu_dev(event->pmu);
 	struct xgene_pmu_dev *pmu_dev = to_pmu_dev(event->pmu);
+	struct xgene_pmu *xgene_pmu = pmu_dev->parent;
 
 
-	xgene_pmu_write_evttype(pmu_dev, GET_CNTR(event), GET_EVENTID(event));
-	xgene_pmu_write_agentmsk(pmu_dev, ~((u32)GET_AGENTID(event)));
+	xgene_pmu->ops->write_evttype(pmu_dev, GET_CNTR(event),
+				      GET_EVENTID(event));
+	xgene_pmu->ops->write_agentmsk(pmu_dev, ~((u32)GET_AGENTID(event)));
 	if (pmu_dev->inf->type == PMU_TYPE_IOB)
 	if (pmu_dev->inf->type == PMU_TYPE_IOB)
-		xgene_pmu_write_agent1msk(pmu_dev, ~((u32)GET_AGENT1ID(event)));
+		xgene_pmu->ops->write_agent1msk(pmu_dev,
+						~((u32)GET_AGENT1ID(event)));
 
 
-	xgene_pmu_enable_counter(pmu_dev, GET_CNTR(event));
-	xgene_pmu_enable_counter_int(pmu_dev, GET_CNTR(event));
+	xgene_pmu->ops->enable_counter(pmu_dev, GET_CNTR(event));
+	xgene_pmu->ops->enable_counter_int(pmu_dev, GET_CNTR(event));
 }
 }
 
 
 static void xgene_perf_disable_event(struct perf_event *event)
 static void xgene_perf_disable_event(struct perf_event *event)
 {
 {
 	struct xgene_pmu_dev *pmu_dev = to_pmu_dev(event->pmu);
 	struct xgene_pmu_dev *pmu_dev = to_pmu_dev(event->pmu);
+	struct xgene_pmu *xgene_pmu = pmu_dev->parent;
 
 
-	xgene_pmu_disable_counter(pmu_dev, GET_CNTR(event));
-	xgene_pmu_disable_counter_int(pmu_dev, GET_CNTR(event));
+	xgene_pmu->ops->disable_counter(pmu_dev, GET_CNTR(event));
+	xgene_pmu->ops->disable_counter_int(pmu_dev, GET_CNTR(event));
 }
 }
 
 
 static void xgene_perf_event_set_period(struct perf_event *event)
 static void xgene_perf_event_set_period(struct perf_event *event)
 {
 {
 	struct xgene_pmu_dev *pmu_dev = to_pmu_dev(event->pmu);
 	struct xgene_pmu_dev *pmu_dev = to_pmu_dev(event->pmu);
+	struct xgene_pmu *xgene_pmu = pmu_dev->parent;
 	struct hw_perf_event *hw = &event->hw;
 	struct hw_perf_event *hw = &event->hw;
 	/*
 	/*
-	 * The X-Gene PMU counters have a period of 2^32. To account for the
-	 * possiblity of extreme interrupt latency we program for a period of
-	 * half that. Hopefully we can handle the interrupt before another 2^31
+	 * For 32 bit counter, it has a period of 2^32. To account for the
+	 * possibility of extreme interrupt latency we program for a period of
+	 * half that. Hopefully, we can handle the interrupt before another 2^31
 	 * events occur and the counter overtakes its previous value.
 	 * events occur and the counter overtakes its previous value.
+	 * For 64 bit counter, we don't expect it overflow.
 	 */
 	 */
 	u64 val = 1ULL << 31;
 	u64 val = 1ULL << 31;
 
 
 	local64_set(&hw->prev_count, val);
 	local64_set(&hw->prev_count, val);
-	xgene_pmu_write_counter(pmu_dev, hw->idx, (u32) val);
+	xgene_pmu->ops->write_counter(pmu_dev, hw->idx, val);
 }
 }
 
 
 static void xgene_perf_event_update(struct perf_event *event)
 static void xgene_perf_event_update(struct perf_event *event)
 {
 {
 	struct xgene_pmu_dev *pmu_dev = to_pmu_dev(event->pmu);
 	struct xgene_pmu_dev *pmu_dev = to_pmu_dev(event->pmu);
+	struct xgene_pmu *xgene_pmu = pmu_dev->parent;
 	struct hw_perf_event *hw = &event->hw;
 	struct hw_perf_event *hw = &event->hw;
 	u64 delta, prev_raw_count, new_raw_count;
 	u64 delta, prev_raw_count, new_raw_count;
 
 
 again:
 again:
 	prev_raw_count = local64_read(&hw->prev_count);
 	prev_raw_count = local64_read(&hw->prev_count);
-	new_raw_count = xgene_pmu_read_counter(pmu_dev, GET_CNTR(event));
+	new_raw_count = xgene_pmu->ops->read_counter(pmu_dev, GET_CNTR(event));
 
 
 	if (local64_cmpxchg(&hw->prev_count, prev_raw_count,
 	if (local64_cmpxchg(&hw->prev_count, prev_raw_count,
 			    new_raw_count) != prev_raw_count)
 			    new_raw_count) != prev_raw_count)
@@ -633,6 +1029,7 @@ static void xgene_perf_read(struct perf_event *event)
 static void xgene_perf_start(struct perf_event *event, int flags)
 static void xgene_perf_start(struct perf_event *event, int flags)
 {
 {
 	struct xgene_pmu_dev *pmu_dev = to_pmu_dev(event->pmu);
 	struct xgene_pmu_dev *pmu_dev = to_pmu_dev(event->pmu);
+	struct xgene_pmu *xgene_pmu = pmu_dev->parent;
 	struct hw_perf_event *hw = &event->hw;
 	struct hw_perf_event *hw = &event->hw;
 
 
 	if (WARN_ON_ONCE(!(hw->state & PERF_HES_STOPPED)))
 	if (WARN_ON_ONCE(!(hw->state & PERF_HES_STOPPED)))
@@ -646,8 +1043,8 @@ static void xgene_perf_start(struct perf_event *event, int flags)
 	if (flags & PERF_EF_RELOAD) {
 	if (flags & PERF_EF_RELOAD) {
 		u64 prev_raw_count =  local64_read(&hw->prev_count);
 		u64 prev_raw_count =  local64_read(&hw->prev_count);
 
 
-		xgene_pmu_write_counter(pmu_dev, GET_CNTR(event),
-					(u32) prev_raw_count);
+		xgene_pmu->ops->write_counter(pmu_dev, GET_CNTR(event),
+					      prev_raw_count);
 	}
 	}
 
 
 	xgene_perf_enable_event(event);
 	xgene_perf_enable_event(event);
@@ -713,7 +1110,10 @@ static int xgene_init_perf(struct xgene_pmu_dev *pmu_dev, char *name)
 {
 {
 	struct xgene_pmu *xgene_pmu;
 	struct xgene_pmu *xgene_pmu;
 
 
-	pmu_dev->max_period = PMU_CNT_MAX_PERIOD - 1;
+	if (pmu_dev->parent->version == PCP_PMU_V3)
+		pmu_dev->max_period = PMU_V3_CNT_MAX_PERIOD;
+	else
+		pmu_dev->max_period = PMU_CNT_MAX_PERIOD;
 	/* First version PMU supports only single event counter */
 	/* First version PMU supports only single event counter */
 	xgene_pmu = pmu_dev->parent;
 	xgene_pmu = pmu_dev->parent;
 	if (xgene_pmu->version == PCP_PMU_V1)
 	if (xgene_pmu->version == PCP_PMU_V1)
@@ -736,8 +1136,8 @@ static int xgene_init_perf(struct xgene_pmu_dev *pmu_dev, char *name)
 	};
 	};
 
 
 	/* Hardware counter init */
 	/* Hardware counter init */
-	xgene_pmu_stop_counters(pmu_dev);
-	xgene_pmu_reset_counters(pmu_dev);
+	xgene_pmu->ops->stop_counters(pmu_dev);
+	xgene_pmu->ops->reset_counters(pmu_dev);
 
 
 	return perf_pmu_register(&pmu_dev->pmu, name, -1);
 	return perf_pmu_register(&pmu_dev->pmu, name, -1);
 }
 }
@@ -758,20 +1158,38 @@ xgene_pmu_dev_add(struct xgene_pmu *xgene_pmu, struct xgene_pmu_dev_ctx *ctx)
 
 
 	switch (pmu->inf->type) {
 	switch (pmu->inf->type) {
 	case PMU_TYPE_L3C:
 	case PMU_TYPE_L3C:
-		pmu->attr_groups = l3c_pmu_attr_groups;
+		if (!(xgene_pmu->l3c_active_mask & pmu->inf->enable_mask))
+			goto dev_err;
+		if (xgene_pmu->version == PCP_PMU_V3)
+			pmu->attr_groups = l3c_pmu_v3_attr_groups;
+		else
+			pmu->attr_groups = l3c_pmu_attr_groups;
 		break;
 		break;
 	case PMU_TYPE_IOB:
 	case PMU_TYPE_IOB:
-		pmu->attr_groups = iob_pmu_attr_groups;
+		if (xgene_pmu->version == PCP_PMU_V3)
+			pmu->attr_groups = iob_fast_pmu_v3_attr_groups;
+		else
+			pmu->attr_groups = iob_pmu_attr_groups;
+		break;
+	case PMU_TYPE_IOB_SLOW:
+		if (xgene_pmu->version == PCP_PMU_V3)
+			pmu->attr_groups = iob_slow_pmu_v3_attr_groups;
 		break;
 		break;
 	case PMU_TYPE_MCB:
 	case PMU_TYPE_MCB:
 		if (!(xgene_pmu->mcb_active_mask & pmu->inf->enable_mask))
 		if (!(xgene_pmu->mcb_active_mask & pmu->inf->enable_mask))
 			goto dev_err;
 			goto dev_err;
-		pmu->attr_groups = mcb_pmu_attr_groups;
+		if (xgene_pmu->version == PCP_PMU_V3)
+			pmu->attr_groups = mcb_pmu_v3_attr_groups;
+		else
+			pmu->attr_groups = mcb_pmu_attr_groups;
 		break;
 		break;
 	case PMU_TYPE_MC:
 	case PMU_TYPE_MC:
 		if (!(xgene_pmu->mc_active_mask & pmu->inf->enable_mask))
 		if (!(xgene_pmu->mc_active_mask & pmu->inf->enable_mask))
 			goto dev_err;
 			goto dev_err;
-		pmu->attr_groups = mc_pmu_attr_groups;
+		if (xgene_pmu->version == PCP_PMU_V3)
+			pmu->attr_groups = mc_pmu_v3_attr_groups;
+		else
+			pmu->attr_groups = mc_pmu_attr_groups;
 		break;
 		break;
 	default:
 	default:
 		return -EINVAL;
 		return -EINVAL;
@@ -795,18 +1213,27 @@ dev_err:
 static void _xgene_pmu_isr(int irq, struct xgene_pmu_dev *pmu_dev)
 static void _xgene_pmu_isr(int irq, struct xgene_pmu_dev *pmu_dev)
 {
 {
 	struct xgene_pmu *xgene_pmu = pmu_dev->parent;
 	struct xgene_pmu *xgene_pmu = pmu_dev->parent;
+	void __iomem *csr = pmu_dev->inf->csr;
 	u32 pmovsr;
 	u32 pmovsr;
 	int idx;
 	int idx;
 
 
-	pmovsr = readl(pmu_dev->inf->csr + PMU_PMOVSR) & PMU_OVERFLOW_MASK;
+	xgene_pmu->ops->stop_counters(pmu_dev);
+
+	if (xgene_pmu->version == PCP_PMU_V3)
+		pmovsr = readl(csr + PMU_PMOVSSET) & PMU_OVERFLOW_MASK;
+	else
+		pmovsr = readl(csr + PMU_PMOVSR) & PMU_OVERFLOW_MASK;
+
 	if (!pmovsr)
 	if (!pmovsr)
-		return;
+		goto out;
 
 
 	/* Clear interrupt flag */
 	/* Clear interrupt flag */
 	if (xgene_pmu->version == PCP_PMU_V1)
 	if (xgene_pmu->version == PCP_PMU_V1)
-		writel(0x0, pmu_dev->inf->csr + PMU_PMOVSR);
+		writel(0x0, csr + PMU_PMOVSR);
+	else if (xgene_pmu->version == PCP_PMU_V2)
+		writel(pmovsr, csr + PMU_PMOVSR);
 	else
 	else
-		writel(pmovsr, pmu_dev->inf->csr + PMU_PMOVSR);
+		writel(pmovsr, csr + PMU_PMOVSCLR);
 
 
 	for (idx = 0; idx < PMU_MAX_COUNTERS; idx++) {
 	for (idx = 0; idx < PMU_MAX_COUNTERS; idx++) {
 		struct perf_event *event = pmu_dev->pmu_counter_event[idx];
 		struct perf_event *event = pmu_dev->pmu_counter_event[idx];
@@ -818,10 +1245,14 @@ static void _xgene_pmu_isr(int irq, struct xgene_pmu_dev *pmu_dev)
 		xgene_perf_event_update(event);
 		xgene_perf_event_update(event);
 		xgene_perf_event_set_period(event);
 		xgene_perf_event_set_period(event);
 	}
 	}
+
+out:
+	xgene_pmu->ops->start_counters(pmu_dev);
 }
 }
 
 
 static irqreturn_t xgene_pmu_isr(int irq, void *dev_id)
 static irqreturn_t xgene_pmu_isr(int irq, void *dev_id)
 {
 {
+	u32 intr_mcu, intr_mcb, intr_l3c, intr_iob;
 	struct xgene_pmu_dev_ctx *ctx;
 	struct xgene_pmu_dev_ctx *ctx;
 	struct xgene_pmu *xgene_pmu = dev_id;
 	struct xgene_pmu *xgene_pmu = dev_id;
 	unsigned long flags;
 	unsigned long flags;
@@ -831,22 +1262,33 @@ static irqreturn_t xgene_pmu_isr(int irq, void *dev_id)
 
 
 	/* Get Interrupt PMU source */
 	/* Get Interrupt PMU source */
 	val = readl(xgene_pmu->pcppmu_csr + PCPPMU_INTSTATUS_REG);
 	val = readl(xgene_pmu->pcppmu_csr + PCPPMU_INTSTATUS_REG);
-	if (val & PCPPMU_INT_MCU) {
+	if (xgene_pmu->version == PCP_PMU_V3) {
+		intr_mcu = PCPPMU_V3_INT_MCU;
+		intr_mcb = PCPPMU_V3_INT_MCB;
+		intr_l3c = PCPPMU_V3_INT_L3C;
+		intr_iob = PCPPMU_V3_INT_IOB;
+	} else {
+		intr_mcu = PCPPMU_INT_MCU;
+		intr_mcb = PCPPMU_INT_MCB;
+		intr_l3c = PCPPMU_INT_L3C;
+		intr_iob = PCPPMU_INT_IOB;
+	}
+	if (val & intr_mcu) {
 		list_for_each_entry(ctx, &xgene_pmu->mcpmus, next) {
 		list_for_each_entry(ctx, &xgene_pmu->mcpmus, next) {
 			_xgene_pmu_isr(irq, ctx->pmu_dev);
 			_xgene_pmu_isr(irq, ctx->pmu_dev);
 		}
 		}
 	}
 	}
-	if (val & PCPPMU_INT_MCB) {
+	if (val & intr_mcb) {
 		list_for_each_entry(ctx, &xgene_pmu->mcbpmus, next) {
 		list_for_each_entry(ctx, &xgene_pmu->mcbpmus, next) {
 			_xgene_pmu_isr(irq, ctx->pmu_dev);
 			_xgene_pmu_isr(irq, ctx->pmu_dev);
 		}
 		}
 	}
 	}
-	if (val & PCPPMU_INT_L3C) {
+	if (val & intr_l3c) {
 		list_for_each_entry(ctx, &xgene_pmu->l3cpmus, next) {
 		list_for_each_entry(ctx, &xgene_pmu->l3cpmus, next) {
 			_xgene_pmu_isr(irq, ctx->pmu_dev);
 			_xgene_pmu_isr(irq, ctx->pmu_dev);
 		}
 		}
 	}
 	}
-	if (val & PCPPMU_INT_IOB) {
+	if (val & intr_iob) {
 		list_for_each_entry(ctx, &xgene_pmu->iobpmus, next) {
 		list_for_each_entry(ctx, &xgene_pmu->iobpmus, next) {
 			_xgene_pmu_isr(irq, ctx->pmu_dev);
 			_xgene_pmu_isr(irq, ctx->pmu_dev);
 		}
 		}
@@ -857,8 +1299,8 @@ static irqreturn_t xgene_pmu_isr(int irq, void *dev_id)
 	return IRQ_HANDLED;
 	return IRQ_HANDLED;
 }
 }
 
 
-static int acpi_pmu_probe_active_mcb_mcu(struct xgene_pmu *xgene_pmu,
-					 struct platform_device *pdev)
+static int acpi_pmu_probe_active_mcb_mcu_l3c(struct xgene_pmu *xgene_pmu,
+					     struct platform_device *pdev)
 {
 {
 	void __iomem *csw_csr, *mcba_csr, *mcbb_csr;
 	void __iomem *csw_csr, *mcba_csr, *mcbb_csr;
 	struct resource *res;
 	struct resource *res;
@@ -885,6 +1327,8 @@ static int acpi_pmu_probe_active_mcb_mcu(struct xgene_pmu *xgene_pmu,
 		return PTR_ERR(mcbb_csr);
 		return PTR_ERR(mcbb_csr);
 	}
 	}
 
 
+	xgene_pmu->l3c_active_mask = 0x1;
+
 	reg = readl(csw_csr + CSW_CSWCR);
 	reg = readl(csw_csr + CSW_CSWCR);
 	if (reg & CSW_CSWCR_DUALMCB_MASK) {
 	if (reg & CSW_CSWCR_DUALMCB_MASK) {
 		/* Dual MCB active */
 		/* Dual MCB active */
@@ -905,8 +1349,56 @@ static int acpi_pmu_probe_active_mcb_mcu(struct xgene_pmu *xgene_pmu,
 	return 0;
 	return 0;
 }
 }
 
 
-static int fdt_pmu_probe_active_mcb_mcu(struct xgene_pmu *xgene_pmu,
-					struct platform_device *pdev)
+static int acpi_pmu_v3_probe_active_mcb_mcu_l3c(struct xgene_pmu *xgene_pmu,
+						struct platform_device *pdev)
+{
+	void __iomem *csw_csr;
+	struct resource *res;
+	unsigned int reg;
+	u32 mcb0routing;
+	u32 mcb1routing;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	csw_csr = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(csw_csr)) {
+		dev_err(&pdev->dev, "ioremap failed for CSW CSR resource\n");
+		return PTR_ERR(csw_csr);
+	}
+
+	reg = readl(csw_csr + CSW_CSWCR);
+	mcb0routing = CSW_CSWCR_MCB0_ROUTING(reg);
+	mcb1routing = CSW_CSWCR_MCB1_ROUTING(reg);
+	if (reg & CSW_CSWCR_DUALMCB_MASK) {
+		/* Dual MCB active */
+		xgene_pmu->mcb_active_mask = 0x3;
+		/* Probe all active L3C(s), maximum is 8 */
+		xgene_pmu->l3c_active_mask = 0xFF;
+		/* Probe all active MC(s), maximum is 8 */
+		if ((mcb0routing == 0x2) && (mcb1routing == 0x2))
+			xgene_pmu->mc_active_mask = 0xFF;
+		else if ((mcb0routing == 0x1) && (mcb1routing == 0x1))
+			xgene_pmu->mc_active_mask =  0x33;
+		else
+			xgene_pmu->mc_active_mask =  0x11;
+	} else {
+		/* Single MCB active */
+		xgene_pmu->mcb_active_mask = 0x1;
+		/* Probe all active L3C(s), maximum is 4 */
+		xgene_pmu->l3c_active_mask = 0x0F;
+		/* Probe all active MC(s), maximum is 4 */
+		if (mcb0routing == 0x2)
+			xgene_pmu->mc_active_mask = 0x0F;
+		else if (mcb0routing == 0x1)
+			xgene_pmu->mc_active_mask =  0x03;
+		else
+			xgene_pmu->mc_active_mask =  0x01;
+	}
+
+	return 0;
+}
+
+static int fdt_pmu_probe_active_mcb_mcu_l3c(struct xgene_pmu *xgene_pmu,
+					    struct platform_device *pdev)
 {
 {
 	struct regmap *csw_map, *mcba_map, *mcbb_map;
 	struct regmap *csw_map, *mcba_map, *mcbb_map;
 	struct device_node *np = pdev->dev.of_node;
 	struct device_node *np = pdev->dev.of_node;
@@ -930,6 +1422,7 @@ static int fdt_pmu_probe_active_mcb_mcu(struct xgene_pmu *xgene_pmu,
 		return PTR_ERR(mcbb_map);
 		return PTR_ERR(mcbb_map);
 	}
 	}
 
 
+	xgene_pmu->l3c_active_mask = 0x1;
 	if (regmap_read(csw_map, CSW_CSWCR, &reg))
 	if (regmap_read(csw_map, CSW_CSWCR, &reg))
 		return -EINVAL;
 		return -EINVAL;
 
 
@@ -954,12 +1447,18 @@ static int fdt_pmu_probe_active_mcb_mcu(struct xgene_pmu *xgene_pmu,
 	return 0;
 	return 0;
 }
 }
 
 
-static int xgene_pmu_probe_active_mcb_mcu(struct xgene_pmu *xgene_pmu,
-					  struct platform_device *pdev)
+static int xgene_pmu_probe_active_mcb_mcu_l3c(struct xgene_pmu *xgene_pmu,
+					      struct platform_device *pdev)
 {
 {
-	if (has_acpi_companion(&pdev->dev))
-		return acpi_pmu_probe_active_mcb_mcu(xgene_pmu, pdev);
-	return fdt_pmu_probe_active_mcb_mcu(xgene_pmu, pdev);
+	if (has_acpi_companion(&pdev->dev)) {
+		if (xgene_pmu->version == PCP_PMU_V3)
+			return acpi_pmu_v3_probe_active_mcb_mcu_l3c(xgene_pmu,
+								    pdev);
+		else
+			return acpi_pmu_probe_active_mcb_mcu_l3c(xgene_pmu,
+								 pdev);
+	}
+	return fdt_pmu_probe_active_mcb_mcu_l3c(xgene_pmu, pdev);
 }
 }
 
 
 static char *xgene_pmu_dev_name(struct device *dev, u32 type, int id)
 static char *xgene_pmu_dev_name(struct device *dev, u32 type, int id)
@@ -969,6 +1468,8 @@ static char *xgene_pmu_dev_name(struct device *dev, u32 type, int id)
 		return devm_kasprintf(dev, GFP_KERNEL, "l3c%d", id);
 		return devm_kasprintf(dev, GFP_KERNEL, "l3c%d", id);
 	case PMU_TYPE_IOB:
 	case PMU_TYPE_IOB:
 		return devm_kasprintf(dev, GFP_KERNEL, "iob%d", id);
 		return devm_kasprintf(dev, GFP_KERNEL, "iob%d", id);
+	case PMU_TYPE_IOB_SLOW:
+		return devm_kasprintf(dev, GFP_KERNEL, "iob-slow%d", id);
 	case PMU_TYPE_MCB:
 	case PMU_TYPE_MCB:
 		return devm_kasprintf(dev, GFP_KERNEL, "mcb%d", id);
 		return devm_kasprintf(dev, GFP_KERNEL, "mcb%d", id);
 	case PMU_TYPE_MC:
 	case PMU_TYPE_MC:
@@ -1047,9 +1548,40 @@ err:
 	return NULL;
 	return NULL;
 }
 }
 
 
+static const struct acpi_device_id xgene_pmu_acpi_type_match[] = {
+	{"APMC0D5D", PMU_TYPE_L3C},
+	{"APMC0D5E", PMU_TYPE_IOB},
+	{"APMC0D5F", PMU_TYPE_MCB},
+	{"APMC0D60", PMU_TYPE_MC},
+	{"APMC0D84", PMU_TYPE_L3C},
+	{"APMC0D85", PMU_TYPE_IOB},
+	{"APMC0D86", PMU_TYPE_IOB_SLOW},
+	{"APMC0D87", PMU_TYPE_MCB},
+	{"APMC0D88", PMU_TYPE_MC},
+	{},
+};
+
+static const struct acpi_device_id *xgene_pmu_acpi_match_type(
+					const struct acpi_device_id *ids,
+					struct acpi_device *adev)
+{
+	const struct acpi_device_id *match_id = NULL;
+	const struct acpi_device_id *id;
+
+	for (id = ids; id->id[0] || id->cls; id++) {
+		if (!acpi_match_device_ids(adev, id))
+			match_id = id;
+		else if (match_id)
+			break;
+	}
+
+	return match_id;
+}
+
 static acpi_status acpi_pmu_dev_add(acpi_handle handle, u32 level,
 static acpi_status acpi_pmu_dev_add(acpi_handle handle, u32 level,
 				    void *data, void **return_value)
 				    void *data, void **return_value)
 {
 {
+	const struct acpi_device_id *acpi_id;
 	struct xgene_pmu *xgene_pmu = data;
 	struct xgene_pmu *xgene_pmu = data;
 	struct xgene_pmu_dev_ctx *ctx;
 	struct xgene_pmu_dev_ctx *ctx;
 	struct acpi_device *adev;
 	struct acpi_device *adev;
@@ -1059,17 +1591,11 @@ static acpi_status acpi_pmu_dev_add(acpi_handle handle, u32 level,
 	if (acpi_bus_get_status(adev) || !adev->status.present)
 	if (acpi_bus_get_status(adev) || !adev->status.present)
 		return AE_OK;
 		return AE_OK;
 
 
-	if (!strcmp(acpi_device_hid(adev), "APMC0D5D"))
-		ctx = acpi_get_pmu_hw_inf(xgene_pmu, adev, PMU_TYPE_L3C);
-	else if (!strcmp(acpi_device_hid(adev), "APMC0D5E"))
-		ctx = acpi_get_pmu_hw_inf(xgene_pmu, adev, PMU_TYPE_IOB);
-	else if (!strcmp(acpi_device_hid(adev), "APMC0D5F"))
-		ctx = acpi_get_pmu_hw_inf(xgene_pmu, adev, PMU_TYPE_MCB);
-	else if (!strcmp(acpi_device_hid(adev), "APMC0D60"))
-		ctx = acpi_get_pmu_hw_inf(xgene_pmu, adev, PMU_TYPE_MC);
-	else
-		ctx = NULL;
+	acpi_id = xgene_pmu_acpi_match_type(xgene_pmu_acpi_type_match, adev);
+	if (!acpi_id)
+		return AE_OK;
 
 
+	ctx = acpi_get_pmu_hw_inf(xgene_pmu, adev, (u32)acpi_id->driver_data);
 	if (!ctx)
 	if (!ctx)
 		return AE_OK;
 		return AE_OK;
 
 
@@ -1086,6 +1612,9 @@ static acpi_status acpi_pmu_dev_add(acpi_handle handle, u32 level,
 	case PMU_TYPE_IOB:
 	case PMU_TYPE_IOB:
 		list_add(&ctx->next, &xgene_pmu->iobpmus);
 		list_add(&ctx->next, &xgene_pmu->iobpmus);
 		break;
 		break;
+	case PMU_TYPE_IOB_SLOW:
+		list_add(&ctx->next, &xgene_pmu->iobpmus);
+		break;
 	case PMU_TYPE_MCB:
 	case PMU_TYPE_MCB:
 		list_add(&ctx->next, &xgene_pmu->mcbpmus);
 		list_add(&ctx->next, &xgene_pmu->mcbpmus);
 		break;
 		break;
@@ -1207,6 +1736,9 @@ static int fdt_pmu_probe_pmu_dev(struct xgene_pmu *xgene_pmu,
 		case PMU_TYPE_IOB:
 		case PMU_TYPE_IOB:
 			list_add(&ctx->next, &xgene_pmu->iobpmus);
 			list_add(&ctx->next, &xgene_pmu->iobpmus);
 			break;
 			break;
+		case PMU_TYPE_IOB_SLOW:
+			list_add(&ctx->next, &xgene_pmu->iobpmus);
+			break;
 		case PMU_TYPE_MCB:
 		case PMU_TYPE_MCB:
 			list_add(&ctx->next, &xgene_pmu->mcbpmus);
 			list_add(&ctx->next, &xgene_pmu->mcbpmus);
 			break;
 			break;
@@ -1235,6 +1767,40 @@ static const struct xgene_pmu_data xgene_pmu_v2_data = {
 	.id   = PCP_PMU_V2,
 	.id   = PCP_PMU_V2,
 };
 };
 
 
+static const struct xgene_pmu_ops xgene_pmu_ops = {
+	.mask_int = xgene_pmu_mask_int,
+	.unmask_int = xgene_pmu_unmask_int,
+	.read_counter = xgene_pmu_read_counter32,
+	.write_counter = xgene_pmu_write_counter32,
+	.write_evttype = xgene_pmu_write_evttype,
+	.write_agentmsk = xgene_pmu_write_agentmsk,
+	.write_agent1msk = xgene_pmu_write_agent1msk,
+	.enable_counter = xgene_pmu_enable_counter,
+	.disable_counter = xgene_pmu_disable_counter,
+	.enable_counter_int = xgene_pmu_enable_counter_int,
+	.disable_counter_int = xgene_pmu_disable_counter_int,
+	.reset_counters = xgene_pmu_reset_counters,
+	.start_counters = xgene_pmu_start_counters,
+	.stop_counters = xgene_pmu_stop_counters,
+};
+
+static const struct xgene_pmu_ops xgene_pmu_v3_ops = {
+	.mask_int = xgene_pmu_v3_mask_int,
+	.unmask_int = xgene_pmu_v3_unmask_int,
+	.read_counter = xgene_pmu_read_counter64,
+	.write_counter = xgene_pmu_write_counter64,
+	.write_evttype = xgene_pmu_write_evttype,
+	.write_agentmsk = xgene_pmu_v3_write_agentmsk,
+	.write_agent1msk = xgene_pmu_v3_write_agent1msk,
+	.enable_counter = xgene_pmu_enable_counter,
+	.disable_counter = xgene_pmu_disable_counter,
+	.enable_counter_int = xgene_pmu_enable_counter_int,
+	.disable_counter_int = xgene_pmu_disable_counter_int,
+	.reset_counters = xgene_pmu_reset_counters,
+	.start_counters = xgene_pmu_start_counters,
+	.stop_counters = xgene_pmu_stop_counters,
+};
+
 static const struct of_device_id xgene_pmu_of_match[] = {
 static const struct of_device_id xgene_pmu_of_match[] = {
 	{ .compatible	= "apm,xgene-pmu",	.data = &xgene_pmu_data },
 	{ .compatible	= "apm,xgene-pmu",	.data = &xgene_pmu_data },
 	{ .compatible	= "apm,xgene-pmu-v2",	.data = &xgene_pmu_v2_data },
 	{ .compatible	= "apm,xgene-pmu-v2",	.data = &xgene_pmu_v2_data },
@@ -1245,6 +1811,7 @@ MODULE_DEVICE_TABLE(of, xgene_pmu_of_match);
 static const struct acpi_device_id xgene_pmu_acpi_match[] = {
 static const struct acpi_device_id xgene_pmu_acpi_match[] = {
 	{"APMC0D5B", PCP_PMU_V1},
 	{"APMC0D5B", PCP_PMU_V1},
 	{"APMC0D5C", PCP_PMU_V2},
 	{"APMC0D5C", PCP_PMU_V2},
+	{"APMC0D83", PCP_PMU_V3},
 	{},
 	{},
 };
 };
 MODULE_DEVICE_TABLE(acpi, xgene_pmu_acpi_match);
 MODULE_DEVICE_TABLE(acpi, xgene_pmu_acpi_match);
@@ -1284,6 +1851,11 @@ static int xgene_pmu_probe(struct platform_device *pdev)
 	if (version < 0)
 	if (version < 0)
 		return -ENODEV;
 		return -ENODEV;
 
 
+	if (version == PCP_PMU_V3)
+		xgene_pmu->ops = &xgene_pmu_v3_ops;
+	else
+		xgene_pmu->ops = &xgene_pmu_ops;
+
 	INIT_LIST_HEAD(&xgene_pmu->l3cpmus);
 	INIT_LIST_HEAD(&xgene_pmu->l3cpmus);
 	INIT_LIST_HEAD(&xgene_pmu->iobpmus);
 	INIT_LIST_HEAD(&xgene_pmu->iobpmus);
 	INIT_LIST_HEAD(&xgene_pmu->mcbpmus);
 	INIT_LIST_HEAD(&xgene_pmu->mcbpmus);
@@ -1317,7 +1889,7 @@ static int xgene_pmu_probe(struct platform_device *pdev)
 	raw_spin_lock_init(&xgene_pmu->lock);
 	raw_spin_lock_init(&xgene_pmu->lock);
 
 
 	/* Check for active MCBs and MCUs */
 	/* Check for active MCBs and MCUs */
-	rc = xgene_pmu_probe_active_mcb_mcu(xgene_pmu, pdev);
+	rc = xgene_pmu_probe_active_mcb_mcu_l3c(xgene_pmu, pdev);
 	if (rc) {
 	if (rc) {
 		dev_warn(&pdev->dev, "Unknown MCB/MCU active status\n");
 		dev_warn(&pdev->dev, "Unknown MCB/MCU active status\n");
 		xgene_pmu->mcb_active_mask = 0x1;
 		xgene_pmu->mcb_active_mask = 0x1;
@@ -1342,7 +1914,7 @@ static int xgene_pmu_probe(struct platform_device *pdev)
 	}
 	}
 
 
 	/* Enable interrupt */
 	/* Enable interrupt */
-	xgene_pmu_unmask_int(xgene_pmu);
+	xgene_pmu->ops->unmask_int(xgene_pmu);
 
 
 	return 0;
 	return 0;