Просмотр исходного кода

Merge tag 'edac_for_4.7' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp

Pull EDAC updates from Borislav Petkov:
 "It was pretty busy in EDAC land this time:

   - Altera Arria10 L2 cache and On-Chip RAM ECC handling (Thor Thayer)

   - Remove ad-hoc buffering of MCE records in sb_edac and i7core_edac
     (Tony Luck)

   - Do not register sb_edac with pci_register_driver() (Tony Luck)

   - Add support for Skylake to ie31200_edac (Jason Baron)

   - Do not register amd64_edac with pci_register_driver() (Borislav
     Petkov)

  ... plus the usual round of cleanups and fixes all over the place"

* tag 'edac_for_4.7' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp: (25 commits)
  EDAC, amd64_edac: Drop pci_register_driver() use
  EDAC, ie31200_edac: Add Skylake support
  EDAC, sb_edac: Use cpu family/model in driver detection
  EDAC, i7core: Remove double buffering of error records
  EDAC, amd64_edac: Issue driver banner only on success
  ARM: socfpga: Initialize Arria10 OCRAM ECC on startup
  EDAC: Increment correct counter in edac_inc_ue_error()
  EDAC, sb_edac: Remove double buffering of error records
  EDAC: Fix used after kfree() error in edac_unregister_sysfs()
  EDAC, altera: Avoid unused function warnings
  EDAC, altera: Remove useless casts
  ARM: socfpga: Enable Arria10 OCRAM ECC on startup
  EDAC, altera: Add Arria10 OCRAM ECC support
  Documentation: dt: socfpga: Add Altera Arria10 OCRAM binding
  EDAC, altera: Make OCRAM ECC dependency check generic
  EDAC, altera: Add register offset for ECC Enable
  EDAC, altera: Extract error inject operations to a struct fops
  ARM: socfpga: Enable Arria10 L2 cache ECC on startup
  EDAC, altera: Add Arria10 L2 Cache ECC handling
  Documentation, dt, socfpga: Add Altera Arria10 L2 cache binding
  ...
Linus Torvalds 9 лет назад
Родитель
Сommit
1cc3880a3c

+ 50 - 0
Documentation/devicetree/bindings/arm/altera/socfpga-eccmgr.txt

@@ -3,6 +3,7 @@ This driver uses the EDAC framework to implement the SOCFPGA ECC Manager.
 The ECC Manager counts and corrects single bit errors and counts/handles
 double bit errors which are uncorrectable.
 
+Cyclone5 and Arria5 ECC Manager
 Required Properties:
 - compatible : Should be "altr,socfpga-ecc-manager"
 - #address-cells: must be 1
@@ -47,3 +48,52 @@ Example:
 			interrupts = <0 178 1>, <0 179 1>;
 		};
 	};
+
+Arria10 SoCFPGA ECC Manager
+The Arria10 SoC ECC Manager handles the IRQs for each peripheral
+in a shared register instead of individual IRQs like the Cyclone5
+and Arria5. Therefore the device tree is different as well.
+
+Required Properties:
+- compatible : Should be "altr,socfpga-a10-ecc-manager"
+- altr,sysgr-syscon : phandle to Arria10 System Manager Block
+	containing the ECC manager registers.
+- #address-cells: must be 1
+- #size-cells: must be 1
+- interrupts : Should be single bit error interrupt, then double bit error
+	interrupt. Note the rising edge type.
+- ranges : standard definition, should translate from local addresses
+
+Subcomponents:
+
+L2 Cache ECC
+Required Properties:
+- compatible : Should be "altr,socfpga-a10-l2-ecc"
+- reg : Address and size for ECC error interrupt clear registers.
+
+On-Chip RAM ECC
+Required Properties:
+- compatible : Should be "altr,socfpga-a10-ocram-ecc"
+- reg        : Address and size for ECC block registers.
+
+Example:
+
+	eccmgr: eccmgr@ffd06000 {
+		compatible = "altr,socfpga-a10-ecc-manager";
+		altr,sysmgr-syscon = <&sysmgr>;
+		#address-cells = <1>;
+		#size-cells = <1>;
+		interrupts = <0 2 IRQ_TYPE_LEVEL_HIGH>,
+			     <0 0 IRQ_TYPE_LEVEL_HIGH>;
+		ranges;
+
+		l2-ecc@ffd06010 {
+			compatible = "altr,socfpga-a10-l2-ecc";
+			reg = <0xffd06010 0x4>;
+		};
+
+		ocram-ecc@ff8c3000 {
+			compatible = "altr,socfpga-a10-ocram-ecc";
+			reg = <0xff8c3000 0x90>;
+		};
+	};

+ 2 - 0
arch/arm/mach-socfpga/core.h

@@ -38,6 +38,8 @@ extern void socfpga_init_clocks(void);
 extern void socfpga_sysmgr_init(void);
 void socfpga_init_l2_ecc(void);
 void socfpga_init_ocram_ecc(void);
+void socfpga_init_arria10_l2_ecc(void);
+void socfpga_init_arria10_ocram_ecc(void);
 
 extern void __iomem *sys_manager_base_addr;
 extern void __iomem *rst_manager_base_addr;

+ 49 - 0
arch/arm/mach-socfpga/l2_cache.c

@@ -17,6 +17,20 @@
 #include <linux/of_platform.h>
 #include <linux/of_address.h>
 
+#include "core.h"
+
+/* A10 System Manager L2 ECC Control register */
+#define A10_MPU_CTRL_L2_ECC_OFST          0x0
+#define A10_MPU_CTRL_L2_ECC_EN            BIT(0)
+
+/* A10 System Manager Global IRQ Mask register */
+#define A10_SYSMGR_ECC_INTMASK_CLR_OFST   0x98
+#define A10_SYSMGR_ECC_INTMASK_CLR_L2     BIT(0)
+
+/* A10 System Manager L2 ECC IRQ Clear register */
+#define A10_SYSMGR_MPU_CLEAR_L2_ECC_OFST  0xA8
+#define A10_SYSMGR_MPU_CLEAR_L2_ECC       (BIT(31) | BIT(15))
+
 void socfpga_init_l2_ecc(void)
 {
 	struct device_node *np;
@@ -39,3 +53,38 @@ void socfpga_init_l2_ecc(void)
 	writel(0x01, mapped_l2_edac_addr);
 	iounmap(mapped_l2_edac_addr);
 }
+
+void socfpga_init_arria10_l2_ecc(void)
+{
+	struct device_node *np;
+	void __iomem *mapped_l2_edac_addr;
+
+	/* Find the L2 EDAC device tree node */
+	np = of_find_compatible_node(NULL, NULL, "altr,socfpga-a10-l2-ecc");
+	if (!np) {
+		pr_err("Unable to find socfpga-a10-l2-ecc in dtb\n");
+		return;
+	}
+
+	mapped_l2_edac_addr = of_iomap(np, 0);
+	of_node_put(np);
+	if (!mapped_l2_edac_addr) {
+		pr_err("Unable to find L2 ECC mapping in dtb\n");
+		return;
+	}
+
+	if (!sys_manager_base_addr) {
+		pr_err("System Mananger not mapped for L2 ECC\n");
+		goto exit;
+	}
+	/* Clear any pending IRQs */
+	writel(A10_SYSMGR_MPU_CLEAR_L2_ECC, (sys_manager_base_addr +
+	       A10_SYSMGR_MPU_CLEAR_L2_ECC_OFST));
+	/* Enable ECC */
+	writel(A10_SYSMGR_ECC_INTMASK_CLR_L2, sys_manager_base_addr +
+	       A10_SYSMGR_ECC_INTMASK_CLR_OFST);
+	writel(A10_MPU_CTRL_L2_ECC_EN, mapped_l2_edac_addr +
+	       A10_MPU_CTRL_L2_ECC_OFST);
+exit:
+	iounmap(mapped_l2_edac_addr);
+}

+ 133 - 0
arch/arm/mach-socfpga/ocram.c

@@ -13,12 +13,15 @@
  * You should have received a copy of the GNU General Public License along with
  * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
+#include <linux/delay.h>
 #include <linux/io.h>
 #include <linux/genalloc.h>
 #include <linux/module.h>
 #include <linux/of_address.h>
 #include <linux/of_platform.h>
 
+#include "core.h"
+
 #define ALTR_OCRAM_CLEAR_ECC          0x00000018
 #define ALTR_OCRAM_ECC_EN             0x00000019
 
@@ -47,3 +50,133 @@ void socfpga_init_ocram_ecc(void)
 
 	iounmap(mapped_ocr_edac_addr);
 }
+
+/* Arria10 OCRAM Section */
+#define ALTR_A10_ECC_CTRL_OFST          0x08
+#define ALTR_A10_OCRAM_ECC_EN_CTL       (BIT(1) | BIT(0))
+#define ALTR_A10_ECC_INITA              BIT(16)
+
+#define ALTR_A10_ECC_INITSTAT_OFST      0x0C
+#define ALTR_A10_ECC_INITCOMPLETEA      BIT(0)
+#define ALTR_A10_ECC_INITCOMPLETEB      BIT(8)
+
+#define ALTR_A10_ECC_ERRINTEN_OFST      0x10
+#define ALTR_A10_ECC_SERRINTEN          BIT(0)
+
+#define ALTR_A10_ECC_INTSTAT_OFST       0x20
+#define ALTR_A10_ECC_SERRPENA           BIT(0)
+#define ALTR_A10_ECC_DERRPENA           BIT(8)
+#define ALTR_A10_ECC_ERRPENA_MASK       (ALTR_A10_ECC_SERRPENA | \
+					 ALTR_A10_ECC_DERRPENA)
+/* ECC Manager Defines */
+#define A10_SYSMGR_ECC_INTMASK_SET_OFST   0x94
+#define A10_SYSMGR_ECC_INTMASK_CLR_OFST   0x98
+#define A10_SYSMGR_ECC_INTMASK_OCRAM      BIT(1)
+
+#define ALTR_A10_ECC_INIT_WATCHDOG_10US   10000
+
+static inline void ecc_set_bits(u32 bit_mask, void __iomem *ioaddr)
+{
+	u32 value = readl(ioaddr);
+
+	value |= bit_mask;
+	writel(value, ioaddr);
+}
+
+static inline void ecc_clear_bits(u32 bit_mask, void __iomem *ioaddr)
+{
+	u32 value = readl(ioaddr);
+
+	value &= ~bit_mask;
+	writel(value, ioaddr);
+}
+
+static inline int ecc_test_bits(u32 bit_mask, void __iomem *ioaddr)
+{
+	u32 value = readl(ioaddr);
+
+	return (value & bit_mask) ? 1 : 0;
+}
+
+/*
+ * This function uses the memory initialization block in the Arria10 ECC
+ * controller to initialize/clear the entire memory data and ECC data.
+ */
+static int altr_init_memory_port(void __iomem *ioaddr)
+{
+	int limit = ALTR_A10_ECC_INIT_WATCHDOG_10US;
+
+	ecc_set_bits(ALTR_A10_ECC_INITA, (ioaddr + ALTR_A10_ECC_CTRL_OFST));
+	while (limit--) {
+		if (ecc_test_bits(ALTR_A10_ECC_INITCOMPLETEA,
+				  (ioaddr + ALTR_A10_ECC_INITSTAT_OFST)))
+			break;
+		udelay(1);
+	}
+	if (limit < 0)
+		return -EBUSY;
+
+	/* Clear any pending ECC interrupts */
+	writel(ALTR_A10_ECC_ERRPENA_MASK,
+	       (ioaddr + ALTR_A10_ECC_INTSTAT_OFST));
+
+	return 0;
+}
+
+void socfpga_init_arria10_ocram_ecc(void)
+{
+	struct device_node *np;
+	int ret = 0;
+	void __iomem *ecc_block_base;
+
+	if (!sys_manager_base_addr) {
+		pr_err("SOCFPGA: sys-mgr is not initialized\n");
+		return;
+	}
+
+	/* Find the OCRAM EDAC device tree node */
+	np = of_find_compatible_node(NULL, NULL, "altr,socfpga-a10-ocram-ecc");
+	if (!np) {
+		pr_err("Unable to find socfpga-a10-ocram-ecc\n");
+		return;
+	}
+
+	/* Map the ECC Block */
+	ecc_block_base = of_iomap(np, 0);
+	of_node_put(np);
+	if (!ecc_block_base) {
+		pr_err("Unable to map OCRAM ECC block\n");
+		return;
+	}
+
+	/* Disable ECC */
+	writel(ALTR_A10_OCRAM_ECC_EN_CTL,
+	       sys_manager_base_addr + A10_SYSMGR_ECC_INTMASK_SET_OFST);
+	ecc_clear_bits(ALTR_A10_ECC_SERRINTEN,
+		       (ecc_block_base + ALTR_A10_ECC_ERRINTEN_OFST));
+	ecc_clear_bits(ALTR_A10_OCRAM_ECC_EN_CTL,
+		       (ecc_block_base + ALTR_A10_ECC_CTRL_OFST));
+
+	/* Ensure all writes complete */
+	wmb();
+
+	/* Use HW initialization block to initialize memory for ECC */
+	ret = altr_init_memory_port(ecc_block_base);
+	if (ret) {
+		pr_err("ECC: cannot init OCRAM PORTA memory\n");
+		goto exit;
+	}
+
+	/* Enable ECC */
+	ecc_set_bits(ALTR_A10_OCRAM_ECC_EN_CTL,
+		     (ecc_block_base + ALTR_A10_ECC_CTRL_OFST));
+	ecc_set_bits(ALTR_A10_ECC_SERRINTEN,
+		     (ecc_block_base + ALTR_A10_ECC_ERRINTEN_OFST));
+	writel(ALTR_A10_OCRAM_ECC_EN_CTL,
+	       sys_manager_base_addr + A10_SYSMGR_ECC_INTMASK_CLR_OFST);
+
+	/* Ensure all writes complete */
+	wmb();
+exit:
+	iounmap(ecc_block_base);
+}

+ 11 - 1
arch/arm/mach-socfpga/socfpga.c

@@ -66,6 +66,16 @@ static void __init socfpga_init_irq(void)
 		socfpga_init_ocram_ecc();
 }
 
+static void __init socfpga_arria10_init_irq(void)
+{
+	irqchip_init();
+	socfpga_sysmgr_init();
+	if (IS_ENABLED(CONFIG_EDAC_ALTERA_L2C))
+		socfpga_init_arria10_l2_ecc();
+	if (IS_ENABLED(CONFIG_EDAC_ALTERA_OCRAM))
+		socfpga_init_arria10_ocram_ecc();
+}
+
 static void socfpga_cyclone5_restart(enum reboot_mode mode, const char *cmd)
 {
 	u32 temp;
@@ -113,7 +123,7 @@ static const char *altera_a10_dt_match[] = {
 DT_MACHINE_START(SOCFPGA_A10, "Altera SOCFPGA Arria10")
 	.l2c_aux_val	= 0,
 	.l2c_aux_mask	= ~0,
-	.init_irq	= socfpga_init_irq,
+	.init_irq	= socfpga_arria10_init_irq,
 	.restart	= socfpga_arria10_restart,
 	.dt_compat	= altera_a10_dt_match,
 MACHINE_END

+ 2 - 3
drivers/edac/Kconfig

@@ -378,12 +378,11 @@ config EDAC_ALTERA
 
 config EDAC_ALTERA_L2C
 	bool "Altera L2 Cache ECC"
-	depends on EDAC_ALTERA=y
-	select CACHE_L2X0
+	depends on EDAC_ALTERA=y && CACHE_L2X0
 	help
 	  Support for error detection and correction on the
 	  Altera L2 cache Memory for Altera SoCs. This option
-	  requires L2 cache so it will force that selection.
+	  requires L2 cache.
 
 config EDAC_ALTERA_OCRAM
 	bool "Altera On-Chip RAM ECC"

+ 341 - 71
drivers/edac/altera_edac.c

@@ -24,6 +24,7 @@
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/mfd/syscon.h>
+#include <linux/of_address.h>
 #include <linux/of_platform.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
@@ -78,27 +79,6 @@ static const struct altr_sdram_prv_data a10_data = {
 	.ue_set_mask        = A10_DIAGINT_TDERRA_MASK,
 };
 
-/************************** EDAC Device Defines **************************/
-
-/* OCRAM ECC Management Group Defines */
-#define ALTR_MAN_GRP_OCRAM_ECC_OFFSET   0x04
-#define ALTR_OCR_ECC_EN                 BIT(0)
-#define ALTR_OCR_ECC_INJS               BIT(1)
-#define ALTR_OCR_ECC_INJD               BIT(2)
-#define ALTR_OCR_ECC_SERR               BIT(3)
-#define ALTR_OCR_ECC_DERR               BIT(4)
-
-/* L2 ECC Management Group Defines */
-#define ALTR_MAN_GRP_L2_ECC_OFFSET      0x00
-#define ALTR_L2_ECC_EN                  BIT(0)
-#define ALTR_L2_ECC_INJS                BIT(1)
-#define ALTR_L2_ECC_INJD                BIT(2)
-
-#define ALTR_UE_TRIGGER_CHAR            'U'   /* Trigger for UE */
-#define ALTR_TRIGGER_READ_WRD_CNT       32    /* Line size x 4 */
-#define ALTR_TRIG_OCRAM_BYTE_SIZE       128   /* Line size x 4 */
-#define ALTR_TRIG_L2C_BYTE_SIZE         4096  /* Full Page */
-
 /*********************** EDAC Memory Controller Functions ****************/
 
 /* The SDRAM controller uses the EDAC Memory Controller framework.       */
@@ -252,8 +232,8 @@ static unsigned long get_total_mem(void)
 }
 
 static const struct of_device_id altr_sdram_ctrl_of_match[] = {
-	{ .compatible = "altr,sdram-edac", .data = (void *)&c5_data},
-	{ .compatible = "altr,sdram-edac-a10", .data = (void *)&a10_data},
+	{ .compatible = "altr,sdram-edac", .data = &c5_data},
+	{ .compatible = "altr,sdram-edac-a10", .data = &a10_data},
 	{},
 };
 MODULE_DEVICE_TABLE(of, altr_sdram_ctrl_of_match);
@@ -570,28 +550,8 @@ module_platform_driver(altr_edac_driver);
 
 const struct edac_device_prv_data ocramecc_data;
 const struct edac_device_prv_data l2ecc_data;
-
-struct edac_device_prv_data {
-	int (*setup)(struct platform_device *pdev, void __iomem *base);
-	int ce_clear_mask;
-	int ue_clear_mask;
-	char dbgfs_name[20];
-	void * (*alloc_mem)(size_t size, void **other);
-	void (*free_mem)(void *p, size_t size, void *other);
-	int ecc_enable_mask;
-	int ce_set_mask;
-	int ue_set_mask;
-	int trig_alloc_sz;
-};
-
-struct altr_edac_device_dev {
-	void __iomem *base;
-	int sb_irq;
-	int db_irq;
-	const struct edac_device_prv_data *data;
-	struct dentry *debugfs_dir;
-	char *edac_dev_name;
-};
+const struct edac_device_prv_data a10_ocramecc_data;
+const struct edac_device_prv_data a10_l2ecc_data;
 
 static irqreturn_t altr_edac_device_handler(int irq, void *dev_id)
 {
@@ -665,8 +625,9 @@ static ssize_t altr_edac_device_trig(struct file *file,
 		if (ACCESS_ONCE(ptemp[i]))
 			result = -1;
 		/* Toggle Error bit (it is latched), leave ECC enabled */
-		writel(error_mask, drvdata->base);
-		writel(priv->ecc_enable_mask, drvdata->base);
+		writel(error_mask, (drvdata->base + priv->set_err_ofst));
+		writel(priv->ecc_enable_mask, (drvdata->base +
+					       priv->set_err_ofst));
 		ptemp[i] = i;
 	}
 	/* Ensure it has been written out */
@@ -694,6 +655,16 @@ static const struct file_operations altr_edac_device_inject_fops = {
 	.llseek = generic_file_llseek,
 };
 
+static ssize_t altr_edac_a10_device_trig(struct file *file,
+					 const char __user *user_buf,
+					 size_t count, loff_t *ppos);
+
+static const struct file_operations altr_edac_a10_device_inject_fops = {
+	.open = simple_open,
+	.write = altr_edac_a10_device_trig,
+	.llseek = generic_file_llseek,
+};
+
 static void altr_create_edacdev_dbgfs(struct edac_device_ctl_info *edac_dci,
 				      const struct edac_device_prv_data *priv)
 {
@@ -708,17 +679,18 @@ static void altr_create_edacdev_dbgfs(struct edac_device_ctl_info *edac_dci,
 
 	if (!edac_debugfs_create_file(priv->dbgfs_name, S_IWUSR,
 				      drvdata->debugfs_dir, edac_dci,
-				      &altr_edac_device_inject_fops))
+				      priv->inject_fops))
 		debugfs_remove_recursive(drvdata->debugfs_dir);
 }
 
 static const struct of_device_id altr_edac_device_of_match[] = {
 #ifdef CONFIG_EDAC_ALTERA_L2C
-	{ .compatible = "altr,socfpga-l2-ecc", .data = (void *)&l2ecc_data },
+	{ .compatible = "altr,socfpga-l2-ecc", .data = &l2ecc_data },
+	{ .compatible = "altr,socfpga-a10-l2-ecc", .data = &a10_l2ecc_data },
 #endif
 #ifdef CONFIG_EDAC_ALTERA_OCRAM
-	{ .compatible = "altr,socfpga-ocram-ecc",
-	  .data = (void *)&ocramecc_data },
+	{ .compatible = "altr,socfpga-ocram-ecc", .data = &ocramecc_data },
+	{ .compatible = "altr,socfpga-a10-ocram-ecc", .data = &a10_ocramecc_data },
 #endif
 	{},
 };
@@ -789,7 +761,7 @@ static int altr_edac_device_probe(struct platform_device *pdev)
 
 	/* Check specific dependencies for the module */
 	if (drvdata->data->setup) {
-		res = drvdata->data->setup(pdev, drvdata->base);
+		res = drvdata->data->setup(drvdata);
 		if (res)
 			goto fail1;
 	}
@@ -856,6 +828,25 @@ module_platform_driver(altr_edac_device_driver);
 /*********************** OCRAM EDAC Device Functions *********************/
 
 #ifdef CONFIG_EDAC_ALTERA_OCRAM
+/*
+ *  Test for memory's ECC dependencies upon entry because platform specific
+ *  startup should have initialized the memory and enabled the ECC.
+ *  Can't turn on ECC here because accessing un-initialized memory will
+ *  cause CE/UE errors possibly causing an ABORT.
+ */
+static int altr_check_ecc_deps(struct altr_edac_device_dev *device)
+{
+	void __iomem  *base = device->base;
+	const struct edac_device_prv_data *prv = device->data;
+
+	if (readl(base + prv->ecc_en_ofst) & prv->ecc_enable_mask)
+		return 0;
+
+	edac_printk(KERN_ERR, EDAC_DEVICE,
+		    "%s: No ECC present or ECC disabled.\n",
+		    device->edac_dev_name);
+	return -ENODEV;
+}
 
 static void *ocram_alloc_mem(size_t size, void **other)
 {
@@ -891,36 +882,53 @@ static void ocram_free_mem(void *p, size_t size, void *other)
 	gen_pool_free((struct gen_pool *)other, (u32)p, size);
 }
 
-/*
- * altr_ocram_check_deps()
- *	Test for OCRAM cache ECC dependencies upon entry because
- *	platform specific startup should have initialized the
- *	On-Chip RAM memory and enabled the ECC.
- *	Can't turn on ECC here because accessing un-initialized
- *	memory will cause CE/UE errors possibly causing an ABORT.
- */
-static int altr_ocram_check_deps(struct platform_device *pdev,
-				 void __iomem *base)
+static irqreturn_t altr_edac_a10_ecc_irq(struct altr_edac_device_dev *dci,
+					 bool sberr)
 {
-	if (readl(base) & ALTR_OCR_ECC_EN)
-		return 0;
+	void __iomem  *base = dci->base;
 
-	edac_printk(KERN_ERR, EDAC_DEVICE,
-		    "OCRAM: No ECC present or ECC disabled.\n");
-	return -ENODEV;
+	if (sberr) {
+		writel(ALTR_A10_ECC_SERRPENA,
+		       base + ALTR_A10_ECC_INTSTAT_OFST);
+		edac_device_handle_ce(dci->edac_dev, 0, 0, dci->edac_dev_name);
+	} else {
+		writel(ALTR_A10_ECC_DERRPENA,
+		       base + ALTR_A10_ECC_INTSTAT_OFST);
+		edac_device_handle_ue(dci->edac_dev, 0, 0, dci->edac_dev_name);
+		panic("\nEDAC:ECC_DEVICE[Uncorrectable errors]\n");
+	}
+	return IRQ_HANDLED;
 }
 
 const struct edac_device_prv_data ocramecc_data = {
-	.setup = altr_ocram_check_deps,
+	.setup = altr_check_ecc_deps,
 	.ce_clear_mask = (ALTR_OCR_ECC_EN | ALTR_OCR_ECC_SERR),
 	.ue_clear_mask = (ALTR_OCR_ECC_EN | ALTR_OCR_ECC_DERR),
 	.dbgfs_name = "altr_ocram_trigger",
 	.alloc_mem = ocram_alloc_mem,
 	.free_mem = ocram_free_mem,
 	.ecc_enable_mask = ALTR_OCR_ECC_EN,
+	.ecc_en_ofst = ALTR_OCR_ECC_REG_OFFSET,
 	.ce_set_mask = (ALTR_OCR_ECC_EN | ALTR_OCR_ECC_INJS),
 	.ue_set_mask = (ALTR_OCR_ECC_EN | ALTR_OCR_ECC_INJD),
+	.set_err_ofst = ALTR_OCR_ECC_REG_OFFSET,
 	.trig_alloc_sz = ALTR_TRIG_OCRAM_BYTE_SIZE,
+	.inject_fops = &altr_edac_device_inject_fops,
+};
+
+const struct edac_device_prv_data a10_ocramecc_data = {
+	.setup = altr_check_ecc_deps,
+	.ce_clear_mask = ALTR_A10_ECC_SERRPENA,
+	.ue_clear_mask = ALTR_A10_ECC_DERRPENA,
+	.irq_status_mask = A10_SYSMGR_ECC_INTSTAT_OCRAM,
+	.dbgfs_name = "altr_ocram_trigger",
+	.ecc_enable_mask = ALTR_A10_OCRAM_ECC_EN_CTL,
+	.ecc_en_ofst = ALTR_A10_ECC_CTRL_OFST,
+	.ce_set_mask = ALTR_A10_ECC_TSERRA,
+	.ue_set_mask = ALTR_A10_ECC_TDERRA,
+	.set_err_ofst = ALTR_A10_ECC_INTTEST_OFST,
+	.ecc_irq_handler = altr_edac_a10_ecc_irq,
+	.inject_fops = &altr_edac_a10_device_inject_fops,
 };
 
 #endif	/* CONFIG_EDAC_ALTERA_OCRAM */
@@ -966,10 +974,13 @@ static void l2_free_mem(void *p, size_t size, void *other)
  *	Bail if ECC is not enabled.
  *	Note that L2 Cache Enable is forced at build time.
  */
-static int altr_l2_check_deps(struct platform_device *pdev,
-			      void __iomem *base)
+static int altr_l2_check_deps(struct altr_edac_device_dev *device)
 {
-	if (readl(base) & ALTR_L2_ECC_EN)
+	void __iomem *base = device->base;
+	const struct edac_device_prv_data *prv = device->data;
+
+	if ((readl(base) & prv->ecc_enable_mask) ==
+	     prv->ecc_enable_mask)
 		return 0;
 
 	edac_printk(KERN_ERR, EDAC_DEVICE,
@@ -977,6 +988,24 @@ static int altr_l2_check_deps(struct platform_device *pdev,
 	return -ENODEV;
 }
 
+static irqreturn_t altr_edac_a10_l2_irq(struct altr_edac_device_dev *dci,
+					bool sberr)
+{
+	if (sberr) {
+		regmap_write(dci->edac->ecc_mgr_map,
+			     A10_SYSGMR_MPU_CLEAR_L2_ECC_OFST,
+			     A10_SYSGMR_MPU_CLEAR_L2_ECC_SB);
+		edac_device_handle_ce(dci->edac_dev, 0, 0, dci->edac_dev_name);
+	} else {
+		regmap_write(dci->edac->ecc_mgr_map,
+			     A10_SYSGMR_MPU_CLEAR_L2_ECC_OFST,
+			     A10_SYSGMR_MPU_CLEAR_L2_ECC_MB);
+		edac_device_handle_ue(dci->edac_dev, 0, 0, dci->edac_dev_name);
+		panic("\nEDAC:ECC_DEVICE[Uncorrectable errors]\n");
+	}
+	return IRQ_HANDLED;
+}
+
 const struct edac_device_prv_data l2ecc_data = {
 	.setup = altr_l2_check_deps,
 	.ce_clear_mask = 0,
@@ -987,11 +1016,252 @@ const struct edac_device_prv_data l2ecc_data = {
 	.ecc_enable_mask = ALTR_L2_ECC_EN,
 	.ce_set_mask = (ALTR_L2_ECC_EN | ALTR_L2_ECC_INJS),
 	.ue_set_mask = (ALTR_L2_ECC_EN | ALTR_L2_ECC_INJD),
+	.set_err_ofst = ALTR_L2_ECC_REG_OFFSET,
+	.trig_alloc_sz = ALTR_TRIG_L2C_BYTE_SIZE,
+	.inject_fops = &altr_edac_device_inject_fops,
+};
+
+const struct edac_device_prv_data a10_l2ecc_data = {
+	.setup = altr_l2_check_deps,
+	.ce_clear_mask = ALTR_A10_L2_ECC_SERR_CLR,
+	.ue_clear_mask = ALTR_A10_L2_ECC_MERR_CLR,
+	.irq_status_mask = A10_SYSMGR_ECC_INTSTAT_L2,
+	.dbgfs_name = "altr_l2_trigger",
+	.alloc_mem = l2_alloc_mem,
+	.free_mem = l2_free_mem,
+	.ecc_enable_mask = ALTR_A10_L2_ECC_EN_CTL,
+	.ce_set_mask = ALTR_A10_L2_ECC_CE_INJ_MASK,
+	.ue_set_mask = ALTR_A10_L2_ECC_UE_INJ_MASK,
+	.set_err_ofst = ALTR_A10_L2_ECC_INJ_OFST,
+	.ecc_irq_handler = altr_edac_a10_l2_irq,
 	.trig_alloc_sz = ALTR_TRIG_L2C_BYTE_SIZE,
+	.inject_fops = &altr_edac_device_inject_fops,
 };
 
 #endif	/* CONFIG_EDAC_ALTERA_L2C */
 
+/********************* Arria10 EDAC Device Functions *************************/
+
+/*
+ * The Arria10 EDAC Device Functions differ from the Cyclone5/Arria5
+ * because 2 IRQs are shared among the all ECC peripherals. The ECC
+ * manager manages the IRQs and the children.
+ * Based on xgene_edac.c peripheral code.
+ */
+
+static ssize_t altr_edac_a10_device_trig(struct file *file,
+					 const char __user *user_buf,
+					 size_t count, loff_t *ppos)
+{
+	struct edac_device_ctl_info *edac_dci = file->private_data;
+	struct altr_edac_device_dev *drvdata = edac_dci->pvt_info;
+	const struct edac_device_prv_data *priv = drvdata->data;
+	void __iomem *set_addr = (drvdata->base + priv->set_err_ofst);
+	unsigned long flags;
+	u8 trig_type;
+
+	if (!user_buf || get_user(trig_type, user_buf))
+		return -EFAULT;
+
+	local_irq_save(flags);
+	if (trig_type == ALTR_UE_TRIGGER_CHAR)
+		writel(priv->ue_set_mask, set_addr);
+	else
+		writel(priv->ce_set_mask, set_addr);
+	/* Ensure the interrupt test bits are set */
+	wmb();
+	local_irq_restore(flags);
+
+	return count;
+}
+
+static irqreturn_t altr_edac_a10_irq_handler(int irq, void *dev_id)
+{
+	irqreturn_t rc = IRQ_NONE;
+	struct altr_arria10_edac *edac = dev_id;
+	struct altr_edac_device_dev *dci;
+	int irq_status;
+	bool sberr = (irq == edac->sb_irq) ? 1 : 0;
+	int sm_offset = sberr ? A10_SYSMGR_ECC_INTSTAT_SERR_OFST :
+				A10_SYSMGR_ECC_INTSTAT_DERR_OFST;
+
+	regmap_read(edac->ecc_mgr_map, sm_offset, &irq_status);
+
+	if ((irq != edac->sb_irq) && (irq != edac->db_irq)) {
+		WARN_ON(1);
+	} else {
+		list_for_each_entry(dci, &edac->a10_ecc_devices, next) {
+			if (irq_status & dci->data->irq_status_mask)
+				rc = dci->data->ecc_irq_handler(dci, sberr);
+		}
+	}
+
+	return rc;
+}
+
+static int altr_edac_a10_device_add(struct altr_arria10_edac *edac,
+				    struct device_node *np)
+{
+	struct edac_device_ctl_info *dci;
+	struct altr_edac_device_dev *altdev;
+	char *ecc_name = (char *)np->name;
+	struct resource res;
+	int edac_idx;
+	int rc = 0;
+	const struct edac_device_prv_data *prv;
+	/* Get matching node and check for valid result */
+	const struct of_device_id *pdev_id =
+		of_match_node(altr_edac_device_of_match, np);
+	if (IS_ERR_OR_NULL(pdev_id))
+		return -ENODEV;
+
+	/* Get driver specific data for this EDAC device */
+	prv = pdev_id->data;
+	if (IS_ERR_OR_NULL(prv))
+		return -ENODEV;
+
+	if (!devres_open_group(edac->dev, altr_edac_a10_device_add, GFP_KERNEL))
+		return -ENOMEM;
+
+	rc = of_address_to_resource(np, 0, &res);
+	if (rc < 0) {
+		edac_printk(KERN_ERR, EDAC_DEVICE,
+			    "%s: no resource address\n", ecc_name);
+		goto err_release_group;
+	}
+
+	edac_idx = edac_device_alloc_index();
+	dci = edac_device_alloc_ctl_info(sizeof(*altdev), ecc_name,
+					 1, ecc_name, 1, 0, NULL, 0,
+					 edac_idx);
+
+	if (!dci) {
+		edac_printk(KERN_ERR, EDAC_DEVICE,
+			    "%s: Unable to allocate EDAC device\n", ecc_name);
+		rc = -ENOMEM;
+		goto err_release_group;
+	}
+
+	altdev = dci->pvt_info;
+	dci->dev = edac->dev;
+	altdev->edac_dev_name = ecc_name;
+	altdev->edac_idx = edac_idx;
+	altdev->edac = edac;
+	altdev->edac_dev = dci;
+	altdev->data = prv;
+	altdev->ddev = *edac->dev;
+	dci->dev = &altdev->ddev;
+	dci->ctl_name = "Altera ECC Manager";
+	dci->mod_name = ecc_name;
+	dci->dev_name = ecc_name;
+
+	altdev->base = devm_ioremap_resource(edac->dev, &res);
+	if (IS_ERR(altdev->base)) {
+		rc = PTR_ERR(altdev->base);
+		goto err_release_group1;
+	}
+
+	/* Check specific dependencies for the module */
+	if (altdev->data->setup) {
+		rc = altdev->data->setup(altdev);
+		if (rc)
+			goto err_release_group1;
+	}
+
+	rc = edac_device_add_device(dci);
+	if (rc) {
+		dev_err(edac->dev, "edac_device_add_device failed\n");
+		rc = -ENOMEM;
+		goto err_release_group1;
+	}
+
+	altr_create_edacdev_dbgfs(dci, prv);
+
+	list_add(&altdev->next, &edac->a10_ecc_devices);
+
+	devres_remove_group(edac->dev, altr_edac_a10_device_add);
+
+	return 0;
+
+err_release_group1:
+	edac_device_free_ctl_info(dci);
+err_release_group:
+	edac_printk(KERN_ALERT, EDAC_DEVICE, "%s: %d\n", __func__, __LINE__);
+	devres_release_group(edac->dev, NULL);
+	edac_printk(KERN_ERR, EDAC_DEVICE,
+		    "%s:Error setting up EDAC device: %d\n", ecc_name, rc);
+
+	return rc;
+}
+
+static int altr_edac_a10_probe(struct platform_device *pdev)
+{
+	struct altr_arria10_edac *edac;
+	struct device_node *child;
+	int rc;
+
+	edac = devm_kzalloc(&pdev->dev, sizeof(*edac), GFP_KERNEL);
+	if (!edac)
+		return -ENOMEM;
+
+	edac->dev = &pdev->dev;
+	platform_set_drvdata(pdev, edac);
+	INIT_LIST_HEAD(&edac->a10_ecc_devices);
+
+	edac->ecc_mgr_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
+							"altr,sysmgr-syscon");
+	if (IS_ERR(edac->ecc_mgr_map)) {
+		edac_printk(KERN_ERR, EDAC_DEVICE,
+			    "Unable to get syscon altr,sysmgr-syscon\n");
+		return PTR_ERR(edac->ecc_mgr_map);
+	}
+
+	edac->sb_irq = platform_get_irq(pdev, 0);
+	rc = devm_request_irq(&pdev->dev, edac->sb_irq,
+			      altr_edac_a10_irq_handler,
+			      IRQF_SHARED, dev_name(&pdev->dev), edac);
+	if (rc) {
+		edac_printk(KERN_ERR, EDAC_DEVICE, "No SBERR IRQ resource\n");
+		return rc;
+	}
+
+	edac->db_irq = platform_get_irq(pdev, 1);
+	rc = devm_request_irq(&pdev->dev, edac->db_irq,
+			      altr_edac_a10_irq_handler,
+			      IRQF_SHARED, dev_name(&pdev->dev), edac);
+	if (rc) {
+		edac_printk(KERN_ERR, EDAC_DEVICE, "No DBERR IRQ resource\n");
+		return rc;
+	}
+
+	for_each_child_of_node(pdev->dev.of_node, child) {
+		if (!of_device_is_available(child))
+			continue;
+		if (of_device_is_compatible(child, "altr,socfpga-a10-l2-ecc"))
+			altr_edac_a10_device_add(edac, child);
+		else if (of_device_is_compatible(child,
+						 "altr,socfpga-a10-ocram-ecc"))
+			altr_edac_a10_device_add(edac, child);
+	}
+
+	return 0;
+}
+
+static const struct of_device_id altr_edac_a10_of_match[] = {
+	{ .compatible = "altr,socfpga-a10-ecc-manager" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, altr_edac_a10_of_match);
+
+static struct platform_driver altr_edac_a10_driver = {
+	.probe =  altr_edac_a10_probe,
+	.driver = {
+		.name = "socfpga_a10_ecc_manager",
+		.of_match_table = altr_edac_a10_of_match,
+	},
+};
+module_platform_driver(altr_edac_a10_driver);
+
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Thor Thayer");
 MODULE_DESCRIPTION("EDAC Driver for Altera Memories");

+ 128 - 0
drivers/edac/altera_edac.h

@@ -195,4 +195,132 @@ struct altr_sdram_mc_data {
 	const struct altr_sdram_prv_data *data;
 };
 
+/************************** EDAC Device Defines **************************/
+/***** General Device Trigger Defines *****/
+#define ALTR_UE_TRIGGER_CHAR            'U'   /* Trigger for UE */
+#define ALTR_TRIGGER_READ_WRD_CNT       32    /* Line size x 4 */
+#define ALTR_TRIG_OCRAM_BYTE_SIZE       128   /* Line size x 4 */
+#define ALTR_TRIG_L2C_BYTE_SIZE         4096  /* Full Page */
+
+/******* Cyclone5 and Arria5 Defines *******/
+/* OCRAM ECC Management Group Defines */
+#define ALTR_MAN_GRP_OCRAM_ECC_OFFSET   0x04
+#define ALTR_OCR_ECC_REG_OFFSET         0x00
+#define ALTR_OCR_ECC_EN                 BIT(0)
+#define ALTR_OCR_ECC_INJS               BIT(1)
+#define ALTR_OCR_ECC_INJD               BIT(2)
+#define ALTR_OCR_ECC_SERR               BIT(3)
+#define ALTR_OCR_ECC_DERR               BIT(4)
+
+/* L2 ECC Management Group Defines */
+#define ALTR_MAN_GRP_L2_ECC_OFFSET      0x00
+#define ALTR_L2_ECC_REG_OFFSET          0x00
+#define ALTR_L2_ECC_EN                  BIT(0)
+#define ALTR_L2_ECC_INJS                BIT(1)
+#define ALTR_L2_ECC_INJD                BIT(2)
+
+/* Arria10 General ECC Block Module Defines */
+#define ALTR_A10_ECC_CTRL_OFST          0x08
+#define ALTR_A10_ECC_EN                 BIT(0)
+#define ALTR_A10_ECC_INITA              BIT(16)
+#define ALTR_A10_ECC_INITB              BIT(24)
+
+#define ALTR_A10_ECC_INITSTAT_OFST      0x0C
+#define ALTR_A10_ECC_INITCOMPLETEA      BIT(0)
+#define ALTR_A10_ECC_INITCOMPLETEB      BIT(8)
+
+#define ALTR_A10_ECC_ERRINTEN_OFST      0x10
+#define ALTR_A10_ECC_SERRINTEN          BIT(0)
+
+#define ALTR_A10_ECC_INTSTAT_OFST       0x20
+#define ALTR_A10_ECC_SERRPENA           BIT(0)
+#define ALTR_A10_ECC_DERRPENA           BIT(8)
+#define ALTR_A10_ECC_ERRPENA_MASK       (ALTR_A10_ECC_SERRPENA | \
+					 ALTR_A10_ECC_DERRPENA)
+#define ALTR_A10_ECC_SERRPENB           BIT(16)
+#define ALTR_A10_ECC_DERRPENB           BIT(24)
+#define ALTR_A10_ECC_ERRPENB_MASK       (ALTR_A10_ECC_SERRPENB | \
+					 ALTR_A10_ECC_DERRPENB)
+
+#define ALTR_A10_ECC_INTTEST_OFST       0x24
+#define ALTR_A10_ECC_TSERRA             BIT(0)
+#define ALTR_A10_ECC_TDERRA             BIT(8)
+
+/* ECC Manager Defines */
+#define A10_SYSMGR_ECC_INTMASK_SET_OFST   0x94
+#define A10_SYSMGR_ECC_INTMASK_CLR_OFST   0x98
+#define A10_SYSMGR_ECC_INTMASK_OCRAM      BIT(1)
+
+#define A10_SYSMGR_ECC_INTSTAT_SERR_OFST  0x9C
+#define A10_SYSMGR_ECC_INTSTAT_DERR_OFST  0xA0
+#define A10_SYSMGR_ECC_INTSTAT_L2         BIT(0)
+#define A10_SYSMGR_ECC_INTSTAT_OCRAM      BIT(1)
+
+#define A10_SYSGMR_MPU_CLEAR_L2_ECC_OFST  0xA8
+#define A10_SYSGMR_MPU_CLEAR_L2_ECC_SB    BIT(15)
+#define A10_SYSGMR_MPU_CLEAR_L2_ECC_MB    BIT(31)
+
+/* Arria 10 L2 ECC Management Group Defines */
+#define ALTR_A10_L2_ECC_CTL_OFST        0x0
+#define ALTR_A10_L2_ECC_EN_CTL          BIT(0)
+
+#define ALTR_A10_L2_ECC_STATUS          0xFFD060A4
+#define ALTR_A10_L2_ECC_STAT_OFST       0xA4
+#define ALTR_A10_L2_ECC_SERR_PEND       BIT(0)
+#define ALTR_A10_L2_ECC_MERR_PEND       BIT(0)
+
+#define ALTR_A10_L2_ECC_CLR_OFST        0x4
+#define ALTR_A10_L2_ECC_SERR_CLR        BIT(15)
+#define ALTR_A10_L2_ECC_MERR_CLR        BIT(31)
+
+#define ALTR_A10_L2_ECC_INJ_OFST        ALTR_A10_L2_ECC_CTL_OFST
+#define ALTR_A10_L2_ECC_CE_INJ_MASK     0x00000101
+#define ALTR_A10_L2_ECC_UE_INJ_MASK     0x00010101
+
+/* Arria 10 OCRAM ECC Management Group Defines */
+#define ALTR_A10_OCRAM_ECC_EN_CTL       (BIT(1) | BIT(0))
+
+struct altr_edac_device_dev;
+
+struct edac_device_prv_data {
+	int (*setup)(struct altr_edac_device_dev *device);
+	int ce_clear_mask;
+	int ue_clear_mask;
+	int irq_status_mask;
+	char dbgfs_name[20];
+	void * (*alloc_mem)(size_t size, void **other);
+	void (*free_mem)(void *p, size_t size, void *other);
+	int ecc_enable_mask;
+	int ecc_en_ofst;
+	int ce_set_mask;
+	int ue_set_mask;
+	int set_err_ofst;
+	irqreturn_t (*ecc_irq_handler)(struct altr_edac_device_dev *dci,
+				       bool sb);
+	int trig_alloc_sz;
+	const struct file_operations *inject_fops;
+};
+
+struct altr_edac_device_dev {
+	struct list_head next;
+	void __iomem *base;
+	int sb_irq;
+	int db_irq;
+	const struct edac_device_prv_data *data;
+	struct dentry *debugfs_dir;
+	char *edac_dev_name;
+	struct altr_arria10_edac *edac;
+	struct edac_device_ctl_info *edac_dev;
+	struct device ddev;
+	int edac_idx;
+};
+
+struct altr_arria10_edac {
+	struct device		*dev;
+	struct regmap		*ecc_mgr_map;
+	int sb_irq;
+	int db_irq;
+	struct list_head	a10_ecc_devices;
+};
+
 #endif	/* #ifndef _ALTERA_EDAC_H */

+ 45 - 84
drivers/edac/amd64_edac.c

@@ -15,11 +15,6 @@ module_param(ecc_enable_override, int, 0644);
 
 static struct msr __percpu *msrs;
 
-/*
- * count successfully initialized driver instances for setup_pci_device()
- */
-static atomic_t drv_instances = ATOMIC_INIT(0);
-
 /* Per-node stuff */
 static struct ecc_settings **ecc_stngs;
 
@@ -1918,7 +1913,7 @@ static struct amd64_family_type family_types[] = {
 	[K8_CPUS] = {
 		.ctl_name = "K8",
 		.f1_id = PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP,
-		.f3_id = PCI_DEVICE_ID_AMD_K8_NB_MISC,
+		.f2_id = PCI_DEVICE_ID_AMD_K8_NB_MEMCTL,
 		.ops = {
 			.early_channel_count	= k8_early_channel_count,
 			.map_sysaddr_to_csrow	= k8_map_sysaddr_to_csrow,
@@ -1928,7 +1923,7 @@ static struct amd64_family_type family_types[] = {
 	[F10_CPUS] = {
 		.ctl_name = "F10h",
 		.f1_id = PCI_DEVICE_ID_AMD_10H_NB_MAP,
-		.f3_id = PCI_DEVICE_ID_AMD_10H_NB_MISC,
+		.f2_id = PCI_DEVICE_ID_AMD_10H_NB_DRAM,
 		.ops = {
 			.early_channel_count	= f1x_early_channel_count,
 			.map_sysaddr_to_csrow	= f1x_map_sysaddr_to_csrow,
@@ -1938,7 +1933,7 @@ static struct amd64_family_type family_types[] = {
 	[F15_CPUS] = {
 		.ctl_name = "F15h",
 		.f1_id = PCI_DEVICE_ID_AMD_15H_NB_F1,
-		.f3_id = PCI_DEVICE_ID_AMD_15H_NB_F3,
+		.f2_id = PCI_DEVICE_ID_AMD_15H_NB_F2,
 		.ops = {
 			.early_channel_count	= f1x_early_channel_count,
 			.map_sysaddr_to_csrow	= f1x_map_sysaddr_to_csrow,
@@ -1948,7 +1943,7 @@ static struct amd64_family_type family_types[] = {
 	[F15_M30H_CPUS] = {
 		.ctl_name = "F15h_M30h",
 		.f1_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F1,
-		.f3_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F3,
+		.f2_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F2,
 		.ops = {
 			.early_channel_count	= f1x_early_channel_count,
 			.map_sysaddr_to_csrow	= f1x_map_sysaddr_to_csrow,
@@ -1958,7 +1953,7 @@ static struct amd64_family_type family_types[] = {
 	[F15_M60H_CPUS] = {
 		.ctl_name = "F15h_M60h",
 		.f1_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F1,
-		.f3_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F3,
+		.f2_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F2,
 		.ops = {
 			.early_channel_count	= f1x_early_channel_count,
 			.map_sysaddr_to_csrow	= f1x_map_sysaddr_to_csrow,
@@ -1968,7 +1963,7 @@ static struct amd64_family_type family_types[] = {
 	[F16_CPUS] = {
 		.ctl_name = "F16h",
 		.f1_id = PCI_DEVICE_ID_AMD_16H_NB_F1,
-		.f3_id = PCI_DEVICE_ID_AMD_16H_NB_F3,
+		.f2_id = PCI_DEVICE_ID_AMD_16H_NB_F2,
 		.ops = {
 			.early_channel_count	= f1x_early_channel_count,
 			.map_sysaddr_to_csrow	= f1x_map_sysaddr_to_csrow,
@@ -1978,7 +1973,7 @@ static struct amd64_family_type family_types[] = {
 	[F16_M30H_CPUS] = {
 		.ctl_name = "F16h_M30h",
 		.f1_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F1,
-		.f3_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F3,
+		.f2_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F2,
 		.ops = {
 			.early_channel_count	= f1x_early_channel_count,
 			.map_sysaddr_to_csrow	= f1x_map_sysaddr_to_csrow,
@@ -2227,13 +2222,13 @@ static inline void decode_bus_error(int node_id, struct mce *m)
 }
 
 /*
- * Use pvt->F2 which contains the F2 CPU PCI device to get the related
- * F1 (AddrMap) and F3 (Misc) devices. Return negative value on error.
+ * Use pvt->F3 which contains the F3 CPU PCI device to get the related
+ * F1 (AddrMap) and F2 (Dct) devices. Return negative value on error.
  */
-static int reserve_mc_sibling_devs(struct amd64_pvt *pvt, u16 f1_id, u16 f3_id)
+static int reserve_mc_sibling_devs(struct amd64_pvt *pvt, u16 f1_id, u16 f2_id)
 {
 	/* Reserve the ADDRESS MAP Device */
-	pvt->F1 = pci_get_related_function(pvt->F2->vendor, f1_id, pvt->F2);
+	pvt->F1 = pci_get_related_function(pvt->F3->vendor, f1_id, pvt->F3);
 	if (!pvt->F1) {
 		amd64_err("error address map device not found: "
 			  "vendor %x device 0x%x (broken BIOS?)\n",
@@ -2241,15 +2236,15 @@ static int reserve_mc_sibling_devs(struct amd64_pvt *pvt, u16 f1_id, u16 f3_id)
 		return -ENODEV;
 	}
 
-	/* Reserve the MISC Device */
-	pvt->F3 = pci_get_related_function(pvt->F2->vendor, f3_id, pvt->F2);
-	if (!pvt->F3) {
+	/* Reserve the DCT Device */
+	pvt->F2 = pci_get_related_function(pvt->F3->vendor, f2_id, pvt->F3);
+	if (!pvt->F2) {
 		pci_dev_put(pvt->F1);
 		pvt->F1 = NULL;
 
-		amd64_err("error F3 device not found: "
+		amd64_err("error F2 device not found: "
 			  "vendor %x device 0x%x (broken BIOS?)\n",
-			  PCI_VENDOR_ID_AMD, f3_id);
+			  PCI_VENDOR_ID_AMD, f2_id);
 
 		return -ENODEV;
 	}
@@ -2263,7 +2258,7 @@ static int reserve_mc_sibling_devs(struct amd64_pvt *pvt, u16 f1_id, u16 f3_id)
 static void free_mc_sibling_devs(struct amd64_pvt *pvt)
 {
 	pci_dev_put(pvt->F1);
-	pci_dev_put(pvt->F3);
+	pci_dev_put(pvt->F2);
 }
 
 /*
@@ -2778,14 +2773,14 @@ static const struct attribute_group *amd64_edac_attr_groups[] = {
 	NULL
 };
 
-static int init_one_instance(struct pci_dev *F2)
+static int init_one_instance(unsigned int nid)
 {
-	struct amd64_pvt *pvt = NULL;
+	struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
 	struct amd64_family_type *fam_type = NULL;
 	struct mem_ctl_info *mci = NULL;
 	struct edac_mc_layer layers[2];
+	struct amd64_pvt *pvt = NULL;
 	int err = 0, ret;
-	u16 nid = amd_pci_dev_to_node_id(F2);
 
 	ret = -ENOMEM;
 	pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL);
@@ -2793,7 +2788,7 @@ static int init_one_instance(struct pci_dev *F2)
 		goto err_ret;
 
 	pvt->mc_node_id	= nid;
-	pvt->F2 = F2;
+	pvt->F3 = F3;
 
 	ret = -EINVAL;
 	fam_type = per_family_init(pvt);
@@ -2801,7 +2796,7 @@ static int init_one_instance(struct pci_dev *F2)
 		goto err_free;
 
 	ret = -ENODEV;
-	err = reserve_mc_sibling_devs(pvt, fam_type->f1_id, fam_type->f3_id);
+	err = reserve_mc_sibling_devs(pvt, fam_type->f1_id, fam_type->f2_id);
 	if (err)
 		goto err_free;
 
@@ -2836,7 +2831,7 @@ static int init_one_instance(struct pci_dev *F2)
 		goto err_siblings;
 
 	mci->pvt_info = pvt;
-	mci->pdev = &pvt->F2->dev;
+	mci->pdev = &pvt->F3->dev;
 
 	setup_mci_misc_attrs(mci, fam_type);
 
@@ -2855,8 +2850,6 @@ static int init_one_instance(struct pci_dev *F2)
 
 	amd_register_ecc_decoder(decode_bus_error);
 
-	atomic_inc(&drv_instances);
-
 	return 0;
 
 err_add_mc:
@@ -2872,19 +2865,11 @@ err_ret:
 	return ret;
 }
 
-static int probe_one_instance(struct pci_dev *pdev,
-			      const struct pci_device_id *mc_type)
+static int probe_one_instance(unsigned int nid)
 {
-	u16 nid = amd_pci_dev_to_node_id(pdev);
 	struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
 	struct ecc_settings *s;
-	int ret = 0;
-
-	ret = pci_enable_device(pdev);
-	if (ret < 0) {
-		edac_dbg(0, "ret=%d\n", ret);
-		return -EIO;
-	}
+	int ret;
 
 	ret = -ENOMEM;
 	s = kzalloc(sizeof(struct ecc_settings), GFP_KERNEL);
@@ -2905,7 +2890,7 @@ static int probe_one_instance(struct pci_dev *pdev,
 			goto err_enable;
 	}
 
-	ret = init_one_instance(pdev);
+	ret = init_one_instance(nid);
 	if (ret < 0) {
 		amd64_err("Error probing instance: %d\n", nid);
 		restore_ecc_error_reporting(s, nid, F3);
@@ -2921,19 +2906,18 @@ err_out:
 	return ret;
 }
 
-static void remove_one_instance(struct pci_dev *pdev)
+static void remove_one_instance(unsigned int nid)
 {
-	struct mem_ctl_info *mci;
-	struct amd64_pvt *pvt;
-	u16 nid = amd_pci_dev_to_node_id(pdev);
 	struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
 	struct ecc_settings *s = ecc_stngs[nid];
+	struct mem_ctl_info *mci;
+	struct amd64_pvt *pvt;
 
-	mci = find_mci_by_dev(&pdev->dev);
+	mci = find_mci_by_dev(&F3->dev);
 	WARN_ON(!mci);
 
 	/* Remove from EDAC CORE tracking list */
-	mci = edac_mc_del_mc(&pdev->dev);
+	mci = edac_mc_del_mc(&F3->dev);
 	if (!mci)
 		return;
 
@@ -2957,31 +2941,6 @@ static void remove_one_instance(struct pci_dev *pdev)
 	edac_mc_free(mci);
 }
 
-/*
- * This table is part of the interface for loading drivers for PCI devices. The
- * PCI core identifies what devices are on a system during boot, and then
- * inquiry this table to see if this driver is for a given device found.
- */
-static const struct pci_device_id amd64_pci_table[] = {
-	{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_K8_NB_MEMCTL) },
-	{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_10H_NB_DRAM) },
-	{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_NB_F2) },
-	{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_M30H_NB_F2) },
-	{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_M60H_NB_F2) },
-	{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_16H_NB_F2) },
-	{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F2) },
-	{0, }
-};
-MODULE_DEVICE_TABLE(pci, amd64_pci_table);
-
-static struct pci_driver amd64_pci_driver = {
-	.name		= EDAC_MOD_STR,
-	.probe		= probe_one_instance,
-	.remove		= remove_one_instance,
-	.id_table	= amd64_pci_table,
-	.driver.probe_type = PROBE_FORCE_SYNCHRONOUS,
-};
-
 static void setup_pci_device(void)
 {
 	struct mem_ctl_info *mci;
@@ -3005,8 +2964,7 @@ static void setup_pci_device(void)
 static int __init amd64_edac_init(void)
 {
 	int err = -ENODEV;
-
-	printk(KERN_INFO "AMD64 EDAC driver v%s\n", EDAC_AMD64_VERSION);
+	int i;
 
 	opstate_init();
 
@@ -3022,13 +2980,14 @@ static int __init amd64_edac_init(void)
 	if (!msrs)
 		goto err_free;
 
-	err = pci_register_driver(&amd64_pci_driver);
-	if (err)
-		goto err_pci;
+	for (i = 0; i < amd_nb_num(); i++)
+		if (probe_one_instance(i)) {
+			/* unwind properly */
+			while (--i >= 0)
+				remove_one_instance(i);
 
-	err = -ENODEV;
-	if (!atomic_read(&drv_instances))
-		goto err_no_instances;
+			goto err_pci;
+		}
 
 	setup_pci_device();
 
@@ -3036,10 +2995,9 @@ static int __init amd64_edac_init(void)
 	amd64_err("%s on 32-bit is unsupported. USE AT YOUR OWN RISK!\n", EDAC_MOD_STR);
 #endif
 
-	return 0;
+	printk(KERN_INFO "AMD64 EDAC driver v%s\n", EDAC_AMD64_VERSION);
 
-err_no_instances:
-	pci_unregister_driver(&amd64_pci_driver);
+	return 0;
 
 err_pci:
 	msrs_free(msrs);
@@ -3055,10 +3013,13 @@ err_ret:
 
 static void __exit amd64_edac_exit(void)
 {
+	int i;
+
 	if (pci_ctl)
 		edac_pci_release_generic_ctl(pci_ctl);
 
-	pci_unregister_driver(&amd64_pci_driver);
+	for (i = 0; i < amd_nb_num(); i++)
+		remove_one_instance(i);
 
 	kfree(ecc_stngs);
 	ecc_stngs = NULL;

+ 1 - 1
drivers/edac/amd64_edac.h

@@ -422,7 +422,7 @@ struct low_ops {
 
 struct amd64_family_type {
 	const char *ctl_name;
-	u16 f1_id, f3_id;
+	u16 f1_id, f2_id;
 	struct low_ops ops;
 };
 

+ 1 - 1
drivers/edac/edac_mc.c

@@ -923,7 +923,7 @@ static void edac_inc_ue_error(struct mem_ctl_info *mci,
 	mci->ue_mc += count;
 
 	if (!enable_per_layer_report) {
-		mci->ce_noinfo_count += count;
+		mci->ue_noinfo_count += count;
 		return;
 	}
 

+ 2 - 1
drivers/edac/edac_mc_sysfs.c

@@ -998,11 +998,12 @@ void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci)
 
 void edac_unregister_sysfs(struct mem_ctl_info *mci)
 {
+	struct bus_type *bus = mci->bus;
 	const char *name = mci->bus->name;
 
 	edac_dbg(1, "Unregistering device %s\n", dev_name(&mci->dev));
 	device_unregister(&mci->dev);
-	bus_unregister(mci->bus);
+	bus_unregister(bus);
 	kfree(name);
 }
 

+ 5 - 76
drivers/edac/i7core_edac.c

@@ -271,16 +271,6 @@ struct i7core_pvt {
 
 	bool		is_registered, enable_scrub;
 
-	/* Fifo double buffers */
-	struct mce		mce_entry[MCE_LOG_LEN];
-	struct mce		mce_outentry[MCE_LOG_LEN];
-
-	/* Fifo in/out counters */
-	unsigned		mce_in, mce_out;
-
-	/* Count indicator to show errors not got */
-	unsigned		mce_overrun;
-
 	/* DCLK Frequency used for computing scrub rate */
 	int			dclk_freq;
 
@@ -1792,56 +1782,15 @@ static void i7core_mce_output_error(struct mem_ctl_info *mci,
  *	i7core_check_error	Retrieve and process errors reported by the
  *				hardware. Called by the Core module.
  */
-static void i7core_check_error(struct mem_ctl_info *mci)
+static void i7core_check_error(struct mem_ctl_info *mci, struct mce *m)
 {
 	struct i7core_pvt *pvt = mci->pvt_info;
-	int i;
-	unsigned count = 0;
-	struct mce *m;
 
-	/*
-	 * MCE first step: Copy all mce errors into a temporary buffer
-	 * We use a double buffering here, to reduce the risk of
-	 * losing an error.
-	 */
-	smp_rmb();
-	count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in)
-		% MCE_LOG_LEN;
-	if (!count)
-		goto check_ce_error;
-
-	m = pvt->mce_outentry;
-	if (pvt->mce_in + count > MCE_LOG_LEN) {
-		unsigned l = MCE_LOG_LEN - pvt->mce_in;
-
-		memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l);
-		smp_wmb();
-		pvt->mce_in = 0;
-		count -= l;
-		m += l;
-	}
-	memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count);
-	smp_wmb();
-	pvt->mce_in += count;
-
-	smp_rmb();
-	if (pvt->mce_overrun) {
-		i7core_printk(KERN_ERR, "Lost %d memory errors\n",
-			      pvt->mce_overrun);
-		smp_wmb();
-		pvt->mce_overrun = 0;
-	}
-
-	/*
-	 * MCE second step: parse errors and display
-	 */
-	for (i = 0; i < count; i++)
-		i7core_mce_output_error(mci, &pvt->mce_outentry[i]);
+	i7core_mce_output_error(mci, m);
 
 	/*
 	 * Now, let's increment CE error counts
 	 */
-check_ce_error:
 	if (!pvt->is_registered)
 		i7core_udimm_check_mc_ecc_err(mci);
 	else
@@ -1849,12 +1798,8 @@ check_ce_error:
 }
 
 /*
- * i7core_mce_check_error	Replicates mcelog routine to get errors
- *				This routine simply queues mcelog errors, and
- *				return. The error itself should be handled later
- *				by i7core_check_error.
- * WARNING: As this routine should be called at NMI time, extra care should
- * be taken to avoid deadlocks, and to be as fast as possible.
+ * Check that logging is enabled and that this is the right type
+ * of error for us to handle.
  */
 static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val,
 				  void *data)
@@ -1882,21 +1827,7 @@ static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val,
 	if (mce->bank != 8)
 		return NOTIFY_DONE;
 
-	smp_rmb();
-	if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
-		smp_wmb();
-		pvt->mce_overrun++;
-		return NOTIFY_DONE;
-	}
-
-	/* Copy memory error at the ringbuffer */
-	memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce));
-	smp_wmb();
-	pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN;
-
-	/* Handle fatal errors immediately */
-	if (mce->mcgstatus & 1)
-		i7core_check_error(mci);
+	i7core_check_error(mci, mce);
 
 	/* Advise mcelog that the errors were handled */
 	return NOTIFY_STOP;
@@ -2243,8 +2174,6 @@ static int i7core_register_mci(struct i7core_dev *i7core_dev)
 	get_dimm_config(mci);
 	/* record ptr to the generic device */
 	mci->pdev = &i7core_dev->pdev[0]->dev;
-	/* Set the function pointer to an actual operation function */
-	mci->edac_check = i7core_check_error;
 
 	/* Enable scrubrate setting */
 	if (pvt->enable_scrub)

+ 90 - 31
drivers/edac/ie31200_edac.c

@@ -17,6 +17,7 @@
  * 015c: Xeon E3-1200 v2/3rd Gen Core processor DRAM Controller
  * 0c04: Xeon E3-1200 v3/4th Gen Core Processor DRAM Controller
  * 0c08: Xeon E3-1200 v3 Processor DRAM Controller
+ * 1918: Xeon E3-1200 v5 Skylake Host Bridge/DRAM Registers
  *
  * Based on Intel specification:
  * http://www.intel.com/content/dam/www/public/us/en/documents/datasheets/xeon-e3-1200v3-vol-2-datasheet.pdf
@@ -55,6 +56,7 @@
 #define PCI_DEVICE_ID_INTEL_IE31200_HB_5 0x015c
 #define PCI_DEVICE_ID_INTEL_IE31200_HB_6 0x0c04
 #define PCI_DEVICE_ID_INTEL_IE31200_HB_7 0x0c08
+#define PCI_DEVICE_ID_INTEL_IE31200_HB_8 0x1918
 
 #define IE31200_DIMMS			4
 #define IE31200_RANKS			8
@@ -105,8 +107,11 @@
  *    1  Multiple Bit Error Status (MERRSTS)
  *    0  Correctable Error Status (CERRSTS)
  */
+
 #define IE31200_C0ECCERRLOG			0x40c8
 #define IE31200_C1ECCERRLOG			0x44c8
+#define IE31200_C0ECCERRLOG_SKL			0x4048
+#define IE31200_C1ECCERRLOG_SKL			0x4448
 #define IE31200_ECCERRLOG_CE			BIT(0)
 #define IE31200_ECCERRLOG_UE			BIT(1)
 #define IE31200_ECCERRLOG_RANK_BITS		GENMASK_ULL(28, 27)
@@ -123,17 +128,28 @@
 #define IE31200_CAPID0_DDPCD		BIT(6)
 #define IE31200_CAPID0_ECC		BIT(1)
 
-#define IE31200_MAD_DIMM_0_OFFSET	0x5004
-#define IE31200_MAD_DIMM_SIZE		GENMASK_ULL(7, 0)
-#define IE31200_MAD_DIMM_A_RANK		BIT(17)
-#define IE31200_MAD_DIMM_A_WIDTH	BIT(19)
-
-#define IE31200_PAGES(n)		(n << (28 - PAGE_SHIFT))
+#define IE31200_MAD_DIMM_0_OFFSET		0x5004
+#define IE31200_MAD_DIMM_0_OFFSET_SKL		0x500C
+#define IE31200_MAD_DIMM_SIZE			GENMASK_ULL(7, 0)
+#define IE31200_MAD_DIMM_A_RANK			BIT(17)
+#define IE31200_MAD_DIMM_A_RANK_SHIFT		17
+#define IE31200_MAD_DIMM_A_RANK_SKL		BIT(10)
+#define IE31200_MAD_DIMM_A_RANK_SKL_SHIFT	10
+#define IE31200_MAD_DIMM_A_WIDTH		BIT(19)
+#define IE31200_MAD_DIMM_A_WIDTH_SHIFT		19
+#define IE31200_MAD_DIMM_A_WIDTH_SKL		GENMASK_ULL(9, 8)
+#define IE31200_MAD_DIMM_A_WIDTH_SKL_SHIFT	8
+
+/* Skylake reports 1GB increments, everything else is 256MB */
+#define IE31200_PAGES(n, skl)	\
+	(n << (28 + (2 * skl) - PAGE_SHIFT))
 
 static int nr_channels;
 
 struct ie31200_priv {
 	void __iomem *window;
+	void __iomem *c0errlog;
+	void __iomem *c1errlog;
 };
 
 enum ie31200_chips {
@@ -157,9 +173,9 @@ static const struct ie31200_dev_info ie31200_devs[] = {
 };
 
 struct dimm_data {
-	u8 size; /* in 256MB multiples */
+	u8 size; /* in multiples of 256MB, except Skylake is 1GB */
 	u8 dual_rank : 1,
-	   x16_width : 1; /* 0 means x8 width */
+	   x16_width : 2; /* 0 means x8 width */
 };
 
 static int how_many_channels(struct pci_dev *pdev)
@@ -197,11 +213,10 @@ static bool ecc_capable(struct pci_dev *pdev)
 	return true;
 }
 
-static int eccerrlog_row(int channel, u64 log)
+static int eccerrlog_row(u64 log)
 {
-	int rank = ((log & IE31200_ECCERRLOG_RANK_BITS) >>
-		IE31200_ECCERRLOG_RANK_SHIFT);
-	return rank | (channel * IE31200_RANKS_PER_CHANNEL);
+	return ((log & IE31200_ECCERRLOG_RANK_BITS) >>
+				IE31200_ECCERRLOG_RANK_SHIFT);
 }
 
 static void ie31200_clear_error_info(struct mem_ctl_info *mci)
@@ -219,7 +234,6 @@ static void ie31200_get_and_clear_error_info(struct mem_ctl_info *mci,
 {
 	struct pci_dev *pdev;
 	struct ie31200_priv *priv = mci->pvt_info;
-	void __iomem *window = priv->window;
 
 	pdev = to_pci_dev(mci->pdev);
 
@@ -232,9 +246,9 @@ static void ie31200_get_and_clear_error_info(struct mem_ctl_info *mci,
 	if (!(info->errsts & IE31200_ERRSTS_BITS))
 		return;
 
-	info->eccerrlog[0] = lo_hi_readq(window + IE31200_C0ECCERRLOG);
+	info->eccerrlog[0] = lo_hi_readq(priv->c0errlog);
 	if (nr_channels == 2)
-		info->eccerrlog[1] = lo_hi_readq(window + IE31200_C1ECCERRLOG);
+		info->eccerrlog[1] = lo_hi_readq(priv->c1errlog);
 
 	pci_read_config_word(pdev, IE31200_ERRSTS, &info->errsts2);
 
@@ -245,10 +259,10 @@ static void ie31200_get_and_clear_error_info(struct mem_ctl_info *mci,
 	 * should be UE info.
 	 */
 	if ((info->errsts ^ info->errsts2) & IE31200_ERRSTS_BITS) {
-		info->eccerrlog[0] = lo_hi_readq(window + IE31200_C0ECCERRLOG);
+		info->eccerrlog[0] = lo_hi_readq(priv->c0errlog);
 		if (nr_channels == 2)
 			info->eccerrlog[1] =
-				lo_hi_readq(window + IE31200_C1ECCERRLOG);
+				lo_hi_readq(priv->c1errlog);
 	}
 
 	ie31200_clear_error_info(mci);
@@ -274,14 +288,14 @@ static void ie31200_process_error_info(struct mem_ctl_info *mci,
 		if (log & IE31200_ECCERRLOG_UE) {
 			edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
 					     0, 0, 0,
-					     eccerrlog_row(channel, log),
+					     eccerrlog_row(log),
 					     channel, -1,
 					     "ie31200 UE", "");
 		} else if (log & IE31200_ECCERRLOG_CE) {
 			edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
 					     0, 0,
 					     IE31200_ECCERRLOG_SYNDROME(log),
-					     eccerrlog_row(channel, log),
+					     eccerrlog_row(log),
 					     channel, -1,
 					     "ie31200 CE", "");
 		}
@@ -326,6 +340,33 @@ static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev)
 	return window;
 }
 
+static void __skl_populate_dimm_info(struct dimm_data *dd, u32 addr_decode,
+				     int chan)
+{
+	dd->size = (addr_decode >> (chan << 4)) & IE31200_MAD_DIMM_SIZE;
+	dd->dual_rank = (addr_decode & (IE31200_MAD_DIMM_A_RANK_SKL << (chan << 4))) ? 1 : 0;
+	dd->x16_width = ((addr_decode & (IE31200_MAD_DIMM_A_WIDTH_SKL << (chan << 4))) >>
+				(IE31200_MAD_DIMM_A_WIDTH_SKL_SHIFT + (chan << 4)));
+}
+
+static void __populate_dimm_info(struct dimm_data *dd, u32 addr_decode,
+				 int chan)
+{
+	dd->size = (addr_decode >> (chan << 3)) & IE31200_MAD_DIMM_SIZE;
+	dd->dual_rank = (addr_decode & (IE31200_MAD_DIMM_A_RANK << chan)) ? 1 : 0;
+	dd->x16_width = (addr_decode & (IE31200_MAD_DIMM_A_WIDTH << chan)) ? 1 : 0;
+}
+
+static void populate_dimm_info(struct dimm_data *dd, u32 addr_decode, int chan,
+			       bool skl)
+{
+	if (skl)
+		__skl_populate_dimm_info(dd, addr_decode, chan);
+	else
+		__populate_dimm_info(dd, addr_decode, chan);
+}
+
+
 static int ie31200_probe1(struct pci_dev *pdev, int dev_idx)
 {
 	int i, j, ret;
@@ -334,7 +375,8 @@ static int ie31200_probe1(struct pci_dev *pdev, int dev_idx)
 	struct dimm_data dimm_info[IE31200_CHANNELS][IE31200_DIMMS_PER_CHANNEL];
 	void __iomem *window;
 	struct ie31200_priv *priv;
-	u32 addr_decode;
+	u32 addr_decode, mad_offset;
+	bool skl = (pdev->device == PCI_DEVICE_ID_INTEL_IE31200_HB_8);
 
 	edac_dbg(0, "MC:\n");
 
@@ -363,7 +405,10 @@ static int ie31200_probe1(struct pci_dev *pdev, int dev_idx)
 
 	edac_dbg(3, "MC: init mci\n");
 	mci->pdev = &pdev->dev;
-	mci->mtype_cap = MEM_FLAG_DDR3;
+	if (skl)
+		mci->mtype_cap = MEM_FLAG_DDR4;
+	else
+		mci->mtype_cap = MEM_FLAG_DDR3;
 	mci->edac_ctl_cap = EDAC_FLAG_SECDED;
 	mci->edac_cap = EDAC_FLAG_SECDED;
 	mci->mod_name = EDAC_MOD_STR;
@@ -374,19 +419,24 @@ static int ie31200_probe1(struct pci_dev *pdev, int dev_idx)
 	mci->ctl_page_to_phys = NULL;
 	priv = mci->pvt_info;
 	priv->window = window;
+	if (skl) {
+		priv->c0errlog = window + IE31200_C0ECCERRLOG_SKL;
+		priv->c1errlog = window + IE31200_C1ECCERRLOG_SKL;
+		mad_offset = IE31200_MAD_DIMM_0_OFFSET_SKL;
+	} else {
+		priv->c0errlog = window + IE31200_C0ECCERRLOG;
+		priv->c1errlog = window + IE31200_C1ECCERRLOG;
+		mad_offset = IE31200_MAD_DIMM_0_OFFSET;
+	}
 
 	/* populate DIMM info */
 	for (i = 0; i < IE31200_CHANNELS; i++) {
-		addr_decode = readl(window + IE31200_MAD_DIMM_0_OFFSET +
+		addr_decode = readl(window + mad_offset +
 					(i * 4));
 		edac_dbg(0, "addr_decode: 0x%x\n", addr_decode);
 		for (j = 0; j < IE31200_DIMMS_PER_CHANNEL; j++) {
-			dimm_info[i][j].size = (addr_decode >> (j * 8)) &
-						IE31200_MAD_DIMM_SIZE;
-			dimm_info[i][j].dual_rank = (addr_decode &
-				(IE31200_MAD_DIMM_A_RANK << j)) ? 1 : 0;
-			dimm_info[i][j].x16_width = (addr_decode &
-				(IE31200_MAD_DIMM_A_WIDTH << j)) ? 1 : 0;
+			populate_dimm_info(&dimm_info[i][j], addr_decode, j,
+					   skl);
 			edac_dbg(0, "size: 0x%x, rank: %d, width: %d\n",
 				 dimm_info[i][j].size,
 				 dimm_info[i][j].dual_rank,
@@ -405,7 +455,7 @@ static int ie31200_probe1(struct pci_dev *pdev, int dev_idx)
 			struct dimm_info *dimm;
 			unsigned long nr_pages;
 
-			nr_pages = IE31200_PAGES(dimm_info[j][i].size);
+			nr_pages = IE31200_PAGES(dimm_info[j][i].size, skl);
 			if (nr_pages == 0)
 				continue;
 
@@ -417,7 +467,10 @@ static int ie31200_probe1(struct pci_dev *pdev, int dev_idx)
 				dimm->nr_pages = nr_pages;
 				edac_dbg(0, "set nr pages: 0x%lx\n", nr_pages);
 				dimm->grain = 8; /* just a guess */
-				dimm->mtype = MEM_DDR3;
+				if (skl)
+					dimm->mtype = MEM_DDR4;
+				else
+					dimm->mtype = MEM_DDR3;
 				dimm->dtype = DEV_UNKNOWN;
 				dimm->edac_mode = EDAC_UNKNOWN;
 			}
@@ -426,7 +479,10 @@ static int ie31200_probe1(struct pci_dev *pdev, int dev_idx)
 			dimm->nr_pages = nr_pages;
 			edac_dbg(0, "set nr pages: 0x%lx\n", nr_pages);
 			dimm->grain = 8; /* same guess */
-			dimm->mtype = MEM_DDR3;
+			if (skl)
+				dimm->mtype = MEM_DDR4;
+			else
+				dimm->mtype = MEM_DDR3;
 			dimm->dtype = DEV_UNKNOWN;
 			dimm->edac_mode = EDAC_UNKNOWN;
 		}
@@ -500,6 +556,9 @@ static const struct pci_device_id ie31200_pci_tbl[] = {
 	{
 		PCI_VEND_DEV(INTEL, IE31200_HB_7), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
 		IE31200},
+	{
+		PCI_VEND_DEV(INTEL, IE31200_HB_8), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+		IE31200},
 	{
 		0,
 	}            /* 0 terminated list. */

+ 39 - 183
drivers/edac/sb_edac.c

@@ -21,6 +21,8 @@
 #include <linux/smp.h>
 #include <linux/bitmap.h>
 #include <linux/math64.h>
+#include <linux/mod_devicetable.h>
+#include <asm/cpu_device_id.h>
 #include <asm/processor.h>
 #include <asm/mce.h>
 
@@ -28,8 +30,6 @@
 
 /* Static vars */
 static LIST_HEAD(sbridge_edac_list);
-static DEFINE_MUTEX(sbridge_edac_lock);
-static int probed;
 
 /*
  * Alter this version for the module when modifications are made
@@ -364,16 +364,6 @@ struct sbridge_pvt {
 	bool			is_mirrored, is_lockstep, is_close_pg;
 	bool			is_chan_hash;
 
-	/* Fifo double buffers */
-	struct mce		mce_entry[MCE_LOG_LEN];
-	struct mce		mce_outentry[MCE_LOG_LEN];
-
-	/* Fifo in/out counters */
-	unsigned		mce_in, mce_out;
-
-	/* Count indicator to show errors not got */
-	unsigned		mce_overrun;
-
 	/* Memory description */
 	u64			tolm, tohm;
 	struct knl_pvt knl;
@@ -662,18 +652,6 @@ static const struct pci_id_table pci_dev_descr_broadwell_table[] = {
 	{0,}			/* 0 terminated list. */
 };
 
-/*
- *	pci_device_id	table for which devices we are looking for
- */
-static const struct pci_device_id sbridge_pci_tbl[] = {
-	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0)},
-	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA)},
-	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0)},
-	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0)},
-	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KNL_IMC_SAD0)},
-	{0,}			/* 0 terminated list. */
-};
-
 
 /****************************************************************************
 			Ancillary status routines
@@ -3097,63 +3075,8 @@ err_parsing:
 }
 
 /*
- *	sbridge_check_error	Retrieve and process errors reported by the
- *				hardware. Called by the Core module.
- */
-static void sbridge_check_error(struct mem_ctl_info *mci)
-{
-	struct sbridge_pvt *pvt = mci->pvt_info;
-	int i;
-	unsigned count = 0;
-	struct mce *m;
-
-	/*
-	 * MCE first step: Copy all mce errors into a temporary buffer
-	 * We use a double buffering here, to reduce the risk of
-	 * loosing an error.
-	 */
-	smp_rmb();
-	count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in)
-		% MCE_LOG_LEN;
-	if (!count)
-		return;
-
-	m = pvt->mce_outentry;
-	if (pvt->mce_in + count > MCE_LOG_LEN) {
-		unsigned l = MCE_LOG_LEN - pvt->mce_in;
-
-		memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l);
-		smp_wmb();
-		pvt->mce_in = 0;
-		count -= l;
-		m += l;
-	}
-	memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count);
-	smp_wmb();
-	pvt->mce_in += count;
-
-	smp_rmb();
-	if (pvt->mce_overrun) {
-		sbridge_printk(KERN_ERR, "Lost %d memory errors\n",
-			      pvt->mce_overrun);
-		smp_wmb();
-		pvt->mce_overrun = 0;
-	}
-
-	/*
-	 * MCE second step: parse errors and display
-	 */
-	for (i = 0; i < count; i++)
-		sbridge_mce_output_error(mci, &pvt->mce_outentry[i]);
-}
-
-/*
- * sbridge_mce_check_error	Replicates mcelog routine to get errors
- *				This routine simply queues mcelog errors, and
- *				return. The error itself should be handled later
- *				by sbridge_check_error.
- * WARNING: As this routine should be called at NMI time, extra care should
- * be taken to avoid deadlocks, and to be as fast as possible.
+ * Check that logging is enabled and that this is the right type
+ * of error for us to handle.
  */
 static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val,
 				   void *data)
@@ -3198,21 +3121,7 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val,
 			  "%u APIC %x\n", mce->cpuvendor, mce->cpuid,
 			  mce->time, mce->socketid, mce->apicid);
 
-	smp_rmb();
-	if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
-		smp_wmb();
-		pvt->mce_overrun++;
-		return NOTIFY_DONE;
-	}
-
-	/* Copy memory error at the ringbuffer */
-	memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce));
-	smp_wmb();
-	pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN;
-
-	/* Handle fatal errors immediately */
-	if (mce->mcgstatus & 1)
-		sbridge_check_error(mci);
+	sbridge_mce_output_error(mci, mce);
 
 	/* Advice mcelog that the error were handled */
 	return NOTIFY_STOP;
@@ -3298,9 +3207,6 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type)
 	mci->dev_name = pci_name(pdev);
 	mci->ctl_page_to_phys = NULL;
 
-	/* Set the function pointer to an actual operation function */
-	mci->edac_check = sbridge_check_error;
-
 	pvt->info.type = type;
 	switch (type) {
 	case IVY_BRIDGE:
@@ -3448,62 +3354,40 @@ fail0:
 	return rc;
 }
 
+#define ICPU(model, table) \
+	{ X86_VENDOR_INTEL, 6, model, 0, (unsigned long)&table }
+
+/* Order here must match "enum type" */
+static const struct x86_cpu_id sbridge_cpuids[] = {
+	ICPU(0x2d, pci_dev_descr_sbridge_table),	/* SANDY_BRIDGE */
+	ICPU(0x3e, pci_dev_descr_ibridge_table),	/* IVY_BRIDGE */
+	ICPU(0x3f, pci_dev_descr_haswell_table),	/* HASWELL */
+	ICPU(0x4f, pci_dev_descr_broadwell_table),	/* BROADWELL */
+	ICPU(0x57, pci_dev_descr_knl_table),		/* KNIGHTS_LANDING */
+	{ }
+};
+MODULE_DEVICE_TABLE(x86cpu, sbridge_cpuids);
+
 /*
- *	sbridge_probe	Probe for ONE instance of device to see if it is
+ *	sbridge_probe	Get all devices and register memory controllers
  *			present.
  *	return:
  *		0 for FOUND a device
  *		< 0 for error code
  */
 
-static int sbridge_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+static int sbridge_probe(const struct x86_cpu_id *id)
 {
 	int rc = -ENODEV;
 	u8 mc, num_mc = 0;
 	struct sbridge_dev *sbridge_dev;
-	enum type type = SANDY_BRIDGE;
+	struct pci_id_table *ptable = (struct pci_id_table *)id->driver_data;
 
 	/* get the pci devices we want to reserve for our use */
-	mutex_lock(&sbridge_edac_lock);
-
-	/*
-	 * All memory controllers are allocated at the first pass.
-	 */
-	if (unlikely(probed >= 1)) {
-		mutex_unlock(&sbridge_edac_lock);
-		return -ENODEV;
-	}
-	probed++;
+	rc = sbridge_get_all_devices(&num_mc, ptable);
 
-	switch (pdev->device) {
-	case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA:
-		rc = sbridge_get_all_devices(&num_mc,
-					pci_dev_descr_ibridge_table);
-		type = IVY_BRIDGE;
-		break;
-	case PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0:
-		rc = sbridge_get_all_devices(&num_mc,
-					pci_dev_descr_sbridge_table);
-		type = SANDY_BRIDGE;
-		break;
-	case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0:
-		rc = sbridge_get_all_devices(&num_mc,
-					pci_dev_descr_haswell_table);
-		type = HASWELL;
-		break;
-	case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0:
-		rc = sbridge_get_all_devices(&num_mc,
-					pci_dev_descr_broadwell_table);
-		type = BROADWELL;
-	    break;
-	case PCI_DEVICE_ID_INTEL_KNL_IMC_SAD0:
-		rc = sbridge_get_all_devices_knl(&num_mc,
-					pci_dev_descr_knl_table);
-		type = KNIGHTS_LANDING;
-		break;
-	}
 	if (unlikely(rc < 0)) {
-		edac_dbg(0, "couldn't get all devices for 0x%x\n", pdev->device);
+		edac_dbg(0, "couldn't get all devices\n");
 		goto fail0;
 	}
 
@@ -3514,14 +3398,13 @@ static int sbridge_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 			 mc, mc + 1, num_mc);
 
 		sbridge_dev->mc = mc++;
-		rc = sbridge_register_mci(sbridge_dev, type);
+		rc = sbridge_register_mci(sbridge_dev, id - sbridge_cpuids);
 		if (unlikely(rc < 0))
 			goto fail1;
 	}
 
 	sbridge_printk(KERN_INFO, "%s\n", SBRIDGE_REVISION);
 
-	mutex_unlock(&sbridge_edac_lock);
 	return 0;
 
 fail1:
@@ -3530,74 +3413,47 @@ fail1:
 
 	sbridge_put_all_devices();
 fail0:
-	mutex_unlock(&sbridge_edac_lock);
 	return rc;
 }
 
 /*
- *	sbridge_remove	destructor for one instance of device
+ *	sbridge_remove	cleanup
  *
  */
-static void sbridge_remove(struct pci_dev *pdev)
+static void sbridge_remove(void)
 {
 	struct sbridge_dev *sbridge_dev;
 
 	edac_dbg(0, "\n");
 
-	/*
-	 * we have a trouble here: pdev value for removal will be wrong, since
-	 * it will point to the X58 register used to detect that the machine
-	 * is a Nehalem or upper design. However, due to the way several PCI
-	 * devices are grouped together to provide MC functionality, we need
-	 * to use a different method for releasing the devices
-	 */
-
-	mutex_lock(&sbridge_edac_lock);
-
-	if (unlikely(!probed)) {
-		mutex_unlock(&sbridge_edac_lock);
-		return;
-	}
-
 	list_for_each_entry(sbridge_dev, &sbridge_edac_list, list)
 		sbridge_unregister_mci(sbridge_dev);
 
 	/* Release PCI resources */
 	sbridge_put_all_devices();
-
-	probed--;
-
-	mutex_unlock(&sbridge_edac_lock);
 }
 
-MODULE_DEVICE_TABLE(pci, sbridge_pci_tbl);
-
-/*
- *	sbridge_driver	pci_driver structure for this module
- *
- */
-static struct pci_driver sbridge_driver = {
-	.name     = "sbridge_edac",
-	.probe    = sbridge_probe,
-	.remove   = sbridge_remove,
-	.id_table = sbridge_pci_tbl,
-};
-
 /*
  *	sbridge_init		Module entry function
  *			Try to initialize this module for its devices
  */
 static int __init sbridge_init(void)
 {
-	int pci_rc;
+	const struct x86_cpu_id *id;
+	int rc;
 
 	edac_dbg(2, "\n");
 
+	id = x86_match_cpu(sbridge_cpuids);
+	if (!id)
+		return -ENODEV;
+
 	/* Ensure that the OPSTATE is set correctly for POLL or NMI */
 	opstate_init();
 
-	pci_rc = pci_register_driver(&sbridge_driver);
-	if (pci_rc >= 0) {
+	rc = sbridge_probe(id);
+
+	if (rc >= 0) {
 		mce_register_decode_chain(&sbridge_mce_dec);
 		if (get_edac_report_status() == EDAC_REPORTING_DISABLED)
 			sbridge_printk(KERN_WARNING, "Loading driver, error reporting disabled.\n");
@@ -3605,9 +3461,9 @@ static int __init sbridge_init(void)
 	}
 
 	sbridge_printk(KERN_ERR, "Failed to register device with error %d.\n",
-		      pci_rc);
+		      rc);
 
-	return pci_rc;
+	return rc;
 }
 
 /*
@@ -3617,7 +3473,7 @@ static int __init sbridge_init(void)
 static void __exit sbridge_exit(void)
 {
 	edac_dbg(2, "\n");
-	pci_unregister_driver(&sbridge_driver);
+	sbridge_remove();
 	mce_unregister_decode_chain(&sbridge_mce_dec);
 }