浏览代码

Merge branch 'pci/aer' into next

* pci/aer:
  PCI/AER: Fix aer_probe() kernel-doc comment
  PCI/AER: Cache capability position
  PCI/AER: Avoid memory allocation in interrupt handling path
  ACPI / APEI: Send correct severity to calculate AER severity
  PCI/AER: Remove duplicate AER severity translation
  PCI/AER: Remove aerdriver.forceload kernel parameter
  PCI/AER: Remove aerdriver.nosourceid kernel parameter
  x86/PCI: VMD: Add quirk for AER to ignore source ID
  PCI/AER: Add bus flag to skip source ID matching

Conflicts:
	drivers/pci/probe.c
Bjorn Helgaas 8 年之前
父节点
当前提交
4dc2db096a

+ 9 - 17
Documentation/PCI/pcieaer-howto.txt

@@ -49,25 +49,17 @@ depends on CONFIG_PCIEPORTBUS, so pls. set CONFIG_PCIEPORTBUS=y and
 CONFIG_PCIEAER = y.
 
 2.2 Load PCI Express AER Root Driver
-There is a case where a system has AER support in BIOS. Enabling the AER
-Root driver and having AER support in BIOS may result unpredictable
-behavior. To avoid this conflict, a successful load of the AER Root driver
-requires ACPI _OSC support in the BIOS to allow the AER Root driver to
-request for native control of AER. See the PCI FW 3.0 Specification for
-details regarding OSC usage. Currently, lots of firmwares don't provide
-_OSC support while they use PCI Express. To support such firmwares,
-forceload, a parameter of type bool, could enable AER to continue to
-be initiated although firmwares have no _OSC support. To enable the
-walkaround, pls. add aerdriver.forceload=y to kernel boot parameter line
-when booting kernel. Note that forceload=n by default.
-
-nosourceid, another parameter of type bool, can be used when broken
-hardware (mostly chipsets) has root ports that cannot obtain the reporting
-source ID. nosourceid=n by default.
+
+Some systems have AER support in firmware. Enabling Linux AER support at
+the same time the firmware handles AER may result in unpredictable
+behavior. Therefore, Linux does not handle AER events unless the firmware
+grants AER control to the OS via the ACPI _OSC method. See the PCI FW 3.0
+Specification for details regarding _OSC usage.
 
 2.3 AER error output
-When a PCI-E AER error is captured, an error message will be outputted to
-console. If it's a correctable error, it is outputted as a warning.
+
+When a PCIe AER error is captured, an error message will be output to
+console. If it's a correctable error, it is output as a warning.
 Otherwise, it is printed as an error. So users could choose different
 log level to filter out correctable error messages.
 

+ 1 - 1
drivers/acpi/apei/ghes.c

@@ -457,7 +457,7 @@ static void ghes_do_proc(struct ghes *ghes,
 
 				devfn = PCI_DEVFN(pcie_err->device_id.device,
 						  pcie_err->device_id.function);
-				aer_severity = cper_severity_to_aer(sev);
+				aer_severity = cper_severity_to_aer(gdata->error_severity);
 
 				/*
 				 * If firmware reset the component to contain

+ 6 - 12
drivers/pci/pcie/aer/aerdrv.c

@@ -66,7 +66,7 @@ static int pcie_aer_disable;
 
 void pci_no_aer(void)
 {
-	pcie_aer_disable = 1;	/* has priority over 'forceload' */
+	pcie_aer_disable = 1;
 }
 
 bool pci_aer_available(void)
@@ -130,7 +130,7 @@ static void aer_enable_rootport(struct aer_rpc *rpc)
 	pcie_capability_clear_word(pdev, PCI_EXP_RTCTL,
 				   SYSTEM_ERROR_INTR_ON_MESG_MASK);
 
-	aer_pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ERR);
+	aer_pos = pdev->aer_cap;
 	/* Clear error status */
 	pci_read_config_dword(pdev, aer_pos + PCI_ERR_ROOT_STATUS, &reg32);
 	pci_write_config_dword(pdev, aer_pos + PCI_ERR_ROOT_STATUS, reg32);
@@ -169,7 +169,7 @@ static void aer_disable_rootport(struct aer_rpc *rpc)
 	 */
 	set_downstream_devices_error_reporting(pdev, false);
 
-	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ERR);
+	pos = pdev->aer_cap;
 	/* Disable Root's interrupt in response to error messages */
 	pci_read_config_dword(pdev, pos + PCI_ERR_ROOT_COMMAND, &reg32);
 	reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK;
@@ -196,7 +196,7 @@ irqreturn_t aer_irq(int irq, void *context)
 	unsigned long flags;
 	int pos;
 
-	pos = pci_find_ext_capability(pdev->port, PCI_EXT_CAP_ID_ERR);
+	pos = pdev->port->aer_cap;
 	/*
 	 * Must lock access to Root Error Status Reg, Root Error ID Reg,
 	 * and Root error producer/consumer index
@@ -290,7 +290,6 @@ static void aer_remove(struct pcie_device *dev)
 /**
  * aer_probe - initialize resources
  * @dev: pointer to the pcie_dev data structure
- * @id: pointer to the service id data structure
  *
  * Invoked when PCI Express bus loads AER service driver.
  */
@@ -300,11 +299,6 @@ static int aer_probe(struct pcie_device *dev)
 	struct aer_rpc *rpc;
 	struct device *device = &dev->device;
 
-	/* Init */
-	status = aer_init(dev);
-	if (status)
-		return status;
-
 	/* Alloc rpc data structure */
 	rpc = aer_alloc_rpc(dev);
 	if (!rpc) {
@@ -339,7 +333,7 @@ static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
 	u32 reg32;
 	int pos;
 
-	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
+	pos = dev->aer_cap;
 
 	/* Disable Root's interrupt in response to error messages */
 	pci_read_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, &reg32);
@@ -392,7 +386,7 @@ static void aer_error_resume(struct pci_dev *dev)
 	pcie_capability_write_word(dev, PCI_EXP_DEVSTA, reg16);
 
 	/* Clean AER Root Error Status */
-	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
+	pos = dev->aer_cap;
 	pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status);
 	pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &mask);
 	if (dev->error_state == pci_channel_io_normal)

+ 1 - 8
drivers/pci/pcie/aer/aerdrv.h

@@ -60,6 +60,7 @@ struct aer_rpc {
 	struct pcie_device *rpd;	/* Root Port device */
 	struct work_struct dpc_handler;
 	struct aer_err_source e_sources[AER_ERROR_SOURCES_MAX];
+	struct aer_err_info e_info;
 	unsigned short prod_idx;	/* Error Producer Index */
 	unsigned short cons_idx;	/* Error Consumer Index */
 	int isr;
@@ -105,7 +106,6 @@ static inline pci_ers_result_t merge_result(enum pci_ers_result orig,
 }
 
 extern struct bus_type pcie_port_bus_type;
-int aer_init(struct pcie_device *dev);
 void aer_isr(struct work_struct *work);
 void aer_print_error(struct pci_dev *dev, struct aer_err_info *info);
 void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info);
@@ -121,11 +121,4 @@ static inline int pcie_aer_get_firmware_first(struct pci_dev *pci_dev)
 	return 0;
 }
 #endif
-
-static inline void pcie_aer_force_firmware_first(struct pci_dev *pci_dev,
-						 int enable)
-{
-	pci_dev->__aer_firmware_first = !!enable;
-	pci_dev->__aer_firmware_first_valid = 1;
-}
 #endif /* _AERDRV_H_ */

+ 19 - 42
drivers/pci/pcie/aer/aerdrv_core.c

@@ -27,11 +27,6 @@
 #include <linux/kfifo.h>
 #include "aerdrv.h"
 
-static bool forceload;
-static bool nosourceid;
-module_param(forceload, bool, 0);
-module_param(nosourceid, bool, 0);
-
 #define	PCI_EXP_AER_FLAGS	(PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE | \
 				 PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE)
 
@@ -40,7 +35,7 @@ int pci_enable_pcie_error_reporting(struct pci_dev *dev)
 	if (pcie_aer_get_firmware_first(dev))
 		return -EIO;
 
-	if (!pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR))
+	if (!dev->aer_cap)
 		return -EIO;
 
 	return pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_AER_FLAGS);
@@ -62,7 +57,7 @@ int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev)
 	int pos;
 	u32 status;
 
-	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
+	pos = dev->aer_cap;
 	if (!pos)
 		return -EIO;
 
@@ -83,7 +78,7 @@ int pci_cleanup_aer_error_status_regs(struct pci_dev *dev)
 	if (!pci_is_pcie(dev))
 		return -ENODEV;
 
-	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
+	pos = dev->aer_cap;
 	if (!pos)
 		return -EIO;
 
@@ -102,6 +97,12 @@ int pci_cleanup_aer_error_status_regs(struct pci_dev *dev)
 	return 0;
 }
 
+int pci_aer_init(struct pci_dev *dev)
+{
+	dev->aer_cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
+	return pci_cleanup_aer_error_status_regs(dev);
+}
+
 /**
  * add_error_device - list device to be handled
  * @e_info: pointer to error info
@@ -132,7 +133,8 @@ static bool is_error_source(struct pci_dev *dev, struct aer_err_info *e_info)
 	 * When bus id is equal to 0, it might be a bad id
 	 * reported by root port.
 	 */
-	if (!nosourceid && (PCI_BUS_NUM(e_info->id) != 0)) {
+	if ((PCI_BUS_NUM(e_info->id) != 0) &&
+	    !(dev->bus->bus_flags & PCI_BUS_FLAGS_NO_AERSID)) {
 		/* Device ID match? */
 		if (e_info->id == ((dev->bus->number << 8) | dev->devfn))
 			return true;
@@ -144,10 +146,10 @@ static bool is_error_source(struct pci_dev *dev, struct aer_err_info *e_info)
 
 	/*
 	 * When either
-	 *      1) nosourceid==y;
-	 *      2) bus id is equal to 0. Some ports might lose the bus
+	 *      1) bus id is equal to 0. Some ports might lose the bus
 	 *              id of error source id;
-	 *      3) There are multiple errors and prior id comparing fails;
+	 *      2) bus flag PCI_BUS_FLAGS_NO_AERSID is set
+	 *      3) There are multiple errors and prior ID comparing fails;
 	 * We check AER status registers to find possible reporter.
 	 */
 	if (atomic_read(&dev->enable_cnt) == 0)
@@ -158,7 +160,7 @@ static bool is_error_source(struct pci_dev *dev, struct aer_err_info *e_info)
 	if (!(reg16 & PCI_EXP_AER_FLAGS))
 		return false;
 
-	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
+	pos = dev->aer_cap;
 	if (!pos)
 		return false;
 
@@ -555,7 +557,7 @@ static void handle_error_source(struct pcie_device *aerdev,
 		 * Correctable error does not need software intervention.
 		 * No need to go through error recovery process.
 		 */
-		pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
+		pos = dev->aer_cap;
 		if (pos)
 			pci_write_config_dword(dev, pos + PCI_ERR_COR_STATUS,
 					info->status);
@@ -647,7 +649,7 @@ static int get_device_error_info(struct pci_dev *dev, struct aer_err_info *info)
 	info->status = 0;
 	info->tlp_header_valid = 0;
 
-	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
+	pos = dev->aer_cap;
 
 	/* The device might not support AER */
 	if (!pos)
@@ -715,15 +717,8 @@ static inline void aer_process_err_devices(struct pcie_device *p_device,
 static void aer_isr_one_error(struct pcie_device *p_device,
 		struct aer_err_source *e_src)
 {
-	struct aer_err_info *e_info;
-
-	/* struct aer_err_info might be big, so we allocate it with slab */
-	e_info = kmalloc(sizeof(struct aer_err_info), GFP_KERNEL);
-	if (!e_info) {
-		dev_printk(KERN_DEBUG, &p_device->port->dev,
-			"Can't allocate mem when processing AER errors\n");
-		return;
-	}
+	struct aer_rpc *rpc = get_service_data(p_device);
+	struct aer_err_info *e_info = &rpc->e_info;
 
 	/*
 	 * There is a possibility that both correctable error and
@@ -762,8 +757,6 @@ static void aer_isr_one_error(struct pcie_device *p_device,
 		if (find_source_device(p_device->port, e_info))
 			aer_process_err_devices(p_device, e_info);
 	}
-
-	kfree(e_info);
 }
 
 /**
@@ -812,19 +805,3 @@ void aer_isr(struct work_struct *work)
 		aer_isr_one_error(p_device, &e_src);
 	mutex_unlock(&rpc->rpc_mutex);
 }
-
-/**
- * aer_init - provide AER initialization
- * @dev: pointer to AER pcie device
- *
- * Invoked when AER service driver is loaded.
- */
-int aer_init(struct pcie_device *dev)
-{
-	if (forceload) {
-		dev_printk(KERN_DEBUG, &dev->device,
-			   "aerdrv forceload requested.\n");
-		pcie_aer_force_firmware_first(dev->port, 0);
-	}
-	return 0;
-}

+ 2 - 4
drivers/pci/pcie/aer/aerdrv_errprint.c

@@ -219,15 +219,13 @@ int cper_severity_to_aer(int cper_severity)
 }
 EXPORT_SYMBOL_GPL(cper_severity_to_aer);
 
-void cper_print_aer(struct pci_dev *dev, int cper_severity,
+void cper_print_aer(struct pci_dev *dev, int aer_severity,
 		    struct aer_capability_regs *aer)
 {
-	int aer_severity, layer, agent, status_strs_size, tlp_header_valid = 0;
+	int layer, agent, status_strs_size, tlp_header_valid = 0;
 	u32 status, mask;
 	const char **status_strs;
 
-	aer_severity = cper_severity_to_aer(cper_severity);
-
 	if (aer_severity == AER_CORRECTABLE) {
 		status = aer->cor_status;
 		mask = aer->cor_mask;

+ 3 - 2
drivers/pci/probe.c

@@ -1666,10 +1666,11 @@ static void pci_init_capabilities(struct pci_dev *dev)
 	/* Enable ACS P2P upstream forwarding */
 	pci_enable_acs(dev);
 
-	pci_cleanup_aer_error_status_regs(dev);
-
 	/* Precision Time Measurement */
 	pci_ptm_init(dev);
+
+	/* Advanced Error Reporting */
+	pci_aer_init(dev);
 }
 
 /*

+ 17 - 0
drivers/pci/quirks.c

@@ -4428,3 +4428,20 @@ static void quirk_intel_qat_vf_cap(struct pci_dev *pdev)
 	}
 }
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x443, quirk_intel_qat_vf_cap);
+
+/*
+ * VMD-enabled root ports will change the source ID for all messages
+ * to the VMD device. Rather than doing device matching with the source
+ * ID, the AER driver should traverse the child device tree, reading
+ * AER registers to find the faulting device.
+ */
+static void quirk_no_aersid(struct pci_dev *pdev)
+{
+	/* VMD Domain */
+	if (pdev->bus->sysdata && pci_domain_nr(pdev->bus) >= 0x10000)
+		pdev->bus->bus_flags |= PCI_BUS_FLAGS_NO_AERSID;
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2030, quirk_no_aersid);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2031, quirk_no_aersid);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2032, quirk_no_aersid);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2033, quirk_no_aersid);

+ 1 - 1
include/linux/aer.h

@@ -63,7 +63,7 @@ static inline int pci_cleanup_aer_error_status_regs(struct pci_dev *dev)
 }
 #endif
 
-void cper_print_aer(struct pci_dev *dev, int cper_severity,
+void cper_print_aer(struct pci_dev *dev, int aer_severity,
 		    struct aer_capability_regs *aer);
 int cper_severity_to_aer(int cper_severity);
 void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn,

+ 8 - 2
include/linux/pci.h

@@ -187,8 +187,9 @@ enum pci_irq_reroute_variant {
 
 typedef unsigned short __bitwise pci_bus_flags_t;
 enum pci_bus_flags {
-	PCI_BUS_FLAGS_NO_MSI   = (__force pci_bus_flags_t) 1,
-	PCI_BUS_FLAGS_NO_MMRBC = (__force pci_bus_flags_t) 2,
+	PCI_BUS_FLAGS_NO_MSI	= (__force pci_bus_flags_t) 1,
+	PCI_BUS_FLAGS_NO_MMRBC	= (__force pci_bus_flags_t) 2,
+	PCI_BUS_FLAGS_NO_AERSID	= (__force pci_bus_flags_t) 4,
 };
 
 /* These values come from the PCI Express Spec */
@@ -268,6 +269,9 @@ struct pci_dev {
 	unsigned int	class;		/* 3 bytes: (base,sub,prog-if) */
 	u8		revision;	/* PCI revision, low byte of class word */
 	u8		hdr_type;	/* PCI header type (`multi' flag masked out) */
+#ifdef CONFIG_PCIEAER
+	u16		aer_cap;	/* AER capability offset */
+#endif
 	u8		pcie_cap;	/* PCIe capability offset */
 	u8		msi_cap;	/* MSI capability offset */
 	u8		msix_cap;	/* MSI-X capability offset */
@@ -1374,9 +1378,11 @@ static inline bool pcie_aspm_support_enabled(void) { return false; }
 #ifdef CONFIG_PCIEAER
 void pci_no_aer(void);
 bool pci_aer_available(void);
+int pci_aer_init(struct pci_dev *dev);
 #else
 static inline void pci_no_aer(void) { }
 static inline bool pci_aer_available(void) { return false; }
+static inline int pci_aer_init(struct pci_dev *d) { return -ENODEV; }
 #endif
 
 #ifdef CONFIG_PCIE_ECRC