aer.c 36 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Implement the AER root port service driver. The driver registers an IRQ
  4. * handler. When a root port triggers an AER interrupt, the IRQ handler
  5. * collects root port status and schedules work.
  6. *
  7. * Copyright (C) 2006 Intel Corp.
  8. * Tom Long Nguyen (tom.l.nguyen@intel.com)
  9. * Zhang Yanmin (yanmin.zhang@intel.com)
  10. *
  11. * (C) Copyright 2009 Hewlett-Packard Development Company, L.P.
  12. * Andrew Patterson <andrew.patterson@hp.com>
  13. */
  14. #include <linux/cper.h>
  15. #include <linux/pci.h>
  16. #include <linux/pci-acpi.h>
  17. #include <linux/sched.h>
  18. #include <linux/kernel.h>
  19. #include <linux/errno.h>
  20. #include <linux/pm.h>
  21. #include <linux/init.h>
  22. #include <linux/interrupt.h>
  23. #include <linux/delay.h>
  24. #include <linux/kfifo.h>
  25. #include <linux/slab.h>
  26. #include <acpi/apei.h>
  27. #include <ras/ras_event.h>
  28. #include "../pci.h"
  29. #include "portdrv.h"
  30. #define AER_ERROR_SOURCES_MAX 100
  31. #define AER_MAX_MULTI_ERR_DEVICES 5 /* Not likely to have more */
  32. struct aer_err_info {
  33. struct pci_dev *dev[AER_MAX_MULTI_ERR_DEVICES];
  34. int error_dev_num;
  35. unsigned int id:16;
  36. unsigned int severity:2; /* 0:NONFATAL | 1:FATAL | 2:COR */
  37. unsigned int __pad1:5;
  38. unsigned int multi_error_valid:1;
  39. unsigned int first_error:5;
  40. unsigned int __pad2:2;
  41. unsigned int tlp_header_valid:1;
  42. unsigned int status; /* COR/UNCOR Error Status */
  43. unsigned int mask; /* COR/UNCOR Error Mask */
  44. struct aer_header_log_regs tlp; /* TLP Header */
  45. };
  46. struct aer_err_source {
  47. unsigned int status;
  48. unsigned int id;
  49. };
  50. struct aer_rpc {
  51. struct pci_dev *rpd; /* Root Port device */
  52. struct work_struct dpc_handler;
  53. struct aer_err_source e_sources[AER_ERROR_SOURCES_MAX];
  54. struct aer_err_info e_info;
  55. unsigned short prod_idx; /* Error Producer Index */
  56. unsigned short cons_idx; /* Error Consumer Index */
  57. int isr;
  58. spinlock_t e_lock; /*
  59. * Lock access to Error Status/ID Regs
  60. * and error producer/consumer index
  61. */
  62. struct mutex rpc_mutex; /*
  63. * only one thread could do
  64. * recovery on the same
  65. * root port hierarchy
  66. */
  67. };
  68. #define AER_LOG_TLP_MASKS (PCI_ERR_UNC_POISON_TLP| \
  69. PCI_ERR_UNC_ECRC| \
  70. PCI_ERR_UNC_UNSUP| \
  71. PCI_ERR_UNC_COMP_ABORT| \
  72. PCI_ERR_UNC_UNX_COMP| \
  73. PCI_ERR_UNC_MALF_TLP)
  74. #define SYSTEM_ERROR_INTR_ON_MESG_MASK (PCI_EXP_RTCTL_SECEE| \
  75. PCI_EXP_RTCTL_SENFEE| \
  76. PCI_EXP_RTCTL_SEFEE)
  77. #define ROOT_PORT_INTR_ON_MESG_MASK (PCI_ERR_ROOT_CMD_COR_EN| \
  78. PCI_ERR_ROOT_CMD_NONFATAL_EN| \
  79. PCI_ERR_ROOT_CMD_FATAL_EN)
  80. #define ERR_COR_ID(d) (d & 0xffff)
  81. #define ERR_UNCOR_ID(d) (d >> 16)
  82. static int pcie_aer_disable;
  83. void pci_no_aer(void)
  84. {
  85. pcie_aer_disable = 1;
  86. }
  87. bool pci_aer_available(void)
  88. {
  89. return !pcie_aer_disable && pci_msi_enabled();
  90. }
  91. #ifdef CONFIG_PCIE_ECRC
  92. #define ECRC_POLICY_DEFAULT 0 /* ECRC set by BIOS */
  93. #define ECRC_POLICY_OFF 1 /* ECRC off for performance */
  94. #define ECRC_POLICY_ON 2 /* ECRC on for data integrity */
  95. static int ecrc_policy = ECRC_POLICY_DEFAULT;
  96. static const char *ecrc_policy_str[] = {
  97. [ECRC_POLICY_DEFAULT] = "bios",
  98. [ECRC_POLICY_OFF] = "off",
  99. [ECRC_POLICY_ON] = "on"
  100. };
  101. /**
  102. * enable_ercr_checking - enable PCIe ECRC checking for a device
  103. * @dev: the PCI device
  104. *
  105. * Returns 0 on success, or negative on failure.
  106. */
  107. static int enable_ecrc_checking(struct pci_dev *dev)
  108. {
  109. int pos;
  110. u32 reg32;
  111. if (!pci_is_pcie(dev))
  112. return -ENODEV;
  113. pos = dev->aer_cap;
  114. if (!pos)
  115. return -ENODEV;
  116. pci_read_config_dword(dev, pos + PCI_ERR_CAP, &reg32);
  117. if (reg32 & PCI_ERR_CAP_ECRC_GENC)
  118. reg32 |= PCI_ERR_CAP_ECRC_GENE;
  119. if (reg32 & PCI_ERR_CAP_ECRC_CHKC)
  120. reg32 |= PCI_ERR_CAP_ECRC_CHKE;
  121. pci_write_config_dword(dev, pos + PCI_ERR_CAP, reg32);
  122. return 0;
  123. }
  124. /**
  125. * disable_ercr_checking - disables PCIe ECRC checking for a device
  126. * @dev: the PCI device
  127. *
  128. * Returns 0 on success, or negative on failure.
  129. */
  130. static int disable_ecrc_checking(struct pci_dev *dev)
  131. {
  132. int pos;
  133. u32 reg32;
  134. if (!pci_is_pcie(dev))
  135. return -ENODEV;
  136. pos = dev->aer_cap;
  137. if (!pos)
  138. return -ENODEV;
  139. pci_read_config_dword(dev, pos + PCI_ERR_CAP, &reg32);
  140. reg32 &= ~(PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE);
  141. pci_write_config_dword(dev, pos + PCI_ERR_CAP, reg32);
  142. return 0;
  143. }
  144. /**
  145. * pcie_set_ecrc_checking - set/unset PCIe ECRC checking for a device based on global policy
  146. * @dev: the PCI device
  147. */
  148. void pcie_set_ecrc_checking(struct pci_dev *dev)
  149. {
  150. switch (ecrc_policy) {
  151. case ECRC_POLICY_DEFAULT:
  152. return;
  153. case ECRC_POLICY_OFF:
  154. disable_ecrc_checking(dev);
  155. break;
  156. case ECRC_POLICY_ON:
  157. enable_ecrc_checking(dev);
  158. break;
  159. default:
  160. return;
  161. }
  162. }
  163. /**
  164. * pcie_ecrc_get_policy - parse kernel command-line ecrc option
  165. */
  166. void pcie_ecrc_get_policy(char *str)
  167. {
  168. int i;
  169. for (i = 0; i < ARRAY_SIZE(ecrc_policy_str); i++)
  170. if (!strncmp(str, ecrc_policy_str[i],
  171. strlen(ecrc_policy_str[i])))
  172. break;
  173. if (i >= ARRAY_SIZE(ecrc_policy_str))
  174. return;
  175. ecrc_policy = i;
  176. }
  177. #endif /* CONFIG_PCIE_ECRC */
  178. #ifdef CONFIG_ACPI_APEI
  179. static inline int hest_match_pci(struct acpi_hest_aer_common *p,
  180. struct pci_dev *pci)
  181. {
  182. return ACPI_HEST_SEGMENT(p->bus) == pci_domain_nr(pci->bus) &&
  183. ACPI_HEST_BUS(p->bus) == pci->bus->number &&
  184. p->device == PCI_SLOT(pci->devfn) &&
  185. p->function == PCI_FUNC(pci->devfn);
  186. }
  187. static inline bool hest_match_type(struct acpi_hest_header *hest_hdr,
  188. struct pci_dev *dev)
  189. {
  190. u16 hest_type = hest_hdr->type;
  191. u8 pcie_type = pci_pcie_type(dev);
  192. if ((hest_type == ACPI_HEST_TYPE_AER_ROOT_PORT &&
  193. pcie_type == PCI_EXP_TYPE_ROOT_PORT) ||
  194. (hest_type == ACPI_HEST_TYPE_AER_ENDPOINT &&
  195. pcie_type == PCI_EXP_TYPE_ENDPOINT) ||
  196. (hest_type == ACPI_HEST_TYPE_AER_BRIDGE &&
  197. (dev->class >> 16) == PCI_BASE_CLASS_BRIDGE))
  198. return true;
  199. return false;
  200. }
  201. struct aer_hest_parse_info {
  202. struct pci_dev *pci_dev;
  203. int firmware_first;
  204. };
  205. static int hest_source_is_pcie_aer(struct acpi_hest_header *hest_hdr)
  206. {
  207. if (hest_hdr->type == ACPI_HEST_TYPE_AER_ROOT_PORT ||
  208. hest_hdr->type == ACPI_HEST_TYPE_AER_ENDPOINT ||
  209. hest_hdr->type == ACPI_HEST_TYPE_AER_BRIDGE)
  210. return 1;
  211. return 0;
  212. }
  213. static int aer_hest_parse(struct acpi_hest_header *hest_hdr, void *data)
  214. {
  215. struct aer_hest_parse_info *info = data;
  216. struct acpi_hest_aer_common *p;
  217. int ff;
  218. if (!hest_source_is_pcie_aer(hest_hdr))
  219. return 0;
  220. p = (struct acpi_hest_aer_common *)(hest_hdr + 1);
  221. ff = !!(p->flags & ACPI_HEST_FIRMWARE_FIRST);
  222. /*
  223. * If no specific device is supplied, determine whether
  224. * FIRMWARE_FIRST is set for *any* PCIe device.
  225. */
  226. if (!info->pci_dev) {
  227. info->firmware_first |= ff;
  228. return 0;
  229. }
  230. /* Otherwise, check the specific device */
  231. if (p->flags & ACPI_HEST_GLOBAL) {
  232. if (hest_match_type(hest_hdr, info->pci_dev))
  233. info->firmware_first = ff;
  234. } else
  235. if (hest_match_pci(p, info->pci_dev))
  236. info->firmware_first = ff;
  237. return 0;
  238. }
  239. static void aer_set_firmware_first(struct pci_dev *pci_dev)
  240. {
  241. int rc;
  242. struct aer_hest_parse_info info = {
  243. .pci_dev = pci_dev,
  244. .firmware_first = 0,
  245. };
  246. rc = apei_hest_parse(aer_hest_parse, &info);
  247. if (rc)
  248. pci_dev->__aer_firmware_first = 0;
  249. else
  250. pci_dev->__aer_firmware_first = info.firmware_first;
  251. pci_dev->__aer_firmware_first_valid = 1;
  252. }
  253. int pcie_aer_get_firmware_first(struct pci_dev *dev)
  254. {
  255. if (!pci_is_pcie(dev))
  256. return 0;
  257. if (!dev->__aer_firmware_first_valid)
  258. aer_set_firmware_first(dev);
  259. return dev->__aer_firmware_first;
  260. }
  261. #define PCI_EXP_AER_FLAGS (PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE | \
  262. PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE)
  263. static bool aer_firmware_first;
  264. /**
  265. * aer_acpi_firmware_first - Check if APEI should control AER.
  266. */
  267. bool aer_acpi_firmware_first(void)
  268. {
  269. static bool parsed = false;
  270. struct aer_hest_parse_info info = {
  271. .pci_dev = NULL, /* Check all PCIe devices */
  272. .firmware_first = 0,
  273. };
  274. if (!parsed) {
  275. apei_hest_parse(aer_hest_parse, &info);
  276. aer_firmware_first = info.firmware_first;
  277. parsed = true;
  278. }
  279. return aer_firmware_first;
  280. }
  281. #endif
  282. #define PCI_EXP_AER_FLAGS (PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE | \
  283. PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE)
  284. int pci_enable_pcie_error_reporting(struct pci_dev *dev)
  285. {
  286. if (pcie_aer_get_firmware_first(dev))
  287. return -EIO;
  288. if (!dev->aer_cap)
  289. return -EIO;
  290. return pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_AER_FLAGS);
  291. }
  292. EXPORT_SYMBOL_GPL(pci_enable_pcie_error_reporting);
  293. int pci_disable_pcie_error_reporting(struct pci_dev *dev)
  294. {
  295. if (pcie_aer_get_firmware_first(dev))
  296. return -EIO;
  297. return pcie_capability_clear_word(dev, PCI_EXP_DEVCTL,
  298. PCI_EXP_AER_FLAGS);
  299. }
  300. EXPORT_SYMBOL_GPL(pci_disable_pcie_error_reporting);
  301. int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev)
  302. {
  303. int pos;
  304. u32 status;
  305. pos = dev->aer_cap;
  306. if (!pos)
  307. return -EIO;
  308. pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status);
  309. if (status)
  310. pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status);
  311. return 0;
  312. }
  313. EXPORT_SYMBOL_GPL(pci_cleanup_aer_uncorrect_error_status);
  314. int pci_cleanup_aer_error_status_regs(struct pci_dev *dev)
  315. {
  316. int pos;
  317. u32 status;
  318. int port_type;
  319. if (!pci_is_pcie(dev))
  320. return -ENODEV;
  321. pos = dev->aer_cap;
  322. if (!pos)
  323. return -EIO;
  324. port_type = pci_pcie_type(dev);
  325. if (port_type == PCI_EXP_TYPE_ROOT_PORT) {
  326. pci_read_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, &status);
  327. pci_write_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, status);
  328. }
  329. pci_read_config_dword(dev, pos + PCI_ERR_COR_STATUS, &status);
  330. pci_write_config_dword(dev, pos + PCI_ERR_COR_STATUS, status);
  331. pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status);
  332. pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status);
  333. return 0;
  334. }
  335. int pci_aer_init(struct pci_dev *dev)
  336. {
  337. dev->aer_cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
  338. return pci_cleanup_aer_error_status_regs(dev);
  339. }
  340. #define AER_AGENT_RECEIVER 0
  341. #define AER_AGENT_REQUESTER 1
  342. #define AER_AGENT_COMPLETER 2
  343. #define AER_AGENT_TRANSMITTER 3
  344. #define AER_AGENT_REQUESTER_MASK(t) ((t == AER_CORRECTABLE) ? \
  345. 0 : (PCI_ERR_UNC_COMP_TIME|PCI_ERR_UNC_UNSUP))
  346. #define AER_AGENT_COMPLETER_MASK(t) ((t == AER_CORRECTABLE) ? \
  347. 0 : PCI_ERR_UNC_COMP_ABORT)
  348. #define AER_AGENT_TRANSMITTER_MASK(t) ((t == AER_CORRECTABLE) ? \
  349. (PCI_ERR_COR_REP_ROLL|PCI_ERR_COR_REP_TIMER) : 0)
  350. #define AER_GET_AGENT(t, e) \
  351. ((e & AER_AGENT_COMPLETER_MASK(t)) ? AER_AGENT_COMPLETER : \
  352. (e & AER_AGENT_REQUESTER_MASK(t)) ? AER_AGENT_REQUESTER : \
  353. (e & AER_AGENT_TRANSMITTER_MASK(t)) ? AER_AGENT_TRANSMITTER : \
  354. AER_AGENT_RECEIVER)
  355. #define AER_PHYSICAL_LAYER_ERROR 0
  356. #define AER_DATA_LINK_LAYER_ERROR 1
  357. #define AER_TRANSACTION_LAYER_ERROR 2
  358. #define AER_PHYSICAL_LAYER_ERROR_MASK(t) ((t == AER_CORRECTABLE) ? \
  359. PCI_ERR_COR_RCVR : 0)
  360. #define AER_DATA_LINK_LAYER_ERROR_MASK(t) ((t == AER_CORRECTABLE) ? \
  361. (PCI_ERR_COR_BAD_TLP| \
  362. PCI_ERR_COR_BAD_DLLP| \
  363. PCI_ERR_COR_REP_ROLL| \
  364. PCI_ERR_COR_REP_TIMER) : PCI_ERR_UNC_DLP)
  365. #define AER_GET_LAYER_ERROR(t, e) \
  366. ((e & AER_PHYSICAL_LAYER_ERROR_MASK(t)) ? AER_PHYSICAL_LAYER_ERROR : \
  367. (e & AER_DATA_LINK_LAYER_ERROR_MASK(t)) ? AER_DATA_LINK_LAYER_ERROR : \
  368. AER_TRANSACTION_LAYER_ERROR)
  369. /*
  370. * AER error strings
  371. */
  372. static const char *aer_error_severity_string[] = {
  373. "Uncorrected (Non-Fatal)",
  374. "Uncorrected (Fatal)",
  375. "Corrected"
  376. };
  377. static const char *aer_error_layer[] = {
  378. "Physical Layer",
  379. "Data Link Layer",
  380. "Transaction Layer"
  381. };
  382. static const char *aer_correctable_error_string[] = {
  383. "Receiver Error", /* Bit Position 0 */
  384. NULL,
  385. NULL,
  386. NULL,
  387. NULL,
  388. NULL,
  389. "Bad TLP", /* Bit Position 6 */
  390. "Bad DLLP", /* Bit Position 7 */
  391. "RELAY_NUM Rollover", /* Bit Position 8 */
  392. NULL,
  393. NULL,
  394. NULL,
  395. "Replay Timer Timeout", /* Bit Position 12 */
  396. "Advisory Non-Fatal", /* Bit Position 13 */
  397. "Corrected Internal Error", /* Bit Position 14 */
  398. "Header Log Overflow", /* Bit Position 15 */
  399. };
  400. static const char *aer_uncorrectable_error_string[] = {
  401. "Undefined", /* Bit Position 0 */
  402. NULL,
  403. NULL,
  404. NULL,
  405. "Data Link Protocol", /* Bit Position 4 */
  406. "Surprise Down Error", /* Bit Position 5 */
  407. NULL,
  408. NULL,
  409. NULL,
  410. NULL,
  411. NULL,
  412. NULL,
  413. "Poisoned TLP", /* Bit Position 12 */
  414. "Flow Control Protocol", /* Bit Position 13 */
  415. "Completion Timeout", /* Bit Position 14 */
  416. "Completer Abort", /* Bit Position 15 */
  417. "Unexpected Completion", /* Bit Position 16 */
  418. "Receiver Overflow", /* Bit Position 17 */
  419. "Malformed TLP", /* Bit Position 18 */
  420. "ECRC", /* Bit Position 19 */
  421. "Unsupported Request", /* Bit Position 20 */
  422. "ACS Violation", /* Bit Position 21 */
  423. "Uncorrectable Internal Error", /* Bit Position 22 */
  424. "MC Blocked TLP", /* Bit Position 23 */
  425. "AtomicOp Egress Blocked", /* Bit Position 24 */
  426. "TLP Prefix Blocked Error", /* Bit Position 25 */
  427. };
  428. static const char *aer_agent_string[] = {
  429. "Receiver ID",
  430. "Requester ID",
  431. "Completer ID",
  432. "Transmitter ID"
  433. };
  434. static void __print_tlp_header(struct pci_dev *dev,
  435. struct aer_header_log_regs *t)
  436. {
  437. pci_err(dev, " TLP Header: %08x %08x %08x %08x\n",
  438. t->dw0, t->dw1, t->dw2, t->dw3);
  439. }
  440. static void __aer_print_error(struct pci_dev *dev,
  441. struct aer_err_info *info)
  442. {
  443. int i, status;
  444. const char *errmsg = NULL;
  445. status = (info->status & ~info->mask);
  446. for (i = 0; i < 32; i++) {
  447. if (!(status & (1 << i)))
  448. continue;
  449. if (info->severity == AER_CORRECTABLE)
  450. errmsg = i < ARRAY_SIZE(aer_correctable_error_string) ?
  451. aer_correctable_error_string[i] : NULL;
  452. else
  453. errmsg = i < ARRAY_SIZE(aer_uncorrectable_error_string) ?
  454. aer_uncorrectable_error_string[i] : NULL;
  455. if (errmsg)
  456. pci_err(dev, " [%2d] %-22s%s\n", i, errmsg,
  457. info->first_error == i ? " (First)" : "");
  458. else
  459. pci_err(dev, " [%2d] Unknown Error Bit%s\n",
  460. i, info->first_error == i ? " (First)" : "");
  461. }
  462. }
  463. static void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
  464. {
  465. int layer, agent;
  466. int id = ((dev->bus->number << 8) | dev->devfn);
  467. if (!info->status) {
  468. pci_err(dev, "PCIe Bus Error: severity=%s, type=Inaccessible, (Unregistered Agent ID)\n",
  469. aer_error_severity_string[info->severity]);
  470. goto out;
  471. }
  472. layer = AER_GET_LAYER_ERROR(info->severity, info->status);
  473. agent = AER_GET_AGENT(info->severity, info->status);
  474. pci_err(dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n",
  475. aer_error_severity_string[info->severity],
  476. aer_error_layer[layer], aer_agent_string[agent]);
  477. pci_err(dev, " device [%04x:%04x] error status/mask=%08x/%08x\n",
  478. dev->vendor, dev->device,
  479. info->status, info->mask);
  480. __aer_print_error(dev, info);
  481. if (info->tlp_header_valid)
  482. __print_tlp_header(dev, &info->tlp);
  483. out:
  484. if (info->id && info->error_dev_num > 1 && info->id == id)
  485. pci_err(dev, " Error of this Agent is reported first\n");
  486. trace_aer_event(dev_name(&dev->dev), (info->status & ~info->mask),
  487. info->severity, info->tlp_header_valid, &info->tlp);
  488. }
  489. static void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info)
  490. {
  491. u8 bus = info->id >> 8;
  492. u8 devfn = info->id & 0xff;
  493. pci_info(dev, "AER: %s%s error received: %04x:%02x:%02x.%d\n",
  494. info->multi_error_valid ? "Multiple " : "",
  495. aer_error_severity_string[info->severity],
  496. pci_domain_nr(dev->bus), bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
  497. }
  498. #ifdef CONFIG_ACPI_APEI_PCIEAER
  499. int cper_severity_to_aer(int cper_severity)
  500. {
  501. switch (cper_severity) {
  502. case CPER_SEV_RECOVERABLE:
  503. return AER_NONFATAL;
  504. case CPER_SEV_FATAL:
  505. return AER_FATAL;
  506. default:
  507. return AER_CORRECTABLE;
  508. }
  509. }
  510. EXPORT_SYMBOL_GPL(cper_severity_to_aer);
  511. void cper_print_aer(struct pci_dev *dev, int aer_severity,
  512. struct aer_capability_regs *aer)
  513. {
  514. int layer, agent, tlp_header_valid = 0;
  515. u32 status, mask;
  516. struct aer_err_info info;
  517. if (aer_severity == AER_CORRECTABLE) {
  518. status = aer->cor_status;
  519. mask = aer->cor_mask;
  520. } else {
  521. status = aer->uncor_status;
  522. mask = aer->uncor_mask;
  523. tlp_header_valid = status & AER_LOG_TLP_MASKS;
  524. }
  525. layer = AER_GET_LAYER_ERROR(aer_severity, status);
  526. agent = AER_GET_AGENT(aer_severity, status);
  527. memset(&info, 0, sizeof(info));
  528. info.severity = aer_severity;
  529. info.status = status;
  530. info.mask = mask;
  531. info.first_error = PCI_ERR_CAP_FEP(aer->cap_control);
  532. pci_err(dev, "aer_status: 0x%08x, aer_mask: 0x%08x\n", status, mask);
  533. __aer_print_error(dev, &info);
  534. pci_err(dev, "aer_layer=%s, aer_agent=%s\n",
  535. aer_error_layer[layer], aer_agent_string[agent]);
  536. if (aer_severity != AER_CORRECTABLE)
  537. pci_err(dev, "aer_uncor_severity: 0x%08x\n",
  538. aer->uncor_severity);
  539. if (tlp_header_valid)
  540. __print_tlp_header(dev, &aer->header_log);
  541. trace_aer_event(dev_name(&dev->dev), (status & ~mask),
  542. aer_severity, tlp_header_valid, &aer->header_log);
  543. }
  544. #endif
  545. /**
  546. * add_error_device - list device to be handled
  547. * @e_info: pointer to error info
  548. * @dev: pointer to pci_dev to be added
  549. */
  550. static int add_error_device(struct aer_err_info *e_info, struct pci_dev *dev)
  551. {
  552. if (e_info->error_dev_num < AER_MAX_MULTI_ERR_DEVICES) {
  553. e_info->dev[e_info->error_dev_num] = dev;
  554. e_info->error_dev_num++;
  555. return 0;
  556. }
  557. return -ENOSPC;
  558. }
  559. /**
  560. * is_error_source - check whether the device is source of reported error
  561. * @dev: pointer to pci_dev to be checked
  562. * @e_info: pointer to reported error info
  563. */
  564. static bool is_error_source(struct pci_dev *dev, struct aer_err_info *e_info)
  565. {
  566. int pos;
  567. u32 status, mask;
  568. u16 reg16;
  569. /*
  570. * When bus id is equal to 0, it might be a bad id
  571. * reported by root port.
  572. */
  573. if ((PCI_BUS_NUM(e_info->id) != 0) &&
  574. !(dev->bus->bus_flags & PCI_BUS_FLAGS_NO_AERSID)) {
  575. /* Device ID match? */
  576. if (e_info->id == ((dev->bus->number << 8) | dev->devfn))
  577. return true;
  578. /* Continue id comparing if there is no multiple error */
  579. if (!e_info->multi_error_valid)
  580. return false;
  581. }
  582. /*
  583. * When either
  584. * 1) bus id is equal to 0. Some ports might lose the bus
  585. * id of error source id;
  586. * 2) bus flag PCI_BUS_FLAGS_NO_AERSID is set
  587. * 3) There are multiple errors and prior ID comparing fails;
  588. * We check AER status registers to find possible reporter.
  589. */
  590. if (atomic_read(&dev->enable_cnt) == 0)
  591. return false;
  592. /* Check if AER is enabled */
  593. pcie_capability_read_word(dev, PCI_EXP_DEVCTL, &reg16);
  594. if (!(reg16 & PCI_EXP_AER_FLAGS))
  595. return false;
  596. pos = dev->aer_cap;
  597. if (!pos)
  598. return false;
  599. /* Check if error is recorded */
  600. if (e_info->severity == AER_CORRECTABLE) {
  601. pci_read_config_dword(dev, pos + PCI_ERR_COR_STATUS, &status);
  602. pci_read_config_dword(dev, pos + PCI_ERR_COR_MASK, &mask);
  603. } else {
  604. pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status);
  605. pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, &mask);
  606. }
  607. if (status & ~mask)
  608. return true;
  609. return false;
  610. }
  611. static int find_device_iter(struct pci_dev *dev, void *data)
  612. {
  613. struct aer_err_info *e_info = (struct aer_err_info *)data;
  614. if (is_error_source(dev, e_info)) {
  615. /* List this device */
  616. if (add_error_device(e_info, dev)) {
  617. /* We cannot handle more... Stop iteration */
  618. /* TODO: Should print error message here? */
  619. return 1;
  620. }
  621. /* If there is only a single error, stop iteration */
  622. if (!e_info->multi_error_valid)
  623. return 1;
  624. }
  625. return 0;
  626. }
  627. /**
  628. * find_source_device - search through device hierarchy for source device
  629. * @parent: pointer to Root Port pci_dev data structure
  630. * @e_info: including detailed error information such like id
  631. *
  632. * Return true if found.
  633. *
  634. * Invoked by DPC when error is detected at the Root Port.
  635. * Caller of this function must set id, severity, and multi_error_valid of
  636. * struct aer_err_info pointed by @e_info properly. This function must fill
  637. * e_info->error_dev_num and e_info->dev[], based on the given information.
  638. */
  639. static bool find_source_device(struct pci_dev *parent,
  640. struct aer_err_info *e_info)
  641. {
  642. struct pci_dev *dev = parent;
  643. int result;
  644. /* Must reset in this function */
  645. e_info->error_dev_num = 0;
  646. /* Is Root Port an agent that sends error message? */
  647. result = find_device_iter(dev, e_info);
  648. if (result)
  649. return true;
  650. pci_walk_bus(parent->subordinate, find_device_iter, e_info);
  651. if (!e_info->error_dev_num) {
  652. pci_printk(KERN_DEBUG, parent, "can't find device of ID%04x\n",
  653. e_info->id);
  654. return false;
  655. }
  656. return true;
  657. }
  658. /**
  659. * handle_error_source - handle logging error into an event log
  660. * @dev: pointer to pci_dev data structure of error source device
  661. * @info: comprehensive error information
  662. *
  663. * Invoked when an error being detected by Root Port.
  664. */
  665. static void handle_error_source(struct pci_dev *dev, struct aer_err_info *info)
  666. {
  667. int pos;
  668. if (info->severity == AER_CORRECTABLE) {
  669. /*
  670. * Correctable error does not need software intervention.
  671. * No need to go through error recovery process.
  672. */
  673. pos = dev->aer_cap;
  674. if (pos)
  675. pci_write_config_dword(dev, pos + PCI_ERR_COR_STATUS,
  676. info->status);
  677. } else if (info->severity == AER_NONFATAL)
  678. pcie_do_nonfatal_recovery(dev);
  679. else if (info->severity == AER_FATAL)
  680. pcie_do_fatal_recovery(dev, PCIE_PORT_SERVICE_AER);
  681. }
  682. #ifdef CONFIG_ACPI_APEI_PCIEAER
  683. #define AER_RECOVER_RING_ORDER 4
  684. #define AER_RECOVER_RING_SIZE (1 << AER_RECOVER_RING_ORDER)
  685. struct aer_recover_entry {
  686. u8 bus;
  687. u8 devfn;
  688. u16 domain;
  689. int severity;
  690. struct aer_capability_regs *regs;
  691. };
  692. static DEFINE_KFIFO(aer_recover_ring, struct aer_recover_entry,
  693. AER_RECOVER_RING_SIZE);
  694. static void aer_recover_work_func(struct work_struct *work)
  695. {
  696. struct aer_recover_entry entry;
  697. struct pci_dev *pdev;
  698. while (kfifo_get(&aer_recover_ring, &entry)) {
  699. pdev = pci_get_domain_bus_and_slot(entry.domain, entry.bus,
  700. entry.devfn);
  701. if (!pdev) {
  702. pr_err("AER recover: Can not find pci_dev for %04x:%02x:%02x:%x\n",
  703. entry.domain, entry.bus,
  704. PCI_SLOT(entry.devfn), PCI_FUNC(entry.devfn));
  705. continue;
  706. }
  707. cper_print_aer(pdev, entry.severity, entry.regs);
  708. if (entry.severity == AER_NONFATAL)
  709. pcie_do_nonfatal_recovery(pdev);
  710. else if (entry.severity == AER_FATAL)
  711. pcie_do_fatal_recovery(pdev, PCIE_PORT_SERVICE_AER);
  712. pci_dev_put(pdev);
  713. }
  714. }
  715. /*
  716. * Mutual exclusion for writers of aer_recover_ring, reader side don't
  717. * need lock, because there is only one reader and lock is not needed
  718. * between reader and writer.
  719. */
  720. static DEFINE_SPINLOCK(aer_recover_ring_lock);
  721. static DECLARE_WORK(aer_recover_work, aer_recover_work_func);
  722. void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn,
  723. int severity, struct aer_capability_regs *aer_regs)
  724. {
  725. unsigned long flags;
  726. struct aer_recover_entry entry = {
  727. .bus = bus,
  728. .devfn = devfn,
  729. .domain = domain,
  730. .severity = severity,
  731. .regs = aer_regs,
  732. };
  733. spin_lock_irqsave(&aer_recover_ring_lock, flags);
  734. if (kfifo_put(&aer_recover_ring, entry))
  735. schedule_work(&aer_recover_work);
  736. else
  737. pr_err("AER recover: Buffer overflow when recovering AER for %04x:%02x:%02x:%x\n",
  738. domain, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
  739. spin_unlock_irqrestore(&aer_recover_ring_lock, flags);
  740. }
  741. EXPORT_SYMBOL_GPL(aer_recover_queue);
  742. #endif
  743. /**
  744. * get_device_error_info - read error status from dev and store it to info
  745. * @dev: pointer to the device expected to have a error record
  746. * @info: pointer to structure to store the error record
  747. *
  748. * Return 1 on success, 0 on error.
  749. *
  750. * Note that @info is reused among all error devices. Clear fields properly.
  751. */
  752. static int get_device_error_info(struct pci_dev *dev, struct aer_err_info *info)
  753. {
  754. int pos, temp;
  755. /* Must reset in this function */
  756. info->status = 0;
  757. info->tlp_header_valid = 0;
  758. pos = dev->aer_cap;
  759. /* The device might not support AER */
  760. if (!pos)
  761. return 0;
  762. if (info->severity == AER_CORRECTABLE) {
  763. pci_read_config_dword(dev, pos + PCI_ERR_COR_STATUS,
  764. &info->status);
  765. pci_read_config_dword(dev, pos + PCI_ERR_COR_MASK,
  766. &info->mask);
  767. if (!(info->status & ~info->mask))
  768. return 0;
  769. } else if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE ||
  770. info->severity == AER_NONFATAL) {
  771. /* Link is still healthy for IO reads */
  772. pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS,
  773. &info->status);
  774. pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_MASK,
  775. &info->mask);
  776. if (!(info->status & ~info->mask))
  777. return 0;
  778. /* Get First Error Pointer */
  779. pci_read_config_dword(dev, pos + PCI_ERR_CAP, &temp);
  780. info->first_error = PCI_ERR_CAP_FEP(temp);
  781. if (info->status & AER_LOG_TLP_MASKS) {
  782. info->tlp_header_valid = 1;
  783. pci_read_config_dword(dev,
  784. pos + PCI_ERR_HEADER_LOG, &info->tlp.dw0);
  785. pci_read_config_dword(dev,
  786. pos + PCI_ERR_HEADER_LOG + 4, &info->tlp.dw1);
  787. pci_read_config_dword(dev,
  788. pos + PCI_ERR_HEADER_LOG + 8, &info->tlp.dw2);
  789. pci_read_config_dword(dev,
  790. pos + PCI_ERR_HEADER_LOG + 12, &info->tlp.dw3);
  791. }
  792. }
  793. return 1;
  794. }
  795. static inline void aer_process_err_devices(struct aer_err_info *e_info)
  796. {
  797. int i;
  798. /* Report all before handle them, not to lost records by reset etc. */
  799. for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) {
  800. if (get_device_error_info(e_info->dev[i], e_info))
  801. aer_print_error(e_info->dev[i], e_info);
  802. }
  803. for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) {
  804. if (get_device_error_info(e_info->dev[i], e_info))
  805. handle_error_source(e_info->dev[i], e_info);
  806. }
  807. }
  808. /**
  809. * aer_isr_one_error - consume an error detected by root port
  810. * @rpc: pointer to the root port which holds an error
  811. * @e_src: pointer to an error source
  812. */
  813. static void aer_isr_one_error(struct aer_rpc *rpc,
  814. struct aer_err_source *e_src)
  815. {
  816. struct pci_dev *pdev = rpc->rpd;
  817. struct aer_err_info *e_info = &rpc->e_info;
  818. /*
  819. * There is a possibility that both correctable error and
  820. * uncorrectable error being logged. Report correctable error first.
  821. */
  822. if (e_src->status & PCI_ERR_ROOT_COR_RCV) {
  823. e_info->id = ERR_COR_ID(e_src->id);
  824. e_info->severity = AER_CORRECTABLE;
  825. if (e_src->status & PCI_ERR_ROOT_MULTI_COR_RCV)
  826. e_info->multi_error_valid = 1;
  827. else
  828. e_info->multi_error_valid = 0;
  829. aer_print_port_info(pdev, e_info);
  830. if (find_source_device(pdev, e_info))
  831. aer_process_err_devices(e_info);
  832. }
  833. if (e_src->status & PCI_ERR_ROOT_UNCOR_RCV) {
  834. e_info->id = ERR_UNCOR_ID(e_src->id);
  835. if (e_src->status & PCI_ERR_ROOT_FATAL_RCV)
  836. e_info->severity = AER_FATAL;
  837. else
  838. e_info->severity = AER_NONFATAL;
  839. if (e_src->status & PCI_ERR_ROOT_MULTI_UNCOR_RCV)
  840. e_info->multi_error_valid = 1;
  841. else
  842. e_info->multi_error_valid = 0;
  843. aer_print_port_info(pdev, e_info);
  844. if (find_source_device(pdev, e_info))
  845. aer_process_err_devices(e_info);
  846. }
  847. }
  848. /**
  849. * get_e_source - retrieve an error source
  850. * @rpc: pointer to the root port which holds an error
  851. * @e_src: pointer to store retrieved error source
  852. *
  853. * Return 1 if an error source is retrieved, otherwise 0.
  854. *
  855. * Invoked by DPC handler to consume an error.
  856. */
  857. static int get_e_source(struct aer_rpc *rpc, struct aer_err_source *e_src)
  858. {
  859. unsigned long flags;
  860. /* Lock access to Root error producer/consumer index */
  861. spin_lock_irqsave(&rpc->e_lock, flags);
  862. if (rpc->prod_idx == rpc->cons_idx) {
  863. spin_unlock_irqrestore(&rpc->e_lock, flags);
  864. return 0;
  865. }
  866. *e_src = rpc->e_sources[rpc->cons_idx];
  867. rpc->cons_idx++;
  868. if (rpc->cons_idx == AER_ERROR_SOURCES_MAX)
  869. rpc->cons_idx = 0;
  870. spin_unlock_irqrestore(&rpc->e_lock, flags);
  871. return 1;
  872. }
  873. /**
  874. * aer_isr - consume errors detected by root port
  875. * @work: definition of this work item
  876. *
  877. * Invoked, as DPC, when root port records new detected error
  878. */
  879. static void aer_isr(struct work_struct *work)
  880. {
  881. struct aer_rpc *rpc = container_of(work, struct aer_rpc, dpc_handler);
  882. struct aer_err_source uninitialized_var(e_src);
  883. mutex_lock(&rpc->rpc_mutex);
  884. while (get_e_source(rpc, &e_src))
  885. aer_isr_one_error(rpc, &e_src);
  886. mutex_unlock(&rpc->rpc_mutex);
  887. }
  888. /**
  889. * aer_irq - Root Port's ISR
  890. * @irq: IRQ assigned to Root Port
  891. * @context: pointer to Root Port data structure
  892. *
  893. * Invoked when Root Port detects AER messages.
  894. */
  895. irqreturn_t aer_irq(int irq, void *context)
  896. {
  897. unsigned int status, id;
  898. struct pcie_device *pdev = (struct pcie_device *)context;
  899. struct aer_rpc *rpc = get_service_data(pdev);
  900. int next_prod_idx;
  901. unsigned long flags;
  902. int pos;
  903. pos = pdev->port->aer_cap;
  904. /*
  905. * Must lock access to Root Error Status Reg, Root Error ID Reg,
  906. * and Root error producer/consumer index
  907. */
  908. spin_lock_irqsave(&rpc->e_lock, flags);
  909. /* Read error status */
  910. pci_read_config_dword(pdev->port, pos + PCI_ERR_ROOT_STATUS, &status);
  911. if (!(status & (PCI_ERR_ROOT_UNCOR_RCV|PCI_ERR_ROOT_COR_RCV))) {
  912. spin_unlock_irqrestore(&rpc->e_lock, flags);
  913. return IRQ_NONE;
  914. }
  915. /* Read error source and clear error status */
  916. pci_read_config_dword(pdev->port, pos + PCI_ERR_ROOT_ERR_SRC, &id);
  917. pci_write_config_dword(pdev->port, pos + PCI_ERR_ROOT_STATUS, status);
  918. /* Store error source for later DPC handler */
  919. next_prod_idx = rpc->prod_idx + 1;
  920. if (next_prod_idx == AER_ERROR_SOURCES_MAX)
  921. next_prod_idx = 0;
  922. if (next_prod_idx == rpc->cons_idx) {
  923. /*
  924. * Error Storm Condition - possibly the same error occurred.
  925. * Drop the error.
  926. */
  927. spin_unlock_irqrestore(&rpc->e_lock, flags);
  928. return IRQ_HANDLED;
  929. }
  930. rpc->e_sources[rpc->prod_idx].status = status;
  931. rpc->e_sources[rpc->prod_idx].id = id;
  932. rpc->prod_idx = next_prod_idx;
  933. spin_unlock_irqrestore(&rpc->e_lock, flags);
  934. /* Invoke DPC handler */
  935. schedule_work(&rpc->dpc_handler);
  936. return IRQ_HANDLED;
  937. }
  938. EXPORT_SYMBOL_GPL(aer_irq);
  939. static int set_device_error_reporting(struct pci_dev *dev, void *data)
  940. {
  941. bool enable = *((bool *)data);
  942. int type = pci_pcie_type(dev);
  943. if ((type == PCI_EXP_TYPE_ROOT_PORT) ||
  944. (type == PCI_EXP_TYPE_UPSTREAM) ||
  945. (type == PCI_EXP_TYPE_DOWNSTREAM)) {
  946. if (enable)
  947. pci_enable_pcie_error_reporting(dev);
  948. else
  949. pci_disable_pcie_error_reporting(dev);
  950. }
  951. if (enable)
  952. pcie_set_ecrc_checking(dev);
  953. return 0;
  954. }
  955. /**
  956. * set_downstream_devices_error_reporting - enable/disable the error reporting bits on the root port and its downstream ports.
  957. * @dev: pointer to root port's pci_dev data structure
  958. * @enable: true = enable error reporting, false = disable error reporting.
  959. */
  960. static void set_downstream_devices_error_reporting(struct pci_dev *dev,
  961. bool enable)
  962. {
  963. set_device_error_reporting(dev, &enable);
  964. if (!dev->subordinate)
  965. return;
  966. pci_walk_bus(dev->subordinate, set_device_error_reporting, &enable);
  967. }
  968. /**
  969. * aer_enable_rootport - enable Root Port's interrupts when receiving messages
  970. * @rpc: pointer to a Root Port data structure
  971. *
  972. * Invoked when PCIe bus loads AER service driver.
  973. */
  974. static void aer_enable_rootport(struct aer_rpc *rpc)
  975. {
  976. struct pci_dev *pdev = rpc->rpd;
  977. int aer_pos;
  978. u16 reg16;
  979. u32 reg32;
  980. /* Clear PCIe Capability's Device Status */
  981. pcie_capability_read_word(pdev, PCI_EXP_DEVSTA, &reg16);
  982. pcie_capability_write_word(pdev, PCI_EXP_DEVSTA, reg16);
  983. /* Disable system error generation in response to error messages */
  984. pcie_capability_clear_word(pdev, PCI_EXP_RTCTL,
  985. SYSTEM_ERROR_INTR_ON_MESG_MASK);
  986. aer_pos = pdev->aer_cap;
  987. /* Clear error status */
  988. pci_read_config_dword(pdev, aer_pos + PCI_ERR_ROOT_STATUS, &reg32);
  989. pci_write_config_dword(pdev, aer_pos + PCI_ERR_ROOT_STATUS, reg32);
  990. pci_read_config_dword(pdev, aer_pos + PCI_ERR_COR_STATUS, &reg32);
  991. pci_write_config_dword(pdev, aer_pos + PCI_ERR_COR_STATUS, reg32);
  992. pci_read_config_dword(pdev, aer_pos + PCI_ERR_UNCOR_STATUS, &reg32);
  993. pci_write_config_dword(pdev, aer_pos + PCI_ERR_UNCOR_STATUS, reg32);
  994. /*
  995. * Enable error reporting for the root port device and downstream port
  996. * devices.
  997. */
  998. set_downstream_devices_error_reporting(pdev, true);
  999. /* Enable Root Port's interrupt in response to error messages */
  1000. pci_read_config_dword(pdev, aer_pos + PCI_ERR_ROOT_COMMAND, &reg32);
  1001. reg32 |= ROOT_PORT_INTR_ON_MESG_MASK;
  1002. pci_write_config_dword(pdev, aer_pos + PCI_ERR_ROOT_COMMAND, reg32);
  1003. }
  1004. /**
  1005. * aer_disable_rootport - disable Root Port's interrupts when receiving messages
  1006. * @rpc: pointer to a Root Port data structure
  1007. *
  1008. * Invoked when PCIe bus unloads AER service driver.
  1009. */
  1010. static void aer_disable_rootport(struct aer_rpc *rpc)
  1011. {
  1012. struct pci_dev *pdev = rpc->rpd;
  1013. u32 reg32;
  1014. int pos;
  1015. /*
  1016. * Disable error reporting for the root port device and downstream port
  1017. * devices.
  1018. */
  1019. set_downstream_devices_error_reporting(pdev, false);
  1020. pos = pdev->aer_cap;
  1021. /* Disable Root's interrupt in response to error messages */
  1022. pci_read_config_dword(pdev, pos + PCI_ERR_ROOT_COMMAND, &reg32);
  1023. reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK;
  1024. pci_write_config_dword(pdev, pos + PCI_ERR_ROOT_COMMAND, reg32);
  1025. /* Clear Root's error status reg */
  1026. pci_read_config_dword(pdev, pos + PCI_ERR_ROOT_STATUS, &reg32);
  1027. pci_write_config_dword(pdev, pos + PCI_ERR_ROOT_STATUS, reg32);
  1028. }
  1029. /**
  1030. * aer_alloc_rpc - allocate Root Port data structure
  1031. * @dev: pointer to the pcie_dev data structure
  1032. *
  1033. * Invoked when Root Port's AER service is loaded.
  1034. */
  1035. static struct aer_rpc *aer_alloc_rpc(struct pcie_device *dev)
  1036. {
  1037. struct aer_rpc *rpc;
  1038. rpc = kzalloc(sizeof(struct aer_rpc), GFP_KERNEL);
  1039. if (!rpc)
  1040. return NULL;
  1041. /* Initialize Root lock access, e_lock, to Root Error Status Reg */
  1042. spin_lock_init(&rpc->e_lock);
  1043. rpc->rpd = dev->port;
  1044. INIT_WORK(&rpc->dpc_handler, aer_isr);
  1045. mutex_init(&rpc->rpc_mutex);
  1046. /* Use PCIe bus function to store rpc into PCIe device */
  1047. set_service_data(dev, rpc);
  1048. return rpc;
  1049. }
  1050. /**
  1051. * aer_remove - clean up resources
  1052. * @dev: pointer to the pcie_dev data structure
  1053. *
  1054. * Invoked when PCI Express bus unloads or AER probe fails.
  1055. */
  1056. static void aer_remove(struct pcie_device *dev)
  1057. {
  1058. struct aer_rpc *rpc = get_service_data(dev);
  1059. if (rpc) {
  1060. /* If register interrupt service, it must be free. */
  1061. if (rpc->isr)
  1062. free_irq(dev->irq, dev);
  1063. flush_work(&rpc->dpc_handler);
  1064. aer_disable_rootport(rpc);
  1065. kfree(rpc);
  1066. set_service_data(dev, NULL);
  1067. }
  1068. }
  1069. /**
  1070. * aer_probe - initialize resources
  1071. * @dev: pointer to the pcie_dev data structure
  1072. *
  1073. * Invoked when PCI Express bus loads AER service driver.
  1074. */
  1075. static int aer_probe(struct pcie_device *dev)
  1076. {
  1077. int status;
  1078. struct aer_rpc *rpc;
  1079. struct device *device = &dev->port->dev;
  1080. /* Alloc rpc data structure */
  1081. rpc = aer_alloc_rpc(dev);
  1082. if (!rpc) {
  1083. dev_printk(KERN_DEBUG, device, "alloc AER rpc failed\n");
  1084. aer_remove(dev);
  1085. return -ENOMEM;
  1086. }
  1087. /* Request IRQ ISR */
  1088. status = request_irq(dev->irq, aer_irq, IRQF_SHARED, "aerdrv", dev);
  1089. if (status) {
  1090. dev_printk(KERN_DEBUG, device, "request AER IRQ %d failed\n",
  1091. dev->irq);
  1092. aer_remove(dev);
  1093. return status;
  1094. }
  1095. rpc->isr = 1;
  1096. aer_enable_rootport(rpc);
  1097. dev_info(device, "AER enabled with IRQ %d\n", dev->irq);
  1098. return 0;
  1099. }
  1100. /**
  1101. * aer_root_reset - reset link on Root Port
  1102. * @dev: pointer to Root Port's pci_dev data structure
  1103. *
  1104. * Invoked by Port Bus driver when performing link reset at Root Port.
  1105. */
  1106. static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
  1107. {
  1108. u32 reg32;
  1109. int pos;
  1110. pos = dev->aer_cap;
  1111. /* Disable Root's interrupt in response to error messages */
  1112. pci_read_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, &reg32);
  1113. reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK;
  1114. pci_write_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, reg32);
  1115. pci_reset_bridge_secondary_bus(dev);
  1116. pci_printk(KERN_DEBUG, dev, "Root Port link has been reset\n");
  1117. /* Clear Root Error Status */
  1118. pci_read_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, &reg32);
  1119. pci_write_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, reg32);
  1120. /* Enable Root Port's interrupt in response to error messages */
  1121. pci_read_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, &reg32);
  1122. reg32 |= ROOT_PORT_INTR_ON_MESG_MASK;
  1123. pci_write_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, reg32);
  1124. return PCI_ERS_RESULT_RECOVERED;
  1125. }
  1126. /**
  1127. * aer_error_resume - clean up corresponding error status bits
  1128. * @dev: pointer to Root Port's pci_dev data structure
  1129. *
  1130. * Invoked by Port Bus driver during nonfatal recovery.
  1131. */
  1132. static void aer_error_resume(struct pci_dev *dev)
  1133. {
  1134. int pos;
  1135. u32 status, mask;
  1136. u16 reg16;
  1137. /* Clean up Root device status */
  1138. pcie_capability_read_word(dev, PCI_EXP_DEVSTA, &reg16);
  1139. pcie_capability_write_word(dev, PCI_EXP_DEVSTA, reg16);
  1140. /* Clean AER Root Error Status */
  1141. pos = dev->aer_cap;
  1142. pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status);
  1143. pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &mask);
  1144. status &= ~mask; /* Clear corresponding nonfatal bits */
  1145. pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status);
  1146. }
  1147. static struct pcie_port_service_driver aerdriver = {
  1148. .name = "aer",
  1149. .port_type = PCI_EXP_TYPE_ROOT_PORT,
  1150. .service = PCIE_PORT_SERVICE_AER,
  1151. .probe = aer_probe,
  1152. .remove = aer_remove,
  1153. .error_resume = aer_error_resume,
  1154. .reset_link = aer_root_reset,
  1155. };
  1156. /**
  1157. * aer_service_init - register AER root service driver
  1158. *
  1159. * Invoked when AER root service driver is loaded.
  1160. */
  1161. static int __init aer_service_init(void)
  1162. {
  1163. if (!pci_aer_available() || aer_acpi_firmware_first())
  1164. return -ENXIO;
  1165. return pcie_port_service_register(&aerdriver);
  1166. }
  1167. device_initcall(aer_service_init);