aerdrv_errprint.c 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Format error messages and print them to console.
  4. *
  5. * Copyright (C) 2006 Intel Corp.
  6. * Tom Long Nguyen (tom.l.nguyen@intel.com)
  7. * Zhang Yanmin (yanmin.zhang@intel.com)
  8. */
  9. #include <linux/module.h>
  10. #include <linux/pci.h>
  11. #include <linux/kernel.h>
  12. #include <linux/errno.h>
  13. #include <linux/pm.h>
  14. #include <linux/suspend.h>
  15. #include <linux/cper.h>
  16. #include "aerdrv.h"
  17. #include <ras/ras_event.h>
  18. #define AER_AGENT_RECEIVER 0
  19. #define AER_AGENT_REQUESTER 1
  20. #define AER_AGENT_COMPLETER 2
  21. #define AER_AGENT_TRANSMITTER 3
  22. #define AER_AGENT_REQUESTER_MASK(t) ((t == AER_CORRECTABLE) ? \
  23. 0 : (PCI_ERR_UNC_COMP_TIME|PCI_ERR_UNC_UNSUP))
  24. #define AER_AGENT_COMPLETER_MASK(t) ((t == AER_CORRECTABLE) ? \
  25. 0 : PCI_ERR_UNC_COMP_ABORT)
  26. #define AER_AGENT_TRANSMITTER_MASK(t) ((t == AER_CORRECTABLE) ? \
  27. (PCI_ERR_COR_REP_ROLL|PCI_ERR_COR_REP_TIMER) : 0)
  28. #define AER_GET_AGENT(t, e) \
  29. ((e & AER_AGENT_COMPLETER_MASK(t)) ? AER_AGENT_COMPLETER : \
  30. (e & AER_AGENT_REQUESTER_MASK(t)) ? AER_AGENT_REQUESTER : \
  31. (e & AER_AGENT_TRANSMITTER_MASK(t)) ? AER_AGENT_TRANSMITTER : \
  32. AER_AGENT_RECEIVER)
  33. #define AER_PHYSICAL_LAYER_ERROR 0
  34. #define AER_DATA_LINK_LAYER_ERROR 1
  35. #define AER_TRANSACTION_LAYER_ERROR 2
  36. #define AER_PHYSICAL_LAYER_ERROR_MASK(t) ((t == AER_CORRECTABLE) ? \
  37. PCI_ERR_COR_RCVR : 0)
  38. #define AER_DATA_LINK_LAYER_ERROR_MASK(t) ((t == AER_CORRECTABLE) ? \
  39. (PCI_ERR_COR_BAD_TLP| \
  40. PCI_ERR_COR_BAD_DLLP| \
  41. PCI_ERR_COR_REP_ROLL| \
  42. PCI_ERR_COR_REP_TIMER) : PCI_ERR_UNC_DLP)
  43. #define AER_GET_LAYER_ERROR(t, e) \
  44. ((e & AER_PHYSICAL_LAYER_ERROR_MASK(t)) ? AER_PHYSICAL_LAYER_ERROR : \
  45. (e & AER_DATA_LINK_LAYER_ERROR_MASK(t)) ? AER_DATA_LINK_LAYER_ERROR : \
  46. AER_TRANSACTION_LAYER_ERROR)
  47. /*
  48. * AER error strings
  49. */
  50. static const char *aer_error_severity_string[] = {
  51. "Uncorrected (Non-Fatal)",
  52. "Uncorrected (Fatal)",
  53. "Corrected"
  54. };
  55. static const char *aer_error_layer[] = {
  56. "Physical Layer",
  57. "Data Link Layer",
  58. "Transaction Layer"
  59. };
  60. static const char *aer_correctable_error_string[] = {
  61. "Receiver Error", /* Bit Position 0 */
  62. NULL,
  63. NULL,
  64. NULL,
  65. NULL,
  66. NULL,
  67. "Bad TLP", /* Bit Position 6 */
  68. "Bad DLLP", /* Bit Position 7 */
  69. "RELAY_NUM Rollover", /* Bit Position 8 */
  70. NULL,
  71. NULL,
  72. NULL,
  73. "Replay Timer Timeout", /* Bit Position 12 */
  74. "Advisory Non-Fatal", /* Bit Position 13 */
  75. "Corrected Internal Error", /* Bit Position 14 */
  76. "Header Log Overflow", /* Bit Position 15 */
  77. };
  78. static const char *aer_uncorrectable_error_string[] = {
  79. "Undefined", /* Bit Position 0 */
  80. NULL,
  81. NULL,
  82. NULL,
  83. "Data Link Protocol", /* Bit Position 4 */
  84. "Surprise Down Error", /* Bit Position 5 */
  85. NULL,
  86. NULL,
  87. NULL,
  88. NULL,
  89. NULL,
  90. NULL,
  91. "Poisoned TLP", /* Bit Position 12 */
  92. "Flow Control Protocol", /* Bit Position 13 */
  93. "Completion Timeout", /* Bit Position 14 */
  94. "Completer Abort", /* Bit Position 15 */
  95. "Unexpected Completion", /* Bit Position 16 */
  96. "Receiver Overflow", /* Bit Position 17 */
  97. "Malformed TLP", /* Bit Position 18 */
  98. "ECRC", /* Bit Position 19 */
  99. "Unsupported Request", /* Bit Position 20 */
  100. "ACS Violation", /* Bit Position 21 */
  101. "Uncorrectable Internal Error", /* Bit Position 22 */
  102. "MC Blocked TLP", /* Bit Position 23 */
  103. "AtomicOp Egress Blocked", /* Bit Position 24 */
  104. "TLP Prefix Blocked Error", /* Bit Position 25 */
  105. };
  106. static const char *aer_agent_string[] = {
  107. "Receiver ID",
  108. "Requester ID",
  109. "Completer ID",
  110. "Transmitter ID"
  111. };
  112. static void __print_tlp_header(struct pci_dev *dev,
  113. struct aer_header_log_regs *t)
  114. {
  115. pci_err(dev, " TLP Header: %08x %08x %08x %08x\n",
  116. t->dw0, t->dw1, t->dw2, t->dw3);
  117. }
  118. static void __aer_print_error(struct pci_dev *dev,
  119. struct aer_err_info *info)
  120. {
  121. int i, status;
  122. const char *errmsg = NULL;
  123. status = (info->status & ~info->mask);
  124. for (i = 0; i < 32; i++) {
  125. if (!(status & (1 << i)))
  126. continue;
  127. if (info->severity == AER_CORRECTABLE)
  128. errmsg = i < ARRAY_SIZE(aer_correctable_error_string) ?
  129. aer_correctable_error_string[i] : NULL;
  130. else
  131. errmsg = i < ARRAY_SIZE(aer_uncorrectable_error_string) ?
  132. aer_uncorrectable_error_string[i] : NULL;
  133. if (errmsg)
  134. pci_err(dev, " [%2d] %-22s%s\n", i, errmsg,
  135. info->first_error == i ? " (First)" : "");
  136. else
  137. pci_err(dev, " [%2d] Unknown Error Bit%s\n",
  138. i, info->first_error == i ? " (First)" : "");
  139. }
  140. }
  141. void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
  142. {
  143. int layer, agent;
  144. int id = ((dev->bus->number << 8) | dev->devfn);
  145. if (!info->status) {
  146. pci_err(dev, "PCIe Bus Error: severity=%s, type=Unaccessible, id=%04x(Unregistered Agent ID)\n",
  147. aer_error_severity_string[info->severity], id);
  148. goto out;
  149. }
  150. layer = AER_GET_LAYER_ERROR(info->severity, info->status);
  151. agent = AER_GET_AGENT(info->severity, info->status);
  152. pci_err(dev, "PCIe Bus Error: severity=%s, type=%s, id=%04x(%s)\n",
  153. aer_error_severity_string[info->severity],
  154. aer_error_layer[layer], id, aer_agent_string[agent]);
  155. pci_err(dev, " device [%04x:%04x] error status/mask=%08x/%08x\n",
  156. dev->vendor, dev->device,
  157. info->status, info->mask);
  158. __aer_print_error(dev, info);
  159. if (info->tlp_header_valid)
  160. __print_tlp_header(dev, &info->tlp);
  161. out:
  162. if (info->id && info->error_dev_num > 1 && info->id == id)
  163. pci_err(dev, " Error of this Agent(%04x) is reported first\n", id);
  164. trace_aer_event(dev_name(&dev->dev), (info->status & ~info->mask),
  165. info->severity, info->tlp_header_valid, &info->tlp);
  166. }
  167. void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info)
  168. {
  169. pci_info(dev, "AER: %s%s error received: id=%04x\n",
  170. info->multi_error_valid ? "Multiple " : "",
  171. aer_error_severity_string[info->severity], info->id);
  172. }
  173. #ifdef CONFIG_ACPI_APEI_PCIEAER
  174. int cper_severity_to_aer(int cper_severity)
  175. {
  176. switch (cper_severity) {
  177. case CPER_SEV_RECOVERABLE:
  178. return AER_NONFATAL;
  179. case CPER_SEV_FATAL:
  180. return AER_FATAL;
  181. default:
  182. return AER_CORRECTABLE;
  183. }
  184. }
  185. EXPORT_SYMBOL_GPL(cper_severity_to_aer);
  186. void cper_print_aer(struct pci_dev *dev, int aer_severity,
  187. struct aer_capability_regs *aer)
  188. {
  189. int layer, agent, tlp_header_valid = 0;
  190. u32 status, mask;
  191. struct aer_err_info info;
  192. if (aer_severity == AER_CORRECTABLE) {
  193. status = aer->cor_status;
  194. mask = aer->cor_mask;
  195. } else {
  196. status = aer->uncor_status;
  197. mask = aer->uncor_mask;
  198. tlp_header_valid = status & AER_LOG_TLP_MASKS;
  199. }
  200. layer = AER_GET_LAYER_ERROR(aer_severity, status);
  201. agent = AER_GET_AGENT(aer_severity, status);
  202. memset(&info, 0, sizeof(info));
  203. info.severity = aer_severity;
  204. info.status = status;
  205. info.mask = mask;
  206. info.first_error = PCI_ERR_CAP_FEP(aer->cap_control);
  207. pci_err(dev, "aer_status: 0x%08x, aer_mask: 0x%08x\n", status, mask);
  208. __aer_print_error(dev, &info);
  209. pci_err(dev, "aer_layer=%s, aer_agent=%s\n",
  210. aer_error_layer[layer], aer_agent_string[agent]);
  211. if (aer_severity != AER_CORRECTABLE)
  212. pci_err(dev, "aer_uncor_severity: 0x%08x\n",
  213. aer->uncor_severity);
  214. if (tlp_header_valid)
  215. __print_tlp_header(dev, &aer->header_log);
  216. trace_aer_event(dev_name(&dev->dev), (status & ~mask),
  217. aer_severity, tlp_header_valid, &aer->header_log);
  218. }
  219. #endif