err.c 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * This file implements the error recovery as a core part of PCIe error
  4. * reporting. When a PCIe error is delivered, an error message will be
  5. * collected and printed to console, then, an error recovery procedure
  6. * will be executed by following the PCI error recovery rules.
  7. *
  8. * Copyright (C) 2006 Intel Corp.
  9. * Tom Long Nguyen (tom.l.nguyen@intel.com)
  10. * Zhang Yanmin (yanmin.zhang@intel.com)
  11. */
  12. #include <linux/pci.h>
  13. #include <linux/module.h>
  14. #include <linux/kernel.h>
  15. #include <linux/errno.h>
  16. #include <linux/aer.h>
  17. #include "portdrv.h"
  18. #include "../pci.h"
  19. static pci_ers_result_t merge_result(enum pci_ers_result orig,
  20. enum pci_ers_result new)
  21. {
  22. if (new == PCI_ERS_RESULT_NO_AER_DRIVER)
  23. return PCI_ERS_RESULT_NO_AER_DRIVER;
  24. if (new == PCI_ERS_RESULT_NONE)
  25. return orig;
  26. switch (orig) {
  27. case PCI_ERS_RESULT_CAN_RECOVER:
  28. case PCI_ERS_RESULT_RECOVERED:
  29. orig = new;
  30. break;
  31. case PCI_ERS_RESULT_DISCONNECT:
  32. if (new == PCI_ERS_RESULT_NEED_RESET)
  33. orig = PCI_ERS_RESULT_NEED_RESET;
  34. break;
  35. default:
  36. break;
  37. }
  38. return orig;
  39. }
  40. static int report_error_detected(struct pci_dev *dev,
  41. enum pci_channel_state state,
  42. enum pci_ers_result *result)
  43. {
  44. pci_ers_result_t vote;
  45. const struct pci_error_handlers *err_handler;
  46. device_lock(&dev->dev);
  47. if (!pci_dev_set_io_state(dev, state) ||
  48. !dev->driver ||
  49. !dev->driver->err_handler ||
  50. !dev->driver->err_handler->error_detected) {
  51. /*
  52. * If any device in the subtree does not have an error_detected
  53. * callback, PCI_ERS_RESULT_NO_AER_DRIVER prevents subsequent
  54. * error callbacks of "any" device in the subtree, and will
  55. * exit in the disconnected error state.
  56. */
  57. if (dev->hdr_type != PCI_HEADER_TYPE_BRIDGE)
  58. vote = PCI_ERS_RESULT_NO_AER_DRIVER;
  59. else
  60. vote = PCI_ERS_RESULT_NONE;
  61. } else {
  62. err_handler = dev->driver->err_handler;
  63. vote = err_handler->error_detected(dev, state);
  64. }
  65. pci_uevent_ers(dev, vote);
  66. *result = merge_result(*result, vote);
  67. device_unlock(&dev->dev);
  68. return 0;
  69. }
  70. static int report_frozen_detected(struct pci_dev *dev, void *data)
  71. {
  72. return report_error_detected(dev, pci_channel_io_frozen, data);
  73. }
  74. static int report_normal_detected(struct pci_dev *dev, void *data)
  75. {
  76. return report_error_detected(dev, pci_channel_io_normal, data);
  77. }
  78. static int report_mmio_enabled(struct pci_dev *dev, void *data)
  79. {
  80. pci_ers_result_t vote, *result = data;
  81. const struct pci_error_handlers *err_handler;
  82. device_lock(&dev->dev);
  83. if (!dev->driver ||
  84. !dev->driver->err_handler ||
  85. !dev->driver->err_handler->mmio_enabled)
  86. goto out;
  87. err_handler = dev->driver->err_handler;
  88. vote = err_handler->mmio_enabled(dev);
  89. *result = merge_result(*result, vote);
  90. out:
  91. device_unlock(&dev->dev);
  92. return 0;
  93. }
  94. static int report_slot_reset(struct pci_dev *dev, void *data)
  95. {
  96. pci_ers_result_t vote, *result = data;
  97. const struct pci_error_handlers *err_handler;
  98. device_lock(&dev->dev);
  99. if (!dev->driver ||
  100. !dev->driver->err_handler ||
  101. !dev->driver->err_handler->slot_reset)
  102. goto out;
  103. err_handler = dev->driver->err_handler;
  104. vote = err_handler->slot_reset(dev);
  105. *result = merge_result(*result, vote);
  106. out:
  107. device_unlock(&dev->dev);
  108. return 0;
  109. }
  110. static int report_resume(struct pci_dev *dev, void *data)
  111. {
  112. const struct pci_error_handlers *err_handler;
  113. device_lock(&dev->dev);
  114. if (!pci_dev_set_io_state(dev, pci_channel_io_normal) ||
  115. !dev->driver ||
  116. !dev->driver->err_handler ||
  117. !dev->driver->err_handler->resume)
  118. goto out;
  119. err_handler = dev->driver->err_handler;
  120. err_handler->resume(dev);
  121. out:
  122. pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED);
  123. device_unlock(&dev->dev);
  124. return 0;
  125. }
  126. /**
  127. * default_reset_link - default reset function
  128. * @dev: pointer to pci_dev data structure
  129. *
  130. * Invoked when performing link reset on a Downstream Port or a
  131. * Root Port with no aer driver.
  132. */
  133. static pci_ers_result_t default_reset_link(struct pci_dev *dev)
  134. {
  135. int rc;
  136. rc = pci_bus_error_reset(dev);
  137. pci_printk(KERN_DEBUG, dev, "downstream link has been reset\n");
  138. return rc ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
  139. }
  140. static pci_ers_result_t reset_link(struct pci_dev *dev, u32 service)
  141. {
  142. pci_ers_result_t status;
  143. struct pcie_port_service_driver *driver = NULL;
  144. driver = pcie_port_find_service(dev, service);
  145. if (driver && driver->reset_link) {
  146. status = driver->reset_link(dev);
  147. } else if (dev->has_secondary_link) {
  148. status = default_reset_link(dev);
  149. } else {
  150. pci_printk(KERN_DEBUG, dev, "no link-reset support at upstream device %s\n",
  151. pci_name(dev));
  152. return PCI_ERS_RESULT_DISCONNECT;
  153. }
  154. if (status != PCI_ERS_RESULT_RECOVERED) {
  155. pci_printk(KERN_DEBUG, dev, "link reset at upstream device %s failed\n",
  156. pci_name(dev));
  157. return PCI_ERS_RESULT_DISCONNECT;
  158. }
  159. return status;
  160. }
  161. void pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state state,
  162. u32 service)
  163. {
  164. pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER;
  165. struct pci_bus *bus;
  166. /*
  167. * Error recovery runs on all subordinates of the first downstream port.
  168. * If the downstream port detected the error, it is cleared at the end.
  169. */
  170. if (!(pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT ||
  171. pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM))
  172. dev = dev->bus->self;
  173. bus = dev->subordinate;
  174. pci_dbg(dev, "broadcast error_detected message\n");
  175. if (state == pci_channel_io_frozen)
  176. pci_walk_bus(bus, report_frozen_detected, &status);
  177. else
  178. pci_walk_bus(bus, report_normal_detected, &status);
  179. if (state == pci_channel_io_frozen &&
  180. reset_link(dev, service) != PCI_ERS_RESULT_RECOVERED)
  181. goto failed;
  182. if (status == PCI_ERS_RESULT_CAN_RECOVER) {
  183. status = PCI_ERS_RESULT_RECOVERED;
  184. pci_dbg(dev, "broadcast mmio_enabled message\n");
  185. pci_walk_bus(bus, report_mmio_enabled, &status);
  186. }
  187. if (status == PCI_ERS_RESULT_NEED_RESET) {
  188. /*
  189. * TODO: Should call platform-specific
  190. * functions to reset slot before calling
  191. * drivers' slot_reset callbacks?
  192. */
  193. status = PCI_ERS_RESULT_RECOVERED;
  194. pci_dbg(dev, "broadcast slot_reset message\n");
  195. pci_walk_bus(bus, report_slot_reset, &status);
  196. }
  197. if (status != PCI_ERS_RESULT_RECOVERED)
  198. goto failed;
  199. pci_dbg(dev, "broadcast resume message\n");
  200. pci_walk_bus(bus, report_resume, &status);
  201. pci_aer_clear_device_status(dev);
  202. pci_cleanup_aer_uncorrect_error_status(dev);
  203. pci_info(dev, "AER: Device recovery successful\n");
  204. return;
  205. failed:
  206. pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT);
  207. /* TODO: Should kernel panic here? */
  208. pci_info(dev, "AER: Device recovery failed\n");
  209. }