ghes.c 32 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249
  1. /*
  2. * APEI Generic Hardware Error Source support
  3. *
  4. * Generic Hardware Error Source provides a way to report platform
  5. * hardware errors (such as that from chipset). It works in so called
  6. * "Firmware First" mode, that is, hardware errors are reported to
  7. * firmware firstly, then reported to Linux by firmware. This way,
  8. * some non-standard hardware error registers or non-standard hardware
  9. * link can be checked by firmware to produce more hardware error
  10. * information for Linux.
  11. *
  12. * For more information about Generic Hardware Error Source, please
  13. * refer to ACPI Specification version 4.0, section 17.3.2.6
  14. *
  15. * Copyright 2010,2011 Intel Corp.
  16. * Author: Huang Ying <ying.huang@intel.com>
  17. *
  18. * This program is free software; you can redistribute it and/or
  19. * modify it under the terms of the GNU General Public License version
  20. * 2 as published by the Free Software Foundation;
  21. *
  22. * This program is distributed in the hope that it will be useful,
  23. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  24. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  25. * GNU General Public License for more details.
  26. */
  27. #include <linux/kernel.h>
  28. #include <linux/moduleparam.h>
  29. #include <linux/init.h>
  30. #include <linux/acpi.h>
  31. #include <linux/io.h>
  32. #include <linux/interrupt.h>
  33. #include <linux/timer.h>
  34. #include <linux/cper.h>
  35. #include <linux/kdebug.h>
  36. #include <linux/platform_device.h>
  37. #include <linux/mutex.h>
  38. #include <linux/ratelimit.h>
  39. #include <linux/vmalloc.h>
  40. #include <linux/irq_work.h>
  41. #include <linux/llist.h>
  42. #include <linux/genalloc.h>
  43. #include <linux/pci.h>
  44. #include <linux/aer.h>
  45. #include <linux/nmi.h>
  46. #include <linux/sched/clock.h>
  47. #include <linux/uuid.h>
  48. #include <linux/ras.h>
  49. #include <acpi/actbl1.h>
  50. #include <acpi/ghes.h>
  51. #include <acpi/apei.h>
  52. #include <asm/fixmap.h>
  53. #include <asm/tlbflush.h>
  54. #include <ras/ras_event.h>
  55. #include "apei-internal.h"
  56. #define GHES_PFX "GHES: "
  57. #define GHES_ESTATUS_MAX_SIZE 65536
  58. #define GHES_ESOURCE_PREALLOC_MAX_SIZE 65536
  59. #define GHES_ESTATUS_POOL_MIN_ALLOC_ORDER 3
  60. /* This is just an estimation for memory pool allocation */
  61. #define GHES_ESTATUS_CACHE_AVG_SIZE 512
  62. #define GHES_ESTATUS_CACHES_SIZE 4
  63. #define GHES_ESTATUS_IN_CACHE_MAX_NSEC 10000000000ULL
  64. /* Prevent too many caches are allocated because of RCU */
  65. #define GHES_ESTATUS_CACHE_ALLOCED_MAX (GHES_ESTATUS_CACHES_SIZE * 3 / 2)
  66. #define GHES_ESTATUS_CACHE_LEN(estatus_len) \
  67. (sizeof(struct ghes_estatus_cache) + (estatus_len))
  68. #define GHES_ESTATUS_FROM_CACHE(estatus_cache) \
  69. ((struct acpi_hest_generic_status *) \
  70. ((struct ghes_estatus_cache *)(estatus_cache) + 1))
  71. #define GHES_ESTATUS_NODE_LEN(estatus_len) \
  72. (sizeof(struct ghes_estatus_node) + (estatus_len))
  73. #define GHES_ESTATUS_FROM_NODE(estatus_node) \
  74. ((struct acpi_hest_generic_status *) \
  75. ((struct ghes_estatus_node *)(estatus_node) + 1))
  76. static inline bool is_hest_type_generic_v2(struct ghes *ghes)
  77. {
  78. return ghes->generic->header.type == ACPI_HEST_TYPE_GENERIC_ERROR_V2;
  79. }
  80. /*
  81. * This driver isn't really modular, however for the time being,
  82. * continuing to use module_param is the easiest way to remain
  83. * compatible with existing boot arg use cases.
  84. */
  85. bool ghes_disable;
  86. module_param_named(disable, ghes_disable, bool, 0);
  87. /*
  88. * All error sources notified with HED (Hardware Error Device) share a
  89. * single notifier callback, so they need to be linked and checked one
  90. * by one. This holds true for NMI too.
  91. *
  92. * RCU is used for these lists, so ghes_list_mutex is only used for
  93. * list changing, not for traversing.
  94. */
  95. static LIST_HEAD(ghes_hed);
  96. static DEFINE_MUTEX(ghes_list_mutex);
  97. /*
  98. * Because the memory area used to transfer hardware error information
  99. * from BIOS to Linux can be determined only in NMI, IRQ or timer
  100. * handler, but general ioremap can not be used in atomic context, so
  101. * the fixmap is used instead.
  102. *
  103. * These 2 spinlocks are used to prevent the fixmap entries from being used
  104. * simultaneously.
  105. */
  106. static DEFINE_RAW_SPINLOCK(ghes_ioremap_lock_nmi);
  107. static DEFINE_SPINLOCK(ghes_ioremap_lock_irq);
  108. static struct gen_pool *ghes_estatus_pool;
  109. static unsigned long ghes_estatus_pool_size_request;
  110. static struct ghes_estatus_cache *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE];
  111. static atomic_t ghes_estatus_cache_alloced;
  112. static int ghes_panic_timeout __read_mostly = 30;
  113. static void __iomem *ghes_ioremap_pfn_nmi(u64 pfn)
  114. {
  115. phys_addr_t paddr;
  116. pgprot_t prot;
  117. paddr = pfn << PAGE_SHIFT;
  118. prot = arch_apei_get_mem_attribute(paddr);
  119. __set_fixmap(FIX_APEI_GHES_NMI, paddr, prot);
  120. return (void __iomem *) fix_to_virt(FIX_APEI_GHES_NMI);
  121. }
  122. static void __iomem *ghes_ioremap_pfn_irq(u64 pfn)
  123. {
  124. phys_addr_t paddr;
  125. pgprot_t prot;
  126. paddr = pfn << PAGE_SHIFT;
  127. prot = arch_apei_get_mem_attribute(paddr);
  128. __set_fixmap(FIX_APEI_GHES_IRQ, paddr, prot);
  129. return (void __iomem *) fix_to_virt(FIX_APEI_GHES_IRQ);
  130. }
  131. static void ghes_iounmap_nmi(void)
  132. {
  133. clear_fixmap(FIX_APEI_GHES_NMI);
  134. }
  135. static void ghes_iounmap_irq(void)
  136. {
  137. clear_fixmap(FIX_APEI_GHES_IRQ);
  138. }
  139. static int ghes_estatus_pool_init(void)
  140. {
  141. ghes_estatus_pool = gen_pool_create(GHES_ESTATUS_POOL_MIN_ALLOC_ORDER, -1);
  142. if (!ghes_estatus_pool)
  143. return -ENOMEM;
  144. return 0;
  145. }
  146. static void ghes_estatus_pool_free_chunk_page(struct gen_pool *pool,
  147. struct gen_pool_chunk *chunk,
  148. void *data)
  149. {
  150. free_page(chunk->start_addr);
  151. }
  152. static void ghes_estatus_pool_exit(void)
  153. {
  154. gen_pool_for_each_chunk(ghes_estatus_pool,
  155. ghes_estatus_pool_free_chunk_page, NULL);
  156. gen_pool_destroy(ghes_estatus_pool);
  157. }
  158. static int ghes_estatus_pool_expand(unsigned long len)
  159. {
  160. unsigned long i, pages, size, addr;
  161. int ret;
  162. ghes_estatus_pool_size_request += PAGE_ALIGN(len);
  163. size = gen_pool_size(ghes_estatus_pool);
  164. if (size >= ghes_estatus_pool_size_request)
  165. return 0;
  166. pages = (ghes_estatus_pool_size_request - size) / PAGE_SIZE;
  167. for (i = 0; i < pages; i++) {
  168. addr = __get_free_page(GFP_KERNEL);
  169. if (!addr)
  170. return -ENOMEM;
  171. ret = gen_pool_add(ghes_estatus_pool, addr, PAGE_SIZE, -1);
  172. if (ret)
  173. return ret;
  174. }
  175. return 0;
  176. }
  177. static int map_gen_v2(struct ghes *ghes)
  178. {
  179. return apei_map_generic_address(&ghes->generic_v2->read_ack_register);
  180. }
  181. static void unmap_gen_v2(struct ghes *ghes)
  182. {
  183. apei_unmap_generic_address(&ghes->generic_v2->read_ack_register);
  184. }
  185. static struct ghes *ghes_new(struct acpi_hest_generic *generic)
  186. {
  187. struct ghes *ghes;
  188. unsigned int error_block_length;
  189. int rc;
  190. ghes = kzalloc(sizeof(*ghes), GFP_KERNEL);
  191. if (!ghes)
  192. return ERR_PTR(-ENOMEM);
  193. ghes->generic = generic;
  194. if (is_hest_type_generic_v2(ghes)) {
  195. rc = map_gen_v2(ghes);
  196. if (rc)
  197. goto err_free;
  198. }
  199. rc = apei_map_generic_address(&generic->error_status_address);
  200. if (rc)
  201. goto err_unmap_read_ack_addr;
  202. error_block_length = generic->error_block_length;
  203. if (error_block_length > GHES_ESTATUS_MAX_SIZE) {
  204. pr_warning(FW_WARN GHES_PFX
  205. "Error status block length is too long: %u for "
  206. "generic hardware error source: %d.\n",
  207. error_block_length, generic->header.source_id);
  208. error_block_length = GHES_ESTATUS_MAX_SIZE;
  209. }
  210. ghes->estatus = kmalloc(error_block_length, GFP_KERNEL);
  211. if (!ghes->estatus) {
  212. rc = -ENOMEM;
  213. goto err_unmap_status_addr;
  214. }
  215. return ghes;
  216. err_unmap_status_addr:
  217. apei_unmap_generic_address(&generic->error_status_address);
  218. err_unmap_read_ack_addr:
  219. if (is_hest_type_generic_v2(ghes))
  220. unmap_gen_v2(ghes);
  221. err_free:
  222. kfree(ghes);
  223. return ERR_PTR(rc);
  224. }
  225. static void ghes_fini(struct ghes *ghes)
  226. {
  227. kfree(ghes->estatus);
  228. apei_unmap_generic_address(&ghes->generic->error_status_address);
  229. if (is_hest_type_generic_v2(ghes))
  230. unmap_gen_v2(ghes);
  231. }
  232. static inline int ghes_severity(int severity)
  233. {
  234. switch (severity) {
  235. case CPER_SEV_INFORMATIONAL:
  236. return GHES_SEV_NO;
  237. case CPER_SEV_CORRECTED:
  238. return GHES_SEV_CORRECTED;
  239. case CPER_SEV_RECOVERABLE:
  240. return GHES_SEV_RECOVERABLE;
  241. case CPER_SEV_FATAL:
  242. return GHES_SEV_PANIC;
  243. default:
  244. /* Unknown, go panic */
  245. return GHES_SEV_PANIC;
  246. }
  247. }
  248. static void ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len,
  249. int from_phys)
  250. {
  251. void __iomem *vaddr;
  252. unsigned long flags = 0;
  253. int in_nmi = in_nmi();
  254. u64 offset;
  255. u32 trunk;
  256. while (len > 0) {
  257. offset = paddr - (paddr & PAGE_MASK);
  258. if (in_nmi) {
  259. raw_spin_lock(&ghes_ioremap_lock_nmi);
  260. vaddr = ghes_ioremap_pfn_nmi(paddr >> PAGE_SHIFT);
  261. } else {
  262. spin_lock_irqsave(&ghes_ioremap_lock_irq, flags);
  263. vaddr = ghes_ioremap_pfn_irq(paddr >> PAGE_SHIFT);
  264. }
  265. trunk = PAGE_SIZE - offset;
  266. trunk = min(trunk, len);
  267. if (from_phys)
  268. memcpy_fromio(buffer, vaddr + offset, trunk);
  269. else
  270. memcpy_toio(vaddr + offset, buffer, trunk);
  271. len -= trunk;
  272. paddr += trunk;
  273. buffer += trunk;
  274. if (in_nmi) {
  275. ghes_iounmap_nmi();
  276. raw_spin_unlock(&ghes_ioremap_lock_nmi);
  277. } else {
  278. ghes_iounmap_irq();
  279. spin_unlock_irqrestore(&ghes_ioremap_lock_irq, flags);
  280. }
  281. }
  282. }
  283. static int ghes_read_estatus(struct ghes *ghes, int silent)
  284. {
  285. struct acpi_hest_generic *g = ghes->generic;
  286. u64 buf_paddr;
  287. u32 len;
  288. int rc;
  289. rc = apei_read(&buf_paddr, &g->error_status_address);
  290. if (rc) {
  291. if (!silent && printk_ratelimit())
  292. pr_warning(FW_WARN GHES_PFX
  293. "Failed to read error status block address for hardware error source: %d.\n",
  294. g->header.source_id);
  295. return -EIO;
  296. }
  297. if (!buf_paddr)
  298. return -ENOENT;
  299. ghes_copy_tofrom_phys(ghes->estatus, buf_paddr,
  300. sizeof(*ghes->estatus), 1);
  301. if (!ghes->estatus->block_status)
  302. return -ENOENT;
  303. ghes->buffer_paddr = buf_paddr;
  304. ghes->flags |= GHES_TO_CLEAR;
  305. rc = -EIO;
  306. len = cper_estatus_len(ghes->estatus);
  307. if (len < sizeof(*ghes->estatus))
  308. goto err_read_block;
  309. if (len > ghes->generic->error_block_length)
  310. goto err_read_block;
  311. if (cper_estatus_check_header(ghes->estatus))
  312. goto err_read_block;
  313. ghes_copy_tofrom_phys(ghes->estatus + 1,
  314. buf_paddr + sizeof(*ghes->estatus),
  315. len - sizeof(*ghes->estatus), 1);
  316. if (cper_estatus_check(ghes->estatus))
  317. goto err_read_block;
  318. rc = 0;
  319. err_read_block:
  320. if (rc && !silent && printk_ratelimit())
  321. pr_warning(FW_WARN GHES_PFX
  322. "Failed to read error status block!\n");
  323. return rc;
  324. }
  325. static void ghes_clear_estatus(struct ghes *ghes)
  326. {
  327. ghes->estatus->block_status = 0;
  328. if (!(ghes->flags & GHES_TO_CLEAR))
  329. return;
  330. ghes_copy_tofrom_phys(ghes->estatus, ghes->buffer_paddr,
  331. sizeof(ghes->estatus->block_status), 0);
  332. ghes->flags &= ~GHES_TO_CLEAR;
  333. }
  334. static void ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, int sev)
  335. {
  336. #ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE
  337. unsigned long pfn;
  338. int flags = -1;
  339. int sec_sev = ghes_severity(gdata->error_severity);
  340. struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
  341. if (!(mem_err->validation_bits & CPER_MEM_VALID_PA))
  342. return;
  343. pfn = mem_err->physical_addr >> PAGE_SHIFT;
  344. if (!pfn_valid(pfn)) {
  345. pr_warn_ratelimited(FW_WARN GHES_PFX
  346. "Invalid address in generic error data: %#llx\n",
  347. mem_err->physical_addr);
  348. return;
  349. }
  350. /* iff following two events can be handled properly by now */
  351. if (sec_sev == GHES_SEV_CORRECTED &&
  352. (gdata->flags & CPER_SEC_ERROR_THRESHOLD_EXCEEDED))
  353. flags = MF_SOFT_OFFLINE;
  354. if (sev == GHES_SEV_RECOVERABLE && sec_sev == GHES_SEV_RECOVERABLE)
  355. flags = 0;
  356. if (flags != -1)
  357. memory_failure_queue(pfn, flags);
  358. #endif
  359. }
  360. static void ghes_do_proc(struct ghes *ghes,
  361. const struct acpi_hest_generic_status *estatus)
  362. {
  363. int sev, sec_sev;
  364. struct acpi_hest_generic_data *gdata;
  365. guid_t *sec_type;
  366. guid_t *fru_id = &NULL_UUID_LE;
  367. char *fru_text = "";
  368. sev = ghes_severity(estatus->error_severity);
  369. apei_estatus_for_each_section(estatus, gdata) {
  370. sec_type = (guid_t *)gdata->section_type;
  371. sec_sev = ghes_severity(gdata->error_severity);
  372. if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
  373. fru_id = (guid_t *)gdata->fru_id;
  374. if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
  375. fru_text = gdata->fru_text;
  376. if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
  377. struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
  378. ghes_edac_report_mem_error(ghes, sev, mem_err);
  379. arch_apei_report_mem_error(sev, mem_err);
  380. ghes_handle_memory_failure(gdata, sev);
  381. }
  382. #ifdef CONFIG_ACPI_APEI_PCIEAER
  383. else if (guid_equal(sec_type, &CPER_SEC_PCIE)) {
  384. struct cper_sec_pcie *pcie_err = acpi_hest_get_payload(gdata);
  385. if (sev == GHES_SEV_RECOVERABLE &&
  386. sec_sev == GHES_SEV_RECOVERABLE &&
  387. pcie_err->validation_bits & CPER_PCIE_VALID_DEVICE_ID &&
  388. pcie_err->validation_bits & CPER_PCIE_VALID_AER_INFO) {
  389. unsigned int devfn;
  390. int aer_severity;
  391. devfn = PCI_DEVFN(pcie_err->device_id.device,
  392. pcie_err->device_id.function);
  393. aer_severity = cper_severity_to_aer(gdata->error_severity);
  394. /*
  395. * If firmware reset the component to contain
  396. * the error, we must reinitialize it before
  397. * use, so treat it as a fatal AER error.
  398. */
  399. if (gdata->flags & CPER_SEC_RESET)
  400. aer_severity = AER_FATAL;
  401. aer_recover_queue(pcie_err->device_id.segment,
  402. pcie_err->device_id.bus,
  403. devfn, aer_severity,
  404. (struct aer_capability_regs *)
  405. pcie_err->aer_info);
  406. }
  407. }
  408. #endif
  409. else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) {
  410. struct cper_sec_proc_arm *err = acpi_hest_get_payload(gdata);
  411. log_arm_hw_error(err);
  412. } else {
  413. void *err = acpi_hest_get_payload(gdata);
  414. log_non_standard_event(sec_type, fru_id, fru_text,
  415. sec_sev, err,
  416. gdata->error_data_length);
  417. }
  418. }
  419. }
  420. static void __ghes_print_estatus(const char *pfx,
  421. const struct acpi_hest_generic *generic,
  422. const struct acpi_hest_generic_status *estatus)
  423. {
  424. static atomic_t seqno;
  425. unsigned int curr_seqno;
  426. char pfx_seq[64];
  427. if (pfx == NULL) {
  428. if (ghes_severity(estatus->error_severity) <=
  429. GHES_SEV_CORRECTED)
  430. pfx = KERN_WARNING;
  431. else
  432. pfx = KERN_ERR;
  433. }
  434. curr_seqno = atomic_inc_return(&seqno);
  435. snprintf(pfx_seq, sizeof(pfx_seq), "%s{%u}" HW_ERR, pfx, curr_seqno);
  436. printk("%s""Hardware error from APEI Generic Hardware Error Source: %d\n",
  437. pfx_seq, generic->header.source_id);
  438. cper_estatus_print(pfx_seq, estatus);
  439. }
  440. static int ghes_print_estatus(const char *pfx,
  441. const struct acpi_hest_generic *generic,
  442. const struct acpi_hest_generic_status *estatus)
  443. {
  444. /* Not more than 2 messages every 5 seconds */
  445. static DEFINE_RATELIMIT_STATE(ratelimit_corrected, 5*HZ, 2);
  446. static DEFINE_RATELIMIT_STATE(ratelimit_uncorrected, 5*HZ, 2);
  447. struct ratelimit_state *ratelimit;
  448. if (ghes_severity(estatus->error_severity) <= GHES_SEV_CORRECTED)
  449. ratelimit = &ratelimit_corrected;
  450. else
  451. ratelimit = &ratelimit_uncorrected;
  452. if (__ratelimit(ratelimit)) {
  453. __ghes_print_estatus(pfx, generic, estatus);
  454. return 1;
  455. }
  456. return 0;
  457. }
  458. /*
  459. * GHES error status reporting throttle, to report more kinds of
  460. * errors, instead of just most frequently occurred errors.
  461. */
  462. static int ghes_estatus_cached(struct acpi_hest_generic_status *estatus)
  463. {
  464. u32 len;
  465. int i, cached = 0;
  466. unsigned long long now;
  467. struct ghes_estatus_cache *cache;
  468. struct acpi_hest_generic_status *cache_estatus;
  469. len = cper_estatus_len(estatus);
  470. rcu_read_lock();
  471. for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) {
  472. cache = rcu_dereference(ghes_estatus_caches[i]);
  473. if (cache == NULL)
  474. continue;
  475. if (len != cache->estatus_len)
  476. continue;
  477. cache_estatus = GHES_ESTATUS_FROM_CACHE(cache);
  478. if (memcmp(estatus, cache_estatus, len))
  479. continue;
  480. atomic_inc(&cache->count);
  481. now = sched_clock();
  482. if (now - cache->time_in < GHES_ESTATUS_IN_CACHE_MAX_NSEC)
  483. cached = 1;
  484. break;
  485. }
  486. rcu_read_unlock();
  487. return cached;
  488. }
  489. static struct ghes_estatus_cache *ghes_estatus_cache_alloc(
  490. struct acpi_hest_generic *generic,
  491. struct acpi_hest_generic_status *estatus)
  492. {
  493. int alloced;
  494. u32 len, cache_len;
  495. struct ghes_estatus_cache *cache;
  496. struct acpi_hest_generic_status *cache_estatus;
  497. alloced = atomic_add_return(1, &ghes_estatus_cache_alloced);
  498. if (alloced > GHES_ESTATUS_CACHE_ALLOCED_MAX) {
  499. atomic_dec(&ghes_estatus_cache_alloced);
  500. return NULL;
  501. }
  502. len = cper_estatus_len(estatus);
  503. cache_len = GHES_ESTATUS_CACHE_LEN(len);
  504. cache = (void *)gen_pool_alloc(ghes_estatus_pool, cache_len);
  505. if (!cache) {
  506. atomic_dec(&ghes_estatus_cache_alloced);
  507. return NULL;
  508. }
  509. cache_estatus = GHES_ESTATUS_FROM_CACHE(cache);
  510. memcpy(cache_estatus, estatus, len);
  511. cache->estatus_len = len;
  512. atomic_set(&cache->count, 0);
  513. cache->generic = generic;
  514. cache->time_in = sched_clock();
  515. return cache;
  516. }
  517. static void ghes_estatus_cache_free(struct ghes_estatus_cache *cache)
  518. {
  519. u32 len;
  520. len = cper_estatus_len(GHES_ESTATUS_FROM_CACHE(cache));
  521. len = GHES_ESTATUS_CACHE_LEN(len);
  522. gen_pool_free(ghes_estatus_pool, (unsigned long)cache, len);
  523. atomic_dec(&ghes_estatus_cache_alloced);
  524. }
  525. static void ghes_estatus_cache_rcu_free(struct rcu_head *head)
  526. {
  527. struct ghes_estatus_cache *cache;
  528. cache = container_of(head, struct ghes_estatus_cache, rcu);
  529. ghes_estatus_cache_free(cache);
  530. }
  531. static void ghes_estatus_cache_add(
  532. struct acpi_hest_generic *generic,
  533. struct acpi_hest_generic_status *estatus)
  534. {
  535. int i, slot = -1, count;
  536. unsigned long long now, duration, period, max_period = 0;
  537. struct ghes_estatus_cache *cache, *slot_cache = NULL, *new_cache;
  538. new_cache = ghes_estatus_cache_alloc(generic, estatus);
  539. if (new_cache == NULL)
  540. return;
  541. rcu_read_lock();
  542. now = sched_clock();
  543. for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) {
  544. cache = rcu_dereference(ghes_estatus_caches[i]);
  545. if (cache == NULL) {
  546. slot = i;
  547. slot_cache = NULL;
  548. break;
  549. }
  550. duration = now - cache->time_in;
  551. if (duration >= GHES_ESTATUS_IN_CACHE_MAX_NSEC) {
  552. slot = i;
  553. slot_cache = cache;
  554. break;
  555. }
  556. count = atomic_read(&cache->count);
  557. period = duration;
  558. do_div(period, (count + 1));
  559. if (period > max_period) {
  560. max_period = period;
  561. slot = i;
  562. slot_cache = cache;
  563. }
  564. }
  565. /* new_cache must be put into array after its contents are written */
  566. smp_wmb();
  567. if (slot != -1 && cmpxchg(ghes_estatus_caches + slot,
  568. slot_cache, new_cache) == slot_cache) {
  569. if (slot_cache)
  570. call_rcu(&slot_cache->rcu, ghes_estatus_cache_rcu_free);
  571. } else
  572. ghes_estatus_cache_free(new_cache);
  573. rcu_read_unlock();
  574. }
  575. static int ghes_ack_error(struct acpi_hest_generic_v2 *gv2)
  576. {
  577. int rc;
  578. u64 val = 0;
  579. rc = apei_read(&val, &gv2->read_ack_register);
  580. if (rc)
  581. return rc;
  582. val &= gv2->read_ack_preserve << gv2->read_ack_register.bit_offset;
  583. val |= gv2->read_ack_write << gv2->read_ack_register.bit_offset;
  584. return apei_write(val, &gv2->read_ack_register);
  585. }
  586. static void __ghes_panic(struct ghes *ghes)
  587. {
  588. __ghes_print_estatus(KERN_EMERG, ghes->generic, ghes->estatus);
  589. /* reboot to log the error! */
  590. if (!panic_timeout)
  591. panic_timeout = ghes_panic_timeout;
  592. panic("Fatal hardware error!");
  593. }
  594. static int ghes_proc(struct ghes *ghes)
  595. {
  596. int rc;
  597. rc = ghes_read_estatus(ghes, 0);
  598. if (rc)
  599. goto out;
  600. if (ghes_severity(ghes->estatus->error_severity) >= GHES_SEV_PANIC) {
  601. __ghes_panic(ghes);
  602. }
  603. if (!ghes_estatus_cached(ghes->estatus)) {
  604. if (ghes_print_estatus(NULL, ghes->generic, ghes->estatus))
  605. ghes_estatus_cache_add(ghes->generic, ghes->estatus);
  606. }
  607. ghes_do_proc(ghes, ghes->estatus);
  608. out:
  609. ghes_clear_estatus(ghes);
  610. if (rc == -ENOENT)
  611. return rc;
  612. /*
  613. * GHESv2 type HEST entries introduce support for error acknowledgment,
  614. * so only acknowledge the error if this support is present.
  615. */
  616. if (is_hest_type_generic_v2(ghes))
  617. return ghes_ack_error(ghes->generic_v2);
  618. return rc;
  619. }
  620. static void ghes_add_timer(struct ghes *ghes)
  621. {
  622. struct acpi_hest_generic *g = ghes->generic;
  623. unsigned long expire;
  624. if (!g->notify.poll_interval) {
  625. pr_warning(FW_WARN GHES_PFX "Poll interval is 0 for generic hardware error source: %d, disabled.\n",
  626. g->header.source_id);
  627. return;
  628. }
  629. expire = jiffies + msecs_to_jiffies(g->notify.poll_interval);
  630. ghes->timer.expires = round_jiffies_relative(expire);
  631. add_timer(&ghes->timer);
  632. }
  633. static void ghes_poll_func(struct timer_list *t)
  634. {
  635. struct ghes *ghes = from_timer(ghes, t, timer);
  636. ghes_proc(ghes);
  637. if (!(ghes->flags & GHES_EXITING))
  638. ghes_add_timer(ghes);
  639. }
  640. static irqreturn_t ghes_irq_func(int irq, void *data)
  641. {
  642. struct ghes *ghes = data;
  643. int rc;
  644. rc = ghes_proc(ghes);
  645. if (rc)
  646. return IRQ_NONE;
  647. return IRQ_HANDLED;
  648. }
  649. static int ghes_notify_hed(struct notifier_block *this, unsigned long event,
  650. void *data)
  651. {
  652. struct ghes *ghes;
  653. int ret = NOTIFY_DONE;
  654. rcu_read_lock();
  655. list_for_each_entry_rcu(ghes, &ghes_hed, list) {
  656. if (!ghes_proc(ghes))
  657. ret = NOTIFY_OK;
  658. }
  659. rcu_read_unlock();
  660. return ret;
  661. }
  662. static struct notifier_block ghes_notifier_hed = {
  663. .notifier_call = ghes_notify_hed,
  664. };
  665. #ifdef CONFIG_ACPI_APEI_SEA
  666. static LIST_HEAD(ghes_sea);
  667. /*
  668. * Return 0 only if one of the SEA error sources successfully reported an error
  669. * record sent from the firmware.
  670. */
  671. int ghes_notify_sea(void)
  672. {
  673. struct ghes *ghes;
  674. int ret = -ENOENT;
  675. rcu_read_lock();
  676. list_for_each_entry_rcu(ghes, &ghes_sea, list) {
  677. if (!ghes_proc(ghes))
  678. ret = 0;
  679. }
  680. rcu_read_unlock();
  681. return ret;
  682. }
  683. static void ghes_sea_add(struct ghes *ghes)
  684. {
  685. mutex_lock(&ghes_list_mutex);
  686. list_add_rcu(&ghes->list, &ghes_sea);
  687. mutex_unlock(&ghes_list_mutex);
  688. }
  689. static void ghes_sea_remove(struct ghes *ghes)
  690. {
  691. mutex_lock(&ghes_list_mutex);
  692. list_del_rcu(&ghes->list);
  693. mutex_unlock(&ghes_list_mutex);
  694. synchronize_rcu();
  695. }
  696. #else /* CONFIG_ACPI_APEI_SEA */
  697. static inline void ghes_sea_add(struct ghes *ghes) { }
  698. static inline void ghes_sea_remove(struct ghes *ghes) { }
  699. #endif /* CONFIG_ACPI_APEI_SEA */
  700. #ifdef CONFIG_HAVE_ACPI_APEI_NMI
  701. /*
  702. * printk is not safe in NMI context. So in NMI handler, we allocate
  703. * required memory from lock-less memory allocator
  704. * (ghes_estatus_pool), save estatus into it, put them into lock-less
  705. * list (ghes_estatus_llist), then delay printk into IRQ context via
  706. * irq_work (ghes_proc_irq_work). ghes_estatus_size_request record
  707. * required pool size by all NMI error source.
  708. */
  709. static struct llist_head ghes_estatus_llist;
  710. static struct irq_work ghes_proc_irq_work;
  711. /*
  712. * NMI may be triggered on any CPU, so ghes_in_nmi is used for
  713. * having only one concurrent reader.
  714. */
  715. static atomic_t ghes_in_nmi = ATOMIC_INIT(0);
  716. static LIST_HEAD(ghes_nmi);
  717. static void ghes_proc_in_irq(struct irq_work *irq_work)
  718. {
  719. struct llist_node *llnode, *next;
  720. struct ghes_estatus_node *estatus_node;
  721. struct acpi_hest_generic *generic;
  722. struct acpi_hest_generic_status *estatus;
  723. u32 len, node_len;
  724. llnode = llist_del_all(&ghes_estatus_llist);
  725. /*
  726. * Because the time order of estatus in list is reversed,
  727. * revert it back to proper order.
  728. */
  729. llnode = llist_reverse_order(llnode);
  730. while (llnode) {
  731. next = llnode->next;
  732. estatus_node = llist_entry(llnode, struct ghes_estatus_node,
  733. llnode);
  734. estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
  735. len = cper_estatus_len(estatus);
  736. node_len = GHES_ESTATUS_NODE_LEN(len);
  737. ghes_do_proc(estatus_node->ghes, estatus);
  738. if (!ghes_estatus_cached(estatus)) {
  739. generic = estatus_node->generic;
  740. if (ghes_print_estatus(NULL, generic, estatus))
  741. ghes_estatus_cache_add(generic, estatus);
  742. }
  743. gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node,
  744. node_len);
  745. llnode = next;
  746. }
  747. }
  748. static void ghes_print_queued_estatus(void)
  749. {
  750. struct llist_node *llnode;
  751. struct ghes_estatus_node *estatus_node;
  752. struct acpi_hest_generic *generic;
  753. struct acpi_hest_generic_status *estatus;
  754. u32 len, node_len;
  755. llnode = llist_del_all(&ghes_estatus_llist);
  756. /*
  757. * Because the time order of estatus in list is reversed,
  758. * revert it back to proper order.
  759. */
  760. llnode = llist_reverse_order(llnode);
  761. while (llnode) {
  762. estatus_node = llist_entry(llnode, struct ghes_estatus_node,
  763. llnode);
  764. estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
  765. len = cper_estatus_len(estatus);
  766. node_len = GHES_ESTATUS_NODE_LEN(len);
  767. generic = estatus_node->generic;
  768. ghes_print_estatus(NULL, generic, estatus);
  769. llnode = llnode->next;
  770. }
  771. }
  772. /* Save estatus for further processing in IRQ context */
  773. static void __process_error(struct ghes *ghes)
  774. {
  775. #ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
  776. u32 len, node_len;
  777. struct ghes_estatus_node *estatus_node;
  778. struct acpi_hest_generic_status *estatus;
  779. if (ghes_estatus_cached(ghes->estatus))
  780. return;
  781. len = cper_estatus_len(ghes->estatus);
  782. node_len = GHES_ESTATUS_NODE_LEN(len);
  783. estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool, node_len);
  784. if (!estatus_node)
  785. return;
  786. estatus_node->ghes = ghes;
  787. estatus_node->generic = ghes->generic;
  788. estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
  789. memcpy(estatus, ghes->estatus, len);
  790. llist_add(&estatus_node->llnode, &ghes_estatus_llist);
  791. #endif
  792. }
  793. static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs)
  794. {
  795. struct ghes *ghes;
  796. int sev, ret = NMI_DONE;
  797. if (!atomic_add_unless(&ghes_in_nmi, 1, 1))
  798. return ret;
  799. list_for_each_entry_rcu(ghes, &ghes_nmi, list) {
  800. if (ghes_read_estatus(ghes, 1)) {
  801. ghes_clear_estatus(ghes);
  802. continue;
  803. } else {
  804. ret = NMI_HANDLED;
  805. }
  806. sev = ghes_severity(ghes->estatus->error_severity);
  807. if (sev >= GHES_SEV_PANIC) {
  808. oops_begin();
  809. ghes_print_queued_estatus();
  810. __ghes_panic(ghes);
  811. }
  812. if (!(ghes->flags & GHES_TO_CLEAR))
  813. continue;
  814. __process_error(ghes);
  815. ghes_clear_estatus(ghes);
  816. }
  817. #ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
  818. if (ret == NMI_HANDLED)
  819. irq_work_queue(&ghes_proc_irq_work);
  820. #endif
  821. atomic_dec(&ghes_in_nmi);
  822. return ret;
  823. }
  824. static unsigned long ghes_esource_prealloc_size(
  825. const struct acpi_hest_generic *generic)
  826. {
  827. unsigned long block_length, prealloc_records, prealloc_size;
  828. block_length = min_t(unsigned long, generic->error_block_length,
  829. GHES_ESTATUS_MAX_SIZE);
  830. prealloc_records = max_t(unsigned long,
  831. generic->records_to_preallocate, 1);
  832. prealloc_size = min_t(unsigned long, block_length * prealloc_records,
  833. GHES_ESOURCE_PREALLOC_MAX_SIZE);
  834. return prealloc_size;
  835. }
  836. static void ghes_estatus_pool_shrink(unsigned long len)
  837. {
  838. ghes_estatus_pool_size_request -= PAGE_ALIGN(len);
  839. }
  840. static void ghes_nmi_add(struct ghes *ghes)
  841. {
  842. unsigned long len;
  843. len = ghes_esource_prealloc_size(ghes->generic);
  844. ghes_estatus_pool_expand(len);
  845. mutex_lock(&ghes_list_mutex);
  846. if (list_empty(&ghes_nmi))
  847. register_nmi_handler(NMI_LOCAL, ghes_notify_nmi, 0, "ghes");
  848. list_add_rcu(&ghes->list, &ghes_nmi);
  849. mutex_unlock(&ghes_list_mutex);
  850. }
  851. static void ghes_nmi_remove(struct ghes *ghes)
  852. {
  853. unsigned long len;
  854. mutex_lock(&ghes_list_mutex);
  855. list_del_rcu(&ghes->list);
  856. if (list_empty(&ghes_nmi))
  857. unregister_nmi_handler(NMI_LOCAL, "ghes");
  858. mutex_unlock(&ghes_list_mutex);
  859. /*
  860. * To synchronize with NMI handler, ghes can only be
  861. * freed after NMI handler finishes.
  862. */
  863. synchronize_rcu();
  864. len = ghes_esource_prealloc_size(ghes->generic);
  865. ghes_estatus_pool_shrink(len);
  866. }
  867. static void ghes_nmi_init_cxt(void)
  868. {
  869. init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq);
  870. }
  871. #else /* CONFIG_HAVE_ACPI_APEI_NMI */
  872. static inline void ghes_nmi_add(struct ghes *ghes) { }
  873. static inline void ghes_nmi_remove(struct ghes *ghes) { }
  874. static inline void ghes_nmi_init_cxt(void) { }
  875. #endif /* CONFIG_HAVE_ACPI_APEI_NMI */
  876. static int ghes_probe(struct platform_device *ghes_dev)
  877. {
  878. struct acpi_hest_generic *generic;
  879. struct ghes *ghes = NULL;
  880. int rc = -EINVAL;
  881. generic = *(struct acpi_hest_generic **)ghes_dev->dev.platform_data;
  882. if (!generic->enabled)
  883. return -ENODEV;
  884. switch (generic->notify.type) {
  885. case ACPI_HEST_NOTIFY_POLLED:
  886. case ACPI_HEST_NOTIFY_EXTERNAL:
  887. case ACPI_HEST_NOTIFY_SCI:
  888. case ACPI_HEST_NOTIFY_GSIV:
  889. case ACPI_HEST_NOTIFY_GPIO:
  890. break;
  891. case ACPI_HEST_NOTIFY_SEA:
  892. if (!IS_ENABLED(CONFIG_ACPI_APEI_SEA)) {
  893. pr_warn(GHES_PFX "Generic hardware error source: %d notified via SEA is not supported\n",
  894. generic->header.source_id);
  895. rc = -ENOTSUPP;
  896. goto err;
  897. }
  898. break;
  899. case ACPI_HEST_NOTIFY_NMI:
  900. if (!IS_ENABLED(CONFIG_HAVE_ACPI_APEI_NMI)) {
  901. pr_warn(GHES_PFX "Generic hardware error source: %d notified via NMI interrupt is not supported!\n",
  902. generic->header.source_id);
  903. goto err;
  904. }
  905. break;
  906. case ACPI_HEST_NOTIFY_LOCAL:
  907. pr_warning(GHES_PFX "Generic hardware error source: %d notified via local interrupt is not supported!\n",
  908. generic->header.source_id);
  909. goto err;
  910. default:
  911. pr_warning(FW_WARN GHES_PFX "Unknown notification type: %u for generic hardware error source: %d\n",
  912. generic->notify.type, generic->header.source_id);
  913. goto err;
  914. }
  915. rc = -EIO;
  916. if (generic->error_block_length <
  917. sizeof(struct acpi_hest_generic_status)) {
  918. pr_warning(FW_BUG GHES_PFX "Invalid error block length: %u for generic hardware error source: %d\n",
  919. generic->error_block_length,
  920. generic->header.source_id);
  921. goto err;
  922. }
  923. ghes = ghes_new(generic);
  924. if (IS_ERR(ghes)) {
  925. rc = PTR_ERR(ghes);
  926. ghes = NULL;
  927. goto err;
  928. }
  929. rc = ghes_edac_register(ghes, &ghes_dev->dev);
  930. if (rc < 0)
  931. goto err;
  932. switch (generic->notify.type) {
  933. case ACPI_HEST_NOTIFY_POLLED:
  934. timer_setup(&ghes->timer, ghes_poll_func, TIMER_DEFERRABLE);
  935. ghes_add_timer(ghes);
  936. break;
  937. case ACPI_HEST_NOTIFY_EXTERNAL:
  938. /* External interrupt vector is GSI */
  939. rc = acpi_gsi_to_irq(generic->notify.vector, &ghes->irq);
  940. if (rc) {
  941. pr_err(GHES_PFX "Failed to map GSI to IRQ for generic hardware error source: %d\n",
  942. generic->header.source_id);
  943. goto err_edac_unreg;
  944. }
  945. rc = request_irq(ghes->irq, ghes_irq_func, IRQF_SHARED,
  946. "GHES IRQ", ghes);
  947. if (rc) {
  948. pr_err(GHES_PFX "Failed to register IRQ for generic hardware error source: %d\n",
  949. generic->header.source_id);
  950. goto err_edac_unreg;
  951. }
  952. break;
  953. case ACPI_HEST_NOTIFY_SCI:
  954. case ACPI_HEST_NOTIFY_GSIV:
  955. case ACPI_HEST_NOTIFY_GPIO:
  956. mutex_lock(&ghes_list_mutex);
  957. if (list_empty(&ghes_hed))
  958. register_acpi_hed_notifier(&ghes_notifier_hed);
  959. list_add_rcu(&ghes->list, &ghes_hed);
  960. mutex_unlock(&ghes_list_mutex);
  961. break;
  962. case ACPI_HEST_NOTIFY_SEA:
  963. ghes_sea_add(ghes);
  964. break;
  965. case ACPI_HEST_NOTIFY_NMI:
  966. ghes_nmi_add(ghes);
  967. break;
  968. default:
  969. BUG();
  970. }
  971. platform_set_drvdata(ghes_dev, ghes);
  972. /* Handle any pending errors right away */
  973. ghes_proc(ghes);
  974. return 0;
  975. err_edac_unreg:
  976. ghes_edac_unregister(ghes);
  977. err:
  978. if (ghes) {
  979. ghes_fini(ghes);
  980. kfree(ghes);
  981. }
  982. return rc;
  983. }
  984. static int ghes_remove(struct platform_device *ghes_dev)
  985. {
  986. struct ghes *ghes;
  987. struct acpi_hest_generic *generic;
  988. ghes = platform_get_drvdata(ghes_dev);
  989. generic = ghes->generic;
  990. ghes->flags |= GHES_EXITING;
  991. switch (generic->notify.type) {
  992. case ACPI_HEST_NOTIFY_POLLED:
  993. del_timer_sync(&ghes->timer);
  994. break;
  995. case ACPI_HEST_NOTIFY_EXTERNAL:
  996. free_irq(ghes->irq, ghes);
  997. break;
  998. case ACPI_HEST_NOTIFY_SCI:
  999. case ACPI_HEST_NOTIFY_GSIV:
  1000. case ACPI_HEST_NOTIFY_GPIO:
  1001. mutex_lock(&ghes_list_mutex);
  1002. list_del_rcu(&ghes->list);
  1003. if (list_empty(&ghes_hed))
  1004. unregister_acpi_hed_notifier(&ghes_notifier_hed);
  1005. mutex_unlock(&ghes_list_mutex);
  1006. synchronize_rcu();
  1007. break;
  1008. case ACPI_HEST_NOTIFY_SEA:
  1009. ghes_sea_remove(ghes);
  1010. break;
  1011. case ACPI_HEST_NOTIFY_NMI:
  1012. ghes_nmi_remove(ghes);
  1013. break;
  1014. default:
  1015. BUG();
  1016. break;
  1017. }
  1018. ghes_fini(ghes);
  1019. ghes_edac_unregister(ghes);
  1020. kfree(ghes);
  1021. platform_set_drvdata(ghes_dev, NULL);
  1022. return 0;
  1023. }
  1024. static struct platform_driver ghes_platform_driver = {
  1025. .driver = {
  1026. .name = "GHES",
  1027. },
  1028. .probe = ghes_probe,
  1029. .remove = ghes_remove,
  1030. };
  1031. static int __init ghes_init(void)
  1032. {
  1033. int rc;
  1034. if (acpi_disabled)
  1035. return -ENODEV;
  1036. switch (hest_disable) {
  1037. case HEST_NOT_FOUND:
  1038. return -ENODEV;
  1039. case HEST_DISABLED:
  1040. pr_info(GHES_PFX "HEST is not enabled!\n");
  1041. return -EINVAL;
  1042. default:
  1043. break;
  1044. }
  1045. if (ghes_disable) {
  1046. pr_info(GHES_PFX "GHES is not enabled!\n");
  1047. return -EINVAL;
  1048. }
  1049. ghes_nmi_init_cxt();
  1050. rc = ghes_estatus_pool_init();
  1051. if (rc)
  1052. goto err;
  1053. rc = ghes_estatus_pool_expand(GHES_ESTATUS_CACHE_AVG_SIZE *
  1054. GHES_ESTATUS_CACHE_ALLOCED_MAX);
  1055. if (rc)
  1056. goto err_pool_exit;
  1057. rc = platform_driver_register(&ghes_platform_driver);
  1058. if (rc)
  1059. goto err_pool_exit;
  1060. rc = apei_osc_setup();
  1061. if (rc == 0 && osc_sb_apei_support_acked)
  1062. pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit and WHEA _OSC.\n");
  1063. else if (rc == 0 && !osc_sb_apei_support_acked)
  1064. pr_info(GHES_PFX "APEI firmware first mode is enabled by WHEA _OSC.\n");
  1065. else if (rc && osc_sb_apei_support_acked)
  1066. pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit.\n");
  1067. else
  1068. pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n");
  1069. return 0;
  1070. err_pool_exit:
  1071. ghes_estatus_pool_exit();
  1072. err:
  1073. return rc;
  1074. }
  1075. device_initcall(ghes_init);