ghes.c 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260
  1. /*
  2. * APEI Generic Hardware Error Source support
  3. *
  4. * Generic Hardware Error Source provides a way to report platform
  5. * hardware errors (such as that from chipset). It works in so called
  6. * "Firmware First" mode, that is, hardware errors are reported to
  7. * firmware firstly, then reported to Linux by firmware. This way,
  8. * some non-standard hardware error registers or non-standard hardware
  9. * link can be checked by firmware to produce more hardware error
  10. * information for Linux.
  11. *
  12. * For more information about Generic Hardware Error Source, please
  13. * refer to ACPI Specification version 4.0, section 17.3.2.6
  14. *
  15. * Copyright 2010,2011 Intel Corp.
  16. * Author: Huang Ying <ying.huang@intel.com>
  17. *
  18. * This program is free software; you can redistribute it and/or
  19. * modify it under the terms of the GNU General Public License version
  20. * 2 as published by the Free Software Foundation;
  21. *
  22. * This program is distributed in the hope that it will be useful,
  23. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  24. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  25. * GNU General Public License for more details.
  26. */
  27. #include <linux/kernel.h>
  28. #include <linux/moduleparam.h>
  29. #include <linux/init.h>
  30. #include <linux/acpi.h>
  31. #include <linux/io.h>
  32. #include <linux/interrupt.h>
  33. #include <linux/timer.h>
  34. #include <linux/cper.h>
  35. #include <linux/kdebug.h>
  36. #include <linux/platform_device.h>
  37. #include <linux/mutex.h>
  38. #include <linux/ratelimit.h>
  39. #include <linux/vmalloc.h>
  40. #include <linux/irq_work.h>
  41. #include <linux/llist.h>
  42. #include <linux/genalloc.h>
  43. #include <linux/pci.h>
  44. #include <linux/aer.h>
  45. #include <linux/nmi.h>
  46. #include <linux/sched/clock.h>
  47. #include <linux/uuid.h>
  48. #include <linux/ras.h>
  49. #include <acpi/actbl1.h>
  50. #include <acpi/ghes.h>
  51. #include <acpi/apei.h>
  52. #include <asm/fixmap.h>
  53. #include <asm/tlbflush.h>
  54. #include <ras/ras_event.h>
  55. #include "apei-internal.h"
  56. #define GHES_PFX "GHES: "
  57. #define GHES_ESTATUS_MAX_SIZE 65536
  58. #define GHES_ESOURCE_PREALLOC_MAX_SIZE 65536
  59. #define GHES_ESTATUS_POOL_MIN_ALLOC_ORDER 3
  60. /* This is just an estimation for memory pool allocation */
  61. #define GHES_ESTATUS_CACHE_AVG_SIZE 512
  62. #define GHES_ESTATUS_CACHES_SIZE 4
  63. #define GHES_ESTATUS_IN_CACHE_MAX_NSEC 10000000000ULL
  64. /* Prevent too many caches are allocated because of RCU */
  65. #define GHES_ESTATUS_CACHE_ALLOCED_MAX (GHES_ESTATUS_CACHES_SIZE * 3 / 2)
  66. #define GHES_ESTATUS_CACHE_LEN(estatus_len) \
  67. (sizeof(struct ghes_estatus_cache) + (estatus_len))
  68. #define GHES_ESTATUS_FROM_CACHE(estatus_cache) \
  69. ((struct acpi_hest_generic_status *) \
  70. ((struct ghes_estatus_cache *)(estatus_cache) + 1))
  71. #define GHES_ESTATUS_NODE_LEN(estatus_len) \
  72. (sizeof(struct ghes_estatus_node) + (estatus_len))
  73. #define GHES_ESTATUS_FROM_NODE(estatus_node) \
  74. ((struct acpi_hest_generic_status *) \
  75. ((struct ghes_estatus_node *)(estatus_node) + 1))
  76. static inline bool is_hest_type_generic_v2(struct ghes *ghes)
  77. {
  78. return ghes->generic->header.type == ACPI_HEST_TYPE_GENERIC_ERROR_V2;
  79. }
  80. /*
  81. * This driver isn't really modular, however for the time being,
  82. * continuing to use module_param is the easiest way to remain
  83. * compatible with existing boot arg use cases.
  84. */
  85. bool ghes_disable;
  86. module_param_named(disable, ghes_disable, bool, 0);
  87. /*
  88. * All error sources notified with HED (Hardware Error Device) share a
  89. * single notifier callback, so they need to be linked and checked one
  90. * by one. This holds true for NMI too.
  91. *
  92. * RCU is used for these lists, so ghes_list_mutex is only used for
  93. * list changing, not for traversing.
  94. */
  95. static LIST_HEAD(ghes_hed);
  96. static DEFINE_MUTEX(ghes_list_mutex);
  97. /*
  98. * Because the memory area used to transfer hardware error information
  99. * from BIOS to Linux can be determined only in NMI, IRQ or timer
  100. * handler, but general ioremap can not be used in atomic context, so
  101. * the fixmap is used instead.
  102. *
  103. * These 2 spinlocks are used to prevent the fixmap entries from being used
  104. * simultaneously.
  105. */
  106. static DEFINE_RAW_SPINLOCK(ghes_ioremap_lock_nmi);
  107. static DEFINE_SPINLOCK(ghes_ioremap_lock_irq);
  108. static struct gen_pool *ghes_estatus_pool;
  109. static unsigned long ghes_estatus_pool_size_request;
  110. static struct ghes_estatus_cache *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE];
  111. static atomic_t ghes_estatus_cache_alloced;
  112. static int ghes_panic_timeout __read_mostly = 30;
  113. static void __iomem *ghes_ioremap_pfn_nmi(u64 pfn)
  114. {
  115. phys_addr_t paddr;
  116. pgprot_t prot;
  117. paddr = pfn << PAGE_SHIFT;
  118. prot = arch_apei_get_mem_attribute(paddr);
  119. __set_fixmap(FIX_APEI_GHES_NMI, paddr, prot);
  120. return (void __iomem *) fix_to_virt(FIX_APEI_GHES_NMI);
  121. }
  122. static void __iomem *ghes_ioremap_pfn_irq(u64 pfn)
  123. {
  124. phys_addr_t paddr;
  125. pgprot_t prot;
  126. paddr = pfn << PAGE_SHIFT;
  127. prot = arch_apei_get_mem_attribute(paddr);
  128. __set_fixmap(FIX_APEI_GHES_IRQ, paddr, prot);
  129. return (void __iomem *) fix_to_virt(FIX_APEI_GHES_IRQ);
  130. }
  131. static void ghes_iounmap_nmi(void)
  132. {
  133. clear_fixmap(FIX_APEI_GHES_NMI);
  134. }
  135. static void ghes_iounmap_irq(void)
  136. {
  137. clear_fixmap(FIX_APEI_GHES_IRQ);
  138. }
  139. static int ghes_estatus_pool_init(void)
  140. {
  141. ghes_estatus_pool = gen_pool_create(GHES_ESTATUS_POOL_MIN_ALLOC_ORDER, -1);
  142. if (!ghes_estatus_pool)
  143. return -ENOMEM;
  144. return 0;
  145. }
  146. static void ghes_estatus_pool_free_chunk_page(struct gen_pool *pool,
  147. struct gen_pool_chunk *chunk,
  148. void *data)
  149. {
  150. free_page(chunk->start_addr);
  151. }
  152. static void ghes_estatus_pool_exit(void)
  153. {
  154. gen_pool_for_each_chunk(ghes_estatus_pool,
  155. ghes_estatus_pool_free_chunk_page, NULL);
  156. gen_pool_destroy(ghes_estatus_pool);
  157. }
  158. static int ghes_estatus_pool_expand(unsigned long len)
  159. {
  160. unsigned long i, pages, size, addr;
  161. int ret;
  162. ghes_estatus_pool_size_request += PAGE_ALIGN(len);
  163. size = gen_pool_size(ghes_estatus_pool);
  164. if (size >= ghes_estatus_pool_size_request)
  165. return 0;
  166. pages = (ghes_estatus_pool_size_request - size) / PAGE_SIZE;
  167. for (i = 0; i < pages; i++) {
  168. addr = __get_free_page(GFP_KERNEL);
  169. if (!addr)
  170. return -ENOMEM;
  171. ret = gen_pool_add(ghes_estatus_pool, addr, PAGE_SIZE, -1);
  172. if (ret)
  173. return ret;
  174. }
  175. return 0;
  176. }
  177. static int map_gen_v2(struct ghes *ghes)
  178. {
  179. return apei_map_generic_address(&ghes->generic_v2->read_ack_register);
  180. }
  181. static void unmap_gen_v2(struct ghes *ghes)
  182. {
  183. apei_unmap_generic_address(&ghes->generic_v2->read_ack_register);
  184. }
  185. static struct ghes *ghes_new(struct acpi_hest_generic *generic)
  186. {
  187. struct ghes *ghes;
  188. unsigned int error_block_length;
  189. int rc;
  190. ghes = kzalloc(sizeof(*ghes), GFP_KERNEL);
  191. if (!ghes)
  192. return ERR_PTR(-ENOMEM);
  193. ghes->generic = generic;
  194. if (is_hest_type_generic_v2(ghes)) {
  195. rc = map_gen_v2(ghes);
  196. if (rc)
  197. goto err_free;
  198. }
  199. rc = apei_map_generic_address(&generic->error_status_address);
  200. if (rc)
  201. goto err_unmap_read_ack_addr;
  202. error_block_length = generic->error_block_length;
  203. if (error_block_length > GHES_ESTATUS_MAX_SIZE) {
  204. pr_warning(FW_WARN GHES_PFX
  205. "Error status block length is too long: %u for "
  206. "generic hardware error source: %d.\n",
  207. error_block_length, generic->header.source_id);
  208. error_block_length = GHES_ESTATUS_MAX_SIZE;
  209. }
  210. ghes->estatus = kmalloc(error_block_length, GFP_KERNEL);
  211. if (!ghes->estatus) {
  212. rc = -ENOMEM;
  213. goto err_unmap_status_addr;
  214. }
  215. return ghes;
  216. err_unmap_status_addr:
  217. apei_unmap_generic_address(&generic->error_status_address);
  218. err_unmap_read_ack_addr:
  219. if (is_hest_type_generic_v2(ghes))
  220. unmap_gen_v2(ghes);
  221. err_free:
  222. kfree(ghes);
  223. return ERR_PTR(rc);
  224. }
  225. static void ghes_fini(struct ghes *ghes)
  226. {
  227. kfree(ghes->estatus);
  228. apei_unmap_generic_address(&ghes->generic->error_status_address);
  229. if (is_hest_type_generic_v2(ghes))
  230. unmap_gen_v2(ghes);
  231. }
  232. static inline int ghes_severity(int severity)
  233. {
  234. switch (severity) {
  235. case CPER_SEV_INFORMATIONAL:
  236. return GHES_SEV_NO;
  237. case CPER_SEV_CORRECTED:
  238. return GHES_SEV_CORRECTED;
  239. case CPER_SEV_RECOVERABLE:
  240. return GHES_SEV_RECOVERABLE;
  241. case CPER_SEV_FATAL:
  242. return GHES_SEV_PANIC;
  243. default:
  244. /* Unknown, go panic */
  245. return GHES_SEV_PANIC;
  246. }
  247. }
  248. static void ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len,
  249. int from_phys)
  250. {
  251. void __iomem *vaddr;
  252. unsigned long flags = 0;
  253. int in_nmi = in_nmi();
  254. u64 offset;
  255. u32 trunk;
  256. while (len > 0) {
  257. offset = paddr - (paddr & PAGE_MASK);
  258. if (in_nmi) {
  259. raw_spin_lock(&ghes_ioremap_lock_nmi);
  260. vaddr = ghes_ioremap_pfn_nmi(paddr >> PAGE_SHIFT);
  261. } else {
  262. spin_lock_irqsave(&ghes_ioremap_lock_irq, flags);
  263. vaddr = ghes_ioremap_pfn_irq(paddr >> PAGE_SHIFT);
  264. }
  265. trunk = PAGE_SIZE - offset;
  266. trunk = min(trunk, len);
  267. if (from_phys)
  268. memcpy_fromio(buffer, vaddr + offset, trunk);
  269. else
  270. memcpy_toio(vaddr + offset, buffer, trunk);
  271. len -= trunk;
  272. paddr += trunk;
  273. buffer += trunk;
  274. if (in_nmi) {
  275. ghes_iounmap_nmi();
  276. raw_spin_unlock(&ghes_ioremap_lock_nmi);
  277. } else {
  278. ghes_iounmap_irq();
  279. spin_unlock_irqrestore(&ghes_ioremap_lock_irq, flags);
  280. }
  281. }
  282. }
  283. static int ghes_read_estatus(struct ghes *ghes, int silent)
  284. {
  285. struct acpi_hest_generic *g = ghes->generic;
  286. u64 buf_paddr;
  287. u32 len;
  288. int rc;
  289. rc = apei_read(&buf_paddr, &g->error_status_address);
  290. if (rc) {
  291. if (!silent && printk_ratelimit())
  292. pr_warning(FW_WARN GHES_PFX
  293. "Failed to read error status block address for hardware error source: %d.\n",
  294. g->header.source_id);
  295. return -EIO;
  296. }
  297. if (!buf_paddr)
  298. return -ENOENT;
  299. ghes_copy_tofrom_phys(ghes->estatus, buf_paddr,
  300. sizeof(*ghes->estatus), 1);
  301. if (!ghes->estatus->block_status)
  302. return -ENOENT;
  303. ghes->buffer_paddr = buf_paddr;
  304. ghes->flags |= GHES_TO_CLEAR;
  305. rc = -EIO;
  306. len = cper_estatus_len(ghes->estatus);
  307. if (len < sizeof(*ghes->estatus))
  308. goto err_read_block;
  309. if (len > ghes->generic->error_block_length)
  310. goto err_read_block;
  311. if (cper_estatus_check_header(ghes->estatus))
  312. goto err_read_block;
  313. ghes_copy_tofrom_phys(ghes->estatus + 1,
  314. buf_paddr + sizeof(*ghes->estatus),
  315. len - sizeof(*ghes->estatus), 1);
  316. if (cper_estatus_check(ghes->estatus))
  317. goto err_read_block;
  318. rc = 0;
  319. err_read_block:
  320. if (rc && !silent && printk_ratelimit())
  321. pr_warning(FW_WARN GHES_PFX
  322. "Failed to read error status block!\n");
  323. return rc;
  324. }
  325. static void ghes_clear_estatus(struct ghes *ghes)
  326. {
  327. ghes->estatus->block_status = 0;
  328. if (!(ghes->flags & GHES_TO_CLEAR))
  329. return;
  330. ghes_copy_tofrom_phys(ghes->estatus, ghes->buffer_paddr,
  331. sizeof(ghes->estatus->block_status), 0);
  332. ghes->flags &= ~GHES_TO_CLEAR;
  333. }
  334. static void ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, int sev)
  335. {
  336. #ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE
  337. unsigned long pfn;
  338. int flags = -1;
  339. int sec_sev = ghes_severity(gdata->error_severity);
  340. struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
  341. if (!(mem_err->validation_bits & CPER_MEM_VALID_PA))
  342. return;
  343. pfn = mem_err->physical_addr >> PAGE_SHIFT;
  344. if (!pfn_valid(pfn)) {
  345. pr_warn_ratelimited(FW_WARN GHES_PFX
  346. "Invalid address in generic error data: %#llx\n",
  347. mem_err->physical_addr);
  348. return;
  349. }
  350. /* iff following two events can be handled properly by now */
  351. if (sec_sev == GHES_SEV_CORRECTED &&
  352. (gdata->flags & CPER_SEC_ERROR_THRESHOLD_EXCEEDED))
  353. flags = MF_SOFT_OFFLINE;
  354. if (sev == GHES_SEV_RECOVERABLE && sec_sev == GHES_SEV_RECOVERABLE)
  355. flags = 0;
  356. if (flags != -1)
  357. memory_failure_queue(pfn, flags);
  358. #endif
  359. }
  360. /*
  361. * PCIe AER errors need to be sent to the AER driver for reporting and
  362. * recovery. The GHES severities map to the following AER severities and
  363. * require the following handling:
  364. *
  365. * GHES_SEV_CORRECTABLE -> AER_CORRECTABLE
  366. * These need to be reported by the AER driver but no recovery is
  367. * necessary.
  368. * GHES_SEV_RECOVERABLE -> AER_NONFATAL
  369. * GHES_SEV_RECOVERABLE && CPER_SEC_RESET -> AER_FATAL
  370. * These both need to be reported and recovered from by the AER driver.
  371. * GHES_SEV_PANIC does not make it to this handling since the kernel must
  372. * panic.
  373. */
  374. static void ghes_handle_aer(struct acpi_hest_generic_data *gdata)
  375. {
  376. #ifdef CONFIG_ACPI_APEI_PCIEAER
  377. struct cper_sec_pcie *pcie_err = acpi_hest_get_payload(gdata);
  378. if (pcie_err->validation_bits & CPER_PCIE_VALID_DEVICE_ID &&
  379. pcie_err->validation_bits & CPER_PCIE_VALID_AER_INFO) {
  380. unsigned int devfn;
  381. int aer_severity;
  382. devfn = PCI_DEVFN(pcie_err->device_id.device,
  383. pcie_err->device_id.function);
  384. aer_severity = cper_severity_to_aer(gdata->error_severity);
  385. /*
  386. * If firmware reset the component to contain
  387. * the error, we must reinitialize it before
  388. * use, so treat it as a fatal AER error.
  389. */
  390. if (gdata->flags & CPER_SEC_RESET)
  391. aer_severity = AER_FATAL;
  392. aer_recover_queue(pcie_err->device_id.segment,
  393. pcie_err->device_id.bus,
  394. devfn, aer_severity,
  395. (struct aer_capability_regs *)
  396. pcie_err->aer_info);
  397. }
  398. #endif
  399. }
  400. static void ghes_do_proc(struct ghes *ghes,
  401. const struct acpi_hest_generic_status *estatus)
  402. {
  403. int sev, sec_sev;
  404. struct acpi_hest_generic_data *gdata;
  405. guid_t *sec_type;
  406. guid_t *fru_id = &NULL_UUID_LE;
  407. char *fru_text = "";
  408. sev = ghes_severity(estatus->error_severity);
  409. apei_estatus_for_each_section(estatus, gdata) {
  410. sec_type = (guid_t *)gdata->section_type;
  411. sec_sev = ghes_severity(gdata->error_severity);
  412. if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
  413. fru_id = (guid_t *)gdata->fru_id;
  414. if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
  415. fru_text = gdata->fru_text;
  416. if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
  417. struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
  418. ghes_edac_report_mem_error(sev, mem_err);
  419. arch_apei_report_mem_error(sev, mem_err);
  420. ghes_handle_memory_failure(gdata, sev);
  421. }
  422. else if (guid_equal(sec_type, &CPER_SEC_PCIE)) {
  423. ghes_handle_aer(gdata);
  424. }
  425. else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) {
  426. struct cper_sec_proc_arm *err = acpi_hest_get_payload(gdata);
  427. log_arm_hw_error(err);
  428. } else {
  429. void *err = acpi_hest_get_payload(gdata);
  430. log_non_standard_event(sec_type, fru_id, fru_text,
  431. sec_sev, err,
  432. gdata->error_data_length);
  433. }
  434. }
  435. }
  436. static void __ghes_print_estatus(const char *pfx,
  437. const struct acpi_hest_generic *generic,
  438. const struct acpi_hest_generic_status *estatus)
  439. {
  440. static atomic_t seqno;
  441. unsigned int curr_seqno;
  442. char pfx_seq[64];
  443. if (pfx == NULL) {
  444. if (ghes_severity(estatus->error_severity) <=
  445. GHES_SEV_CORRECTED)
  446. pfx = KERN_WARNING;
  447. else
  448. pfx = KERN_ERR;
  449. }
  450. curr_seqno = atomic_inc_return(&seqno);
  451. snprintf(pfx_seq, sizeof(pfx_seq), "%s{%u}" HW_ERR, pfx, curr_seqno);
  452. printk("%s""Hardware error from APEI Generic Hardware Error Source: %d\n",
  453. pfx_seq, generic->header.source_id);
  454. cper_estatus_print(pfx_seq, estatus);
  455. }
  456. static int ghes_print_estatus(const char *pfx,
  457. const struct acpi_hest_generic *generic,
  458. const struct acpi_hest_generic_status *estatus)
  459. {
  460. /* Not more than 2 messages every 5 seconds */
  461. static DEFINE_RATELIMIT_STATE(ratelimit_corrected, 5*HZ, 2);
  462. static DEFINE_RATELIMIT_STATE(ratelimit_uncorrected, 5*HZ, 2);
  463. struct ratelimit_state *ratelimit;
  464. if (ghes_severity(estatus->error_severity) <= GHES_SEV_CORRECTED)
  465. ratelimit = &ratelimit_corrected;
  466. else
  467. ratelimit = &ratelimit_uncorrected;
  468. if (__ratelimit(ratelimit)) {
  469. __ghes_print_estatus(pfx, generic, estatus);
  470. return 1;
  471. }
  472. return 0;
  473. }
  474. /*
  475. * GHES error status reporting throttle, to report more kinds of
  476. * errors, instead of just most frequently occurred errors.
  477. */
  478. static int ghes_estatus_cached(struct acpi_hest_generic_status *estatus)
  479. {
  480. u32 len;
  481. int i, cached = 0;
  482. unsigned long long now;
  483. struct ghes_estatus_cache *cache;
  484. struct acpi_hest_generic_status *cache_estatus;
  485. len = cper_estatus_len(estatus);
  486. rcu_read_lock();
  487. for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) {
  488. cache = rcu_dereference(ghes_estatus_caches[i]);
  489. if (cache == NULL)
  490. continue;
  491. if (len != cache->estatus_len)
  492. continue;
  493. cache_estatus = GHES_ESTATUS_FROM_CACHE(cache);
  494. if (memcmp(estatus, cache_estatus, len))
  495. continue;
  496. atomic_inc(&cache->count);
  497. now = sched_clock();
  498. if (now - cache->time_in < GHES_ESTATUS_IN_CACHE_MAX_NSEC)
  499. cached = 1;
  500. break;
  501. }
  502. rcu_read_unlock();
  503. return cached;
  504. }
  505. static struct ghes_estatus_cache *ghes_estatus_cache_alloc(
  506. struct acpi_hest_generic *generic,
  507. struct acpi_hest_generic_status *estatus)
  508. {
  509. int alloced;
  510. u32 len, cache_len;
  511. struct ghes_estatus_cache *cache;
  512. struct acpi_hest_generic_status *cache_estatus;
  513. alloced = atomic_add_return(1, &ghes_estatus_cache_alloced);
  514. if (alloced > GHES_ESTATUS_CACHE_ALLOCED_MAX) {
  515. atomic_dec(&ghes_estatus_cache_alloced);
  516. return NULL;
  517. }
  518. len = cper_estatus_len(estatus);
  519. cache_len = GHES_ESTATUS_CACHE_LEN(len);
  520. cache = (void *)gen_pool_alloc(ghes_estatus_pool, cache_len);
  521. if (!cache) {
  522. atomic_dec(&ghes_estatus_cache_alloced);
  523. return NULL;
  524. }
  525. cache_estatus = GHES_ESTATUS_FROM_CACHE(cache);
  526. memcpy(cache_estatus, estatus, len);
  527. cache->estatus_len = len;
  528. atomic_set(&cache->count, 0);
  529. cache->generic = generic;
  530. cache->time_in = sched_clock();
  531. return cache;
  532. }
  533. static void ghes_estatus_cache_free(struct ghes_estatus_cache *cache)
  534. {
  535. u32 len;
  536. len = cper_estatus_len(GHES_ESTATUS_FROM_CACHE(cache));
  537. len = GHES_ESTATUS_CACHE_LEN(len);
  538. gen_pool_free(ghes_estatus_pool, (unsigned long)cache, len);
  539. atomic_dec(&ghes_estatus_cache_alloced);
  540. }
  541. static void ghes_estatus_cache_rcu_free(struct rcu_head *head)
  542. {
  543. struct ghes_estatus_cache *cache;
  544. cache = container_of(head, struct ghes_estatus_cache, rcu);
  545. ghes_estatus_cache_free(cache);
  546. }
  547. static void ghes_estatus_cache_add(
  548. struct acpi_hest_generic *generic,
  549. struct acpi_hest_generic_status *estatus)
  550. {
  551. int i, slot = -1, count;
  552. unsigned long long now, duration, period, max_period = 0;
  553. struct ghes_estatus_cache *cache, *slot_cache = NULL, *new_cache;
  554. new_cache = ghes_estatus_cache_alloc(generic, estatus);
  555. if (new_cache == NULL)
  556. return;
  557. rcu_read_lock();
  558. now = sched_clock();
  559. for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) {
  560. cache = rcu_dereference(ghes_estatus_caches[i]);
  561. if (cache == NULL) {
  562. slot = i;
  563. slot_cache = NULL;
  564. break;
  565. }
  566. duration = now - cache->time_in;
  567. if (duration >= GHES_ESTATUS_IN_CACHE_MAX_NSEC) {
  568. slot = i;
  569. slot_cache = cache;
  570. break;
  571. }
  572. count = atomic_read(&cache->count);
  573. period = duration;
  574. do_div(period, (count + 1));
  575. if (period > max_period) {
  576. max_period = period;
  577. slot = i;
  578. slot_cache = cache;
  579. }
  580. }
  581. /* new_cache must be put into array after its contents are written */
  582. smp_wmb();
  583. if (slot != -1 && cmpxchg(ghes_estatus_caches + slot,
  584. slot_cache, new_cache) == slot_cache) {
  585. if (slot_cache)
  586. call_rcu(&slot_cache->rcu, ghes_estatus_cache_rcu_free);
  587. } else
  588. ghes_estatus_cache_free(new_cache);
  589. rcu_read_unlock();
  590. }
  591. static int ghes_ack_error(struct acpi_hest_generic_v2 *gv2)
  592. {
  593. int rc;
  594. u64 val = 0;
  595. rc = apei_read(&val, &gv2->read_ack_register);
  596. if (rc)
  597. return rc;
  598. val &= gv2->read_ack_preserve << gv2->read_ack_register.bit_offset;
  599. val |= gv2->read_ack_write << gv2->read_ack_register.bit_offset;
  600. return apei_write(val, &gv2->read_ack_register);
  601. }
  602. static void __ghes_panic(struct ghes *ghes)
  603. {
  604. __ghes_print_estatus(KERN_EMERG, ghes->generic, ghes->estatus);
  605. /* reboot to log the error! */
  606. if (!panic_timeout)
  607. panic_timeout = ghes_panic_timeout;
  608. panic("Fatal hardware error!");
  609. }
  610. static int ghes_proc(struct ghes *ghes)
  611. {
  612. int rc;
  613. rc = ghes_read_estatus(ghes, 0);
  614. if (rc)
  615. goto out;
  616. if (ghes_severity(ghes->estatus->error_severity) >= GHES_SEV_PANIC) {
  617. __ghes_panic(ghes);
  618. }
  619. if (!ghes_estatus_cached(ghes->estatus)) {
  620. if (ghes_print_estatus(NULL, ghes->generic, ghes->estatus))
  621. ghes_estatus_cache_add(ghes->generic, ghes->estatus);
  622. }
  623. ghes_do_proc(ghes, ghes->estatus);
  624. out:
  625. ghes_clear_estatus(ghes);
  626. if (rc == -ENOENT)
  627. return rc;
  628. /*
  629. * GHESv2 type HEST entries introduce support for error acknowledgment,
  630. * so only acknowledge the error if this support is present.
  631. */
  632. if (is_hest_type_generic_v2(ghes))
  633. return ghes_ack_error(ghes->generic_v2);
  634. return rc;
  635. }
  636. static void ghes_add_timer(struct ghes *ghes)
  637. {
  638. struct acpi_hest_generic *g = ghes->generic;
  639. unsigned long expire;
  640. if (!g->notify.poll_interval) {
  641. pr_warning(FW_WARN GHES_PFX "Poll interval is 0 for generic hardware error source: %d, disabled.\n",
  642. g->header.source_id);
  643. return;
  644. }
  645. expire = jiffies + msecs_to_jiffies(g->notify.poll_interval);
  646. ghes->timer.expires = round_jiffies_relative(expire);
  647. add_timer(&ghes->timer);
  648. }
  649. static void ghes_poll_func(struct timer_list *t)
  650. {
  651. struct ghes *ghes = from_timer(ghes, t, timer);
  652. ghes_proc(ghes);
  653. if (!(ghes->flags & GHES_EXITING))
  654. ghes_add_timer(ghes);
  655. }
  656. static irqreturn_t ghes_irq_func(int irq, void *data)
  657. {
  658. struct ghes *ghes = data;
  659. int rc;
  660. rc = ghes_proc(ghes);
  661. if (rc)
  662. return IRQ_NONE;
  663. return IRQ_HANDLED;
  664. }
  665. static int ghes_notify_hed(struct notifier_block *this, unsigned long event,
  666. void *data)
  667. {
  668. struct ghes *ghes;
  669. int ret = NOTIFY_DONE;
  670. rcu_read_lock();
  671. list_for_each_entry_rcu(ghes, &ghes_hed, list) {
  672. if (!ghes_proc(ghes))
  673. ret = NOTIFY_OK;
  674. }
  675. rcu_read_unlock();
  676. return ret;
  677. }
  678. static struct notifier_block ghes_notifier_hed = {
  679. .notifier_call = ghes_notify_hed,
  680. };
  681. #ifdef CONFIG_ACPI_APEI_SEA
  682. static LIST_HEAD(ghes_sea);
  683. /*
  684. * Return 0 only if one of the SEA error sources successfully reported an error
  685. * record sent from the firmware.
  686. */
  687. int ghes_notify_sea(void)
  688. {
  689. struct ghes *ghes;
  690. int ret = -ENOENT;
  691. rcu_read_lock();
  692. list_for_each_entry_rcu(ghes, &ghes_sea, list) {
  693. if (!ghes_proc(ghes))
  694. ret = 0;
  695. }
  696. rcu_read_unlock();
  697. return ret;
  698. }
  699. static void ghes_sea_add(struct ghes *ghes)
  700. {
  701. mutex_lock(&ghes_list_mutex);
  702. list_add_rcu(&ghes->list, &ghes_sea);
  703. mutex_unlock(&ghes_list_mutex);
  704. }
  705. static void ghes_sea_remove(struct ghes *ghes)
  706. {
  707. mutex_lock(&ghes_list_mutex);
  708. list_del_rcu(&ghes->list);
  709. mutex_unlock(&ghes_list_mutex);
  710. synchronize_rcu();
  711. }
  712. #else /* CONFIG_ACPI_APEI_SEA */
  713. static inline void ghes_sea_add(struct ghes *ghes) { }
  714. static inline void ghes_sea_remove(struct ghes *ghes) { }
  715. #endif /* CONFIG_ACPI_APEI_SEA */
  716. #ifdef CONFIG_HAVE_ACPI_APEI_NMI
  717. /*
  718. * printk is not safe in NMI context. So in NMI handler, we allocate
  719. * required memory from lock-less memory allocator
  720. * (ghes_estatus_pool), save estatus into it, put them into lock-less
  721. * list (ghes_estatus_llist), then delay printk into IRQ context via
  722. * irq_work (ghes_proc_irq_work). ghes_estatus_size_request record
  723. * required pool size by all NMI error source.
  724. */
  725. static struct llist_head ghes_estatus_llist;
  726. static struct irq_work ghes_proc_irq_work;
  727. /*
  728. * NMI may be triggered on any CPU, so ghes_in_nmi is used for
  729. * having only one concurrent reader.
  730. */
  731. static atomic_t ghes_in_nmi = ATOMIC_INIT(0);
  732. static LIST_HEAD(ghes_nmi);
  733. static void ghes_proc_in_irq(struct irq_work *irq_work)
  734. {
  735. struct llist_node *llnode, *next;
  736. struct ghes_estatus_node *estatus_node;
  737. struct acpi_hest_generic *generic;
  738. struct acpi_hest_generic_status *estatus;
  739. u32 len, node_len;
  740. llnode = llist_del_all(&ghes_estatus_llist);
  741. /*
  742. * Because the time order of estatus in list is reversed,
  743. * revert it back to proper order.
  744. */
  745. llnode = llist_reverse_order(llnode);
  746. while (llnode) {
  747. next = llnode->next;
  748. estatus_node = llist_entry(llnode, struct ghes_estatus_node,
  749. llnode);
  750. estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
  751. len = cper_estatus_len(estatus);
  752. node_len = GHES_ESTATUS_NODE_LEN(len);
  753. ghes_do_proc(estatus_node->ghes, estatus);
  754. if (!ghes_estatus_cached(estatus)) {
  755. generic = estatus_node->generic;
  756. if (ghes_print_estatus(NULL, generic, estatus))
  757. ghes_estatus_cache_add(generic, estatus);
  758. }
  759. gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node,
  760. node_len);
  761. llnode = next;
  762. }
  763. }
  764. static void ghes_print_queued_estatus(void)
  765. {
  766. struct llist_node *llnode;
  767. struct ghes_estatus_node *estatus_node;
  768. struct acpi_hest_generic *generic;
  769. struct acpi_hest_generic_status *estatus;
  770. llnode = llist_del_all(&ghes_estatus_llist);
  771. /*
  772. * Because the time order of estatus in list is reversed,
  773. * revert it back to proper order.
  774. */
  775. llnode = llist_reverse_order(llnode);
  776. while (llnode) {
  777. estatus_node = llist_entry(llnode, struct ghes_estatus_node,
  778. llnode);
  779. estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
  780. generic = estatus_node->generic;
  781. ghes_print_estatus(NULL, generic, estatus);
  782. llnode = llnode->next;
  783. }
  784. }
  785. /* Save estatus for further processing in IRQ context */
  786. static void __process_error(struct ghes *ghes)
  787. {
  788. #ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
  789. u32 len, node_len;
  790. struct ghes_estatus_node *estatus_node;
  791. struct acpi_hest_generic_status *estatus;
  792. if (ghes_estatus_cached(ghes->estatus))
  793. return;
  794. len = cper_estatus_len(ghes->estatus);
  795. node_len = GHES_ESTATUS_NODE_LEN(len);
  796. estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool, node_len);
  797. if (!estatus_node)
  798. return;
  799. estatus_node->ghes = ghes;
  800. estatus_node->generic = ghes->generic;
  801. estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
  802. memcpy(estatus, ghes->estatus, len);
  803. llist_add(&estatus_node->llnode, &ghes_estatus_llist);
  804. #endif
  805. }
  806. static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs)
  807. {
  808. struct ghes *ghes;
  809. int sev, ret = NMI_DONE;
  810. if (!atomic_add_unless(&ghes_in_nmi, 1, 1))
  811. return ret;
  812. list_for_each_entry_rcu(ghes, &ghes_nmi, list) {
  813. if (ghes_read_estatus(ghes, 1)) {
  814. ghes_clear_estatus(ghes);
  815. continue;
  816. } else {
  817. ret = NMI_HANDLED;
  818. }
  819. sev = ghes_severity(ghes->estatus->error_severity);
  820. if (sev >= GHES_SEV_PANIC) {
  821. oops_begin();
  822. ghes_print_queued_estatus();
  823. __ghes_panic(ghes);
  824. }
  825. if (!(ghes->flags & GHES_TO_CLEAR))
  826. continue;
  827. __process_error(ghes);
  828. ghes_clear_estatus(ghes);
  829. }
  830. #ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
  831. if (ret == NMI_HANDLED)
  832. irq_work_queue(&ghes_proc_irq_work);
  833. #endif
  834. atomic_dec(&ghes_in_nmi);
  835. return ret;
  836. }
  837. static unsigned long ghes_esource_prealloc_size(
  838. const struct acpi_hest_generic *generic)
  839. {
  840. unsigned long block_length, prealloc_records, prealloc_size;
  841. block_length = min_t(unsigned long, generic->error_block_length,
  842. GHES_ESTATUS_MAX_SIZE);
  843. prealloc_records = max_t(unsigned long,
  844. generic->records_to_preallocate, 1);
  845. prealloc_size = min_t(unsigned long, block_length * prealloc_records,
  846. GHES_ESOURCE_PREALLOC_MAX_SIZE);
  847. return prealloc_size;
  848. }
  849. static void ghes_estatus_pool_shrink(unsigned long len)
  850. {
  851. ghes_estatus_pool_size_request -= PAGE_ALIGN(len);
  852. }
  853. static void ghes_nmi_add(struct ghes *ghes)
  854. {
  855. unsigned long len;
  856. len = ghes_esource_prealloc_size(ghes->generic);
  857. ghes_estatus_pool_expand(len);
  858. mutex_lock(&ghes_list_mutex);
  859. if (list_empty(&ghes_nmi))
  860. register_nmi_handler(NMI_LOCAL, ghes_notify_nmi, 0, "ghes");
  861. list_add_rcu(&ghes->list, &ghes_nmi);
  862. mutex_unlock(&ghes_list_mutex);
  863. }
  864. static void ghes_nmi_remove(struct ghes *ghes)
  865. {
  866. unsigned long len;
  867. mutex_lock(&ghes_list_mutex);
  868. list_del_rcu(&ghes->list);
  869. if (list_empty(&ghes_nmi))
  870. unregister_nmi_handler(NMI_LOCAL, "ghes");
  871. mutex_unlock(&ghes_list_mutex);
  872. /*
  873. * To synchronize with NMI handler, ghes can only be
  874. * freed after NMI handler finishes.
  875. */
  876. synchronize_rcu();
  877. len = ghes_esource_prealloc_size(ghes->generic);
  878. ghes_estatus_pool_shrink(len);
  879. }
  880. static void ghes_nmi_init_cxt(void)
  881. {
  882. init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq);
  883. }
  884. #else /* CONFIG_HAVE_ACPI_APEI_NMI */
  885. static inline void ghes_nmi_add(struct ghes *ghes) { }
  886. static inline void ghes_nmi_remove(struct ghes *ghes) { }
  887. static inline void ghes_nmi_init_cxt(void) { }
  888. #endif /* CONFIG_HAVE_ACPI_APEI_NMI */
  889. static int ghes_probe(struct platform_device *ghes_dev)
  890. {
  891. struct acpi_hest_generic *generic;
  892. struct ghes *ghes = NULL;
  893. int rc = -EINVAL;
  894. generic = *(struct acpi_hest_generic **)ghes_dev->dev.platform_data;
  895. if (!generic->enabled)
  896. return -ENODEV;
  897. switch (generic->notify.type) {
  898. case ACPI_HEST_NOTIFY_POLLED:
  899. case ACPI_HEST_NOTIFY_EXTERNAL:
  900. case ACPI_HEST_NOTIFY_SCI:
  901. case ACPI_HEST_NOTIFY_GSIV:
  902. case ACPI_HEST_NOTIFY_GPIO:
  903. break;
  904. case ACPI_HEST_NOTIFY_SEA:
  905. if (!IS_ENABLED(CONFIG_ACPI_APEI_SEA)) {
  906. pr_warn(GHES_PFX "Generic hardware error source: %d notified via SEA is not supported\n",
  907. generic->header.source_id);
  908. rc = -ENOTSUPP;
  909. goto err;
  910. }
  911. break;
  912. case ACPI_HEST_NOTIFY_NMI:
  913. if (!IS_ENABLED(CONFIG_HAVE_ACPI_APEI_NMI)) {
  914. pr_warn(GHES_PFX "Generic hardware error source: %d notified via NMI interrupt is not supported!\n",
  915. generic->header.source_id);
  916. goto err;
  917. }
  918. break;
  919. case ACPI_HEST_NOTIFY_LOCAL:
  920. pr_warning(GHES_PFX "Generic hardware error source: %d notified via local interrupt is not supported!\n",
  921. generic->header.source_id);
  922. goto err;
  923. default:
  924. pr_warning(FW_WARN GHES_PFX "Unknown notification type: %u for generic hardware error source: %d\n",
  925. generic->notify.type, generic->header.source_id);
  926. goto err;
  927. }
  928. rc = -EIO;
  929. if (generic->error_block_length <
  930. sizeof(struct acpi_hest_generic_status)) {
  931. pr_warning(FW_BUG GHES_PFX "Invalid error block length: %u for generic hardware error source: %d\n",
  932. generic->error_block_length,
  933. generic->header.source_id);
  934. goto err;
  935. }
  936. ghes = ghes_new(generic);
  937. if (IS_ERR(ghes)) {
  938. rc = PTR_ERR(ghes);
  939. ghes = NULL;
  940. goto err;
  941. }
  942. switch (generic->notify.type) {
  943. case ACPI_HEST_NOTIFY_POLLED:
  944. timer_setup(&ghes->timer, ghes_poll_func, TIMER_DEFERRABLE);
  945. ghes_add_timer(ghes);
  946. break;
  947. case ACPI_HEST_NOTIFY_EXTERNAL:
  948. /* External interrupt vector is GSI */
  949. rc = acpi_gsi_to_irq(generic->notify.vector, &ghes->irq);
  950. if (rc) {
  951. pr_err(GHES_PFX "Failed to map GSI to IRQ for generic hardware error source: %d\n",
  952. generic->header.source_id);
  953. goto err;
  954. }
  955. rc = request_irq(ghes->irq, ghes_irq_func, IRQF_SHARED,
  956. "GHES IRQ", ghes);
  957. if (rc) {
  958. pr_err(GHES_PFX "Failed to register IRQ for generic hardware error source: %d\n",
  959. generic->header.source_id);
  960. goto err;
  961. }
  962. break;
  963. case ACPI_HEST_NOTIFY_SCI:
  964. case ACPI_HEST_NOTIFY_GSIV:
  965. case ACPI_HEST_NOTIFY_GPIO:
  966. mutex_lock(&ghes_list_mutex);
  967. if (list_empty(&ghes_hed))
  968. register_acpi_hed_notifier(&ghes_notifier_hed);
  969. list_add_rcu(&ghes->list, &ghes_hed);
  970. mutex_unlock(&ghes_list_mutex);
  971. break;
  972. case ACPI_HEST_NOTIFY_SEA:
  973. ghes_sea_add(ghes);
  974. break;
  975. case ACPI_HEST_NOTIFY_NMI:
  976. ghes_nmi_add(ghes);
  977. break;
  978. default:
  979. BUG();
  980. }
  981. platform_set_drvdata(ghes_dev, ghes);
  982. ghes_edac_register(ghes, &ghes_dev->dev);
  983. /* Handle any pending errors right away */
  984. ghes_proc(ghes);
  985. return 0;
  986. err:
  987. if (ghes) {
  988. ghes_fini(ghes);
  989. kfree(ghes);
  990. }
  991. return rc;
  992. }
  993. static int ghes_remove(struct platform_device *ghes_dev)
  994. {
  995. struct ghes *ghes;
  996. struct acpi_hest_generic *generic;
  997. ghes = platform_get_drvdata(ghes_dev);
  998. generic = ghes->generic;
  999. ghes->flags |= GHES_EXITING;
  1000. switch (generic->notify.type) {
  1001. case ACPI_HEST_NOTIFY_POLLED:
  1002. del_timer_sync(&ghes->timer);
  1003. break;
  1004. case ACPI_HEST_NOTIFY_EXTERNAL:
  1005. free_irq(ghes->irq, ghes);
  1006. break;
  1007. case ACPI_HEST_NOTIFY_SCI:
  1008. case ACPI_HEST_NOTIFY_GSIV:
  1009. case ACPI_HEST_NOTIFY_GPIO:
  1010. mutex_lock(&ghes_list_mutex);
  1011. list_del_rcu(&ghes->list);
  1012. if (list_empty(&ghes_hed))
  1013. unregister_acpi_hed_notifier(&ghes_notifier_hed);
  1014. mutex_unlock(&ghes_list_mutex);
  1015. synchronize_rcu();
  1016. break;
  1017. case ACPI_HEST_NOTIFY_SEA:
  1018. ghes_sea_remove(ghes);
  1019. break;
  1020. case ACPI_HEST_NOTIFY_NMI:
  1021. ghes_nmi_remove(ghes);
  1022. break;
  1023. default:
  1024. BUG();
  1025. break;
  1026. }
  1027. ghes_fini(ghes);
  1028. ghes_edac_unregister(ghes);
  1029. kfree(ghes);
  1030. platform_set_drvdata(ghes_dev, NULL);
  1031. return 0;
  1032. }
  1033. static struct platform_driver ghes_platform_driver = {
  1034. .driver = {
  1035. .name = "GHES",
  1036. },
  1037. .probe = ghes_probe,
  1038. .remove = ghes_remove,
  1039. };
  1040. static int __init ghes_init(void)
  1041. {
  1042. int rc;
  1043. if (acpi_disabled)
  1044. return -ENODEV;
  1045. switch (hest_disable) {
  1046. case HEST_NOT_FOUND:
  1047. return -ENODEV;
  1048. case HEST_DISABLED:
  1049. pr_info(GHES_PFX "HEST is not enabled!\n");
  1050. return -EINVAL;
  1051. default:
  1052. break;
  1053. }
  1054. if (ghes_disable) {
  1055. pr_info(GHES_PFX "GHES is not enabled!\n");
  1056. return -EINVAL;
  1057. }
  1058. ghes_nmi_init_cxt();
  1059. rc = ghes_estatus_pool_init();
  1060. if (rc)
  1061. goto err;
  1062. rc = ghes_estatus_pool_expand(GHES_ESTATUS_CACHE_AVG_SIZE *
  1063. GHES_ESTATUS_CACHE_ALLOCED_MAX);
  1064. if (rc)
  1065. goto err_pool_exit;
  1066. rc = platform_driver_register(&ghes_platform_driver);
  1067. if (rc)
  1068. goto err_pool_exit;
  1069. rc = apei_osc_setup();
  1070. if (rc == 0 && osc_sb_apei_support_acked)
  1071. pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit and WHEA _OSC.\n");
  1072. else if (rc == 0 && !osc_sb_apei_support_acked)
  1073. pr_info(GHES_PFX "APEI firmware first mode is enabled by WHEA _OSC.\n");
  1074. else if (rc && osc_sb_apei_support_acked)
  1075. pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit.\n");
  1076. else
  1077. pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n");
  1078. return 0;
  1079. err_pool_exit:
  1080. ghes_estatus_pool_exit();
  1081. err:
  1082. return rc;
  1083. }
  1084. device_initcall(ghes_init);