ghes.c 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323
  1. /*
  2. * APEI Generic Hardware Error Source support
  3. *
  4. * Generic Hardware Error Source provides a way to report platform
  5. * hardware errors (such as that from chipset). It works in so called
  6. * "Firmware First" mode, that is, hardware errors are reported to
  7. * firmware firstly, then reported to Linux by firmware. This way,
  8. * some non-standard hardware error registers or non-standard hardware
  9. * link can be checked by firmware to produce more hardware error
  10. * information for Linux.
  11. *
  12. * For more information about Generic Hardware Error Source, please
  13. * refer to ACPI Specification version 4.0, section 17.3.2.6
  14. *
  15. * Copyright 2010,2011 Intel Corp.
  16. * Author: Huang Ying <ying.huang@intel.com>
  17. *
  18. * This program is free software; you can redistribute it and/or
  19. * modify it under the terms of the GNU General Public License version
  20. * 2 as published by the Free Software Foundation;
  21. *
  22. * This program is distributed in the hope that it will be useful,
  23. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  24. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  25. * GNU General Public License for more details.
  26. */
  27. #include <linux/kernel.h>
  28. #include <linux/moduleparam.h>
  29. #include <linux/init.h>
  30. #include <linux/acpi.h>
  31. #include <linux/io.h>
  32. #include <linux/interrupt.h>
  33. #include <linux/timer.h>
  34. #include <linux/cper.h>
  35. #include <linux/kdebug.h>
  36. #include <linux/platform_device.h>
  37. #include <linux/mutex.h>
  38. #include <linux/ratelimit.h>
  39. #include <linux/vmalloc.h>
  40. #include <linux/irq_work.h>
  41. #include <linux/llist.h>
  42. #include <linux/genalloc.h>
  43. #include <linux/pci.h>
  44. #include <linux/aer.h>
  45. #include <linux/nmi.h>
  46. #include <linux/sched/clock.h>
  47. #include <linux/uuid.h>
  48. #include <linux/ras.h>
  49. #include <acpi/actbl1.h>
  50. #include <acpi/ghes.h>
  51. #include <acpi/apei.h>
  52. #include <asm/tlbflush.h>
  53. #include <ras/ras_event.h>
  54. #include "apei-internal.h"
  55. #define GHES_PFX "GHES: "
  56. #define GHES_ESTATUS_MAX_SIZE 65536
  57. #define GHES_ESOURCE_PREALLOC_MAX_SIZE 65536
  58. #define GHES_ESTATUS_POOL_MIN_ALLOC_ORDER 3
  59. /* This is just an estimation for memory pool allocation */
  60. #define GHES_ESTATUS_CACHE_AVG_SIZE 512
  61. #define GHES_ESTATUS_CACHES_SIZE 4
  62. #define GHES_ESTATUS_IN_CACHE_MAX_NSEC 10000000000ULL
  63. /* Prevent too many caches are allocated because of RCU */
  64. #define GHES_ESTATUS_CACHE_ALLOCED_MAX (GHES_ESTATUS_CACHES_SIZE * 3 / 2)
  65. #define GHES_ESTATUS_CACHE_LEN(estatus_len) \
  66. (sizeof(struct ghes_estatus_cache) + (estatus_len))
  67. #define GHES_ESTATUS_FROM_CACHE(estatus_cache) \
  68. ((struct acpi_hest_generic_status *) \
  69. ((struct ghes_estatus_cache *)(estatus_cache) + 1))
  70. #define GHES_ESTATUS_NODE_LEN(estatus_len) \
  71. (sizeof(struct ghes_estatus_node) + (estatus_len))
  72. #define GHES_ESTATUS_FROM_NODE(estatus_node) \
  73. ((struct acpi_hest_generic_status *) \
  74. ((struct ghes_estatus_node *)(estatus_node) + 1))
  75. static inline bool is_hest_type_generic_v2(struct ghes *ghes)
  76. {
  77. return ghes->generic->header.type == ACPI_HEST_TYPE_GENERIC_ERROR_V2;
  78. }
  79. /*
  80. * This driver isn't really modular, however for the time being,
  81. * continuing to use module_param is the easiest way to remain
  82. * compatible with existing boot arg use cases.
  83. */
  84. bool ghes_disable;
  85. module_param_named(disable, ghes_disable, bool, 0);
  86. /*
  87. * All error sources notified with HED (Hardware Error Device) share a
  88. * single notifier callback, so they need to be linked and checked one
  89. * by one. This holds true for NMI too.
  90. *
  91. * RCU is used for these lists, so ghes_list_mutex is only used for
  92. * list changing, not for traversing.
  93. */
  94. static LIST_HEAD(ghes_hed);
  95. static DEFINE_MUTEX(ghes_list_mutex);
  96. /*
  97. * Because the memory area used to transfer hardware error information
  98. * from BIOS to Linux can be determined only in NMI, IRQ or timer
  99. * handler, but general ioremap can not be used in atomic context, so
  100. * a special version of atomic ioremap is implemented for that.
  101. */
  102. /*
  103. * Two virtual pages are used, one for IRQ/PROCESS context, the other for
  104. * NMI context (optionally).
  105. */
  106. #define GHES_IOREMAP_PAGES 2
  107. #define GHES_IOREMAP_IRQ_PAGE(base) (base)
  108. #define GHES_IOREMAP_NMI_PAGE(base) ((base) + PAGE_SIZE)
  109. /* virtual memory area for atomic ioremap */
  110. static struct vm_struct *ghes_ioremap_area;
  111. /*
  112. * These 2 spinlock is used to prevent atomic ioremap virtual memory
  113. * area from being mapped simultaneously.
  114. */
  115. static DEFINE_RAW_SPINLOCK(ghes_ioremap_lock_nmi);
  116. static DEFINE_SPINLOCK(ghes_ioremap_lock_irq);
  117. static struct gen_pool *ghes_estatus_pool;
  118. static unsigned long ghes_estatus_pool_size_request;
  119. static struct ghes_estatus_cache *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE];
  120. static atomic_t ghes_estatus_cache_alloced;
  121. static int ghes_panic_timeout __read_mostly = 30;
  122. static int ghes_ioremap_init(void)
  123. {
  124. ghes_ioremap_area = __get_vm_area(PAGE_SIZE * GHES_IOREMAP_PAGES,
  125. VM_IOREMAP, VMALLOC_START, VMALLOC_END);
  126. if (!ghes_ioremap_area) {
  127. pr_err(GHES_PFX "Failed to allocate virtual memory area for atomic ioremap.\n");
  128. return -ENOMEM;
  129. }
  130. return 0;
  131. }
  132. static void ghes_ioremap_exit(void)
  133. {
  134. free_vm_area(ghes_ioremap_area);
  135. }
  136. static void __iomem *ghes_ioremap_pfn_nmi(u64 pfn)
  137. {
  138. unsigned long vaddr;
  139. phys_addr_t paddr;
  140. pgprot_t prot;
  141. vaddr = (unsigned long)GHES_IOREMAP_NMI_PAGE(ghes_ioremap_area->addr);
  142. paddr = pfn << PAGE_SHIFT;
  143. prot = arch_apei_get_mem_attribute(paddr);
  144. ioremap_page_range(vaddr, vaddr + PAGE_SIZE, paddr, prot);
  145. return (void __iomem *)vaddr;
  146. }
  147. static void __iomem *ghes_ioremap_pfn_irq(u64 pfn)
  148. {
  149. unsigned long vaddr, paddr;
  150. pgprot_t prot;
  151. vaddr = (unsigned long)GHES_IOREMAP_IRQ_PAGE(ghes_ioremap_area->addr);
  152. paddr = pfn << PAGE_SHIFT;
  153. prot = arch_apei_get_mem_attribute(paddr);
  154. ioremap_page_range(vaddr, vaddr + PAGE_SIZE, paddr, prot);
  155. return (void __iomem *)vaddr;
  156. }
  157. static void ghes_iounmap_nmi(void __iomem *vaddr_ptr)
  158. {
  159. unsigned long vaddr = (unsigned long __force)vaddr_ptr;
  160. void *base = ghes_ioremap_area->addr;
  161. BUG_ON(vaddr != (unsigned long)GHES_IOREMAP_NMI_PAGE(base));
  162. unmap_kernel_range_noflush(vaddr, PAGE_SIZE);
  163. arch_apei_flush_tlb_one(vaddr);
  164. }
  165. static void ghes_iounmap_irq(void __iomem *vaddr_ptr)
  166. {
  167. unsigned long vaddr = (unsigned long __force)vaddr_ptr;
  168. void *base = ghes_ioremap_area->addr;
  169. BUG_ON(vaddr != (unsigned long)GHES_IOREMAP_IRQ_PAGE(base));
  170. unmap_kernel_range_noflush(vaddr, PAGE_SIZE);
  171. arch_apei_flush_tlb_one(vaddr);
  172. }
  173. static int ghes_estatus_pool_init(void)
  174. {
  175. ghes_estatus_pool = gen_pool_create(GHES_ESTATUS_POOL_MIN_ALLOC_ORDER, -1);
  176. if (!ghes_estatus_pool)
  177. return -ENOMEM;
  178. return 0;
  179. }
  180. static void ghes_estatus_pool_free_chunk_page(struct gen_pool *pool,
  181. struct gen_pool_chunk *chunk,
  182. void *data)
  183. {
  184. free_page(chunk->start_addr);
  185. }
  186. static void ghes_estatus_pool_exit(void)
  187. {
  188. gen_pool_for_each_chunk(ghes_estatus_pool,
  189. ghes_estatus_pool_free_chunk_page, NULL);
  190. gen_pool_destroy(ghes_estatus_pool);
  191. }
  192. static int ghes_estatus_pool_expand(unsigned long len)
  193. {
  194. unsigned long i, pages, size, addr;
  195. int ret;
  196. ghes_estatus_pool_size_request += PAGE_ALIGN(len);
  197. size = gen_pool_size(ghes_estatus_pool);
  198. if (size >= ghes_estatus_pool_size_request)
  199. return 0;
  200. pages = (ghes_estatus_pool_size_request - size) / PAGE_SIZE;
  201. for (i = 0; i < pages; i++) {
  202. addr = __get_free_page(GFP_KERNEL);
  203. if (!addr)
  204. return -ENOMEM;
  205. ret = gen_pool_add(ghes_estatus_pool, addr, PAGE_SIZE, -1);
  206. if (ret)
  207. return ret;
  208. }
  209. return 0;
  210. }
  211. static int map_gen_v2(struct ghes *ghes)
  212. {
  213. return apei_map_generic_address(&ghes->generic_v2->read_ack_register);
  214. }
  215. static void unmap_gen_v2(struct ghes *ghes)
  216. {
  217. apei_unmap_generic_address(&ghes->generic_v2->read_ack_register);
  218. }
  219. static struct ghes *ghes_new(struct acpi_hest_generic *generic)
  220. {
  221. struct ghes *ghes;
  222. unsigned int error_block_length;
  223. int rc;
  224. ghes = kzalloc(sizeof(*ghes), GFP_KERNEL);
  225. if (!ghes)
  226. return ERR_PTR(-ENOMEM);
  227. ghes->generic = generic;
  228. if (is_hest_type_generic_v2(ghes)) {
  229. rc = map_gen_v2(ghes);
  230. if (rc)
  231. goto err_free;
  232. }
  233. rc = apei_map_generic_address(&generic->error_status_address);
  234. if (rc)
  235. goto err_unmap_read_ack_addr;
  236. error_block_length = generic->error_block_length;
  237. if (error_block_length > GHES_ESTATUS_MAX_SIZE) {
  238. pr_warning(FW_WARN GHES_PFX
  239. "Error status block length is too long: %u for "
  240. "generic hardware error source: %d.\n",
  241. error_block_length, generic->header.source_id);
  242. error_block_length = GHES_ESTATUS_MAX_SIZE;
  243. }
  244. ghes->estatus = kmalloc(error_block_length, GFP_KERNEL);
  245. if (!ghes->estatus) {
  246. rc = -ENOMEM;
  247. goto err_unmap_status_addr;
  248. }
  249. return ghes;
  250. err_unmap_status_addr:
  251. apei_unmap_generic_address(&generic->error_status_address);
  252. err_unmap_read_ack_addr:
  253. if (is_hest_type_generic_v2(ghes))
  254. unmap_gen_v2(ghes);
  255. err_free:
  256. kfree(ghes);
  257. return ERR_PTR(rc);
  258. }
  259. static void ghes_fini(struct ghes *ghes)
  260. {
  261. kfree(ghes->estatus);
  262. apei_unmap_generic_address(&ghes->generic->error_status_address);
  263. if (is_hest_type_generic_v2(ghes))
  264. unmap_gen_v2(ghes);
  265. }
  266. static inline int ghes_severity(int severity)
  267. {
  268. switch (severity) {
  269. case CPER_SEV_INFORMATIONAL:
  270. return GHES_SEV_NO;
  271. case CPER_SEV_CORRECTED:
  272. return GHES_SEV_CORRECTED;
  273. case CPER_SEV_RECOVERABLE:
  274. return GHES_SEV_RECOVERABLE;
  275. case CPER_SEV_FATAL:
  276. return GHES_SEV_PANIC;
  277. default:
  278. /* Unknown, go panic */
  279. return GHES_SEV_PANIC;
  280. }
  281. }
  282. static void ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len,
  283. int from_phys)
  284. {
  285. void __iomem *vaddr;
  286. unsigned long flags = 0;
  287. int in_nmi = in_nmi();
  288. u64 offset;
  289. u32 trunk;
  290. while (len > 0) {
  291. offset = paddr - (paddr & PAGE_MASK);
  292. if (in_nmi) {
  293. raw_spin_lock(&ghes_ioremap_lock_nmi);
  294. vaddr = ghes_ioremap_pfn_nmi(paddr >> PAGE_SHIFT);
  295. } else {
  296. spin_lock_irqsave(&ghes_ioremap_lock_irq, flags);
  297. vaddr = ghes_ioremap_pfn_irq(paddr >> PAGE_SHIFT);
  298. }
  299. trunk = PAGE_SIZE - offset;
  300. trunk = min(trunk, len);
  301. if (from_phys)
  302. memcpy_fromio(buffer, vaddr + offset, trunk);
  303. else
  304. memcpy_toio(vaddr + offset, buffer, trunk);
  305. len -= trunk;
  306. paddr += trunk;
  307. buffer += trunk;
  308. if (in_nmi) {
  309. ghes_iounmap_nmi(vaddr);
  310. raw_spin_unlock(&ghes_ioremap_lock_nmi);
  311. } else {
  312. ghes_iounmap_irq(vaddr);
  313. spin_unlock_irqrestore(&ghes_ioremap_lock_irq, flags);
  314. }
  315. }
  316. }
  317. static int ghes_read_estatus(struct ghes *ghes, int silent)
  318. {
  319. struct acpi_hest_generic *g = ghes->generic;
  320. u64 buf_paddr;
  321. u32 len;
  322. int rc;
  323. rc = apei_read(&buf_paddr, &g->error_status_address);
  324. if (rc) {
  325. if (!silent && printk_ratelimit())
  326. pr_warning(FW_WARN GHES_PFX
  327. "Failed to read error status block address for hardware error source: %d.\n",
  328. g->header.source_id);
  329. return -EIO;
  330. }
  331. if (!buf_paddr)
  332. return -ENOENT;
  333. ghes_copy_tofrom_phys(ghes->estatus, buf_paddr,
  334. sizeof(*ghes->estatus), 1);
  335. if (!ghes->estatus->block_status)
  336. return -ENOENT;
  337. ghes->buffer_paddr = buf_paddr;
  338. ghes->flags |= GHES_TO_CLEAR;
  339. rc = -EIO;
  340. len = cper_estatus_len(ghes->estatus);
  341. if (len < sizeof(*ghes->estatus))
  342. goto err_read_block;
  343. if (len > ghes->generic->error_block_length)
  344. goto err_read_block;
  345. if (cper_estatus_check_header(ghes->estatus))
  346. goto err_read_block;
  347. ghes_copy_tofrom_phys(ghes->estatus + 1,
  348. buf_paddr + sizeof(*ghes->estatus),
  349. len - sizeof(*ghes->estatus), 1);
  350. if (cper_estatus_check(ghes->estatus))
  351. goto err_read_block;
  352. rc = 0;
  353. err_read_block:
  354. if (rc && !silent && printk_ratelimit())
  355. pr_warning(FW_WARN GHES_PFX
  356. "Failed to read error status block!\n");
  357. return rc;
  358. }
  359. static void ghes_clear_estatus(struct ghes *ghes)
  360. {
  361. ghes->estatus->block_status = 0;
  362. if (!(ghes->flags & GHES_TO_CLEAR))
  363. return;
  364. ghes_copy_tofrom_phys(ghes->estatus, ghes->buffer_paddr,
  365. sizeof(ghes->estatus->block_status), 0);
  366. ghes->flags &= ~GHES_TO_CLEAR;
  367. }
  368. static void ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, int sev)
  369. {
  370. #ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE
  371. unsigned long pfn;
  372. int flags = -1;
  373. int sec_sev = ghes_severity(gdata->error_severity);
  374. struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
  375. if (!(mem_err->validation_bits & CPER_MEM_VALID_PA))
  376. return;
  377. pfn = mem_err->physical_addr >> PAGE_SHIFT;
  378. if (!pfn_valid(pfn)) {
  379. pr_warn_ratelimited(FW_WARN GHES_PFX
  380. "Invalid address in generic error data: %#llx\n",
  381. mem_err->physical_addr);
  382. return;
  383. }
  384. /* iff following two events can be handled properly by now */
  385. if (sec_sev == GHES_SEV_CORRECTED &&
  386. (gdata->flags & CPER_SEC_ERROR_THRESHOLD_EXCEEDED))
  387. flags = MF_SOFT_OFFLINE;
  388. if (sev == GHES_SEV_RECOVERABLE && sec_sev == GHES_SEV_RECOVERABLE)
  389. flags = 0;
  390. if (flags != -1)
  391. memory_failure_queue(pfn, 0, flags);
  392. #endif
  393. }
  394. static void ghes_do_proc(struct ghes *ghes,
  395. const struct acpi_hest_generic_status *estatus)
  396. {
  397. int sev, sec_sev;
  398. struct acpi_hest_generic_data *gdata;
  399. guid_t *sec_type;
  400. guid_t *fru_id = &NULL_UUID_LE;
  401. char *fru_text = "";
  402. sev = ghes_severity(estatus->error_severity);
  403. apei_estatus_for_each_section(estatus, gdata) {
  404. sec_type = (guid_t *)gdata->section_type;
  405. sec_sev = ghes_severity(gdata->error_severity);
  406. if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
  407. fru_id = (guid_t *)gdata->fru_id;
  408. if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
  409. fru_text = gdata->fru_text;
  410. if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
  411. struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
  412. ghes_edac_report_mem_error(ghes, sev, mem_err);
  413. arch_apei_report_mem_error(sev, mem_err);
  414. ghes_handle_memory_failure(gdata, sev);
  415. }
  416. #ifdef CONFIG_ACPI_APEI_PCIEAER
  417. else if (guid_equal(sec_type, &CPER_SEC_PCIE)) {
  418. struct cper_sec_pcie *pcie_err = acpi_hest_get_payload(gdata);
  419. if (sev == GHES_SEV_RECOVERABLE &&
  420. sec_sev == GHES_SEV_RECOVERABLE &&
  421. pcie_err->validation_bits & CPER_PCIE_VALID_DEVICE_ID &&
  422. pcie_err->validation_bits & CPER_PCIE_VALID_AER_INFO) {
  423. unsigned int devfn;
  424. int aer_severity;
  425. devfn = PCI_DEVFN(pcie_err->device_id.device,
  426. pcie_err->device_id.function);
  427. aer_severity = cper_severity_to_aer(gdata->error_severity);
  428. /*
  429. * If firmware reset the component to contain
  430. * the error, we must reinitialize it before
  431. * use, so treat it as a fatal AER error.
  432. */
  433. if (gdata->flags & CPER_SEC_RESET)
  434. aer_severity = AER_FATAL;
  435. aer_recover_queue(pcie_err->device_id.segment,
  436. pcie_err->device_id.bus,
  437. devfn, aer_severity,
  438. (struct aer_capability_regs *)
  439. pcie_err->aer_info);
  440. }
  441. }
  442. #endif
  443. else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) {
  444. struct cper_sec_proc_arm *err = acpi_hest_get_payload(gdata);
  445. log_arm_hw_error(err);
  446. } else {
  447. void *err = acpi_hest_get_payload(gdata);
  448. log_non_standard_event(sec_type, fru_id, fru_text,
  449. sec_sev, err,
  450. gdata->error_data_length);
  451. }
  452. }
  453. }
  454. static void __ghes_print_estatus(const char *pfx,
  455. const struct acpi_hest_generic *generic,
  456. const struct acpi_hest_generic_status *estatus)
  457. {
  458. static atomic_t seqno;
  459. unsigned int curr_seqno;
  460. char pfx_seq[64];
  461. if (pfx == NULL) {
  462. if (ghes_severity(estatus->error_severity) <=
  463. GHES_SEV_CORRECTED)
  464. pfx = KERN_WARNING;
  465. else
  466. pfx = KERN_ERR;
  467. }
  468. curr_seqno = atomic_inc_return(&seqno);
  469. snprintf(pfx_seq, sizeof(pfx_seq), "%s{%u}" HW_ERR, pfx, curr_seqno);
  470. printk("%s""Hardware error from APEI Generic Hardware Error Source: %d\n",
  471. pfx_seq, generic->header.source_id);
  472. cper_estatus_print(pfx_seq, estatus);
  473. }
  474. static int ghes_print_estatus(const char *pfx,
  475. const struct acpi_hest_generic *generic,
  476. const struct acpi_hest_generic_status *estatus)
  477. {
  478. /* Not more than 2 messages every 5 seconds */
  479. static DEFINE_RATELIMIT_STATE(ratelimit_corrected, 5*HZ, 2);
  480. static DEFINE_RATELIMIT_STATE(ratelimit_uncorrected, 5*HZ, 2);
  481. struct ratelimit_state *ratelimit;
  482. if (ghes_severity(estatus->error_severity) <= GHES_SEV_CORRECTED)
  483. ratelimit = &ratelimit_corrected;
  484. else
  485. ratelimit = &ratelimit_uncorrected;
  486. if (__ratelimit(ratelimit)) {
  487. __ghes_print_estatus(pfx, generic, estatus);
  488. return 1;
  489. }
  490. return 0;
  491. }
  492. /*
  493. * GHES error status reporting throttle, to report more kinds of
  494. * errors, instead of just most frequently occurred errors.
  495. */
  496. static int ghes_estatus_cached(struct acpi_hest_generic_status *estatus)
  497. {
  498. u32 len;
  499. int i, cached = 0;
  500. unsigned long long now;
  501. struct ghes_estatus_cache *cache;
  502. struct acpi_hest_generic_status *cache_estatus;
  503. len = cper_estatus_len(estatus);
  504. rcu_read_lock();
  505. for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) {
  506. cache = rcu_dereference(ghes_estatus_caches[i]);
  507. if (cache == NULL)
  508. continue;
  509. if (len != cache->estatus_len)
  510. continue;
  511. cache_estatus = GHES_ESTATUS_FROM_CACHE(cache);
  512. if (memcmp(estatus, cache_estatus, len))
  513. continue;
  514. atomic_inc(&cache->count);
  515. now = sched_clock();
  516. if (now - cache->time_in < GHES_ESTATUS_IN_CACHE_MAX_NSEC)
  517. cached = 1;
  518. break;
  519. }
  520. rcu_read_unlock();
  521. return cached;
  522. }
  523. static struct ghes_estatus_cache *ghes_estatus_cache_alloc(
  524. struct acpi_hest_generic *generic,
  525. struct acpi_hest_generic_status *estatus)
  526. {
  527. int alloced;
  528. u32 len, cache_len;
  529. struct ghes_estatus_cache *cache;
  530. struct acpi_hest_generic_status *cache_estatus;
  531. alloced = atomic_add_return(1, &ghes_estatus_cache_alloced);
  532. if (alloced > GHES_ESTATUS_CACHE_ALLOCED_MAX) {
  533. atomic_dec(&ghes_estatus_cache_alloced);
  534. return NULL;
  535. }
  536. len = cper_estatus_len(estatus);
  537. cache_len = GHES_ESTATUS_CACHE_LEN(len);
  538. cache = (void *)gen_pool_alloc(ghes_estatus_pool, cache_len);
  539. if (!cache) {
  540. atomic_dec(&ghes_estatus_cache_alloced);
  541. return NULL;
  542. }
  543. cache_estatus = GHES_ESTATUS_FROM_CACHE(cache);
  544. memcpy(cache_estatus, estatus, len);
  545. cache->estatus_len = len;
  546. atomic_set(&cache->count, 0);
  547. cache->generic = generic;
  548. cache->time_in = sched_clock();
  549. return cache;
  550. }
  551. static void ghes_estatus_cache_free(struct ghes_estatus_cache *cache)
  552. {
  553. u32 len;
  554. len = cper_estatus_len(GHES_ESTATUS_FROM_CACHE(cache));
  555. len = GHES_ESTATUS_CACHE_LEN(len);
  556. gen_pool_free(ghes_estatus_pool, (unsigned long)cache, len);
  557. atomic_dec(&ghes_estatus_cache_alloced);
  558. }
  559. static void ghes_estatus_cache_rcu_free(struct rcu_head *head)
  560. {
  561. struct ghes_estatus_cache *cache;
  562. cache = container_of(head, struct ghes_estatus_cache, rcu);
  563. ghes_estatus_cache_free(cache);
  564. }
  565. static void ghes_estatus_cache_add(
  566. struct acpi_hest_generic *generic,
  567. struct acpi_hest_generic_status *estatus)
  568. {
  569. int i, slot = -1, count;
  570. unsigned long long now, duration, period, max_period = 0;
  571. struct ghes_estatus_cache *cache, *slot_cache = NULL, *new_cache;
  572. new_cache = ghes_estatus_cache_alloc(generic, estatus);
  573. if (new_cache == NULL)
  574. return;
  575. rcu_read_lock();
  576. now = sched_clock();
  577. for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) {
  578. cache = rcu_dereference(ghes_estatus_caches[i]);
  579. if (cache == NULL) {
  580. slot = i;
  581. slot_cache = NULL;
  582. break;
  583. }
  584. duration = now - cache->time_in;
  585. if (duration >= GHES_ESTATUS_IN_CACHE_MAX_NSEC) {
  586. slot = i;
  587. slot_cache = cache;
  588. break;
  589. }
  590. count = atomic_read(&cache->count);
  591. period = duration;
  592. do_div(period, (count + 1));
  593. if (period > max_period) {
  594. max_period = period;
  595. slot = i;
  596. slot_cache = cache;
  597. }
  598. }
  599. /* new_cache must be put into array after its contents are written */
  600. smp_wmb();
  601. if (slot != -1 && cmpxchg(ghes_estatus_caches + slot,
  602. slot_cache, new_cache) == slot_cache) {
  603. if (slot_cache)
  604. call_rcu(&slot_cache->rcu, ghes_estatus_cache_rcu_free);
  605. } else
  606. ghes_estatus_cache_free(new_cache);
  607. rcu_read_unlock();
  608. }
  609. static int ghes_ack_error(struct acpi_hest_generic_v2 *gv2)
  610. {
  611. int rc;
  612. u64 val = 0;
  613. rc = apei_read(&val, &gv2->read_ack_register);
  614. if (rc)
  615. return rc;
  616. val &= gv2->read_ack_preserve << gv2->read_ack_register.bit_offset;
  617. val |= gv2->read_ack_write << gv2->read_ack_register.bit_offset;
  618. return apei_write(val, &gv2->read_ack_register);
  619. }
  620. static void __ghes_panic(struct ghes *ghes)
  621. {
  622. __ghes_print_estatus(KERN_EMERG, ghes->generic, ghes->estatus);
  623. /* reboot to log the error! */
  624. if (!panic_timeout)
  625. panic_timeout = ghes_panic_timeout;
  626. panic("Fatal hardware error!");
  627. }
  628. static int ghes_proc(struct ghes *ghes)
  629. {
  630. int rc;
  631. rc = ghes_read_estatus(ghes, 0);
  632. if (rc)
  633. goto out;
  634. if (ghes_severity(ghes->estatus->error_severity) >= GHES_SEV_PANIC) {
  635. __ghes_panic(ghes);
  636. }
  637. if (!ghes_estatus_cached(ghes->estatus)) {
  638. if (ghes_print_estatus(NULL, ghes->generic, ghes->estatus))
  639. ghes_estatus_cache_add(ghes->generic, ghes->estatus);
  640. }
  641. ghes_do_proc(ghes, ghes->estatus);
  642. out:
  643. ghes_clear_estatus(ghes);
  644. if (rc == -ENOENT)
  645. return rc;
  646. /*
  647. * GHESv2 type HEST entries introduce support for error acknowledgment,
  648. * so only acknowledge the error if this support is present.
  649. */
  650. if (is_hest_type_generic_v2(ghes))
  651. return ghes_ack_error(ghes->generic_v2);
  652. return rc;
  653. }
  654. static void ghes_add_timer(struct ghes *ghes)
  655. {
  656. struct acpi_hest_generic *g = ghes->generic;
  657. unsigned long expire;
  658. if (!g->notify.poll_interval) {
  659. pr_warning(FW_WARN GHES_PFX "Poll interval is 0 for generic hardware error source: %d, disabled.\n",
  660. g->header.source_id);
  661. return;
  662. }
  663. expire = jiffies + msecs_to_jiffies(g->notify.poll_interval);
  664. ghes->timer.expires = round_jiffies_relative(expire);
  665. add_timer(&ghes->timer);
  666. }
  667. static void ghes_poll_func(unsigned long data)
  668. {
  669. struct ghes *ghes = (void *)data;
  670. ghes_proc(ghes);
  671. if (!(ghes->flags & GHES_EXITING))
  672. ghes_add_timer(ghes);
  673. }
  674. static irqreturn_t ghes_irq_func(int irq, void *data)
  675. {
  676. struct ghes *ghes = data;
  677. int rc;
  678. rc = ghes_proc(ghes);
  679. if (rc)
  680. return IRQ_NONE;
  681. return IRQ_HANDLED;
  682. }
  683. static int ghes_notify_hed(struct notifier_block *this, unsigned long event,
  684. void *data)
  685. {
  686. struct ghes *ghes;
  687. int ret = NOTIFY_DONE;
  688. rcu_read_lock();
  689. list_for_each_entry_rcu(ghes, &ghes_hed, list) {
  690. if (!ghes_proc(ghes))
  691. ret = NOTIFY_OK;
  692. }
  693. rcu_read_unlock();
  694. return ret;
  695. }
  696. static struct notifier_block ghes_notifier_hed = {
  697. .notifier_call = ghes_notify_hed,
  698. };
  699. #ifdef CONFIG_ACPI_APEI_SEA
  700. static LIST_HEAD(ghes_sea);
  701. /*
  702. * Return 0 only if one of the SEA error sources successfully reported an error
  703. * record sent from the firmware.
  704. */
  705. int ghes_notify_sea(void)
  706. {
  707. struct ghes *ghes;
  708. int ret = -ENOENT;
  709. rcu_read_lock();
  710. list_for_each_entry_rcu(ghes, &ghes_sea, list) {
  711. if (!ghes_proc(ghes))
  712. ret = 0;
  713. }
  714. rcu_read_unlock();
  715. return ret;
  716. }
  717. static void ghes_sea_add(struct ghes *ghes)
  718. {
  719. mutex_lock(&ghes_list_mutex);
  720. list_add_rcu(&ghes->list, &ghes_sea);
  721. mutex_unlock(&ghes_list_mutex);
  722. }
  723. static void ghes_sea_remove(struct ghes *ghes)
  724. {
  725. mutex_lock(&ghes_list_mutex);
  726. list_del_rcu(&ghes->list);
  727. mutex_unlock(&ghes_list_mutex);
  728. synchronize_rcu();
  729. }
  730. #else /* CONFIG_ACPI_APEI_SEA */
  731. static inline void ghes_sea_add(struct ghes *ghes)
  732. {
  733. pr_err(GHES_PFX "ID: %d, trying to add SEA notification which is not supported\n",
  734. ghes->generic->header.source_id);
  735. }
  736. static inline void ghes_sea_remove(struct ghes *ghes)
  737. {
  738. pr_err(GHES_PFX "ID: %d, trying to remove SEA notification which is not supported\n",
  739. ghes->generic->header.source_id);
  740. }
  741. #endif /* CONFIG_ACPI_APEI_SEA */
  742. #ifdef CONFIG_HAVE_ACPI_APEI_NMI
  743. /*
  744. * printk is not safe in NMI context. So in NMI handler, we allocate
  745. * required memory from lock-less memory allocator
  746. * (ghes_estatus_pool), save estatus into it, put them into lock-less
  747. * list (ghes_estatus_llist), then delay printk into IRQ context via
  748. * irq_work (ghes_proc_irq_work). ghes_estatus_size_request record
  749. * required pool size by all NMI error source.
  750. */
  751. static struct llist_head ghes_estatus_llist;
  752. static struct irq_work ghes_proc_irq_work;
  753. /*
  754. * NMI may be triggered on any CPU, so ghes_in_nmi is used for
  755. * having only one concurrent reader.
  756. */
  757. static atomic_t ghes_in_nmi = ATOMIC_INIT(0);
  758. static LIST_HEAD(ghes_nmi);
  759. static void ghes_proc_in_irq(struct irq_work *irq_work)
  760. {
  761. struct llist_node *llnode, *next;
  762. struct ghes_estatus_node *estatus_node;
  763. struct acpi_hest_generic *generic;
  764. struct acpi_hest_generic_status *estatus;
  765. u32 len, node_len;
  766. llnode = llist_del_all(&ghes_estatus_llist);
  767. /*
  768. * Because the time order of estatus in list is reversed,
  769. * revert it back to proper order.
  770. */
  771. llnode = llist_reverse_order(llnode);
  772. while (llnode) {
  773. next = llnode->next;
  774. estatus_node = llist_entry(llnode, struct ghes_estatus_node,
  775. llnode);
  776. estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
  777. len = cper_estatus_len(estatus);
  778. node_len = GHES_ESTATUS_NODE_LEN(len);
  779. ghes_do_proc(estatus_node->ghes, estatus);
  780. if (!ghes_estatus_cached(estatus)) {
  781. generic = estatus_node->generic;
  782. if (ghes_print_estatus(NULL, generic, estatus))
  783. ghes_estatus_cache_add(generic, estatus);
  784. }
  785. gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node,
  786. node_len);
  787. llnode = next;
  788. }
  789. }
  790. static void ghes_print_queued_estatus(void)
  791. {
  792. struct llist_node *llnode;
  793. struct ghes_estatus_node *estatus_node;
  794. struct acpi_hest_generic *generic;
  795. struct acpi_hest_generic_status *estatus;
  796. u32 len, node_len;
  797. llnode = llist_del_all(&ghes_estatus_llist);
  798. /*
  799. * Because the time order of estatus in list is reversed,
  800. * revert it back to proper order.
  801. */
  802. llnode = llist_reverse_order(llnode);
  803. while (llnode) {
  804. estatus_node = llist_entry(llnode, struct ghes_estatus_node,
  805. llnode);
  806. estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
  807. len = cper_estatus_len(estatus);
  808. node_len = GHES_ESTATUS_NODE_LEN(len);
  809. generic = estatus_node->generic;
  810. ghes_print_estatus(NULL, generic, estatus);
  811. llnode = llnode->next;
  812. }
  813. }
  814. /* Save estatus for further processing in IRQ context */
  815. static void __process_error(struct ghes *ghes)
  816. {
  817. #ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
  818. u32 len, node_len;
  819. struct ghes_estatus_node *estatus_node;
  820. struct acpi_hest_generic_status *estatus;
  821. if (ghes_estatus_cached(ghes->estatus))
  822. return;
  823. len = cper_estatus_len(ghes->estatus);
  824. node_len = GHES_ESTATUS_NODE_LEN(len);
  825. estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool, node_len);
  826. if (!estatus_node)
  827. return;
  828. estatus_node->ghes = ghes;
  829. estatus_node->generic = ghes->generic;
  830. estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
  831. memcpy(estatus, ghes->estatus, len);
  832. llist_add(&estatus_node->llnode, &ghes_estatus_llist);
  833. #endif
  834. }
  835. static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs)
  836. {
  837. struct ghes *ghes;
  838. int sev, ret = NMI_DONE;
  839. if (!atomic_add_unless(&ghes_in_nmi, 1, 1))
  840. return ret;
  841. list_for_each_entry_rcu(ghes, &ghes_nmi, list) {
  842. if (ghes_read_estatus(ghes, 1)) {
  843. ghes_clear_estatus(ghes);
  844. continue;
  845. } else {
  846. ret = NMI_HANDLED;
  847. }
  848. sev = ghes_severity(ghes->estatus->error_severity);
  849. if (sev >= GHES_SEV_PANIC) {
  850. oops_begin();
  851. ghes_print_queued_estatus();
  852. __ghes_panic(ghes);
  853. }
  854. if (!(ghes->flags & GHES_TO_CLEAR))
  855. continue;
  856. __process_error(ghes);
  857. ghes_clear_estatus(ghes);
  858. }
  859. #ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
  860. if (ret == NMI_HANDLED)
  861. irq_work_queue(&ghes_proc_irq_work);
  862. #endif
  863. atomic_dec(&ghes_in_nmi);
  864. return ret;
  865. }
  866. static unsigned long ghes_esource_prealloc_size(
  867. const struct acpi_hest_generic *generic)
  868. {
  869. unsigned long block_length, prealloc_records, prealloc_size;
  870. block_length = min_t(unsigned long, generic->error_block_length,
  871. GHES_ESTATUS_MAX_SIZE);
  872. prealloc_records = max_t(unsigned long,
  873. generic->records_to_preallocate, 1);
  874. prealloc_size = min_t(unsigned long, block_length * prealloc_records,
  875. GHES_ESOURCE_PREALLOC_MAX_SIZE);
  876. return prealloc_size;
  877. }
  878. static void ghes_estatus_pool_shrink(unsigned long len)
  879. {
  880. ghes_estatus_pool_size_request -= PAGE_ALIGN(len);
  881. }
  882. static void ghes_nmi_add(struct ghes *ghes)
  883. {
  884. unsigned long len;
  885. len = ghes_esource_prealloc_size(ghes->generic);
  886. ghes_estatus_pool_expand(len);
  887. mutex_lock(&ghes_list_mutex);
  888. if (list_empty(&ghes_nmi))
  889. register_nmi_handler(NMI_LOCAL, ghes_notify_nmi, 0, "ghes");
  890. list_add_rcu(&ghes->list, &ghes_nmi);
  891. mutex_unlock(&ghes_list_mutex);
  892. }
  893. static void ghes_nmi_remove(struct ghes *ghes)
  894. {
  895. unsigned long len;
  896. mutex_lock(&ghes_list_mutex);
  897. list_del_rcu(&ghes->list);
  898. if (list_empty(&ghes_nmi))
  899. unregister_nmi_handler(NMI_LOCAL, "ghes");
  900. mutex_unlock(&ghes_list_mutex);
  901. /*
  902. * To synchronize with NMI handler, ghes can only be
  903. * freed after NMI handler finishes.
  904. */
  905. synchronize_rcu();
  906. len = ghes_esource_prealloc_size(ghes->generic);
  907. ghes_estatus_pool_shrink(len);
  908. }
  909. static void ghes_nmi_init_cxt(void)
  910. {
  911. init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq);
  912. }
  913. #else /* CONFIG_HAVE_ACPI_APEI_NMI */
  914. static inline void ghes_nmi_add(struct ghes *ghes)
  915. {
  916. pr_err(GHES_PFX "ID: %d, trying to add NMI notification which is not supported!\n",
  917. ghes->generic->header.source_id);
  918. BUG();
  919. }
  920. static inline void ghes_nmi_remove(struct ghes *ghes)
  921. {
  922. pr_err(GHES_PFX "ID: %d, trying to remove NMI notification which is not supported!\n",
  923. ghes->generic->header.source_id);
  924. BUG();
  925. }
  926. static inline void ghes_nmi_init_cxt(void)
  927. {
  928. }
  929. #endif /* CONFIG_HAVE_ACPI_APEI_NMI */
  930. static int ghes_probe(struct platform_device *ghes_dev)
  931. {
  932. struct acpi_hest_generic *generic;
  933. struct ghes *ghes = NULL;
  934. int rc = -EINVAL;
  935. generic = *(struct acpi_hest_generic **)ghes_dev->dev.platform_data;
  936. if (!generic->enabled)
  937. return -ENODEV;
  938. switch (generic->notify.type) {
  939. case ACPI_HEST_NOTIFY_POLLED:
  940. case ACPI_HEST_NOTIFY_EXTERNAL:
  941. case ACPI_HEST_NOTIFY_SCI:
  942. case ACPI_HEST_NOTIFY_GSIV:
  943. case ACPI_HEST_NOTIFY_GPIO:
  944. break;
  945. case ACPI_HEST_NOTIFY_SEA:
  946. if (!IS_ENABLED(CONFIG_ACPI_APEI_SEA)) {
  947. pr_warn(GHES_PFX "Generic hardware error source: %d notified via SEA is not supported\n",
  948. generic->header.source_id);
  949. rc = -ENOTSUPP;
  950. goto err;
  951. }
  952. break;
  953. case ACPI_HEST_NOTIFY_NMI:
  954. if (!IS_ENABLED(CONFIG_HAVE_ACPI_APEI_NMI)) {
  955. pr_warn(GHES_PFX "Generic hardware error source: %d notified via NMI interrupt is not supported!\n",
  956. generic->header.source_id);
  957. goto err;
  958. }
  959. break;
  960. case ACPI_HEST_NOTIFY_LOCAL:
  961. pr_warning(GHES_PFX "Generic hardware error source: %d notified via local interrupt is not supported!\n",
  962. generic->header.source_id);
  963. goto err;
  964. default:
  965. pr_warning(FW_WARN GHES_PFX "Unknown notification type: %u for generic hardware error source: %d\n",
  966. generic->notify.type, generic->header.source_id);
  967. goto err;
  968. }
  969. rc = -EIO;
  970. if (generic->error_block_length <
  971. sizeof(struct acpi_hest_generic_status)) {
  972. pr_warning(FW_BUG GHES_PFX "Invalid error block length: %u for generic hardware error source: %d\n",
  973. generic->error_block_length,
  974. generic->header.source_id);
  975. goto err;
  976. }
  977. ghes = ghes_new(generic);
  978. if (IS_ERR(ghes)) {
  979. rc = PTR_ERR(ghes);
  980. ghes = NULL;
  981. goto err;
  982. }
  983. rc = ghes_edac_register(ghes, &ghes_dev->dev);
  984. if (rc < 0)
  985. goto err;
  986. switch (generic->notify.type) {
  987. case ACPI_HEST_NOTIFY_POLLED:
  988. setup_deferrable_timer(&ghes->timer, ghes_poll_func,
  989. (unsigned long)ghes);
  990. ghes_add_timer(ghes);
  991. break;
  992. case ACPI_HEST_NOTIFY_EXTERNAL:
  993. /* External interrupt vector is GSI */
  994. rc = acpi_gsi_to_irq(generic->notify.vector, &ghes->irq);
  995. if (rc) {
  996. pr_err(GHES_PFX "Failed to map GSI to IRQ for generic hardware error source: %d\n",
  997. generic->header.source_id);
  998. goto err_edac_unreg;
  999. }
  1000. rc = request_irq(ghes->irq, ghes_irq_func, IRQF_SHARED,
  1001. "GHES IRQ", ghes);
  1002. if (rc) {
  1003. pr_err(GHES_PFX "Failed to register IRQ for generic hardware error source: %d\n",
  1004. generic->header.source_id);
  1005. goto err_edac_unreg;
  1006. }
  1007. break;
  1008. case ACPI_HEST_NOTIFY_SCI:
  1009. case ACPI_HEST_NOTIFY_GSIV:
  1010. case ACPI_HEST_NOTIFY_GPIO:
  1011. mutex_lock(&ghes_list_mutex);
  1012. if (list_empty(&ghes_hed))
  1013. register_acpi_hed_notifier(&ghes_notifier_hed);
  1014. list_add_rcu(&ghes->list, &ghes_hed);
  1015. mutex_unlock(&ghes_list_mutex);
  1016. break;
  1017. case ACPI_HEST_NOTIFY_SEA:
  1018. ghes_sea_add(ghes);
  1019. break;
  1020. case ACPI_HEST_NOTIFY_NMI:
  1021. ghes_nmi_add(ghes);
  1022. break;
  1023. default:
  1024. BUG();
  1025. }
  1026. platform_set_drvdata(ghes_dev, ghes);
  1027. /* Handle any pending errors right away */
  1028. ghes_proc(ghes);
  1029. return 0;
  1030. err_edac_unreg:
  1031. ghes_edac_unregister(ghes);
  1032. err:
  1033. if (ghes) {
  1034. ghes_fini(ghes);
  1035. kfree(ghes);
  1036. }
  1037. return rc;
  1038. }
  1039. static int ghes_remove(struct platform_device *ghes_dev)
  1040. {
  1041. struct ghes *ghes;
  1042. struct acpi_hest_generic *generic;
  1043. ghes = platform_get_drvdata(ghes_dev);
  1044. generic = ghes->generic;
  1045. ghes->flags |= GHES_EXITING;
  1046. switch (generic->notify.type) {
  1047. case ACPI_HEST_NOTIFY_POLLED:
  1048. del_timer_sync(&ghes->timer);
  1049. break;
  1050. case ACPI_HEST_NOTIFY_EXTERNAL:
  1051. free_irq(ghes->irq, ghes);
  1052. break;
  1053. case ACPI_HEST_NOTIFY_SCI:
  1054. case ACPI_HEST_NOTIFY_GSIV:
  1055. case ACPI_HEST_NOTIFY_GPIO:
  1056. mutex_lock(&ghes_list_mutex);
  1057. list_del_rcu(&ghes->list);
  1058. if (list_empty(&ghes_hed))
  1059. unregister_acpi_hed_notifier(&ghes_notifier_hed);
  1060. mutex_unlock(&ghes_list_mutex);
  1061. synchronize_rcu();
  1062. break;
  1063. case ACPI_HEST_NOTIFY_SEA:
  1064. ghes_sea_remove(ghes);
  1065. break;
  1066. case ACPI_HEST_NOTIFY_NMI:
  1067. ghes_nmi_remove(ghes);
  1068. break;
  1069. default:
  1070. BUG();
  1071. break;
  1072. }
  1073. ghes_fini(ghes);
  1074. ghes_edac_unregister(ghes);
  1075. kfree(ghes);
  1076. platform_set_drvdata(ghes_dev, NULL);
  1077. return 0;
  1078. }
  1079. static struct platform_driver ghes_platform_driver = {
  1080. .driver = {
  1081. .name = "GHES",
  1082. },
  1083. .probe = ghes_probe,
  1084. .remove = ghes_remove,
  1085. };
  1086. static int __init ghes_init(void)
  1087. {
  1088. int rc;
  1089. if (acpi_disabled)
  1090. return -ENODEV;
  1091. switch (hest_disable) {
  1092. case HEST_NOT_FOUND:
  1093. return -ENODEV;
  1094. case HEST_DISABLED:
  1095. pr_info(GHES_PFX "HEST is not enabled!\n");
  1096. return -EINVAL;
  1097. default:
  1098. break;
  1099. }
  1100. if (ghes_disable) {
  1101. pr_info(GHES_PFX "GHES is not enabled!\n");
  1102. return -EINVAL;
  1103. }
  1104. ghes_nmi_init_cxt();
  1105. rc = ghes_ioremap_init();
  1106. if (rc)
  1107. goto err;
  1108. rc = ghes_estatus_pool_init();
  1109. if (rc)
  1110. goto err_ioremap_exit;
  1111. rc = ghes_estatus_pool_expand(GHES_ESTATUS_CACHE_AVG_SIZE *
  1112. GHES_ESTATUS_CACHE_ALLOCED_MAX);
  1113. if (rc)
  1114. goto err_pool_exit;
  1115. rc = platform_driver_register(&ghes_platform_driver);
  1116. if (rc)
  1117. goto err_pool_exit;
  1118. rc = apei_osc_setup();
  1119. if (rc == 0 && osc_sb_apei_support_acked)
  1120. pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit and WHEA _OSC.\n");
  1121. else if (rc == 0 && !osc_sb_apei_support_acked)
  1122. pr_info(GHES_PFX "APEI firmware first mode is enabled by WHEA _OSC.\n");
  1123. else if (rc && osc_sb_apei_support_acked)
  1124. pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit.\n");
  1125. else
  1126. pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n");
  1127. return 0;
  1128. err_pool_exit:
  1129. ghes_estatus_pool_exit();
  1130. err_ioremap_exit:
  1131. ghes_ioremap_exit();
  1132. err:
  1133. return rc;
  1134. }
  1135. device_initcall(ghes_init);