ghes.c 32 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262
  1. /*
  2. * APEI Generic Hardware Error Source support
  3. *
  4. * Generic Hardware Error Source provides a way to report platform
  5. * hardware errors (such as that from chipset). It works in so called
  6. * "Firmware First" mode, that is, hardware errors are reported to
  7. * firmware firstly, then reported to Linux by firmware. This way,
  8. * some non-standard hardware error registers or non-standard hardware
  9. * link can be checked by firmware to produce more hardware error
  10. * information for Linux.
  11. *
  12. * For more information about Generic Hardware Error Source, please
  13. * refer to ACPI Specification version 4.0, section 17.3.2.6
  14. *
  15. * Copyright 2010,2011 Intel Corp.
  16. * Author: Huang Ying <ying.huang@intel.com>
  17. *
  18. * This program is free software; you can redistribute it and/or
  19. * modify it under the terms of the GNU General Public License version
  20. * 2 as published by the Free Software Foundation;
  21. *
  22. * This program is distributed in the hope that it will be useful,
  23. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  24. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  25. * GNU General Public License for more details.
  26. */
  27. #include <linux/kernel.h>
  28. #include <linux/moduleparam.h>
  29. #include <linux/init.h>
  30. #include <linux/acpi.h>
  31. #include <linux/io.h>
  32. #include <linux/interrupt.h>
  33. #include <linux/timer.h>
  34. #include <linux/cper.h>
  35. #include <linux/kdebug.h>
  36. #include <linux/platform_device.h>
  37. #include <linux/mutex.h>
  38. #include <linux/ratelimit.h>
  39. #include <linux/vmalloc.h>
  40. #include <linux/irq_work.h>
  41. #include <linux/llist.h>
  42. #include <linux/genalloc.h>
  43. #include <linux/pci.h>
  44. #include <linux/aer.h>
  45. #include <linux/nmi.h>
  46. #include <linux/sched/clock.h>
  47. #include <linux/uuid.h>
  48. #include <linux/ras.h>
  49. #include <acpi/actbl1.h>
  50. #include <acpi/ghes.h>
  51. #include <acpi/apei.h>
  52. #include <asm/fixmap.h>
  53. #include <asm/tlbflush.h>
  54. #include <ras/ras_event.h>
  55. #include "apei-internal.h"
  56. #define GHES_PFX "GHES: "
  57. #define GHES_ESTATUS_MAX_SIZE 65536
  58. #define GHES_ESOURCE_PREALLOC_MAX_SIZE 65536
  59. #define GHES_ESTATUS_POOL_MIN_ALLOC_ORDER 3
  60. /* This is just an estimation for memory pool allocation */
  61. #define GHES_ESTATUS_CACHE_AVG_SIZE 512
  62. #define GHES_ESTATUS_CACHES_SIZE 4
  63. #define GHES_ESTATUS_IN_CACHE_MAX_NSEC 10000000000ULL
  64. /* Prevent too many caches are allocated because of RCU */
  65. #define GHES_ESTATUS_CACHE_ALLOCED_MAX (GHES_ESTATUS_CACHES_SIZE * 3 / 2)
  66. #define GHES_ESTATUS_CACHE_LEN(estatus_len) \
  67. (sizeof(struct ghes_estatus_cache) + (estatus_len))
  68. #define GHES_ESTATUS_FROM_CACHE(estatus_cache) \
  69. ((struct acpi_hest_generic_status *) \
  70. ((struct ghes_estatus_cache *)(estatus_cache) + 1))
  71. #define GHES_ESTATUS_NODE_LEN(estatus_len) \
  72. (sizeof(struct ghes_estatus_node) + (estatus_len))
  73. #define GHES_ESTATUS_FROM_NODE(estatus_node) \
  74. ((struct acpi_hest_generic_status *) \
  75. ((struct ghes_estatus_node *)(estatus_node) + 1))
  76. static inline bool is_hest_type_generic_v2(struct ghes *ghes)
  77. {
  78. return ghes->generic->header.type == ACPI_HEST_TYPE_GENERIC_ERROR_V2;
  79. }
  80. /*
  81. * This driver isn't really modular, however for the time being,
  82. * continuing to use module_param is the easiest way to remain
  83. * compatible with existing boot arg use cases.
  84. */
  85. bool ghes_disable;
  86. module_param_named(disable, ghes_disable, bool, 0);
  87. /*
  88. * All error sources notified with HED (Hardware Error Device) share a
  89. * single notifier callback, so they need to be linked and checked one
  90. * by one. This holds true for NMI too.
  91. *
  92. * RCU is used for these lists, so ghes_list_mutex is only used for
  93. * list changing, not for traversing.
  94. */
  95. static LIST_HEAD(ghes_hed);
  96. static DEFINE_MUTEX(ghes_list_mutex);
  97. /*
  98. * Because the memory area used to transfer hardware error information
  99. * from BIOS to Linux can be determined only in NMI, IRQ or timer
  100. * handler, but general ioremap can not be used in atomic context, so
  101. * the fixmap is used instead.
  102. *
  103. * These 2 spinlocks are used to prevent the fixmap entries from being used
  104. * simultaneously.
  105. */
  106. static DEFINE_RAW_SPINLOCK(ghes_ioremap_lock_nmi);
  107. static DEFINE_SPINLOCK(ghes_ioremap_lock_irq);
  108. static struct gen_pool *ghes_estatus_pool;
  109. static unsigned long ghes_estatus_pool_size_request;
  110. static struct ghes_estatus_cache *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE];
  111. static atomic_t ghes_estatus_cache_alloced;
  112. static int ghes_panic_timeout __read_mostly = 30;
  113. static void __iomem *ghes_ioremap_pfn_nmi(u64 pfn)
  114. {
  115. phys_addr_t paddr;
  116. pgprot_t prot;
  117. paddr = pfn << PAGE_SHIFT;
  118. prot = arch_apei_get_mem_attribute(paddr);
  119. __set_fixmap(FIX_APEI_GHES_NMI, paddr, prot);
  120. return (void __iomem *) fix_to_virt(FIX_APEI_GHES_NMI);
  121. }
  122. static void __iomem *ghes_ioremap_pfn_irq(u64 pfn)
  123. {
  124. phys_addr_t paddr;
  125. pgprot_t prot;
  126. paddr = pfn << PAGE_SHIFT;
  127. prot = arch_apei_get_mem_attribute(paddr);
  128. __set_fixmap(FIX_APEI_GHES_IRQ, paddr, prot);
  129. return (void __iomem *) fix_to_virt(FIX_APEI_GHES_IRQ);
  130. }
  131. static void ghes_iounmap_nmi(void)
  132. {
  133. clear_fixmap(FIX_APEI_GHES_NMI);
  134. }
  135. static void ghes_iounmap_irq(void)
  136. {
  137. clear_fixmap(FIX_APEI_GHES_IRQ);
  138. }
  139. static int ghes_estatus_pool_init(void)
  140. {
  141. ghes_estatus_pool = gen_pool_create(GHES_ESTATUS_POOL_MIN_ALLOC_ORDER, -1);
  142. if (!ghes_estatus_pool)
  143. return -ENOMEM;
  144. return 0;
  145. }
  146. static void ghes_estatus_pool_free_chunk_page(struct gen_pool *pool,
  147. struct gen_pool_chunk *chunk,
  148. void *data)
  149. {
  150. free_page(chunk->start_addr);
  151. }
  152. static void ghes_estatus_pool_exit(void)
  153. {
  154. gen_pool_for_each_chunk(ghes_estatus_pool,
  155. ghes_estatus_pool_free_chunk_page, NULL);
  156. gen_pool_destroy(ghes_estatus_pool);
  157. }
  158. static int ghes_estatus_pool_expand(unsigned long len)
  159. {
  160. unsigned long i, pages, size, addr;
  161. int ret;
  162. ghes_estatus_pool_size_request += PAGE_ALIGN(len);
  163. size = gen_pool_size(ghes_estatus_pool);
  164. if (size >= ghes_estatus_pool_size_request)
  165. return 0;
  166. pages = (ghes_estatus_pool_size_request - size) / PAGE_SIZE;
  167. for (i = 0; i < pages; i++) {
  168. addr = __get_free_page(GFP_KERNEL);
  169. if (!addr)
  170. return -ENOMEM;
  171. ret = gen_pool_add(ghes_estatus_pool, addr, PAGE_SIZE, -1);
  172. if (ret)
  173. return ret;
  174. }
  175. return 0;
  176. }
  177. static int map_gen_v2(struct ghes *ghes)
  178. {
  179. return apei_map_generic_address(&ghes->generic_v2->read_ack_register);
  180. }
  181. static void unmap_gen_v2(struct ghes *ghes)
  182. {
  183. apei_unmap_generic_address(&ghes->generic_v2->read_ack_register);
  184. }
  185. static struct ghes *ghes_new(struct acpi_hest_generic *generic)
  186. {
  187. struct ghes *ghes;
  188. unsigned int error_block_length;
  189. int rc;
  190. ghes = kzalloc(sizeof(*ghes), GFP_KERNEL);
  191. if (!ghes)
  192. return ERR_PTR(-ENOMEM);
  193. ghes->generic = generic;
  194. if (is_hest_type_generic_v2(ghes)) {
  195. rc = map_gen_v2(ghes);
  196. if (rc)
  197. goto err_free;
  198. }
  199. rc = apei_map_generic_address(&generic->error_status_address);
  200. if (rc)
  201. goto err_unmap_read_ack_addr;
  202. error_block_length = generic->error_block_length;
  203. if (error_block_length > GHES_ESTATUS_MAX_SIZE) {
  204. pr_warning(FW_WARN GHES_PFX
  205. "Error status block length is too long: %u for "
  206. "generic hardware error source: %d.\n",
  207. error_block_length, generic->header.source_id);
  208. error_block_length = GHES_ESTATUS_MAX_SIZE;
  209. }
  210. ghes->estatus = kmalloc(error_block_length, GFP_KERNEL);
  211. if (!ghes->estatus) {
  212. rc = -ENOMEM;
  213. goto err_unmap_status_addr;
  214. }
  215. return ghes;
  216. err_unmap_status_addr:
  217. apei_unmap_generic_address(&generic->error_status_address);
  218. err_unmap_read_ack_addr:
  219. if (is_hest_type_generic_v2(ghes))
  220. unmap_gen_v2(ghes);
  221. err_free:
  222. kfree(ghes);
  223. return ERR_PTR(rc);
  224. }
  225. static void ghes_fini(struct ghes *ghes)
  226. {
  227. kfree(ghes->estatus);
  228. apei_unmap_generic_address(&ghes->generic->error_status_address);
  229. if (is_hest_type_generic_v2(ghes))
  230. unmap_gen_v2(ghes);
  231. }
  232. static inline int ghes_severity(int severity)
  233. {
  234. switch (severity) {
  235. case CPER_SEV_INFORMATIONAL:
  236. return GHES_SEV_NO;
  237. case CPER_SEV_CORRECTED:
  238. return GHES_SEV_CORRECTED;
  239. case CPER_SEV_RECOVERABLE:
  240. return GHES_SEV_RECOVERABLE;
  241. case CPER_SEV_FATAL:
  242. return GHES_SEV_PANIC;
  243. default:
  244. /* Unknown, go panic */
  245. return GHES_SEV_PANIC;
  246. }
  247. }
  248. static void ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len,
  249. int from_phys)
  250. {
  251. void __iomem *vaddr;
  252. unsigned long flags = 0;
  253. int in_nmi = in_nmi();
  254. u64 offset;
  255. u32 trunk;
  256. while (len > 0) {
  257. offset = paddr - (paddr & PAGE_MASK);
  258. if (in_nmi) {
  259. raw_spin_lock(&ghes_ioremap_lock_nmi);
  260. vaddr = ghes_ioremap_pfn_nmi(paddr >> PAGE_SHIFT);
  261. } else {
  262. spin_lock_irqsave(&ghes_ioremap_lock_irq, flags);
  263. vaddr = ghes_ioremap_pfn_irq(paddr >> PAGE_SHIFT);
  264. }
  265. trunk = PAGE_SIZE - offset;
  266. trunk = min(trunk, len);
  267. if (from_phys)
  268. memcpy_fromio(buffer, vaddr + offset, trunk);
  269. else
  270. memcpy_toio(vaddr + offset, buffer, trunk);
  271. len -= trunk;
  272. paddr += trunk;
  273. buffer += trunk;
  274. if (in_nmi) {
  275. ghes_iounmap_nmi();
  276. raw_spin_unlock(&ghes_ioremap_lock_nmi);
  277. } else {
  278. ghes_iounmap_irq();
  279. spin_unlock_irqrestore(&ghes_ioremap_lock_irq, flags);
  280. }
  281. }
  282. }
  283. static int ghes_read_estatus(struct ghes *ghes, int silent)
  284. {
  285. struct acpi_hest_generic *g = ghes->generic;
  286. u64 buf_paddr;
  287. u32 len;
  288. int rc;
  289. rc = apei_read(&buf_paddr, &g->error_status_address);
  290. if (rc) {
  291. if (!silent && printk_ratelimit())
  292. pr_warning(FW_WARN GHES_PFX
  293. "Failed to read error status block address for hardware error source: %d.\n",
  294. g->header.source_id);
  295. return -EIO;
  296. }
  297. if (!buf_paddr)
  298. return -ENOENT;
  299. ghes_copy_tofrom_phys(ghes->estatus, buf_paddr,
  300. sizeof(*ghes->estatus), 1);
  301. if (!ghes->estatus->block_status)
  302. return -ENOENT;
  303. ghes->buffer_paddr = buf_paddr;
  304. ghes->flags |= GHES_TO_CLEAR;
  305. rc = -EIO;
  306. len = cper_estatus_len(ghes->estatus);
  307. if (len < sizeof(*ghes->estatus))
  308. goto err_read_block;
  309. if (len > ghes->generic->error_block_length)
  310. goto err_read_block;
  311. if (cper_estatus_check_header(ghes->estatus))
  312. goto err_read_block;
  313. ghes_copy_tofrom_phys(ghes->estatus + 1,
  314. buf_paddr + sizeof(*ghes->estatus),
  315. len - sizeof(*ghes->estatus), 1);
  316. if (cper_estatus_check(ghes->estatus))
  317. goto err_read_block;
  318. rc = 0;
  319. err_read_block:
  320. if (rc && !silent && printk_ratelimit())
  321. pr_warning(FW_WARN GHES_PFX
  322. "Failed to read error status block!\n");
  323. return rc;
  324. }
  325. static void ghes_clear_estatus(struct ghes *ghes)
  326. {
  327. ghes->estatus->block_status = 0;
  328. if (!(ghes->flags & GHES_TO_CLEAR))
  329. return;
  330. ghes_copy_tofrom_phys(ghes->estatus, ghes->buffer_paddr,
  331. sizeof(ghes->estatus->block_status), 0);
  332. ghes->flags &= ~GHES_TO_CLEAR;
  333. }
  334. static void ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, int sev)
  335. {
  336. #ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE
  337. unsigned long pfn;
  338. int flags = -1;
  339. int sec_sev = ghes_severity(gdata->error_severity);
  340. struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
  341. if (!(mem_err->validation_bits & CPER_MEM_VALID_PA))
  342. return;
  343. pfn = mem_err->physical_addr >> PAGE_SHIFT;
  344. if (!pfn_valid(pfn)) {
  345. pr_warn_ratelimited(FW_WARN GHES_PFX
  346. "Invalid address in generic error data: %#llx\n",
  347. mem_err->physical_addr);
  348. return;
  349. }
  350. /* iff following two events can be handled properly by now */
  351. if (sec_sev == GHES_SEV_CORRECTED &&
  352. (gdata->flags & CPER_SEC_ERROR_THRESHOLD_EXCEEDED))
  353. flags = MF_SOFT_OFFLINE;
  354. if (sev == GHES_SEV_RECOVERABLE && sec_sev == GHES_SEV_RECOVERABLE)
  355. flags = 0;
  356. if (flags != -1)
  357. memory_failure_queue(pfn, flags);
  358. #endif
  359. }
  360. /*
  361. * PCIe AER errors need to be sent to the AER driver for reporting and
  362. * recovery. The GHES severities map to the following AER severities and
  363. * require the following handling:
  364. *
  365. * GHES_SEV_CORRECTABLE -> AER_CORRECTABLE
  366. * These need to be reported by the AER driver but no recovery is
  367. * necessary.
  368. * GHES_SEV_RECOVERABLE -> AER_NONFATAL
  369. * GHES_SEV_RECOVERABLE && CPER_SEC_RESET -> AER_FATAL
  370. * These both need to be reported and recovered from by the AER driver.
  371. * GHES_SEV_PANIC does not make it to this handling since the kernel must
  372. * panic.
  373. */
  374. static void ghes_handle_aer(struct acpi_hest_generic_data *gdata)
  375. {
  376. #ifdef CONFIG_ACPI_APEI_PCIEAER
  377. struct cper_sec_pcie *pcie_err = acpi_hest_get_payload(gdata);
  378. if (pcie_err->validation_bits & CPER_PCIE_VALID_DEVICE_ID &&
  379. pcie_err->validation_bits & CPER_PCIE_VALID_AER_INFO) {
  380. unsigned int devfn;
  381. int aer_severity;
  382. devfn = PCI_DEVFN(pcie_err->device_id.device,
  383. pcie_err->device_id.function);
  384. aer_severity = cper_severity_to_aer(gdata->error_severity);
  385. /*
  386. * If firmware reset the component to contain
  387. * the error, we must reinitialize it before
  388. * use, so treat it as a fatal AER error.
  389. */
  390. if (gdata->flags & CPER_SEC_RESET)
  391. aer_severity = AER_FATAL;
  392. aer_recover_queue(pcie_err->device_id.segment,
  393. pcie_err->device_id.bus,
  394. devfn, aer_severity,
  395. (struct aer_capability_regs *)
  396. pcie_err->aer_info);
  397. }
  398. #endif
  399. }
  400. static void ghes_do_proc(struct ghes *ghes,
  401. const struct acpi_hest_generic_status *estatus)
  402. {
  403. int sev, sec_sev;
  404. struct acpi_hest_generic_data *gdata;
  405. guid_t *sec_type;
  406. guid_t *fru_id = &NULL_UUID_LE;
  407. char *fru_text = "";
  408. sev = ghes_severity(estatus->error_severity);
  409. apei_estatus_for_each_section(estatus, gdata) {
  410. sec_type = (guid_t *)gdata->section_type;
  411. sec_sev = ghes_severity(gdata->error_severity);
  412. if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
  413. fru_id = (guid_t *)gdata->fru_id;
  414. if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
  415. fru_text = gdata->fru_text;
  416. if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
  417. struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
  418. ghes_edac_report_mem_error(ghes, sev, mem_err);
  419. arch_apei_report_mem_error(sev, mem_err);
  420. ghes_handle_memory_failure(gdata, sev);
  421. }
  422. else if (guid_equal(sec_type, &CPER_SEC_PCIE)) {
  423. ghes_handle_aer(gdata);
  424. }
  425. else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) {
  426. struct cper_sec_proc_arm *err = acpi_hest_get_payload(gdata);
  427. log_arm_hw_error(err);
  428. } else {
  429. void *err = acpi_hest_get_payload(gdata);
  430. log_non_standard_event(sec_type, fru_id, fru_text,
  431. sec_sev, err,
  432. gdata->error_data_length);
  433. }
  434. }
  435. }
  436. static void __ghes_print_estatus(const char *pfx,
  437. const struct acpi_hest_generic *generic,
  438. const struct acpi_hest_generic_status *estatus)
  439. {
  440. static atomic_t seqno;
  441. unsigned int curr_seqno;
  442. char pfx_seq[64];
  443. if (pfx == NULL) {
  444. if (ghes_severity(estatus->error_severity) <=
  445. GHES_SEV_CORRECTED)
  446. pfx = KERN_WARNING;
  447. else
  448. pfx = KERN_ERR;
  449. }
  450. curr_seqno = atomic_inc_return(&seqno);
  451. snprintf(pfx_seq, sizeof(pfx_seq), "%s{%u}" HW_ERR, pfx, curr_seqno);
  452. printk("%s""Hardware error from APEI Generic Hardware Error Source: %d\n",
  453. pfx_seq, generic->header.source_id);
  454. cper_estatus_print(pfx_seq, estatus);
  455. }
  456. static int ghes_print_estatus(const char *pfx,
  457. const struct acpi_hest_generic *generic,
  458. const struct acpi_hest_generic_status *estatus)
  459. {
  460. /* Not more than 2 messages every 5 seconds */
  461. static DEFINE_RATELIMIT_STATE(ratelimit_corrected, 5*HZ, 2);
  462. static DEFINE_RATELIMIT_STATE(ratelimit_uncorrected, 5*HZ, 2);
  463. struct ratelimit_state *ratelimit;
  464. if (ghes_severity(estatus->error_severity) <= GHES_SEV_CORRECTED)
  465. ratelimit = &ratelimit_corrected;
  466. else
  467. ratelimit = &ratelimit_uncorrected;
  468. if (__ratelimit(ratelimit)) {
  469. __ghes_print_estatus(pfx, generic, estatus);
  470. return 1;
  471. }
  472. return 0;
  473. }
  474. /*
  475. * GHES error status reporting throttle, to report more kinds of
  476. * errors, instead of just most frequently occurred errors.
  477. */
  478. static int ghes_estatus_cached(struct acpi_hest_generic_status *estatus)
  479. {
  480. u32 len;
  481. int i, cached = 0;
  482. unsigned long long now;
  483. struct ghes_estatus_cache *cache;
  484. struct acpi_hest_generic_status *cache_estatus;
  485. len = cper_estatus_len(estatus);
  486. rcu_read_lock();
  487. for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) {
  488. cache = rcu_dereference(ghes_estatus_caches[i]);
  489. if (cache == NULL)
  490. continue;
  491. if (len != cache->estatus_len)
  492. continue;
  493. cache_estatus = GHES_ESTATUS_FROM_CACHE(cache);
  494. if (memcmp(estatus, cache_estatus, len))
  495. continue;
  496. atomic_inc(&cache->count);
  497. now = sched_clock();
  498. if (now - cache->time_in < GHES_ESTATUS_IN_CACHE_MAX_NSEC)
  499. cached = 1;
  500. break;
  501. }
  502. rcu_read_unlock();
  503. return cached;
  504. }
  505. static struct ghes_estatus_cache *ghes_estatus_cache_alloc(
  506. struct acpi_hest_generic *generic,
  507. struct acpi_hest_generic_status *estatus)
  508. {
  509. int alloced;
  510. u32 len, cache_len;
  511. struct ghes_estatus_cache *cache;
  512. struct acpi_hest_generic_status *cache_estatus;
  513. alloced = atomic_add_return(1, &ghes_estatus_cache_alloced);
  514. if (alloced > GHES_ESTATUS_CACHE_ALLOCED_MAX) {
  515. atomic_dec(&ghes_estatus_cache_alloced);
  516. return NULL;
  517. }
  518. len = cper_estatus_len(estatus);
  519. cache_len = GHES_ESTATUS_CACHE_LEN(len);
  520. cache = (void *)gen_pool_alloc(ghes_estatus_pool, cache_len);
  521. if (!cache) {
  522. atomic_dec(&ghes_estatus_cache_alloced);
  523. return NULL;
  524. }
  525. cache_estatus = GHES_ESTATUS_FROM_CACHE(cache);
  526. memcpy(cache_estatus, estatus, len);
  527. cache->estatus_len = len;
  528. atomic_set(&cache->count, 0);
  529. cache->generic = generic;
  530. cache->time_in = sched_clock();
  531. return cache;
  532. }
  533. static void ghes_estatus_cache_free(struct ghes_estatus_cache *cache)
  534. {
  535. u32 len;
  536. len = cper_estatus_len(GHES_ESTATUS_FROM_CACHE(cache));
  537. len = GHES_ESTATUS_CACHE_LEN(len);
  538. gen_pool_free(ghes_estatus_pool, (unsigned long)cache, len);
  539. atomic_dec(&ghes_estatus_cache_alloced);
  540. }
  541. static void ghes_estatus_cache_rcu_free(struct rcu_head *head)
  542. {
  543. struct ghes_estatus_cache *cache;
  544. cache = container_of(head, struct ghes_estatus_cache, rcu);
  545. ghes_estatus_cache_free(cache);
  546. }
  547. static void ghes_estatus_cache_add(
  548. struct acpi_hest_generic *generic,
  549. struct acpi_hest_generic_status *estatus)
  550. {
  551. int i, slot = -1, count;
  552. unsigned long long now, duration, period, max_period = 0;
  553. struct ghes_estatus_cache *cache, *slot_cache = NULL, *new_cache;
  554. new_cache = ghes_estatus_cache_alloc(generic, estatus);
  555. if (new_cache == NULL)
  556. return;
  557. rcu_read_lock();
  558. now = sched_clock();
  559. for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) {
  560. cache = rcu_dereference(ghes_estatus_caches[i]);
  561. if (cache == NULL) {
  562. slot = i;
  563. slot_cache = NULL;
  564. break;
  565. }
  566. duration = now - cache->time_in;
  567. if (duration >= GHES_ESTATUS_IN_CACHE_MAX_NSEC) {
  568. slot = i;
  569. slot_cache = cache;
  570. break;
  571. }
  572. count = atomic_read(&cache->count);
  573. period = duration;
  574. do_div(period, (count + 1));
  575. if (period > max_period) {
  576. max_period = period;
  577. slot = i;
  578. slot_cache = cache;
  579. }
  580. }
  581. /* new_cache must be put into array after its contents are written */
  582. smp_wmb();
  583. if (slot != -1 && cmpxchg(ghes_estatus_caches + slot,
  584. slot_cache, new_cache) == slot_cache) {
  585. if (slot_cache)
  586. call_rcu(&slot_cache->rcu, ghes_estatus_cache_rcu_free);
  587. } else
  588. ghes_estatus_cache_free(new_cache);
  589. rcu_read_unlock();
  590. }
  591. static int ghes_ack_error(struct acpi_hest_generic_v2 *gv2)
  592. {
  593. int rc;
  594. u64 val = 0;
  595. rc = apei_read(&val, &gv2->read_ack_register);
  596. if (rc)
  597. return rc;
  598. val &= gv2->read_ack_preserve << gv2->read_ack_register.bit_offset;
  599. val |= gv2->read_ack_write << gv2->read_ack_register.bit_offset;
  600. return apei_write(val, &gv2->read_ack_register);
  601. }
  602. static void __ghes_panic(struct ghes *ghes)
  603. {
  604. __ghes_print_estatus(KERN_EMERG, ghes->generic, ghes->estatus);
  605. /* reboot to log the error! */
  606. if (!panic_timeout)
  607. panic_timeout = ghes_panic_timeout;
  608. panic("Fatal hardware error!");
  609. }
  610. static int ghes_proc(struct ghes *ghes)
  611. {
  612. int rc;
  613. rc = ghes_read_estatus(ghes, 0);
  614. if (rc)
  615. goto out;
  616. if (ghes_severity(ghes->estatus->error_severity) >= GHES_SEV_PANIC) {
  617. __ghes_panic(ghes);
  618. }
  619. if (!ghes_estatus_cached(ghes->estatus)) {
  620. if (ghes_print_estatus(NULL, ghes->generic, ghes->estatus))
  621. ghes_estatus_cache_add(ghes->generic, ghes->estatus);
  622. }
  623. ghes_do_proc(ghes, ghes->estatus);
  624. out:
  625. ghes_clear_estatus(ghes);
  626. if (rc == -ENOENT)
  627. return rc;
  628. /*
  629. * GHESv2 type HEST entries introduce support for error acknowledgment,
  630. * so only acknowledge the error if this support is present.
  631. */
  632. if (is_hest_type_generic_v2(ghes))
  633. return ghes_ack_error(ghes->generic_v2);
  634. return rc;
  635. }
  636. static void ghes_add_timer(struct ghes *ghes)
  637. {
  638. struct acpi_hest_generic *g = ghes->generic;
  639. unsigned long expire;
  640. if (!g->notify.poll_interval) {
  641. pr_warning(FW_WARN GHES_PFX "Poll interval is 0 for generic hardware error source: %d, disabled.\n",
  642. g->header.source_id);
  643. return;
  644. }
  645. expire = jiffies + msecs_to_jiffies(g->notify.poll_interval);
  646. ghes->timer.expires = round_jiffies_relative(expire);
  647. add_timer(&ghes->timer);
  648. }
  649. static void ghes_poll_func(struct timer_list *t)
  650. {
  651. struct ghes *ghes = from_timer(ghes, t, timer);
  652. ghes_proc(ghes);
  653. if (!(ghes->flags & GHES_EXITING))
  654. ghes_add_timer(ghes);
  655. }
  656. static irqreturn_t ghes_irq_func(int irq, void *data)
  657. {
  658. struct ghes *ghes = data;
  659. int rc;
  660. rc = ghes_proc(ghes);
  661. if (rc)
  662. return IRQ_NONE;
  663. return IRQ_HANDLED;
  664. }
  665. static int ghes_notify_hed(struct notifier_block *this, unsigned long event,
  666. void *data)
  667. {
  668. struct ghes *ghes;
  669. int ret = NOTIFY_DONE;
  670. rcu_read_lock();
  671. list_for_each_entry_rcu(ghes, &ghes_hed, list) {
  672. if (!ghes_proc(ghes))
  673. ret = NOTIFY_OK;
  674. }
  675. rcu_read_unlock();
  676. return ret;
  677. }
  678. static struct notifier_block ghes_notifier_hed = {
  679. .notifier_call = ghes_notify_hed,
  680. };
  681. #ifdef CONFIG_ACPI_APEI_SEA
  682. static LIST_HEAD(ghes_sea);
  683. /*
  684. * Return 0 only if one of the SEA error sources successfully reported an error
  685. * record sent from the firmware.
  686. */
  687. int ghes_notify_sea(void)
  688. {
  689. struct ghes *ghes;
  690. int ret = -ENOENT;
  691. rcu_read_lock();
  692. list_for_each_entry_rcu(ghes, &ghes_sea, list) {
  693. if (!ghes_proc(ghes))
  694. ret = 0;
  695. }
  696. rcu_read_unlock();
  697. return ret;
  698. }
  699. static void ghes_sea_add(struct ghes *ghes)
  700. {
  701. mutex_lock(&ghes_list_mutex);
  702. list_add_rcu(&ghes->list, &ghes_sea);
  703. mutex_unlock(&ghes_list_mutex);
  704. }
  705. static void ghes_sea_remove(struct ghes *ghes)
  706. {
  707. mutex_lock(&ghes_list_mutex);
  708. list_del_rcu(&ghes->list);
  709. mutex_unlock(&ghes_list_mutex);
  710. synchronize_rcu();
  711. }
  712. #else /* CONFIG_ACPI_APEI_SEA */
  713. static inline void ghes_sea_add(struct ghes *ghes) { }
  714. static inline void ghes_sea_remove(struct ghes *ghes) { }
  715. #endif /* CONFIG_ACPI_APEI_SEA */
  716. #ifdef CONFIG_HAVE_ACPI_APEI_NMI
  717. /*
  718. * printk is not safe in NMI context. So in NMI handler, we allocate
  719. * required memory from lock-less memory allocator
  720. * (ghes_estatus_pool), save estatus into it, put them into lock-less
  721. * list (ghes_estatus_llist), then delay printk into IRQ context via
  722. * irq_work (ghes_proc_irq_work). ghes_estatus_size_request record
  723. * required pool size by all NMI error source.
  724. */
  725. static struct llist_head ghes_estatus_llist;
  726. static struct irq_work ghes_proc_irq_work;
  727. /*
  728. * NMI may be triggered on any CPU, so ghes_in_nmi is used for
  729. * having only one concurrent reader.
  730. */
  731. static atomic_t ghes_in_nmi = ATOMIC_INIT(0);
  732. static LIST_HEAD(ghes_nmi);
  733. static void ghes_proc_in_irq(struct irq_work *irq_work)
  734. {
  735. struct llist_node *llnode, *next;
  736. struct ghes_estatus_node *estatus_node;
  737. struct acpi_hest_generic *generic;
  738. struct acpi_hest_generic_status *estatus;
  739. u32 len, node_len;
  740. llnode = llist_del_all(&ghes_estatus_llist);
  741. /*
  742. * Because the time order of estatus in list is reversed,
  743. * revert it back to proper order.
  744. */
  745. llnode = llist_reverse_order(llnode);
  746. while (llnode) {
  747. next = llnode->next;
  748. estatus_node = llist_entry(llnode, struct ghes_estatus_node,
  749. llnode);
  750. estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
  751. len = cper_estatus_len(estatus);
  752. node_len = GHES_ESTATUS_NODE_LEN(len);
  753. ghes_do_proc(estatus_node->ghes, estatus);
  754. if (!ghes_estatus_cached(estatus)) {
  755. generic = estatus_node->generic;
  756. if (ghes_print_estatus(NULL, generic, estatus))
  757. ghes_estatus_cache_add(generic, estatus);
  758. }
  759. gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node,
  760. node_len);
  761. llnode = next;
  762. }
  763. }
  764. static void ghes_print_queued_estatus(void)
  765. {
  766. struct llist_node *llnode;
  767. struct ghes_estatus_node *estatus_node;
  768. struct acpi_hest_generic *generic;
  769. struct acpi_hest_generic_status *estatus;
  770. llnode = llist_del_all(&ghes_estatus_llist);
  771. /*
  772. * Because the time order of estatus in list is reversed,
  773. * revert it back to proper order.
  774. */
  775. llnode = llist_reverse_order(llnode);
  776. while (llnode) {
  777. estatus_node = llist_entry(llnode, struct ghes_estatus_node,
  778. llnode);
  779. estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
  780. generic = estatus_node->generic;
  781. ghes_print_estatus(NULL, generic, estatus);
  782. llnode = llnode->next;
  783. }
  784. }
  785. /* Save estatus for further processing in IRQ context */
  786. static void __process_error(struct ghes *ghes)
  787. {
  788. #ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
  789. u32 len, node_len;
  790. struct ghes_estatus_node *estatus_node;
  791. struct acpi_hest_generic_status *estatus;
  792. if (ghes_estatus_cached(ghes->estatus))
  793. return;
  794. len = cper_estatus_len(ghes->estatus);
  795. node_len = GHES_ESTATUS_NODE_LEN(len);
  796. estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool, node_len);
  797. if (!estatus_node)
  798. return;
  799. estatus_node->ghes = ghes;
  800. estatus_node->generic = ghes->generic;
  801. estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
  802. memcpy(estatus, ghes->estatus, len);
  803. llist_add(&estatus_node->llnode, &ghes_estatus_llist);
  804. #endif
  805. }
  806. static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs)
  807. {
  808. struct ghes *ghes;
  809. int sev, ret = NMI_DONE;
  810. if (!atomic_add_unless(&ghes_in_nmi, 1, 1))
  811. return ret;
  812. list_for_each_entry_rcu(ghes, &ghes_nmi, list) {
  813. if (ghes_read_estatus(ghes, 1)) {
  814. ghes_clear_estatus(ghes);
  815. continue;
  816. } else {
  817. ret = NMI_HANDLED;
  818. }
  819. sev = ghes_severity(ghes->estatus->error_severity);
  820. if (sev >= GHES_SEV_PANIC) {
  821. oops_begin();
  822. ghes_print_queued_estatus();
  823. __ghes_panic(ghes);
  824. }
  825. if (!(ghes->flags & GHES_TO_CLEAR))
  826. continue;
  827. __process_error(ghes);
  828. ghes_clear_estatus(ghes);
  829. }
  830. #ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
  831. if (ret == NMI_HANDLED)
  832. irq_work_queue(&ghes_proc_irq_work);
  833. #endif
  834. atomic_dec(&ghes_in_nmi);
  835. return ret;
  836. }
  837. static unsigned long ghes_esource_prealloc_size(
  838. const struct acpi_hest_generic *generic)
  839. {
  840. unsigned long block_length, prealloc_records, prealloc_size;
  841. block_length = min_t(unsigned long, generic->error_block_length,
  842. GHES_ESTATUS_MAX_SIZE);
  843. prealloc_records = max_t(unsigned long,
  844. generic->records_to_preallocate, 1);
  845. prealloc_size = min_t(unsigned long, block_length * prealloc_records,
  846. GHES_ESOURCE_PREALLOC_MAX_SIZE);
  847. return prealloc_size;
  848. }
  849. static void ghes_estatus_pool_shrink(unsigned long len)
  850. {
  851. ghes_estatus_pool_size_request -= PAGE_ALIGN(len);
  852. }
  853. static void ghes_nmi_add(struct ghes *ghes)
  854. {
  855. unsigned long len;
  856. len = ghes_esource_prealloc_size(ghes->generic);
  857. ghes_estatus_pool_expand(len);
  858. mutex_lock(&ghes_list_mutex);
  859. if (list_empty(&ghes_nmi))
  860. register_nmi_handler(NMI_LOCAL, ghes_notify_nmi, 0, "ghes");
  861. list_add_rcu(&ghes->list, &ghes_nmi);
  862. mutex_unlock(&ghes_list_mutex);
  863. }
  864. static void ghes_nmi_remove(struct ghes *ghes)
  865. {
  866. unsigned long len;
  867. mutex_lock(&ghes_list_mutex);
  868. list_del_rcu(&ghes->list);
  869. if (list_empty(&ghes_nmi))
  870. unregister_nmi_handler(NMI_LOCAL, "ghes");
  871. mutex_unlock(&ghes_list_mutex);
  872. /*
  873. * To synchronize with NMI handler, ghes can only be
  874. * freed after NMI handler finishes.
  875. */
  876. synchronize_rcu();
  877. len = ghes_esource_prealloc_size(ghes->generic);
  878. ghes_estatus_pool_shrink(len);
  879. }
  880. static void ghes_nmi_init_cxt(void)
  881. {
  882. init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq);
  883. }
  884. #else /* CONFIG_HAVE_ACPI_APEI_NMI */
  885. static inline void ghes_nmi_add(struct ghes *ghes) { }
  886. static inline void ghes_nmi_remove(struct ghes *ghes) { }
  887. static inline void ghes_nmi_init_cxt(void) { }
  888. #endif /* CONFIG_HAVE_ACPI_APEI_NMI */
  889. static int ghes_probe(struct platform_device *ghes_dev)
  890. {
  891. struct acpi_hest_generic *generic;
  892. struct ghes *ghes = NULL;
  893. int rc = -EINVAL;
  894. generic = *(struct acpi_hest_generic **)ghes_dev->dev.platform_data;
  895. if (!generic->enabled)
  896. return -ENODEV;
  897. switch (generic->notify.type) {
  898. case ACPI_HEST_NOTIFY_POLLED:
  899. case ACPI_HEST_NOTIFY_EXTERNAL:
  900. case ACPI_HEST_NOTIFY_SCI:
  901. case ACPI_HEST_NOTIFY_GSIV:
  902. case ACPI_HEST_NOTIFY_GPIO:
  903. break;
  904. case ACPI_HEST_NOTIFY_SEA:
  905. if (!IS_ENABLED(CONFIG_ACPI_APEI_SEA)) {
  906. pr_warn(GHES_PFX "Generic hardware error source: %d notified via SEA is not supported\n",
  907. generic->header.source_id);
  908. rc = -ENOTSUPP;
  909. goto err;
  910. }
  911. break;
  912. case ACPI_HEST_NOTIFY_NMI:
  913. if (!IS_ENABLED(CONFIG_HAVE_ACPI_APEI_NMI)) {
  914. pr_warn(GHES_PFX "Generic hardware error source: %d notified via NMI interrupt is not supported!\n",
  915. generic->header.source_id);
  916. goto err;
  917. }
  918. break;
  919. case ACPI_HEST_NOTIFY_LOCAL:
  920. pr_warning(GHES_PFX "Generic hardware error source: %d notified via local interrupt is not supported!\n",
  921. generic->header.source_id);
  922. goto err;
  923. default:
  924. pr_warning(FW_WARN GHES_PFX "Unknown notification type: %u for generic hardware error source: %d\n",
  925. generic->notify.type, generic->header.source_id);
  926. goto err;
  927. }
  928. rc = -EIO;
  929. if (generic->error_block_length <
  930. sizeof(struct acpi_hest_generic_status)) {
  931. pr_warning(FW_BUG GHES_PFX "Invalid error block length: %u for generic hardware error source: %d\n",
  932. generic->error_block_length,
  933. generic->header.source_id);
  934. goto err;
  935. }
  936. ghes = ghes_new(generic);
  937. if (IS_ERR(ghes)) {
  938. rc = PTR_ERR(ghes);
  939. ghes = NULL;
  940. goto err;
  941. }
  942. rc = ghes_edac_register(ghes, &ghes_dev->dev);
  943. if (rc < 0)
  944. goto err;
  945. switch (generic->notify.type) {
  946. case ACPI_HEST_NOTIFY_POLLED:
  947. timer_setup(&ghes->timer, ghes_poll_func, TIMER_DEFERRABLE);
  948. ghes_add_timer(ghes);
  949. break;
  950. case ACPI_HEST_NOTIFY_EXTERNAL:
  951. /* External interrupt vector is GSI */
  952. rc = acpi_gsi_to_irq(generic->notify.vector, &ghes->irq);
  953. if (rc) {
  954. pr_err(GHES_PFX "Failed to map GSI to IRQ for generic hardware error source: %d\n",
  955. generic->header.source_id);
  956. goto err_edac_unreg;
  957. }
  958. rc = request_irq(ghes->irq, ghes_irq_func, IRQF_SHARED,
  959. "GHES IRQ", ghes);
  960. if (rc) {
  961. pr_err(GHES_PFX "Failed to register IRQ for generic hardware error source: %d\n",
  962. generic->header.source_id);
  963. goto err_edac_unreg;
  964. }
  965. break;
  966. case ACPI_HEST_NOTIFY_SCI:
  967. case ACPI_HEST_NOTIFY_GSIV:
  968. case ACPI_HEST_NOTIFY_GPIO:
  969. mutex_lock(&ghes_list_mutex);
  970. if (list_empty(&ghes_hed))
  971. register_acpi_hed_notifier(&ghes_notifier_hed);
  972. list_add_rcu(&ghes->list, &ghes_hed);
  973. mutex_unlock(&ghes_list_mutex);
  974. break;
  975. case ACPI_HEST_NOTIFY_SEA:
  976. ghes_sea_add(ghes);
  977. break;
  978. case ACPI_HEST_NOTIFY_NMI:
  979. ghes_nmi_add(ghes);
  980. break;
  981. default:
  982. BUG();
  983. }
  984. platform_set_drvdata(ghes_dev, ghes);
  985. /* Handle any pending errors right away */
  986. ghes_proc(ghes);
  987. return 0;
  988. err_edac_unreg:
  989. ghes_edac_unregister(ghes);
  990. err:
  991. if (ghes) {
  992. ghes_fini(ghes);
  993. kfree(ghes);
  994. }
  995. return rc;
  996. }
  997. static int ghes_remove(struct platform_device *ghes_dev)
  998. {
  999. struct ghes *ghes;
  1000. struct acpi_hest_generic *generic;
  1001. ghes = platform_get_drvdata(ghes_dev);
  1002. generic = ghes->generic;
  1003. ghes->flags |= GHES_EXITING;
  1004. switch (generic->notify.type) {
  1005. case ACPI_HEST_NOTIFY_POLLED:
  1006. del_timer_sync(&ghes->timer);
  1007. break;
  1008. case ACPI_HEST_NOTIFY_EXTERNAL:
  1009. free_irq(ghes->irq, ghes);
  1010. break;
  1011. case ACPI_HEST_NOTIFY_SCI:
  1012. case ACPI_HEST_NOTIFY_GSIV:
  1013. case ACPI_HEST_NOTIFY_GPIO:
  1014. mutex_lock(&ghes_list_mutex);
  1015. list_del_rcu(&ghes->list);
  1016. if (list_empty(&ghes_hed))
  1017. unregister_acpi_hed_notifier(&ghes_notifier_hed);
  1018. mutex_unlock(&ghes_list_mutex);
  1019. synchronize_rcu();
  1020. break;
  1021. case ACPI_HEST_NOTIFY_SEA:
  1022. ghes_sea_remove(ghes);
  1023. break;
  1024. case ACPI_HEST_NOTIFY_NMI:
  1025. ghes_nmi_remove(ghes);
  1026. break;
  1027. default:
  1028. BUG();
  1029. break;
  1030. }
  1031. ghes_fini(ghes);
  1032. ghes_edac_unregister(ghes);
  1033. kfree(ghes);
  1034. platform_set_drvdata(ghes_dev, NULL);
  1035. return 0;
  1036. }
  1037. static struct platform_driver ghes_platform_driver = {
  1038. .driver = {
  1039. .name = "GHES",
  1040. },
  1041. .probe = ghes_probe,
  1042. .remove = ghes_remove,
  1043. };
  1044. static int __init ghes_init(void)
  1045. {
  1046. int rc;
  1047. if (acpi_disabled)
  1048. return -ENODEV;
  1049. switch (hest_disable) {
  1050. case HEST_NOT_FOUND:
  1051. return -ENODEV;
  1052. case HEST_DISABLED:
  1053. pr_info(GHES_PFX "HEST is not enabled!\n");
  1054. return -EINVAL;
  1055. default:
  1056. break;
  1057. }
  1058. if (ghes_disable) {
  1059. pr_info(GHES_PFX "GHES is not enabled!\n");
  1060. return -EINVAL;
  1061. }
  1062. ghes_nmi_init_cxt();
  1063. rc = ghes_estatus_pool_init();
  1064. if (rc)
  1065. goto err;
  1066. rc = ghes_estatus_pool_expand(GHES_ESTATUS_CACHE_AVG_SIZE *
  1067. GHES_ESTATUS_CACHE_ALLOCED_MAX);
  1068. if (rc)
  1069. goto err_pool_exit;
  1070. rc = platform_driver_register(&ghes_platform_driver);
  1071. if (rc)
  1072. goto err_pool_exit;
  1073. rc = apei_osc_setup();
  1074. if (rc == 0 && osc_sb_apei_support_acked)
  1075. pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit and WHEA _OSC.\n");
  1076. else if (rc == 0 && !osc_sb_apei_support_acked)
  1077. pr_info(GHES_PFX "APEI firmware first mode is enabled by WHEA _OSC.\n");
  1078. else if (rc && osc_sb_apei_support_acked)
  1079. pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit.\n");
  1080. else
  1081. pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n");
  1082. return 0;
  1083. err_pool_exit:
  1084. ghes_estatus_pool_exit();
  1085. err:
  1086. return rc;
  1087. }
  1088. device_initcall(ghes_init);