mce_power.c 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619
  1. /*
  2. * Machine check exception handling CPU-side for power7 and power8
  3. *
  4. * This program is free software; you can redistribute it and/or modify
  5. * it under the terms of the GNU General Public License as published by
  6. * the Free Software Foundation; either version 2 of the License, or
  7. * (at your option) any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program; if not, write to the Free Software
  16. * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  17. *
  18. * Copyright 2013 IBM Corporation
  19. * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
  20. */
  21. #undef DEBUG
  22. #define pr_fmt(fmt) "mce_power: " fmt
  23. #include <linux/types.h>
  24. #include <linux/ptrace.h>
  25. #include <asm/mmu.h>
  26. #include <asm/mce.h>
  27. #include <asm/machdep.h>
  28. #include <asm/pgtable.h>
  29. #include <asm/pte-walk.h>
  30. #include <asm/sstep.h>
  31. #include <asm/exception-64s.h>
  32. /*
  33. * Convert an address related to an mm to a PFN. NOTE: we are in real
  34. * mode, we could potentially race with page table updates.
  35. */
  36. static unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr)
  37. {
  38. pte_t *ptep;
  39. unsigned long flags;
  40. struct mm_struct *mm;
  41. if (user_mode(regs))
  42. mm = current->mm;
  43. else
  44. mm = &init_mm;
  45. local_irq_save(flags);
  46. if (mm == current->mm)
  47. ptep = find_current_mm_pte(mm->pgd, addr, NULL, NULL);
  48. else
  49. ptep = find_init_mm_pte(addr, NULL);
  50. local_irq_restore(flags);
  51. if (!ptep || pte_special(*ptep))
  52. return ULONG_MAX;
  53. return pte_pfn(*ptep);
  54. }
  55. /* flush SLBs and reload */
  56. #ifdef CONFIG_PPC_BOOK3S_64
  57. void flush_and_reload_slb(void)
  58. {
  59. /* Invalidate all SLBs */
  60. slb_flush_all_realmode();
  61. #ifdef CONFIG_KVM_BOOK3S_HANDLER
  62. /*
  63. * If machine check is hit when in guest or in transition, we will
  64. * only flush the SLBs and continue.
  65. */
  66. if (get_paca()->kvm_hstate.in_guest)
  67. return;
  68. #endif
  69. if (early_radix_enabled())
  70. return;
  71. /*
  72. * This probably shouldn't happen, but it may be possible it's
  73. * called in early boot before SLB shadows are allocated.
  74. */
  75. if (!get_slb_shadow())
  76. return;
  77. slb_restore_bolted_realmode();
  78. }
  79. #endif
  80. static void flush_erat(void)
  81. {
  82. #ifdef CONFIG_PPC_BOOK3S_64
  83. if (!early_cpu_has_feature(CPU_FTR_ARCH_300)) {
  84. flush_and_reload_slb();
  85. return;
  86. }
  87. #endif
  88. /* PPC_INVALIDATE_ERAT can only be used on ISA v3 and newer */
  89. asm volatile(PPC_INVALIDATE_ERAT : : :"memory");
  90. }
  91. #define MCE_FLUSH_SLB 1
  92. #define MCE_FLUSH_TLB 2
  93. #define MCE_FLUSH_ERAT 3
  94. static int mce_flush(int what)
  95. {
  96. #ifdef CONFIG_PPC_BOOK3S_64
  97. if (what == MCE_FLUSH_SLB) {
  98. flush_and_reload_slb();
  99. return 1;
  100. }
  101. #endif
  102. if (what == MCE_FLUSH_ERAT) {
  103. flush_erat();
  104. return 1;
  105. }
  106. if (what == MCE_FLUSH_TLB) {
  107. tlbiel_all();
  108. return 1;
  109. }
  110. return 0;
  111. }
  112. #define SRR1_MC_LOADSTORE(srr1) ((srr1) & PPC_BIT(42))
  113. struct mce_ierror_table {
  114. unsigned long srr1_mask;
  115. unsigned long srr1_value;
  116. bool nip_valid; /* nip is a valid indicator of faulting address */
  117. unsigned int error_type;
  118. unsigned int error_subtype;
  119. unsigned int initiator;
  120. unsigned int severity;
  121. };
  122. static const struct mce_ierror_table mce_p7_ierror_table[] = {
  123. { 0x00000000001c0000, 0x0000000000040000, true,
  124. MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH,
  125. MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
  126. { 0x00000000001c0000, 0x0000000000080000, true,
  127. MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY,
  128. MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
  129. { 0x00000000001c0000, 0x00000000000c0000, true,
  130. MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT,
  131. MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
  132. { 0x00000000001c0000, 0x0000000000100000, true,
  133. MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_INDETERMINATE, /* BOTH */
  134. MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
  135. { 0x00000000001c0000, 0x0000000000140000, true,
  136. MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT,
  137. MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
  138. { 0x00000000001c0000, 0x0000000000180000, true,
  139. MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH,
  140. MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
  141. { 0x00000000001c0000, 0x00000000001c0000, true,
  142. MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH,
  143. MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
  144. { 0, 0, 0, 0, 0, 0 } };
  145. static const struct mce_ierror_table mce_p8_ierror_table[] = {
  146. { 0x00000000081c0000, 0x0000000000040000, true,
  147. MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH,
  148. MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
  149. { 0x00000000081c0000, 0x0000000000080000, true,
  150. MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY,
  151. MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
  152. { 0x00000000081c0000, 0x00000000000c0000, true,
  153. MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT,
  154. MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
  155. { 0x00000000081c0000, 0x0000000000100000, true,
  156. MCE_ERROR_TYPE_ERAT,MCE_ERAT_ERROR_MULTIHIT,
  157. MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
  158. { 0x00000000081c0000, 0x0000000000140000, true,
  159. MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT,
  160. MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
  161. { 0x00000000081c0000, 0x0000000000180000, true,
  162. MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH,
  163. MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
  164. { 0x00000000081c0000, 0x00000000001c0000, true,
  165. MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH,
  166. MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
  167. { 0x00000000081c0000, 0x0000000008000000, true,
  168. MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_IFETCH_TIMEOUT,
  169. MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
  170. { 0x00000000081c0000, 0x0000000008040000, true,
  171. MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT,
  172. MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
  173. { 0, 0, 0, 0, 0, 0 } };
  174. static const struct mce_ierror_table mce_p9_ierror_table[] = {
  175. { 0x00000000081c0000, 0x0000000000040000, true,
  176. MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH,
  177. MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
  178. { 0x00000000081c0000, 0x0000000000080000, true,
  179. MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY,
  180. MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
  181. { 0x00000000081c0000, 0x00000000000c0000, true,
  182. MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT,
  183. MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
  184. { 0x00000000081c0000, 0x0000000000100000, true,
  185. MCE_ERROR_TYPE_ERAT,MCE_ERAT_ERROR_MULTIHIT,
  186. MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
  187. { 0x00000000081c0000, 0x0000000000140000, true,
  188. MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT,
  189. MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
  190. { 0x00000000081c0000, 0x0000000000180000, true,
  191. MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH,
  192. MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
  193. { 0x00000000081c0000, 0x00000000001c0000, true,
  194. MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH_FOREIGN,
  195. MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
  196. { 0x00000000081c0000, 0x0000000008000000, true,
  197. MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_IFETCH_TIMEOUT,
  198. MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
  199. { 0x00000000081c0000, 0x0000000008040000, true,
  200. MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT,
  201. MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
  202. { 0x00000000081c0000, 0x00000000080c0000, true,
  203. MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH,
  204. MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
  205. { 0x00000000081c0000, 0x0000000008100000, true,
  206. MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH,
  207. MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
  208. { 0x00000000081c0000, 0x0000000008140000, false,
  209. MCE_ERROR_TYPE_RA, MCE_RA_ERROR_STORE,
  210. MCE_INITIATOR_CPU, MCE_SEV_FATAL, }, /* ASYNC is fatal */
  211. { 0x00000000081c0000, 0x0000000008180000, false,
  212. MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_STORE_TIMEOUT,
  213. MCE_INITIATOR_CPU, MCE_SEV_FATAL, }, /* ASYNC is fatal */
  214. { 0x00000000081c0000, 0x00000000081c0000, true,
  215. MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN,
  216. MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
  217. { 0, 0, 0, 0, 0, 0 } };
  218. struct mce_derror_table {
  219. unsigned long dsisr_value;
  220. bool dar_valid; /* dar is a valid indicator of faulting address */
  221. unsigned int error_type;
  222. unsigned int error_subtype;
  223. unsigned int initiator;
  224. unsigned int severity;
  225. };
  226. static const struct mce_derror_table mce_p7_derror_table[] = {
  227. { 0x00008000, false,
  228. MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE,
  229. MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
  230. { 0x00004000, true,
  231. MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE,
  232. MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
  233. { 0x00000800, true,
  234. MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT,
  235. MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
  236. { 0x00000400, true,
  237. MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT,
  238. MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
  239. { 0x00000080, true,
  240. MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, /* Before PARITY */
  241. MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
  242. { 0x00000100, true,
  243. MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY,
  244. MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
  245. { 0x00000040, true,
  246. MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_INDETERMINATE, /* BOTH */
  247. MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
  248. { 0, false, 0, 0, 0, 0 } };
  249. static const struct mce_derror_table mce_p8_derror_table[] = {
  250. { 0x00008000, false,
  251. MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE,
  252. MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
  253. { 0x00004000, true,
  254. MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE,
  255. MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
  256. { 0x00002000, true,
  257. MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_LOAD_TIMEOUT,
  258. MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
  259. { 0x00001000, true,
  260. MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT,
  261. MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
  262. { 0x00000800, true,
  263. MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT,
  264. MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
  265. { 0x00000400, true,
  266. MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT,
  267. MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
  268. { 0x00000200, true,
  269. MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, /* SECONDARY ERAT */
  270. MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
  271. { 0x00000080, true,
  272. MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, /* Before PARITY */
  273. MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
  274. { 0x00000100, true,
  275. MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY,
  276. MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
  277. { 0, false, 0, 0, 0, 0 } };
  278. static const struct mce_derror_table mce_p9_derror_table[] = {
  279. { 0x00008000, false,
  280. MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE,
  281. MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
  282. { 0x00004000, true,
  283. MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE,
  284. MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
  285. { 0x00002000, true,
  286. MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_LOAD_TIMEOUT,
  287. MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
  288. { 0x00001000, true,
  289. MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT,
  290. MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
  291. { 0x00000800, true,
  292. MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT,
  293. MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
  294. { 0x00000400, true,
  295. MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT,
  296. MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
  297. { 0x00000200, false,
  298. MCE_ERROR_TYPE_USER, MCE_USER_ERROR_TLBIE,
  299. MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
  300. { 0x00000080, true,
  301. MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, /* Before PARITY */
  302. MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
  303. { 0x00000100, true,
  304. MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY,
  305. MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
  306. { 0x00000040, true,
  307. MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD,
  308. MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
  309. { 0x00000020, false,
  310. MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE,
  311. MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
  312. { 0x00000010, false,
  313. MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN,
  314. MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
  315. { 0x00000008, false,
  316. MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD_STORE_FOREIGN,
  317. MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
  318. { 0, false, 0, 0, 0, 0 } };
  319. static int mce_find_instr_ea_and_pfn(struct pt_regs *regs, uint64_t *addr,
  320. uint64_t *phys_addr)
  321. {
  322. /*
  323. * Carefully look at the NIP to determine
  324. * the instruction to analyse. Reading the NIP
  325. * in real-mode is tricky and can lead to recursive
  326. * faults
  327. */
  328. int instr;
  329. unsigned long pfn, instr_addr;
  330. struct instruction_op op;
  331. struct pt_regs tmp = *regs;
  332. pfn = addr_to_pfn(regs, regs->nip);
  333. if (pfn != ULONG_MAX) {
  334. instr_addr = (pfn << PAGE_SHIFT) + (regs->nip & ~PAGE_MASK);
  335. instr = *(unsigned int *)(instr_addr);
  336. if (!analyse_instr(&op, &tmp, instr)) {
  337. pfn = addr_to_pfn(regs, op.ea);
  338. *addr = op.ea;
  339. *phys_addr = (pfn << PAGE_SHIFT);
  340. return 0;
  341. }
  342. /*
  343. * analyse_instr() might fail if the instruction
  344. * is not a load/store, although this is unexpected
  345. * for load/store errors or if we got the NIP
  346. * wrong
  347. */
  348. }
  349. *addr = 0;
  350. return -1;
  351. }
  352. static int mce_handle_ierror(struct pt_regs *regs,
  353. const struct mce_ierror_table table[],
  354. struct mce_error_info *mce_err, uint64_t *addr,
  355. uint64_t *phys_addr)
  356. {
  357. uint64_t srr1 = regs->msr;
  358. int handled = 0;
  359. int i;
  360. *addr = 0;
  361. for (i = 0; table[i].srr1_mask; i++) {
  362. if ((srr1 & table[i].srr1_mask) != table[i].srr1_value)
  363. continue;
  364. /* attempt to correct the error */
  365. switch (table[i].error_type) {
  366. case MCE_ERROR_TYPE_SLB:
  367. handled = mce_flush(MCE_FLUSH_SLB);
  368. break;
  369. case MCE_ERROR_TYPE_ERAT:
  370. handled = mce_flush(MCE_FLUSH_ERAT);
  371. break;
  372. case MCE_ERROR_TYPE_TLB:
  373. handled = mce_flush(MCE_FLUSH_TLB);
  374. break;
  375. }
  376. /* now fill in mce_error_info */
  377. mce_err->error_type = table[i].error_type;
  378. switch (table[i].error_type) {
  379. case MCE_ERROR_TYPE_UE:
  380. mce_err->u.ue_error_type = table[i].error_subtype;
  381. break;
  382. case MCE_ERROR_TYPE_SLB:
  383. mce_err->u.slb_error_type = table[i].error_subtype;
  384. break;
  385. case MCE_ERROR_TYPE_ERAT:
  386. mce_err->u.erat_error_type = table[i].error_subtype;
  387. break;
  388. case MCE_ERROR_TYPE_TLB:
  389. mce_err->u.tlb_error_type = table[i].error_subtype;
  390. break;
  391. case MCE_ERROR_TYPE_USER:
  392. mce_err->u.user_error_type = table[i].error_subtype;
  393. break;
  394. case MCE_ERROR_TYPE_RA:
  395. mce_err->u.ra_error_type = table[i].error_subtype;
  396. break;
  397. case MCE_ERROR_TYPE_LINK:
  398. mce_err->u.link_error_type = table[i].error_subtype;
  399. break;
  400. }
  401. mce_err->severity = table[i].severity;
  402. mce_err->initiator = table[i].initiator;
  403. if (table[i].nip_valid) {
  404. *addr = regs->nip;
  405. if (mce_err->severity == MCE_SEV_ERROR_SYNC &&
  406. table[i].error_type == MCE_ERROR_TYPE_UE) {
  407. unsigned long pfn;
  408. if (get_paca()->in_mce < MAX_MCE_DEPTH) {
  409. pfn = addr_to_pfn(regs, regs->nip);
  410. if (pfn != ULONG_MAX) {
  411. *phys_addr =
  412. (pfn << PAGE_SHIFT);
  413. }
  414. }
  415. }
  416. }
  417. return handled;
  418. }
  419. mce_err->error_type = MCE_ERROR_TYPE_UNKNOWN;
  420. mce_err->severity = MCE_SEV_ERROR_SYNC;
  421. mce_err->initiator = MCE_INITIATOR_CPU;
  422. return 0;
  423. }
  424. static int mce_handle_derror(struct pt_regs *regs,
  425. const struct mce_derror_table table[],
  426. struct mce_error_info *mce_err, uint64_t *addr,
  427. uint64_t *phys_addr)
  428. {
  429. uint64_t dsisr = regs->dsisr;
  430. int handled = 0;
  431. int found = 0;
  432. int i;
  433. *addr = 0;
  434. for (i = 0; table[i].dsisr_value; i++) {
  435. if (!(dsisr & table[i].dsisr_value))
  436. continue;
  437. /* attempt to correct the error */
  438. switch (table[i].error_type) {
  439. case MCE_ERROR_TYPE_SLB:
  440. if (mce_flush(MCE_FLUSH_SLB))
  441. handled = 1;
  442. break;
  443. case MCE_ERROR_TYPE_ERAT:
  444. if (mce_flush(MCE_FLUSH_ERAT))
  445. handled = 1;
  446. break;
  447. case MCE_ERROR_TYPE_TLB:
  448. if (mce_flush(MCE_FLUSH_TLB))
  449. handled = 1;
  450. break;
  451. }
  452. /*
  453. * Attempt to handle multiple conditions, but only return
  454. * one. Ensure uncorrectable errors are first in the table
  455. * to match.
  456. */
  457. if (found)
  458. continue;
  459. /* now fill in mce_error_info */
  460. mce_err->error_type = table[i].error_type;
  461. switch (table[i].error_type) {
  462. case MCE_ERROR_TYPE_UE:
  463. mce_err->u.ue_error_type = table[i].error_subtype;
  464. break;
  465. case MCE_ERROR_TYPE_SLB:
  466. mce_err->u.slb_error_type = table[i].error_subtype;
  467. break;
  468. case MCE_ERROR_TYPE_ERAT:
  469. mce_err->u.erat_error_type = table[i].error_subtype;
  470. break;
  471. case MCE_ERROR_TYPE_TLB:
  472. mce_err->u.tlb_error_type = table[i].error_subtype;
  473. break;
  474. case MCE_ERROR_TYPE_USER:
  475. mce_err->u.user_error_type = table[i].error_subtype;
  476. break;
  477. case MCE_ERROR_TYPE_RA:
  478. mce_err->u.ra_error_type = table[i].error_subtype;
  479. break;
  480. case MCE_ERROR_TYPE_LINK:
  481. mce_err->u.link_error_type = table[i].error_subtype;
  482. break;
  483. }
  484. mce_err->severity = table[i].severity;
  485. mce_err->initiator = table[i].initiator;
  486. if (table[i].dar_valid)
  487. *addr = regs->dar;
  488. else if (mce_err->severity == MCE_SEV_ERROR_SYNC &&
  489. table[i].error_type == MCE_ERROR_TYPE_UE) {
  490. /*
  491. * We do a maximum of 4 nested MCE calls, see
  492. * kernel/exception-64s.h
  493. */
  494. if (get_paca()->in_mce < MAX_MCE_DEPTH)
  495. mce_find_instr_ea_and_pfn(regs, addr, phys_addr);
  496. }
  497. found = 1;
  498. }
  499. if (found)
  500. return handled;
  501. mce_err->error_type = MCE_ERROR_TYPE_UNKNOWN;
  502. mce_err->severity = MCE_SEV_ERROR_SYNC;
  503. mce_err->initiator = MCE_INITIATOR_CPU;
  504. return 0;
  505. }
  506. static long mce_handle_ue_error(struct pt_regs *regs)
  507. {
  508. long handled = 0;
  509. /*
  510. * On specific SCOM read via MMIO we may get a machine check
  511. * exception with SRR0 pointing inside opal. If that is the
  512. * case OPAL may have recovery address to re-read SCOM data in
  513. * different way and hence we can recover from this MC.
  514. */
  515. if (ppc_md.mce_check_early_recovery) {
  516. if (ppc_md.mce_check_early_recovery(regs))
  517. handled = 1;
  518. }
  519. return handled;
  520. }
  521. static long mce_handle_error(struct pt_regs *regs,
  522. const struct mce_derror_table dtable[],
  523. const struct mce_ierror_table itable[])
  524. {
  525. struct mce_error_info mce_err = { 0 };
  526. uint64_t addr, phys_addr = ULONG_MAX;
  527. uint64_t srr1 = regs->msr;
  528. long handled;
  529. if (SRR1_MC_LOADSTORE(srr1))
  530. handled = mce_handle_derror(regs, dtable, &mce_err, &addr,
  531. &phys_addr);
  532. else
  533. handled = mce_handle_ierror(regs, itable, &mce_err, &addr,
  534. &phys_addr);
  535. if (!handled && mce_err.error_type == MCE_ERROR_TYPE_UE)
  536. handled = mce_handle_ue_error(regs);
  537. save_mce_event(regs, handled, &mce_err, regs->nip, addr, phys_addr);
  538. return handled;
  539. }
  540. long __machine_check_early_realmode_p7(struct pt_regs *regs)
  541. {
  542. /* P7 DD1 leaves top bits of DSISR undefined */
  543. regs->dsisr &= 0x0000ffff;
  544. return mce_handle_error(regs, mce_p7_derror_table, mce_p7_ierror_table);
  545. }
  546. long __machine_check_early_realmode_p8(struct pt_regs *regs)
  547. {
  548. return mce_handle_error(regs, mce_p8_derror_table, mce_p8_ierror_table);
  549. }
  550. long __machine_check_early_realmode_p9(struct pt_regs *regs)
  551. {
  552. /*
  553. * On POWER9 DD2.1 and below, it's possible to get a machine check
  554. * caused by a paste instruction where only DSISR bit 25 is set. This
  555. * will result in the MCE handler seeing an unknown event and the kernel
  556. * crashing. An MCE that occurs like this is spurious, so we don't need
  557. * to do anything in terms of servicing it. If there is something that
  558. * needs to be serviced, the CPU will raise the MCE again with the
  559. * correct DSISR so that it can be serviced properly. So detect this
  560. * case and mark it as handled.
  561. */
  562. if (SRR1_MC_LOADSTORE(regs->msr) && regs->dsisr == 0x02000000)
  563. return 1;
  564. return mce_handle_error(regs, mce_p9_derror_table, mce_p9_ierror_table);
  565. }