mce_power.c 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313
  1. /*
  2. * Machine check exception handling CPU-side for power7 and power8
  3. *
  4. * This program is free software; you can redistribute it and/or modify
  5. * it under the terms of the GNU General Public License as published by
  6. * the Free Software Foundation; either version 2 of the License, or
  7. * (at your option) any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program; if not, write to the Free Software
  16. * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  17. *
  18. * Copyright 2013 IBM Corporation
  19. * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
  20. */
  21. #undef DEBUG
  22. #define pr_fmt(fmt) "mce_power: " fmt
  23. #include <linux/types.h>
  24. #include <linux/ptrace.h>
  25. #include <asm/mmu.h>
  26. #include <asm/mce.h>
  27. #include <asm/machdep.h>
  28. /* flush SLBs and reload */
  29. static void flush_and_reload_slb(void)
  30. {
  31. struct slb_shadow *slb;
  32. unsigned long i, n;
  33. /* Invalidate all SLBs */
  34. asm volatile("slbmte %0,%0; slbia" : : "r" (0));
  35. #ifdef CONFIG_KVM_BOOK3S_HANDLER
  36. /*
  37. * If machine check is hit when in guest or in transition, we will
  38. * only flush the SLBs and continue.
  39. */
  40. if (get_paca()->kvm_hstate.in_guest)
  41. return;
  42. #endif
  43. /* For host kernel, reload the SLBs from shadow SLB buffer. */
  44. slb = get_slb_shadow();
  45. if (!slb)
  46. return;
  47. n = min_t(u32, be32_to_cpu(slb->persistent), SLB_MIN_SIZE);
  48. /* Load up the SLB entries from shadow SLB */
  49. for (i = 0; i < n; i++) {
  50. unsigned long rb = be64_to_cpu(slb->save_area[i].esid);
  51. unsigned long rs = be64_to_cpu(slb->save_area[i].vsid);
  52. rb = (rb & ~0xFFFul) | i;
  53. asm volatile("slbmte %0,%1" : : "r" (rs), "r" (rb));
  54. }
  55. }
  56. static long mce_handle_derror(uint64_t dsisr, uint64_t slb_error_bits)
  57. {
  58. long handled = 1;
  59. /*
  60. * flush and reload SLBs for SLB errors and flush TLBs for TLB errors.
  61. * reset the error bits whenever we handle them so that at the end
  62. * we can check whether we handled all of them or not.
  63. * */
  64. if (dsisr & slb_error_bits) {
  65. flush_and_reload_slb();
  66. /* reset error bits */
  67. dsisr &= ~(slb_error_bits);
  68. }
  69. if (dsisr & P7_DSISR_MC_TLB_MULTIHIT_MFTLB) {
  70. if (cur_cpu_spec && cur_cpu_spec->flush_tlb)
  71. cur_cpu_spec->flush_tlb(TLBIEL_INVAL_PAGE);
  72. /* reset error bits */
  73. dsisr &= ~P7_DSISR_MC_TLB_MULTIHIT_MFTLB;
  74. }
  75. /* Any other errors we don't understand? */
  76. if (dsisr & 0xffffffffUL)
  77. handled = 0;
  78. return handled;
  79. }
  80. static long mce_handle_derror_p7(uint64_t dsisr)
  81. {
  82. return mce_handle_derror(dsisr, P7_DSISR_MC_SLB_ERRORS);
  83. }
  84. static long mce_handle_common_ierror(uint64_t srr1)
  85. {
  86. long handled = 0;
  87. switch (P7_SRR1_MC_IFETCH(srr1)) {
  88. case 0:
  89. break;
  90. case P7_SRR1_MC_IFETCH_SLB_PARITY:
  91. case P7_SRR1_MC_IFETCH_SLB_MULTIHIT:
  92. /* flush and reload SLBs for SLB errors. */
  93. flush_and_reload_slb();
  94. handled = 1;
  95. break;
  96. case P7_SRR1_MC_IFETCH_TLB_MULTIHIT:
  97. if (cur_cpu_spec && cur_cpu_spec->flush_tlb) {
  98. cur_cpu_spec->flush_tlb(TLBIEL_INVAL_PAGE);
  99. handled = 1;
  100. }
  101. break;
  102. default:
  103. break;
  104. }
  105. return handled;
  106. }
  107. static long mce_handle_ierror_p7(uint64_t srr1)
  108. {
  109. long handled = 0;
  110. handled = mce_handle_common_ierror(srr1);
  111. if (P7_SRR1_MC_IFETCH(srr1) == P7_SRR1_MC_IFETCH_SLB_BOTH) {
  112. flush_and_reload_slb();
  113. handled = 1;
  114. }
  115. return handled;
  116. }
  117. static void mce_get_common_ierror(struct mce_error_info *mce_err, uint64_t srr1)
  118. {
  119. switch (P7_SRR1_MC_IFETCH(srr1)) {
  120. case P7_SRR1_MC_IFETCH_SLB_PARITY:
  121. mce_err->error_type = MCE_ERROR_TYPE_SLB;
  122. mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY;
  123. break;
  124. case P7_SRR1_MC_IFETCH_SLB_MULTIHIT:
  125. mce_err->error_type = MCE_ERROR_TYPE_SLB;
  126. mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
  127. break;
  128. case P7_SRR1_MC_IFETCH_TLB_MULTIHIT:
  129. mce_err->error_type = MCE_ERROR_TYPE_TLB;
  130. mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
  131. break;
  132. case P7_SRR1_MC_IFETCH_UE:
  133. case P7_SRR1_MC_IFETCH_UE_IFU_INTERNAL:
  134. mce_err->error_type = MCE_ERROR_TYPE_UE;
  135. mce_err->u.ue_error_type = MCE_UE_ERROR_IFETCH;
  136. break;
  137. case P7_SRR1_MC_IFETCH_UE_TLB_RELOAD:
  138. mce_err->error_type = MCE_ERROR_TYPE_UE;
  139. mce_err->u.ue_error_type =
  140. MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH;
  141. break;
  142. }
  143. }
  144. static void mce_get_ierror_p7(struct mce_error_info *mce_err, uint64_t srr1)
  145. {
  146. mce_get_common_ierror(mce_err, srr1);
  147. if (P7_SRR1_MC_IFETCH(srr1) == P7_SRR1_MC_IFETCH_SLB_BOTH) {
  148. mce_err->error_type = MCE_ERROR_TYPE_SLB;
  149. mce_err->u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE;
  150. }
  151. }
  152. static void mce_get_derror_p7(struct mce_error_info *mce_err, uint64_t dsisr)
  153. {
  154. if (dsisr & P7_DSISR_MC_UE) {
  155. mce_err->error_type = MCE_ERROR_TYPE_UE;
  156. mce_err->u.ue_error_type = MCE_UE_ERROR_LOAD_STORE;
  157. } else if (dsisr & P7_DSISR_MC_UE_TABLEWALK) {
  158. mce_err->error_type = MCE_ERROR_TYPE_UE;
  159. mce_err->u.ue_error_type =
  160. MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE;
  161. } else if (dsisr & P7_DSISR_MC_ERAT_MULTIHIT) {
  162. mce_err->error_type = MCE_ERROR_TYPE_ERAT;
  163. mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
  164. } else if (dsisr & P7_DSISR_MC_SLB_MULTIHIT) {
  165. mce_err->error_type = MCE_ERROR_TYPE_SLB;
  166. mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
  167. } else if (dsisr & P7_DSISR_MC_SLB_PARITY_MFSLB) {
  168. mce_err->error_type = MCE_ERROR_TYPE_SLB;
  169. mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY;
  170. } else if (dsisr & P7_DSISR_MC_TLB_MULTIHIT_MFTLB) {
  171. mce_err->error_type = MCE_ERROR_TYPE_TLB;
  172. mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
  173. } else if (dsisr & P7_DSISR_MC_SLB_MULTIHIT_PARITY) {
  174. mce_err->error_type = MCE_ERROR_TYPE_SLB;
  175. mce_err->u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE;
  176. }
  177. }
  178. static long mce_handle_ue_error(struct pt_regs *regs)
  179. {
  180. long handled = 0;
  181. /*
  182. * On specific SCOM read via MMIO we may get a machine check
  183. * exception with SRR0 pointing inside opal. If that is the
  184. * case OPAL may have recovery address to re-read SCOM data in
  185. * different way and hence we can recover from this MC.
  186. */
  187. if (ppc_md.mce_check_early_recovery) {
  188. if (ppc_md.mce_check_early_recovery(regs))
  189. handled = 1;
  190. }
  191. return handled;
  192. }
  193. long __machine_check_early_realmode_p7(struct pt_regs *regs)
  194. {
  195. uint64_t srr1, nip, addr;
  196. long handled = 1;
  197. struct mce_error_info mce_error_info = { 0 };
  198. srr1 = regs->msr;
  199. nip = regs->nip;
  200. /*
  201. * Handle memory errors depending whether this was a load/store or
  202. * ifetch exception. Also, populate the mce error_type and
  203. * type-specific error_type from either SRR1 or DSISR, depending
  204. * whether this was a load/store or ifetch exception
  205. */
  206. if (P7_SRR1_MC_LOADSTORE(srr1)) {
  207. handled = mce_handle_derror_p7(regs->dsisr);
  208. mce_get_derror_p7(&mce_error_info, regs->dsisr);
  209. addr = regs->dar;
  210. } else {
  211. handled = mce_handle_ierror_p7(srr1);
  212. mce_get_ierror_p7(&mce_error_info, srr1);
  213. addr = regs->nip;
  214. }
  215. /* Handle UE error. */
  216. if (mce_error_info.error_type == MCE_ERROR_TYPE_UE)
  217. handled = mce_handle_ue_error(regs);
  218. save_mce_event(regs, handled, &mce_error_info, nip, addr);
  219. return handled;
  220. }
  221. static void mce_get_ierror_p8(struct mce_error_info *mce_err, uint64_t srr1)
  222. {
  223. mce_get_common_ierror(mce_err, srr1);
  224. if (P7_SRR1_MC_IFETCH(srr1) == P8_SRR1_MC_IFETCH_ERAT_MULTIHIT) {
  225. mce_err->error_type = MCE_ERROR_TYPE_ERAT;
  226. mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
  227. }
  228. }
  229. static void mce_get_derror_p8(struct mce_error_info *mce_err, uint64_t dsisr)
  230. {
  231. mce_get_derror_p7(mce_err, dsisr);
  232. if (dsisr & P8_DSISR_MC_ERAT_MULTIHIT_SEC) {
  233. mce_err->error_type = MCE_ERROR_TYPE_ERAT;
  234. mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
  235. }
  236. }
  237. static long mce_handle_ierror_p8(uint64_t srr1)
  238. {
  239. long handled = 0;
  240. handled = mce_handle_common_ierror(srr1);
  241. if (P7_SRR1_MC_IFETCH(srr1) == P8_SRR1_MC_IFETCH_ERAT_MULTIHIT) {
  242. flush_and_reload_slb();
  243. handled = 1;
  244. }
  245. return handled;
  246. }
  247. static long mce_handle_derror_p8(uint64_t dsisr)
  248. {
  249. return mce_handle_derror(dsisr, P8_DSISR_MC_SLB_ERRORS);
  250. }
  251. long __machine_check_early_realmode_p8(struct pt_regs *regs)
  252. {
  253. uint64_t srr1, nip, addr;
  254. long handled = 1;
  255. struct mce_error_info mce_error_info = { 0 };
  256. srr1 = regs->msr;
  257. nip = regs->nip;
  258. if (P7_SRR1_MC_LOADSTORE(srr1)) {
  259. handled = mce_handle_derror_p8(regs->dsisr);
  260. mce_get_derror_p8(&mce_error_info, regs->dsisr);
  261. addr = regs->dar;
  262. } else {
  263. handled = mce_handle_ierror_p8(srr1);
  264. mce_get_ierror_p8(&mce_error_info, srr1);
  265. addr = regs->nip;
  266. }
  267. /* Handle UE error. */
  268. if (mce_error_info.error_type == MCE_ERROR_TYPE_UE)
  269. handled = mce_handle_ue_error(regs);
  270. save_mce_event(regs, handled, &mce_error_info, nip, addr);
  271. return handled;
  272. }