nmi.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475
  1. /*
  2. * Machine check handler
  3. *
  4. * Copyright IBM Corp. 2000, 2009
  5. * Author(s): Ingo Adlung <adlung@de.ibm.com>,
  6. * Martin Schwidefsky <schwidefsky@de.ibm.com>,
  7. * Cornelia Huck <cornelia.huck@de.ibm.com>,
  8. * Heiko Carstens <heiko.carstens@de.ibm.com>,
  9. */
  10. #include <linux/kernel_stat.h>
  11. #include <linux/init.h>
  12. #include <linux/errno.h>
  13. #include <linux/hardirq.h>
  14. #include <linux/log2.h>
  15. #include <linux/kprobes.h>
  16. #include <linux/slab.h>
  17. #include <linux/time.h>
  18. #include <linux/module.h>
  19. #include <linux/sched/signal.h>
  20. #include <linux/export.h>
  21. #include <asm/lowcore.h>
  22. #include <asm/smp.h>
  23. #include <asm/stp.h>
  24. #include <asm/cputime.h>
  25. #include <asm/nmi.h>
  26. #include <asm/crw.h>
  27. #include <asm/switch_to.h>
  28. #include <asm/ctl_reg.h>
  29. #include <asm/asm-offsets.h>
  30. #include <linux/kvm_host.h>
  31. struct mcck_struct {
  32. unsigned int kill_task : 1;
  33. unsigned int channel_report : 1;
  34. unsigned int warning : 1;
  35. unsigned int stp_queue : 1;
  36. unsigned long mcck_code;
  37. };
  38. static DEFINE_PER_CPU(struct mcck_struct, cpu_mcck);
  39. static struct kmem_cache *mcesa_cache;
  40. static unsigned long mcesa_origin_lc;
  41. static inline int nmi_needs_mcesa(void)
  42. {
  43. return MACHINE_HAS_VX || MACHINE_HAS_GS;
  44. }
  45. static inline unsigned long nmi_get_mcesa_size(void)
  46. {
  47. if (MACHINE_HAS_GS)
  48. return MCESA_MAX_SIZE;
  49. return MCESA_MIN_SIZE;
  50. }
  51. /*
  52. * The initial machine check extended save area for the boot CPU.
  53. * It will be replaced by nmi_init() with an allocated structure.
  54. * The structure is required for machine check happening early in
  55. * the boot process.
  56. */
  57. static struct mcesa boot_mcesa __initdata __aligned(MCESA_MAX_SIZE);
  58. void __init nmi_alloc_boot_cpu(struct lowcore *lc)
  59. {
  60. if (!nmi_needs_mcesa())
  61. return;
  62. lc->mcesad = (unsigned long) &boot_mcesa;
  63. if (MACHINE_HAS_GS)
  64. lc->mcesad |= ilog2(MCESA_MAX_SIZE);
  65. }
  66. static int __init nmi_init(void)
  67. {
  68. unsigned long origin, cr0, size;
  69. if (!nmi_needs_mcesa())
  70. return 0;
  71. size = nmi_get_mcesa_size();
  72. if (size > MCESA_MIN_SIZE)
  73. mcesa_origin_lc = ilog2(size);
  74. /* create slab cache for the machine-check-extended-save-areas */
  75. mcesa_cache = kmem_cache_create("nmi_save_areas", size, size, 0, NULL);
  76. if (!mcesa_cache)
  77. panic("Couldn't create nmi save area cache");
  78. origin = (unsigned long) kmem_cache_alloc(mcesa_cache, GFP_KERNEL);
  79. if (!origin)
  80. panic("Couldn't allocate nmi save area");
  81. /* The pointer is stored with mcesa_bits ORed in */
  82. kmemleak_not_leak((void *) origin);
  83. __ctl_store(cr0, 0, 0);
  84. __ctl_clear_bit(0, 28); /* disable lowcore protection */
  85. /* Replace boot_mcesa on the boot CPU */
  86. S390_lowcore.mcesad = origin | mcesa_origin_lc;
  87. __ctl_load(cr0, 0, 0);
  88. return 0;
  89. }
  90. early_initcall(nmi_init);
  91. int nmi_alloc_per_cpu(struct lowcore *lc)
  92. {
  93. unsigned long origin;
  94. if (!nmi_needs_mcesa())
  95. return 0;
  96. origin = (unsigned long) kmem_cache_alloc(mcesa_cache, GFP_KERNEL);
  97. if (!origin)
  98. return -ENOMEM;
  99. /* The pointer is stored with mcesa_bits ORed in */
  100. kmemleak_not_leak((void *) origin);
  101. lc->mcesad = origin | mcesa_origin_lc;
  102. return 0;
  103. }
  104. void nmi_free_per_cpu(struct lowcore *lc)
  105. {
  106. if (!nmi_needs_mcesa())
  107. return;
  108. kmem_cache_free(mcesa_cache, (void *)(lc->mcesad & MCESA_ORIGIN_MASK));
  109. }
  110. static notrace void s390_handle_damage(void)
  111. {
  112. smp_emergency_stop();
  113. disabled_wait((unsigned long) __builtin_return_address(0));
  114. while (1);
  115. }
  116. NOKPROBE_SYMBOL(s390_handle_damage);
  117. /*
  118. * Main machine check handler function. Will be called with interrupts enabled
  119. * or disabled and machine checks enabled or disabled.
  120. */
  121. void s390_handle_mcck(void)
  122. {
  123. unsigned long flags;
  124. struct mcck_struct mcck;
  125. /*
  126. * Disable machine checks and get the current state of accumulated
  127. * machine checks. Afterwards delete the old state and enable machine
  128. * checks again.
  129. */
  130. local_irq_save(flags);
  131. local_mcck_disable();
  132. mcck = *this_cpu_ptr(&cpu_mcck);
  133. memset(this_cpu_ptr(&cpu_mcck), 0, sizeof(mcck));
  134. clear_cpu_flag(CIF_MCCK_PENDING);
  135. local_mcck_enable();
  136. local_irq_restore(flags);
  137. if (mcck.channel_report)
  138. crw_handle_channel_report();
  139. /*
  140. * A warning may remain for a prolonged period on the bare iron.
  141. * (actually until the machine is powered off, or the problem is gone)
  142. * So we just stop listening for the WARNING MCH and avoid continuously
  143. * being interrupted. One caveat is however, that we must do this per
  144. * processor and cannot use the smp version of ctl_clear_bit().
  145. * On VM we only get one interrupt per virtally presented machinecheck.
  146. * Though one suffices, we may get one interrupt per (virtual) cpu.
  147. */
  148. if (mcck.warning) { /* WARNING pending ? */
  149. static int mchchk_wng_posted = 0;
  150. /* Use single cpu clear, as we cannot handle smp here. */
  151. __ctl_clear_bit(14, 24); /* Disable WARNING MCH */
  152. if (xchg(&mchchk_wng_posted, 1) == 0)
  153. kill_cad_pid(SIGPWR, 1);
  154. }
  155. if (mcck.stp_queue)
  156. stp_queue_work();
  157. if (mcck.kill_task) {
  158. local_irq_enable();
  159. printk(KERN_EMERG "mcck: Terminating task because of machine "
  160. "malfunction (code 0x%016lx).\n", mcck.mcck_code);
  161. printk(KERN_EMERG "mcck: task: %s, pid: %d.\n",
  162. current->comm, current->pid);
  163. do_exit(SIGSEGV);
  164. }
  165. }
  166. EXPORT_SYMBOL_GPL(s390_handle_mcck);
  167. /*
  168. * returns 0 if all required registers are available
  169. * returns 1 otherwise
  170. */
  171. static int notrace s390_check_registers(union mci mci, int umode)
  172. {
  173. union ctlreg2 cr2;
  174. int kill_task;
  175. kill_task = 0;
  176. if (!mci.gr) {
  177. /*
  178. * General purpose registers couldn't be restored and have
  179. * unknown contents. Stop system or terminate process.
  180. */
  181. if (!umode)
  182. s390_handle_damage();
  183. kill_task = 1;
  184. }
  185. /* Check control registers */
  186. if (!mci.cr) {
  187. /*
  188. * Control registers have unknown contents.
  189. * Can't recover and therefore stopping machine.
  190. */
  191. s390_handle_damage();
  192. }
  193. if (!mci.fp) {
  194. /*
  195. * Floating point registers can't be restored. If the
  196. * kernel currently uses floating point registers the
  197. * system is stopped. If the process has its floating
  198. * pointer registers loaded it is terminated.
  199. */
  200. if (S390_lowcore.fpu_flags & KERNEL_VXR_V0V7)
  201. s390_handle_damage();
  202. if (!test_cpu_flag(CIF_FPU))
  203. kill_task = 1;
  204. }
  205. if (!mci.fc) {
  206. /*
  207. * Floating point control register can't be restored.
  208. * If the kernel currently uses the floating pointer
  209. * registers and needs the FPC register the system is
  210. * stopped. If the process has its floating pointer
  211. * registers loaded it is terminated.
  212. */
  213. if (S390_lowcore.fpu_flags & KERNEL_FPC)
  214. s390_handle_damage();
  215. if (!test_cpu_flag(CIF_FPU))
  216. kill_task = 1;
  217. }
  218. if (MACHINE_HAS_VX) {
  219. if (!mci.vr) {
  220. /*
  221. * Vector registers can't be restored. If the kernel
  222. * currently uses vector registers the system is
  223. * stopped. If the process has its vector registers
  224. * loaded it is terminated.
  225. */
  226. if (S390_lowcore.fpu_flags & KERNEL_VXR)
  227. s390_handle_damage();
  228. if (!test_cpu_flag(CIF_FPU))
  229. kill_task = 1;
  230. }
  231. }
  232. /* Check if access registers are valid */
  233. if (!mci.ar) {
  234. /*
  235. * Access registers have unknown contents.
  236. * Terminating task.
  237. */
  238. kill_task = 1;
  239. }
  240. /* Check guarded storage registers */
  241. cr2.val = S390_lowcore.cregs_save_area[2];
  242. if (cr2.gse) {
  243. if (!mci.gs) {
  244. /*
  245. * Guarded storage register can't be restored and
  246. * the current processes uses guarded storage.
  247. * It has to be terminated.
  248. */
  249. kill_task = 1;
  250. }
  251. }
  252. /* Check if old PSW is valid */
  253. if (!mci.wp) {
  254. /*
  255. * Can't tell if we come from user or kernel mode
  256. * -> stopping machine.
  257. */
  258. s390_handle_damage();
  259. }
  260. /* Check for invalid kernel instruction address */
  261. if (!mci.ia && !umode) {
  262. /*
  263. * The instruction address got lost while running
  264. * in the kernel -> stopping machine.
  265. */
  266. s390_handle_damage();
  267. }
  268. if (!mci.ms || !mci.pm || !mci.ia)
  269. kill_task = 1;
  270. return kill_task;
  271. }
  272. NOKPROBE_SYMBOL(s390_check_registers);
  273. /*
  274. * Backup the guest's machine check info to its description block
  275. */
  276. static void notrace s390_backup_mcck_info(struct pt_regs *regs)
  277. {
  278. struct mcck_volatile_info *mcck_backup;
  279. struct sie_page *sie_page;
  280. /* r14 contains the sie block, which was set in sie64a */
  281. struct kvm_s390_sie_block *sie_block =
  282. (struct kvm_s390_sie_block *) regs->gprs[14];
  283. if (sie_block == NULL)
  284. /* Something's seriously wrong, stop system. */
  285. s390_handle_damage();
  286. sie_page = container_of(sie_block, struct sie_page, sie_block);
  287. mcck_backup = &sie_page->mcck_info;
  288. mcck_backup->mcic = S390_lowcore.mcck_interruption_code &
  289. ~(MCCK_CODE_CP | MCCK_CODE_EXT_DAMAGE);
  290. mcck_backup->ext_damage_code = S390_lowcore.external_damage_code;
  291. mcck_backup->failing_storage_address
  292. = S390_lowcore.failing_storage_address;
  293. }
  294. NOKPROBE_SYMBOL(s390_backup_mcck_info);
  295. #define MAX_IPD_COUNT 29
  296. #define MAX_IPD_TIME (5 * 60 * USEC_PER_SEC) /* 5 minutes */
  297. #define ED_STP_ISLAND 6 /* External damage STP island check */
  298. #define ED_STP_SYNC 7 /* External damage STP sync check */
  299. #define MCCK_CODE_NO_GUEST (MCCK_CODE_CP | MCCK_CODE_EXT_DAMAGE)
  300. /*
  301. * machine check handler.
  302. */
  303. void notrace s390_do_machine_check(struct pt_regs *regs)
  304. {
  305. static int ipd_count;
  306. static DEFINE_SPINLOCK(ipd_lock);
  307. static unsigned long long last_ipd;
  308. struct mcck_struct *mcck;
  309. unsigned long long tmp;
  310. union mci mci;
  311. unsigned long mcck_dam_code;
  312. nmi_enter();
  313. inc_irq_stat(NMI_NMI);
  314. mci.val = S390_lowcore.mcck_interruption_code;
  315. mcck = this_cpu_ptr(&cpu_mcck);
  316. if (mci.sd) {
  317. /* System damage -> stopping machine */
  318. s390_handle_damage();
  319. }
  320. /*
  321. * Reinject the instruction processing damages' machine checks
  322. * including Delayed Access Exception into the guest
  323. * instead of damaging the host if they happen in the guest.
  324. */
  325. if (mci.pd && !test_cpu_flag(CIF_MCCK_GUEST)) {
  326. if (mci.b) {
  327. /* Processing backup -> verify if we can survive this */
  328. u64 z_mcic, o_mcic, t_mcic;
  329. z_mcic = (1ULL<<63 | 1ULL<<59 | 1ULL<<29);
  330. o_mcic = (1ULL<<43 | 1ULL<<42 | 1ULL<<41 | 1ULL<<40 |
  331. 1ULL<<36 | 1ULL<<35 | 1ULL<<34 | 1ULL<<32 |
  332. 1ULL<<30 | 1ULL<<21 | 1ULL<<20 | 1ULL<<17 |
  333. 1ULL<<16);
  334. t_mcic = mci.val;
  335. if (((t_mcic & z_mcic) != 0) ||
  336. ((t_mcic & o_mcic) != o_mcic)) {
  337. s390_handle_damage();
  338. }
  339. /*
  340. * Nullifying exigent condition, therefore we might
  341. * retry this instruction.
  342. */
  343. spin_lock(&ipd_lock);
  344. tmp = get_tod_clock();
  345. if (((tmp - last_ipd) >> 12) < MAX_IPD_TIME)
  346. ipd_count++;
  347. else
  348. ipd_count = 1;
  349. last_ipd = tmp;
  350. if (ipd_count == MAX_IPD_COUNT)
  351. s390_handle_damage();
  352. spin_unlock(&ipd_lock);
  353. } else {
  354. /* Processing damage -> stopping machine */
  355. s390_handle_damage();
  356. }
  357. }
  358. if (s390_check_registers(mci, user_mode(regs))) {
  359. /*
  360. * Couldn't restore all register contents for the
  361. * user space process -> mark task for termination.
  362. */
  363. mcck->kill_task = 1;
  364. mcck->mcck_code = mci.val;
  365. set_cpu_flag(CIF_MCCK_PENDING);
  366. }
  367. /*
  368. * Backup the machine check's info if it happens when the guest
  369. * is running.
  370. */
  371. if (test_cpu_flag(CIF_MCCK_GUEST))
  372. s390_backup_mcck_info(regs);
  373. if (mci.cd) {
  374. /* Timing facility damage */
  375. s390_handle_damage();
  376. }
  377. if (mci.ed && mci.ec) {
  378. /* External damage */
  379. if (S390_lowcore.external_damage_code & (1U << ED_STP_SYNC))
  380. mcck->stp_queue |= stp_sync_check();
  381. if (S390_lowcore.external_damage_code & (1U << ED_STP_ISLAND))
  382. mcck->stp_queue |= stp_island_check();
  383. if (mcck->stp_queue)
  384. set_cpu_flag(CIF_MCCK_PENDING);
  385. }
  386. /*
  387. * Reinject storage related machine checks into the guest if they
  388. * happen when the guest is running.
  389. */
  390. if (!test_cpu_flag(CIF_MCCK_GUEST)) {
  391. if (mci.se)
  392. /* Storage error uncorrected */
  393. s390_handle_damage();
  394. if (mci.ke)
  395. /* Storage key-error uncorrected */
  396. s390_handle_damage();
  397. if (mci.ds && mci.fa)
  398. /* Storage degradation */
  399. s390_handle_damage();
  400. }
  401. if (mci.cp) {
  402. /* Channel report word pending */
  403. mcck->channel_report = 1;
  404. set_cpu_flag(CIF_MCCK_PENDING);
  405. }
  406. if (mci.w) {
  407. /* Warning pending */
  408. mcck->warning = 1;
  409. set_cpu_flag(CIF_MCCK_PENDING);
  410. }
  411. /*
  412. * If there are only Channel Report Pending and External Damage
  413. * machine checks, they will not be reinjected into the guest
  414. * because they refer to host conditions only.
  415. */
  416. mcck_dam_code = (mci.val & MCIC_SUBCLASS_MASK);
  417. if (test_cpu_flag(CIF_MCCK_GUEST) &&
  418. (mcck_dam_code & MCCK_CODE_NO_GUEST) != mcck_dam_code) {
  419. /* Set exit reason code for host's later handling */
  420. *((long *)(regs->gprs[15] + __SF_SIE_REASON)) = -EINTR;
  421. }
  422. clear_cpu_flag(CIF_MCCK_GUEST);
  423. nmi_exit();
  424. }
  425. NOKPROBE_SYMBOL(s390_do_machine_check);
  426. static int __init machine_check_init(void)
  427. {
  428. ctl_set_bit(14, 25); /* enable external damage MCH */
  429. ctl_set_bit(14, 27); /* enable system recovery MCH */
  430. ctl_set_bit(14, 24); /* enable warning MCH */
  431. return 0;
  432. }
  433. early_initcall(machine_check_init);