|
@@ -772,23 +772,25 @@ EXPORT_SYMBOL_GPL(machine_check_poll);
|
|
|
static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
|
|
|
struct pt_regs *regs)
|
|
|
{
|
|
|
- int i, ret = 0;
|
|
|
char *tmp;
|
|
|
+ int i;
|
|
|
|
|
|
for (i = 0; i < mca_cfg.banks; i++) {
|
|
|
m->status = mce_rdmsrl(msr_ops.status(i));
|
|
|
- if (m->status & MCI_STATUS_VAL) {
|
|
|
- __set_bit(i, validp);
|
|
|
- if (quirk_no_way_out)
|
|
|
- quirk_no_way_out(i, m, regs);
|
|
|
- }
|
|
|
+ if (!(m->status & MCI_STATUS_VAL))
|
|
|
+ continue;
|
|
|
+
|
|
|
+ __set_bit(i, validp);
|
|
|
+ if (quirk_no_way_out)
|
|
|
+ quirk_no_way_out(i, m, regs);
|
|
|
|
|
|
if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) {
|
|
|
+ mce_read_aux(m, i);
|
|
|
*msg = tmp;
|
|
|
- ret = 1;
|
|
|
+ return 1;
|
|
|
}
|
|
|
}
|
|
|
- return ret;
|
|
|
+ return 0;
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -1205,13 +1207,18 @@ void do_machine_check(struct pt_regs *regs, long error_code)
|
|
|
lmce = m.mcgstatus & MCG_STATUS_LMCES;
|
|
|
|
|
|
/*
|
|
|
+ * Local machine check may already know that we have to panic.
|
|
|
+ * Broadcast machine check begins rendezvous in mce_start()
|
|
|
* Go through all banks in exclusion of the other CPUs. This way we
|
|
|
* don't report duplicated events on shared banks because the first one
|
|
|
- * to see it will clear it. If this is a Local MCE, then no need to
|
|
|
- * perform rendezvous.
|
|
|
+ * to see it will clear it.
|
|
|
*/
|
|
|
- if (!lmce)
|
|
|
+ if (lmce) {
|
|
|
+ if (no_way_out)
|
|
|
+ mce_panic("Fatal local machine check", &m, msg);
|
|
|
+ } else {
|
|
|
order = mce_start(&no_way_out);
|
|
|
+ }
|
|
|
|
|
|
for (i = 0; i < cfg->banks; i++) {
|
|
|
__clear_bit(i, toclear);
|
|
@@ -1287,12 +1294,17 @@ void do_machine_check(struct pt_regs *regs, long error_code)
|
|
|
no_way_out = worst >= MCE_PANIC_SEVERITY;
|
|
|
} else {
|
|
|
/*
|
|
|
- * Local MCE skipped calling mce_reign()
|
|
|
- * If we found a fatal error, we need to panic here.
|
|
|
+ * If there was a fatal machine check we should have
|
|
|
+ * already called mce_panic earlier in this function.
|
|
|
+ * Since we re-read the banks, we might have found
|
|
|
+ * something new. Check again to see if we found a
|
|
|
+ * fatal error. We call "mce_severity()" again to
|
|
|
+ * make sure we have the right "msg".
|
|
|
*/
|
|
|
- if (worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3)
|
|
|
- mce_panic("Machine check from unknown source",
|
|
|
- NULL, NULL);
|
|
|
+ if (worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3) {
|
|
|
+ mce_severity(&m, cfg->tolerant, &msg, true);
|
|
|
+ mce_panic("Local fatal machine check!", &m, msg);
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
/*
|