|
@@ -114,6 +114,7 @@ static struct work_struct mce_work;
|
|
|
static struct irq_work mce_irq_work;
|
|
|
|
|
|
static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs);
|
|
|
+static int mce_usable_address(struct mce *m);
|
|
|
|
|
|
/*
|
|
|
* CPU/chipset specific EDAC code can register a notifier call here to print
|
|
@@ -234,11 +235,18 @@ static void drain_mcelog_buffer(void)
|
|
|
} while (next != prev);
|
|
|
}
|
|
|
|
|
|
+static struct notifier_block mce_srao_nb;
|
|
|
|
|
|
-void mce_register_decode_chain(struct notifier_block *nb)
|
|
|
+void mce_register_decode_chain(struct notifier_block *nb, bool drain)
|
|
|
{
|
|
|
+ /* Ensure SRAO notifier has the highest priority in the decode chain. */
|
|
|
+ if (nb != &mce_srao_nb && nb->priority == INT_MAX)
|
|
|
+ nb->priority -= 1;
|
|
|
+
|
|
|
atomic_notifier_chain_register(&x86_mce_decoder_chain, nb);
|
|
|
- drain_mcelog_buffer();
|
|
|
+
|
|
|
+ if (drain)
|
|
|
+ drain_mcelog_buffer();
|
|
|
}
|
|
|
EXPORT_SYMBOL_GPL(mce_register_decode_chain);
|
|
|
|
|
@@ -462,61 +470,6 @@ static inline void mce_gather_info(struct mce *m, struct pt_regs *regs)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-/*
|
|
|
- * Simple lockless ring to communicate PFNs from the exception handler with the
|
|
|
- * process context work function. This is vastly simplified because there's
|
|
|
- * only a single reader and a single writer.
|
|
|
- */
|
|
|
-#define MCE_RING_SIZE 16 /* we use one entry less */
|
|
|
-
|
|
|
-struct mce_ring {
|
|
|
- unsigned short start;
|
|
|
- unsigned short end;
|
|
|
- unsigned long ring[MCE_RING_SIZE];
|
|
|
-};
|
|
|
-static DEFINE_PER_CPU(struct mce_ring, mce_ring);
|
|
|
-
|
|
|
-/* Runs with CPU affinity in workqueue */
|
|
|
-static int mce_ring_empty(void)
|
|
|
-{
|
|
|
- struct mce_ring *r = this_cpu_ptr(&mce_ring);
|
|
|
-
|
|
|
- return r->start == r->end;
|
|
|
-}
|
|
|
-
|
|
|
-static int mce_ring_get(unsigned long *pfn)
|
|
|
-{
|
|
|
- struct mce_ring *r;
|
|
|
- int ret = 0;
|
|
|
-
|
|
|
- *pfn = 0;
|
|
|
- get_cpu();
|
|
|
- r = this_cpu_ptr(&mce_ring);
|
|
|
- if (r->start == r->end)
|
|
|
- goto out;
|
|
|
- *pfn = r->ring[r->start];
|
|
|
- r->start = (r->start + 1) % MCE_RING_SIZE;
|
|
|
- ret = 1;
|
|
|
-out:
|
|
|
- put_cpu();
|
|
|
- return ret;
|
|
|
-}
|
|
|
-
|
|
|
-/* Always runs in MCE context with preempt off */
|
|
|
-static int mce_ring_add(unsigned long pfn)
|
|
|
-{
|
|
|
- struct mce_ring *r = this_cpu_ptr(&mce_ring);
|
|
|
- unsigned next;
|
|
|
-
|
|
|
- next = (r->end + 1) % MCE_RING_SIZE;
|
|
|
- if (next == r->start)
|
|
|
- return -1;
|
|
|
- r->ring[r->end] = pfn;
|
|
|
- wmb();
|
|
|
- r->end = next;
|
|
|
- return 0;
|
|
|
-}
|
|
|
-
|
|
|
int mce_available(struct cpuinfo_x86 *c)
|
|
|
{
|
|
|
if (mca_cfg.disabled)
|
|
@@ -526,7 +479,7 @@ int mce_available(struct cpuinfo_x86 *c)
|
|
|
|
|
|
static void mce_schedule_work(void)
|
|
|
{
|
|
|
- if (!mce_ring_empty())
|
|
|
+ if (!mce_gen_pool_empty() && keventd_up())
|
|
|
schedule_work(&mce_work);
|
|
|
}
|
|
|
|
|
@@ -553,6 +506,27 @@ static void mce_report_event(struct pt_regs *regs)
|
|
|
irq_work_queue(&mce_irq_work);
|
|
|
}
|
|
|
|
|
|
+static int srao_decode_notifier(struct notifier_block *nb, unsigned long val,
|
|
|
+ void *data)
|
|
|
+{
|
|
|
+ struct mce *mce = (struct mce *)data;
|
|
|
+ unsigned long pfn;
|
|
|
+
|
|
|
+ if (!mce)
|
|
|
+ return NOTIFY_DONE;
|
|
|
+
|
|
|
+ if (mce->usable_addr && (mce->severity == MCE_AO_SEVERITY)) {
|
|
|
+ pfn = mce->addr >> PAGE_SHIFT;
|
|
|
+ memory_failure(pfn, MCE_VECTOR, 0);
|
|
|
+ }
|
|
|
+
|
|
|
+ return NOTIFY_OK;
|
|
|
+}
|
|
|
+static struct notifier_block mce_srao_nb = {
|
|
|
+ .notifier_call = srao_decode_notifier,
|
|
|
+ .priority = INT_MAX,
|
|
|
+};
|
|
|
+
|
|
|
/*
|
|
|
* Read ADDR and MISC registers.
|
|
|
*/
|
|
@@ -671,8 +645,11 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
|
|
|
*/
|
|
|
if (severity == MCE_DEFERRED_SEVERITY && memory_error(&m)) {
|
|
|
if (m.status & MCI_STATUS_ADDRV) {
|
|
|
- mce_ring_add(m.addr >> PAGE_SHIFT);
|
|
|
- mce_schedule_work();
|
|
|
+ m.severity = severity;
|
|
|
+ m.usable_addr = mce_usable_address(&m);
|
|
|
+
|
|
|
+ if (!mce_gen_pool_add(&m))
|
|
|
+ mce_schedule_work();
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -1142,15 +1119,10 @@ void do_machine_check(struct pt_regs *regs, long error_code)
|
|
|
|
|
|
mce_read_aux(&m, i);
|
|
|
|
|
|
- /*
|
|
|
- * Action optional error. Queue address for later processing.
|
|
|
- * When the ring overflows we just ignore the AO error.
|
|
|
- * RED-PEN add some logging mechanism when
|
|
|
- * usable_address or mce_add_ring fails.
|
|
|
- * RED-PEN don't ignore overflow for mca_cfg.tolerant == 0
|
|
|
- */
|
|
|
- if (severity == MCE_AO_SEVERITY && mce_usable_address(&m))
|
|
|
- mce_ring_add(m.addr >> PAGE_SHIFT);
|
|
|
+ /* assuming valid severity level != 0 */
|
|
|
+ m.severity = severity;
|
|
|
+ m.usable_addr = mce_usable_address(&m);
|
|
|
+ mce_gen_pool_add(&m);
|
|
|
|
|
|
mce_log(&m);
|
|
|
|
|
@@ -1246,14 +1218,11 @@ int memory_failure(unsigned long pfn, int vector, int flags)
|
|
|
/*
|
|
|
* Action optional processing happens here (picking up
|
|
|
* from the list of faulting pages that do_machine_check()
|
|
|
- * placed into the "ring").
|
|
|
+ * placed into the genpool).
|
|
|
*/
|
|
|
static void mce_process_work(struct work_struct *dummy)
|
|
|
{
|
|
|
- unsigned long pfn;
|
|
|
-
|
|
|
- while (mce_ring_get(&pfn))
|
|
|
- memory_failure(pfn, MCE_VECTOR, 0);
|
|
|
+ mce_gen_pool_process();
|
|
|
}
|
|
|
|
|
|
#ifdef CONFIG_X86_MCE_INTEL
|
|
@@ -2059,6 +2028,7 @@ __setup("mce", mcheck_enable);
|
|
|
int __init mcheck_init(void)
|
|
|
{
|
|
|
mcheck_intel_therm_init();
|
|
|
+ mce_register_decode_chain(&mce_srao_nb, false);
|
|
|
mcheck_vendor_init_severity();
|
|
|
|
|
|
INIT_WORK(&mce_work, mce_process_work);
|
|
@@ -2597,5 +2567,20 @@ static int __init mcheck_debugfs_init(void)
|
|
|
|
|
|
return 0;
|
|
|
}
|
|
|
-late_initcall(mcheck_debugfs_init);
|
|
|
+#else
|
|
|
+static int __init mcheck_debugfs_init(void) { return -EINVAL; }
|
|
|
#endif
|
|
|
+
|
|
|
+static int __init mcheck_late_init(void)
|
|
|
+{
|
|
|
+ mcheck_debugfs_init();
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Flush out everything that has been logged during early boot, now that
|
|
|
+ * everything has been initialized (workqueues, decoders, ...).
|
|
|
+ */
|
|
|
+ mce_schedule_work();
|
|
|
+
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+late_initcall(mcheck_late_init);
|