|
@@ -52,11 +52,11 @@
|
|
|
|
|
|
static DEFINE_MUTEX(mce_chrdev_read_mutex);
|
|
|
|
|
|
-#define rcu_dereference_check_mce(p) \
|
|
|
+#define mce_log_get_idx_check(p) \
|
|
|
({ \
|
|
|
RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \
|
|
|
!lockdep_is_held(&mce_chrdev_read_mutex), \
|
|
|
- "suspicious rcu_dereference_check_mce() usage"); \
|
|
|
+ "suspicious mce_log_get_idx_check() usage"); \
|
|
|
smp_load_acquire(&(p)); \
|
|
|
})
|
|
|
|
|
@@ -110,15 +110,17 @@ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
|
|
|
*/
|
|
|
mce_banks_t mce_banks_ce_disabled;
|
|
|
|
|
|
-static DEFINE_PER_CPU(struct work_struct, mce_work);
|
|
|
+static struct work_struct mce_work;
|
|
|
+static struct irq_work mce_irq_work;
|
|
|
|
|
|
static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs);
|
|
|
+static int mce_usable_address(struct mce *m);
|
|
|
|
|
|
/*
|
|
|
* CPU/chipset specific EDAC code can register a notifier call here to print
|
|
|
* MCE errors in a human-readable form.
|
|
|
*/
|
|
|
-static ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain);
|
|
|
+ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain);
|
|
|
|
|
|
/* Do initial initialization of a struct mce */
|
|
|
void mce_setup(struct mce *m)
|
|
@@ -157,12 +159,13 @@ void mce_log(struct mce *mce)
|
|
|
/* Emit the trace record: */
|
|
|
trace_mce_record(mce);
|
|
|
|
|
|
- atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, mce);
|
|
|
+ if (!mce_gen_pool_add(mce))
|
|
|
+ irq_work_queue(&mce_irq_work);
|
|
|
|
|
|
mce->finished = 0;
|
|
|
wmb();
|
|
|
for (;;) {
|
|
|
- entry = rcu_dereference_check_mce(mcelog.next);
|
|
|
+ entry = mce_log_get_idx_check(mcelog.next);
|
|
|
for (;;) {
|
|
|
|
|
|
/*
|
|
@@ -196,48 +199,23 @@ void mce_log(struct mce *mce)
|
|
|
set_bit(0, &mce_need_notify);
|
|
|
}
|
|
|
|
|
|
-static void drain_mcelog_buffer(void)
|
|
|
+void mce_inject_log(struct mce *m)
|
|
|
{
|
|
|
- unsigned int next, i, prev = 0;
|
|
|
-
|
|
|
- next = ACCESS_ONCE(mcelog.next);
|
|
|
-
|
|
|
- do {
|
|
|
- struct mce *m;
|
|
|
-
|
|
|
- /* drain what was logged during boot */
|
|
|
- for (i = prev; i < next; i++) {
|
|
|
- unsigned long start = jiffies;
|
|
|
- unsigned retries = 1;
|
|
|
-
|
|
|
- m = &mcelog.entry[i];
|
|
|
-
|
|
|
- while (!m->finished) {
|
|
|
- if (time_after_eq(jiffies, start + 2*retries))
|
|
|
- retries++;
|
|
|
-
|
|
|
- cpu_relax();
|
|
|
-
|
|
|
- if (!m->finished && retries >= 4) {
|
|
|
- pr_err("skipping error being logged currently!\n");
|
|
|
- break;
|
|
|
- }
|
|
|
- }
|
|
|
- smp_rmb();
|
|
|
- atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m);
|
|
|
- }
|
|
|
-
|
|
|
- memset(mcelog.entry + prev, 0, (next - prev) * sizeof(*m));
|
|
|
- prev = next;
|
|
|
- next = cmpxchg(&mcelog.next, prev, 0);
|
|
|
- } while (next != prev);
|
|
|
+ mutex_lock(&mce_chrdev_read_mutex);
|
|
|
+ mce_log(m);
|
|
|
+ mutex_unlock(&mce_chrdev_read_mutex);
|
|
|
}
|
|
|
+EXPORT_SYMBOL_GPL(mce_inject_log);
|
|
|
|
|
|
+static struct notifier_block mce_srao_nb;
|
|
|
|
|
|
void mce_register_decode_chain(struct notifier_block *nb)
|
|
|
{
|
|
|
+ /* Ensure SRAO notifier has the highest priority in the decode chain. */
|
|
|
+ if (nb != &mce_srao_nb && nb->priority == INT_MAX)
|
|
|
+ nb->priority -= 1;
|
|
|
+
|
|
|
atomic_notifier_chain_register(&x86_mce_decoder_chain, nb);
|
|
|
- drain_mcelog_buffer();
|
|
|
}
|
|
|
EXPORT_SYMBOL_GPL(mce_register_decode_chain);
|
|
|
|
|
@@ -461,61 +439,6 @@ static inline void mce_gather_info(struct mce *m, struct pt_regs *regs)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-/*
|
|
|
- * Simple lockless ring to communicate PFNs from the exception handler with the
|
|
|
- * process context work function. This is vastly simplified because there's
|
|
|
- * only a single reader and a single writer.
|
|
|
- */
|
|
|
-#define MCE_RING_SIZE 16 /* we use one entry less */
|
|
|
-
|
|
|
-struct mce_ring {
|
|
|
- unsigned short start;
|
|
|
- unsigned short end;
|
|
|
- unsigned long ring[MCE_RING_SIZE];
|
|
|
-};
|
|
|
-static DEFINE_PER_CPU(struct mce_ring, mce_ring);
|
|
|
-
|
|
|
-/* Runs with CPU affinity in workqueue */
|
|
|
-static int mce_ring_empty(void)
|
|
|
-{
|
|
|
- struct mce_ring *r = this_cpu_ptr(&mce_ring);
|
|
|
-
|
|
|
- return r->start == r->end;
|
|
|
-}
|
|
|
-
|
|
|
-static int mce_ring_get(unsigned long *pfn)
|
|
|
-{
|
|
|
- struct mce_ring *r;
|
|
|
- int ret = 0;
|
|
|
-
|
|
|
- *pfn = 0;
|
|
|
- get_cpu();
|
|
|
- r = this_cpu_ptr(&mce_ring);
|
|
|
- if (r->start == r->end)
|
|
|
- goto out;
|
|
|
- *pfn = r->ring[r->start];
|
|
|
- r->start = (r->start + 1) % MCE_RING_SIZE;
|
|
|
- ret = 1;
|
|
|
-out:
|
|
|
- put_cpu();
|
|
|
- return ret;
|
|
|
-}
|
|
|
-
|
|
|
-/* Always runs in MCE context with preempt off */
|
|
|
-static int mce_ring_add(unsigned long pfn)
|
|
|
-{
|
|
|
- struct mce_ring *r = this_cpu_ptr(&mce_ring);
|
|
|
- unsigned next;
|
|
|
-
|
|
|
- next = (r->end + 1) % MCE_RING_SIZE;
|
|
|
- if (next == r->start)
|
|
|
- return -1;
|
|
|
- r->ring[r->end] = pfn;
|
|
|
- wmb();
|
|
|
- r->end = next;
|
|
|
- return 0;
|
|
|
-}
|
|
|
-
|
|
|
int mce_available(struct cpuinfo_x86 *c)
|
|
|
{
|
|
|
if (mca_cfg.disabled)
|
|
@@ -525,12 +448,10 @@ int mce_available(struct cpuinfo_x86 *c)
|
|
|
|
|
|
static void mce_schedule_work(void)
|
|
|
{
|
|
|
- if (!mce_ring_empty())
|
|
|
- schedule_work(this_cpu_ptr(&mce_work));
|
|
|
+ if (!mce_gen_pool_empty() && keventd_up())
|
|
|
+ schedule_work(&mce_work);
|
|
|
}
|
|
|
|
|
|
-static DEFINE_PER_CPU(struct irq_work, mce_irq_work);
|
|
|
-
|
|
|
static void mce_irq_work_cb(struct irq_work *entry)
|
|
|
{
|
|
|
mce_notify_irq();
|
|
@@ -551,8 +472,29 @@ static void mce_report_event(struct pt_regs *regs)
|
|
|
return;
|
|
|
}
|
|
|
|
|
|
- irq_work_queue(this_cpu_ptr(&mce_irq_work));
|
|
|
+ irq_work_queue(&mce_irq_work);
|
|
|
+}
|
|
|
+
|
|
|
+static int srao_decode_notifier(struct notifier_block *nb, unsigned long val,
|
|
|
+ void *data)
|
|
|
+{
|
|
|
+ struct mce *mce = (struct mce *)data;
|
|
|
+ unsigned long pfn;
|
|
|
+
|
|
|
+ if (!mce)
|
|
|
+ return NOTIFY_DONE;
|
|
|
+
|
|
|
+ if (mce->usable_addr && (mce->severity == MCE_AO_SEVERITY)) {
|
|
|
+ pfn = mce->addr >> PAGE_SHIFT;
|
|
|
+ memory_failure(pfn, MCE_VECTOR, 0);
|
|
|
+ }
|
|
|
+
|
|
|
+ return NOTIFY_OK;
|
|
|
}
|
|
|
+static struct notifier_block mce_srao_nb = {
|
|
|
+ .notifier_call = srao_decode_notifier,
|
|
|
+ .priority = INT_MAX,
|
|
|
+};
|
|
|
|
|
|
/*
|
|
|
* Read ADDR and MISC registers.
|
|
@@ -672,8 +614,11 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
|
|
|
*/
|
|
|
if (severity == MCE_DEFERRED_SEVERITY && memory_error(&m)) {
|
|
|
if (m.status & MCI_STATUS_ADDRV) {
|
|
|
- mce_ring_add(m.addr >> PAGE_SHIFT);
|
|
|
- mce_schedule_work();
|
|
|
+ m.severity = severity;
|
|
|
+ m.usable_addr = mce_usable_address(&m);
|
|
|
+
|
|
|
+ if (!mce_gen_pool_add(&m))
|
|
|
+ mce_schedule_work();
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -1143,15 +1088,9 @@ void do_machine_check(struct pt_regs *regs, long error_code)
|
|
|
|
|
|
mce_read_aux(&m, i);
|
|
|
|
|
|
- /*
|
|
|
- * Action optional error. Queue address for later processing.
|
|
|
- * When the ring overflows we just ignore the AO error.
|
|
|
- * RED-PEN add some logging mechanism when
|
|
|
- * usable_address or mce_add_ring fails.
|
|
|
- * RED-PEN don't ignore overflow for mca_cfg.tolerant == 0
|
|
|
- */
|
|
|
- if (severity == MCE_AO_SEVERITY && mce_usable_address(&m))
|
|
|
- mce_ring_add(m.addr >> PAGE_SHIFT);
|
|
|
+ /* assuming valid severity level != 0 */
|
|
|
+ m.severity = severity;
|
|
|
+ m.usable_addr = mce_usable_address(&m);
|
|
|
|
|
|
mce_log(&m);
|
|
|
|
|
@@ -1247,14 +1186,11 @@ int memory_failure(unsigned long pfn, int vector, int flags)
|
|
|
/*
|
|
|
* Action optional processing happens here (picking up
|
|
|
* from the list of faulting pages that do_machine_check()
|
|
|
- * placed into the "ring").
|
|
|
+ * placed into the genpool).
|
|
|
*/
|
|
|
static void mce_process_work(struct work_struct *dummy)
|
|
|
{
|
|
|
- unsigned long pfn;
|
|
|
-
|
|
|
- while (mce_ring_get(&pfn))
|
|
|
- memory_failure(pfn, MCE_VECTOR, 0);
|
|
|
+ mce_gen_pool_process();
|
|
|
}
|
|
|
|
|
|
#ifdef CONFIG_X86_MCE_INTEL
|
|
@@ -1678,6 +1614,17 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+static void __mcheck_cpu_clear_vendor(struct cpuinfo_x86 *c)
|
|
|
+{
|
|
|
+ switch (c->x86_vendor) {
|
|
|
+ case X86_VENDOR_INTEL:
|
|
|
+ mce_intel_feature_clear(c);
|
|
|
+ break;
|
|
|
+ default:
|
|
|
+ break;
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
static void mce_start_timer(unsigned int cpu, struct timer_list *t)
|
|
|
{
|
|
|
unsigned long iv = check_interval * HZ;
|
|
@@ -1731,13 +1678,36 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c)
|
|
|
return;
|
|
|
}
|
|
|
|
|
|
+ if (mce_gen_pool_init()) {
|
|
|
+ mca_cfg.disabled = true;
|
|
|
+ pr_emerg("Couldn't allocate MCE records pool!\n");
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
machine_check_vector = do_machine_check;
|
|
|
|
|
|
__mcheck_cpu_init_generic();
|
|
|
__mcheck_cpu_init_vendor(c);
|
|
|
__mcheck_cpu_init_timer();
|
|
|
- INIT_WORK(this_cpu_ptr(&mce_work), mce_process_work);
|
|
|
- init_irq_work(this_cpu_ptr(&mce_irq_work), &mce_irq_work_cb);
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * Called for each booted CPU to clear some machine checks opt-ins
|
|
|
+ */
|
|
|
+void mcheck_cpu_clear(struct cpuinfo_x86 *c)
|
|
|
+{
|
|
|
+ if (mca_cfg.disabled)
|
|
|
+ return;
|
|
|
+
|
|
|
+ if (!mce_available(c))
|
|
|
+ return;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Possibly to clear general settings generic to x86
|
|
|
+ * __mcheck_cpu_clear_generic(c);
|
|
|
+ */
|
|
|
+ __mcheck_cpu_clear_vendor(c);
|
|
|
+
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -1850,7 +1820,7 @@ static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf,
|
|
|
goto out;
|
|
|
}
|
|
|
|
|
|
- next = rcu_dereference_check_mce(mcelog.next);
|
|
|
+ next = mce_log_get_idx_check(mcelog.next);
|
|
|
|
|
|
/* Only supports full reads right now */
|
|
|
err = -EINVAL;
|
|
@@ -2056,8 +2026,12 @@ __setup("mce", mcheck_enable);
|
|
|
int __init mcheck_init(void)
|
|
|
{
|
|
|
mcheck_intel_therm_init();
|
|
|
+ mce_register_decode_chain(&mce_srao_nb);
|
|
|
mcheck_vendor_init_severity();
|
|
|
|
|
|
+ INIT_WORK(&mce_work, mce_process_work);
|
|
|
+ init_irq_work(&mce_irq_work, mce_irq_work_cb);
|
|
|
+
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
@@ -2591,5 +2565,20 @@ static int __init mcheck_debugfs_init(void)
|
|
|
|
|
|
return 0;
|
|
|
}
|
|
|
-late_initcall(mcheck_debugfs_init);
|
|
|
+#else
|
|
|
+static int __init mcheck_debugfs_init(void) { return -EINVAL; }
|
|
|
#endif
|
|
|
+
|
|
|
+static int __init mcheck_late_init(void)
|
|
|
+{
|
|
|
+ mcheck_debugfs_init();
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Flush out everything that has been logged during early boot, now that
|
|
|
+ * everything has been initialized (workqueues, decoders, ...).
|
|
|
+ */
|
|
|
+ mce_schedule_work();
|
|
|
+
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+late_initcall(mcheck_late_init);
|