Browse Source

x86/mce: Add infrastructure to support Local MCE

Initialize and prepare for handling LMCEs. Add a boot-time
option to disable LMCEs.

Signed-off-by: Ashok Raj <ashok.raj@intel.com>
[ Simplify stuff, align statements for better readability, reflow comments; kill
  unused lmce_clear(); save us an MSR write if LMCE is already enabled. ]
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: http://lkml.kernel.org/r/1433436928-31903-16-git-send-email-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Ashok Raj 10 years ago
parent
commit
88d538672e

+ 3 - 0
Documentation/x86/x86_64/boot-options.txt

@@ -31,6 +31,9 @@ Machine check
 		(e.g. BIOS or hardware monitoring applications), conflicting
 		(e.g. BIOS or hardware monitoring applications), conflicting
 		with OS's error handling, and you cannot deactivate the agent,
 		with OS's error handling, and you cannot deactivate the agent,
 		then this option will be a help.
 		then this option will be a help.
+   mce=no_lmce
+		Do not opt-in to Local MCE delivery. Use legacy method
+		to broadcast MCEs.
    mce=bootlog
    mce=bootlog
 		Enable logging of machine checks left over from booting.
 		Enable logging of machine checks left over from booting.
 		Disabled by default on AMD because some BIOS leave bogus ones.
 		Disabled by default on AMD because some BIOS leave bogus ones.

+ 5 - 0
arch/x86/include/asm/mce.h

@@ -109,6 +109,7 @@ struct mce_log {
 struct mca_config {
 struct mca_config {
 	bool dont_log_ce;
 	bool dont_log_ce;
 	bool cmci_disabled;
 	bool cmci_disabled;
+	bool lmce_disabled;
 	bool ignore_ce;
 	bool ignore_ce;
 	bool disabled;
 	bool disabled;
 	bool ser;
 	bool ser;
@@ -184,12 +185,16 @@ void cmci_clear(void);
 void cmci_reenable(void);
 void cmci_reenable(void);
 void cmci_rediscover(void);
 void cmci_rediscover(void);
 void cmci_recheck(void);
 void cmci_recheck(void);
+void lmce_clear(void);
+void lmce_enable(void);
 #else
 #else
 static inline void mce_intel_feature_init(struct cpuinfo_x86 *c) { }
 static inline void mce_intel_feature_init(struct cpuinfo_x86 *c) { }
 static inline void cmci_clear(void) {}
 static inline void cmci_clear(void) {}
 static inline void cmci_reenable(void) {}
 static inline void cmci_reenable(void) {}
 static inline void cmci_rediscover(void) {}
 static inline void cmci_rediscover(void) {}
 static inline void cmci_recheck(void) {}
 static inline void cmci_recheck(void) {}
+static inline void lmce_clear(void) {}
+static inline void lmce_enable(void) {}
 #endif
 #endif
 
 
 #ifdef CONFIG_X86_MCE_AMD
 #ifdef CONFIG_X86_MCE_AMD

+ 3 - 0
arch/x86/kernel/cpu/mcheck/mce.c

@@ -1982,6 +1982,7 @@ void mce_disable_bank(int bank)
 /*
 /*
  * mce=off Disables machine check
  * mce=off Disables machine check
  * mce=no_cmci Disables CMCI
  * mce=no_cmci Disables CMCI
+ * mce=no_lmce Disables LMCE
  * mce=dont_log_ce Clears corrected events silently, no log created for CEs.
  * mce=dont_log_ce Clears corrected events silently, no log created for CEs.
  * mce=ignore_ce Disables polling and CMCI, corrected events are not cleared.
  * mce=ignore_ce Disables polling and CMCI, corrected events are not cleared.
  * mce=TOLERANCELEVEL[,monarchtimeout] (number, see above)
  * mce=TOLERANCELEVEL[,monarchtimeout] (number, see above)
@@ -2005,6 +2006,8 @@ static int __init mcheck_enable(char *str)
 		cfg->disabled = true;
 		cfg->disabled = true;
 	else if (!strcmp(str, "no_cmci"))
 	else if (!strcmp(str, "no_cmci"))
 		cfg->cmci_disabled = true;
 		cfg->cmci_disabled = true;
+	else if (!strcmp(str, "no_lmce"))
+		cfg->lmce_disabled = true;
 	else if (!strcmp(str, "dont_log_ce"))
 	else if (!strcmp(str, "dont_log_ce"))
 		cfg->dont_log_ce = true;
 		cfg->dont_log_ce = true;
 	else if (!strcmp(str, "ignore_ce"))
 	else if (!strcmp(str, "ignore_ce"))

+ 43 - 0
arch/x86/kernel/cpu/mcheck/mce_intel.c

@@ -91,6 +91,36 @@ static int cmci_supported(int *banks)
 	return !!(cap & MCG_CMCI_P);
 	return !!(cap & MCG_CMCI_P);
 }
 }
 
 
+static bool lmce_supported(void)
+{
+	u64 tmp;
+
+	if (mca_cfg.lmce_disabled)
+		return false;
+
+	rdmsrl(MSR_IA32_MCG_CAP, tmp);
+
+	/*
+	 * LMCE depends on recovery support in the processor. Hence both
+	 * MCG_SER_P and MCG_LMCE_P should be present in MCG_CAP.
+	 */
+	if ((tmp & (MCG_SER_P | MCG_LMCE_P)) !=
+		   (MCG_SER_P | MCG_LMCE_P))
+		return false;
+
+	/*
+	 * BIOS should indicate support for LMCE by setting bit 20 in
+	 * IA32_FEATURE_CONTROL without which touching MCG_EXT_CTL will
+	 * generate a #GP fault.
+	 */
+	rdmsrl(MSR_IA32_FEATURE_CONTROL, tmp);
+	if ((tmp & (FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_LMCE)) ==
+		   (FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_LMCE))
+		return true;
+
+	return false;
+}
+
 bool mce_intel_cmci_poll(void)
 bool mce_intel_cmci_poll(void)
 {
 {
 	if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE)
 	if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE)
@@ -405,6 +435,19 @@ static void intel_init_cmci(void)
 	cmci_recheck();
 	cmci_recheck();
 }
 }
 
 
+void intel_init_lmce(void)
+{
+	u64 val;
+
+	if (!lmce_supported())
+		return;
+
+	rdmsrl(MSR_IA32_MCG_EXT_CTL, val);
+
+	if (!(val & MCG_EXT_CTL_LMCE_EN))
+		wrmsrl(MSR_IA32_MCG_EXT_CTL, val | MCG_EXT_CTL_LMCE_EN);
+}
+
 void mce_intel_feature_init(struct cpuinfo_x86 *c)
 void mce_intel_feature_init(struct cpuinfo_x86 *c)
 {
 {
 	intel_init_thermal(c);
 	intel_init_thermal(c);