10 年之前 · 243d657eaf
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1047,6 +1047,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 
				 	char *msg = "Unknown";
			
 
				 	u64 recover_paddr = ~0ull;
			
 
				 	int flags = MF_ACTION_REQUIRED;
			
 
				+	int lmce = 0;
			
 
				 
			
 
				 	prev_state = ist_enter(regs);
			
 
				 
			
@@ -1074,11 +1075,20 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 
				 		kill_it = 1;
			
 
				 
			
 
				 	/*
			
 
				-	 * Go through all the banks in exclusion of the other CPUs.
			
 
				-	 * This way we don't report duplicated events on shared banks
			
 
				-	 * because the first one to see it will clear it.
			
 
				+	 * Check if this MCE is signaled to only this logical processor
			
 
				 	 */
			
 
				-	order = mce_start(&no_way_out);
			
 
				+	if (m.mcgstatus & MCG_STATUS_LMCES)
			
 
				+		lmce = 1;
			
 
				+	else {
			
 
				+		/*
			
 
				+		 * Go through all the banks in exclusion of the other CPUs.
			
 
				+		 * This way we don't report duplicated events on shared banks
			
 
				+		 * because the first one to see it will clear it.
			
 
				+		 * If this is a Local MCE, then no need to perform rendezvous.
			
 
				+		 */
			
 
				+		order = mce_start(&no_way_out);
			
 
				+	}
			
 
				+
			
 
				 	for (i = 0; i < cfg->banks; i++) {
			
 
				 		__clear_bit(i, toclear);
			
 
				 		if (!test_bit(i, valid_banks))
			
@@ -1155,8 +1165,18 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 
				 	 * Do most of the synchronization with other CPUs.
			
 
				 	 * When there's any problem use only local no_way_out state.
			
 
				 	 */
			
 
				-	if (mce_end(order) < 0)
			
 
				-		no_way_out = worst >= MCE_PANIC_SEVERITY;
			
 
				+	if (!lmce) {
			
 
				+		if (mce_end(order) < 0)
			
 
				+			no_way_out = worst >= MCE_PANIC_SEVERITY;
			
 
				+	} else {
			
 
				+		/*
			
 
				+		 * Local MCE skipped calling mce_reign()
			
 
				+		 * If we found a fatal error, we need to panic here.
			
 
				+		 */
			
 
				+		 if (worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3)
			
 
				+			mce_panic("Machine check from unknown source",
			
 
				+				NULL, NULL);
			
 
				+	}
			
 
				 
			
 
				 	/*
			
 
				 	 * At insane "tolerant" levels we take no action. Otherwise
			
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -452,4 +452,5 @@ void mce_intel_feature_init(struct cpuinfo_x86 *c)
 
				 {
			
 
				 	intel_init_thermal(c);
			
 
				 	intel_init_cmci();
			
 
				+	intel_init_lmce();
			
 
				 }