|
@@ -49,6 +49,7 @@
|
|
#include <asm/tlbflush.h>
|
|
#include <asm/tlbflush.h>
|
|
#include <asm/mce.h>
|
|
#include <asm/mce.h>
|
|
#include <asm/msr.h>
|
|
#include <asm/msr.h>
|
|
|
|
+#include <asm/reboot.h>
|
|
|
|
|
|
#include "mce-internal.h"
|
|
#include "mce-internal.h"
|
|
|
|
|
|
@@ -1127,9 +1128,22 @@ void do_machine_check(struct pt_regs *regs, long error_code)
|
|
* on Intel.
|
|
* on Intel.
|
|
*/
|
|
*/
|
|
int lmce = 1;
|
|
int lmce = 1;
|
|
|
|
+ int cpu = smp_processor_id();
|
|
|
|
|
|
- /* If this CPU is offline, just bail out. */
|
|
|
|
- if (cpu_is_offline(smp_processor_id())) {
|
|
|
|
|
|
+ /*
|
|
|
|
+ * Cases where we avoid rendezvous handler timeout:
|
|
|
|
+ * 1) If this CPU is offline.
|
|
|
|
+ *
|
|
|
|
+ * 2) If crashing_cpu was set, e.g. we're entering kdump and we need to
|
|
|
|
+ * skip those CPUs which remain looping in the 1st kernel - see
|
|
|
|
+ * crash_nmi_callback().
|
|
|
|
+ *
|
|
|
|
+ * Note: there still is a small window between kexec-ing and the new,
|
|
|
|
+ * kdump kernel establishing a new #MC handler where a broadcasted MCE
|
|
|
|
+ * might not get handled properly.
|
|
|
|
+ */
|
|
|
|
+ if (cpu_is_offline(cpu) ||
|
|
|
|
+ (crashing_cpu != -1 && crashing_cpu != cpu)) {
|
|
u64 mcgstatus;
|
|
u64 mcgstatus;
|
|
|
|
|
|
mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
|
|
mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
|