瀏覽代碼

x86/speculation/mds: Clear CPU buffers on exit to user

commit 04dcbdb8057827b043b3c71aa397c4c63e67d086 upstream

Add a static key which controls the invocation of the CPU buffer clear
mechanism on exit to user space and add the call into
prepare_exit_to_usermode() and do_nmi() right before actually returning.

Add documentation which kernel to user space transition this covers and
explain why some corner cases are not mitigated.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Reviewed-by: Jon Masters <jcm@redhat.com>
Tested-by: Jon Masters <jcm@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Thomas Gleixner 6 年之前
父節點
當前提交
e4fa775b56

+ 52 - 0
Documentation/x86/mds.rst

@@ -97,3 +97,55 @@ According to current knowledge additional mitigations inside the kernel
 itself are not required because the necessary gadgets to expose the leaked
 itself are not required because the necessary gadgets to expose the leaked
 data cannot be controlled in a way which allows exploitation from malicious
 data cannot be controlled in a way which allows exploitation from malicious
 user space or VM guests.
 user space or VM guests.
+
+Mitigation points
+-----------------
+
+1. Return to user space
+^^^^^^^^^^^^^^^^^^^^^^^
+
+   When transitioning from kernel to user space the CPU buffers are flushed
+   on affected CPUs when the mitigation is not disabled on the kernel
+   command line. The migitation is enabled through the static key
+   mds_user_clear.
+
+   The mitigation is invoked in prepare_exit_to_usermode() which covers
+   most of the kernel to user space transitions. There are a few exceptions
+   which are not invoking prepare_exit_to_usermode() on return to user
+   space. These exceptions use the paranoid exit code.
+
+   - Non Maskable Interrupt (NMI):
+
+     Access to sensible data like keys, credentials in the NMI context is
+     mostly theoretical: The CPU can do prefetching or execute a
+     misspeculated code path and thereby fetching data which might end up
+     leaking through a buffer.
+
+     But for mounting other attacks the kernel stack address of the task is
+     already valuable information. So in full mitigation mode, the NMI is
+     mitigated on the return from do_nmi() to provide almost complete
+     coverage.
+
+   - Double fault (#DF):
+
+     A double fault is usually fatal, but the ESPFIX workaround, which can
+     be triggered from user space through modify_ldt(2) is a recoverable
+     double fault. #DF uses the paranoid exit path, so explicit mitigation
+     in the double fault handler is required.
+
+   - Machine Check Exception (#MC):
+
+     Another corner case is a #MC which hits between the CPU buffer clear
+     invocation and the actual return to user. As this still is in kernel
+     space it takes the paranoid exit path which does not clear the CPU
+     buffers. So the #MC handler repopulates the buffers to some
+     extent. Machine checks are not reliably controllable and the window is
+     extremly small so mitigation would just tick a checkbox that this
+     theoretical corner case is covered. To keep the amount of special
+     cases small, ignore #MC.
+
+   - Debug Exception (#DB):
+
+     This takes the paranoid exit path only when the INT1 breakpoint is in
+     kernel space. #DB on a user space address takes the regular exit path,
+     so no extra mitigation required.

+ 3 - 0
arch/x86/entry/common.c

@@ -31,6 +31,7 @@
 #include <asm/vdso.h>
 #include <asm/vdso.h>
 #include <linux/uaccess.h>
 #include <linux/uaccess.h>
 #include <asm/cpufeature.h>
 #include <asm/cpufeature.h>
+#include <asm/nospec-branch.h>
 
 
 #define CREATE_TRACE_POINTS
 #define CREATE_TRACE_POINTS
 #include <trace/events/syscalls.h>
 #include <trace/events/syscalls.h>
@@ -212,6 +213,8 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
 #endif
 #endif
 
 
 	user_enter_irqoff();
 	user_enter_irqoff();
+
+	mds_user_clear_cpu_buffers();
 }
 }
 
 
 #define SYSCALL_EXIT_WORK_FLAGS				\
 #define SYSCALL_EXIT_WORK_FLAGS				\

+ 13 - 0
arch/x86/include/asm/nospec-branch.h

@@ -317,6 +317,8 @@ DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp);
 DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
 DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
 DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
 DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
 
 
+DECLARE_STATIC_KEY_FALSE(mds_user_clear);
+
 #include <asm/segment.h>
 #include <asm/segment.h>
 
 
 /**
 /**
@@ -342,6 +344,17 @@ static inline void mds_clear_cpu_buffers(void)
 	asm volatile("verw %[ds]" : : [ds] "m" (ds) : "cc");
 	asm volatile("verw %[ds]" : : [ds] "m" (ds) : "cc");
 }
 }
 
 
+/**
+ * mds_user_clear_cpu_buffers - Mitigation for MDS vulnerability
+ *
+ * Clear CPU buffers if the corresponding static key is enabled
+ */
+static inline void mds_user_clear_cpu_buffers(void)
+{
+	if (static_branch_likely(&mds_user_clear))
+		mds_clear_cpu_buffers();
+}
+
 #endif /* __ASSEMBLY__ */
 #endif /* __ASSEMBLY__ */
 
 
 /*
 /*

+ 3 - 0
arch/x86/kernel/cpu/bugs.c

@@ -61,6 +61,9 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
 /* Control unconditional IBPB in switch_mm() */
 /* Control unconditional IBPB in switch_mm() */
 DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
 DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
 
 
+/* Control MDS CPU buffer clear before returning to user space */
+DEFINE_STATIC_KEY_FALSE(mds_user_clear);
+
 void __init check_bugs(void)
 void __init check_bugs(void)
 {
 {
 	identify_boot_cpu();
 	identify_boot_cpu();

+ 4 - 0
arch/x86/kernel/nmi.c

@@ -34,6 +34,7 @@
 #include <asm/x86_init.h>
 #include <asm/x86_init.h>
 #include <asm/reboot.h>
 #include <asm/reboot.h>
 #include <asm/cache.h>
 #include <asm/cache.h>
+#include <asm/nospec-branch.h>
 
 
 #define CREATE_TRACE_POINTS
 #define CREATE_TRACE_POINTS
 #include <trace/events/nmi.h>
 #include <trace/events/nmi.h>
@@ -533,6 +534,9 @@ nmi_restart:
 		write_cr2(this_cpu_read(nmi_cr2));
 		write_cr2(this_cpu_read(nmi_cr2));
 	if (this_cpu_dec_return(nmi_state))
 	if (this_cpu_dec_return(nmi_state))
 		goto nmi_restart;
 		goto nmi_restart;
+
+	if (user_mode(regs))
+		mds_user_clear_cpu_buffers();
 }
 }
 NOKPROBE_SYMBOL(do_nmi);
 NOKPROBE_SYMBOL(do_nmi);
 
 

+ 8 - 0
arch/x86/kernel/traps.c

@@ -58,6 +58,7 @@
 #include <asm/alternative.h>
 #include <asm/alternative.h>
 #include <asm/fpu/xstate.h>
 #include <asm/fpu/xstate.h>
 #include <asm/trace/mpx.h>
 #include <asm/trace/mpx.h>
+#include <asm/nospec-branch.h>
 #include <asm/mpx.h>
 #include <asm/mpx.h>
 #include <asm/vm86.h>
 #include <asm/vm86.h>
 #include <asm/umip.h>
 #include <asm/umip.h>
@@ -387,6 +388,13 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
 		regs->ip = (unsigned long)general_protection;
 		regs->ip = (unsigned long)general_protection;
 		regs->sp = (unsigned long)&gpregs->orig_ax;
 		regs->sp = (unsigned long)&gpregs->orig_ax;
 
 
+		/*
+		 * This situation can be triggered by userspace via
+		 * modify_ldt(2) and the return does not take the regular
+		 * user space exit, so a CPU buffer clear is required when
+		 * MDS mitigation is enabled.
+		 */
+		mds_user_clear_cpu_buffers();
 		return;
 		return;
 	}
 	}
 #endif
 #endif