Эх сурвалжийг харах

x86/entry/64: Use the TSS sp2 slot for SYSCALL/SYSRET scratch space

In the non-trampoline SYSCALL64 path, a percpu variable is used to
temporarily store the user RSP value.

Instead of a separate variable, use the otherwise unused sp2 slot in the
TSS.  This will improve cache locality, as the sp1 slot is already used in
the same code to find the kernel stack.  It will also simplify a future
change to make the non-trampoline path work in PTI mode.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Borislav Petkov <bp@suse.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lkml.kernel.org/r/08e769a0023dbad4bac6f34f3631dbaf8ad59f4f.1536015544.git.luto@kernel.org
Andy Lutomirski 7 жил өмнө
parent
commit
98f05b5138

+ 9 - 7
arch/x86/entry/entry_64.S

@@ -215,18 +215,20 @@ ENTRY(entry_SYSCALL_64)
 	/*
 	/*
 	 * This path is only taken when PAGE_TABLE_ISOLATION is disabled so it
 	 * This path is only taken when PAGE_TABLE_ISOLATION is disabled so it
 	 * is not required to switch CR3.
 	 * is not required to switch CR3.
+	 *
+	 * tss.sp2 is scratch space.
 	 */
 	 */
-	movq	%rsp, PER_CPU_VAR(rsp_scratch)
+	movq	%rsp, PER_CPU_VAR(cpu_tss_rw + TSS_sp2)
 	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
 	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
 
 
 	/* Construct struct pt_regs on stack */
 	/* Construct struct pt_regs on stack */
-	pushq	$__USER_DS			/* pt_regs->ss */
-	pushq	PER_CPU_VAR(rsp_scratch)	/* pt_regs->sp */
-	pushq	%r11				/* pt_regs->flags */
-	pushq	$__USER_CS			/* pt_regs->cs */
-	pushq	%rcx				/* pt_regs->ip */
+	pushq	$__USER_DS				/* pt_regs->ss */
+	pushq	PER_CPU_VAR(cpu_tss_rw + TSS_sp2)	/* pt_regs->sp */
+	pushq	%r11					/* pt_regs->flags */
+	pushq	$__USER_CS				/* pt_regs->cs */
+	pushq	%rcx					/* pt_regs->ip */
 GLOBAL(entry_SYSCALL_64_after_hwframe)
 GLOBAL(entry_SYSCALL_64_after_hwframe)
-	pushq	%rax				/* pt_regs->orig_ax */
+	pushq	%rax					/* pt_regs->orig_ax */
 
 
 	PUSH_AND_CLEAR_REGS rax=$-ENOSYS
 	PUSH_AND_CLEAR_REGS rax=$-ENOSYS
 
 

+ 6 - 0
arch/x86/include/asm/processor.h

@@ -315,7 +315,13 @@ struct x86_hw_tss {
 	 */
 	 */
 	u64			sp1;
 	u64			sp1;
 
 
+	/*
+	 * Since Linux does not use ring 2, the 'sp2' slot is unused by
+	 * hardware.  entry_SYSCALL_64 uses it as scratch space to stash
+	 * the user RSP value.
+	 */
 	u64			sp2;
 	u64			sp2;
+
 	u64			reserved2;
 	u64			reserved2;
 	u64			ist[7];
 	u64			ist[7];
 	u32			reserved3;
 	u32			reserved3;

+ 2 - 1
arch/x86/kernel/asm-offsets.c

@@ -105,7 +105,8 @@ void common(void) {
 	DEFINE(SIZEOF_entry_stack, sizeof(struct entry_stack));
 	DEFINE(SIZEOF_entry_stack, sizeof(struct entry_stack));
 	DEFINE(MASK_entry_stack, (~(sizeof(struct entry_stack) - 1)));
 	DEFINE(MASK_entry_stack, (~(sizeof(struct entry_stack) - 1)));
 
 
-	/* Offset for sp0 and sp1 into the tss_struct */
+	/* Offset for fields in tss_struct */
 	OFFSET(TSS_sp0, tss_struct, x86_tss.sp0);
 	OFFSET(TSS_sp0, tss_struct, x86_tss.sp0);
 	OFFSET(TSS_sp1, tss_struct, x86_tss.sp1);
 	OFFSET(TSS_sp1, tss_struct, x86_tss.sp1);
+	OFFSET(TSS_sp2, tss_struct, x86_tss.sp2);
 }
 }

+ 0 - 2
arch/x86/kernel/process_64.c

@@ -59,8 +59,6 @@
 #include <asm/unistd_32_ia32.h>
 #include <asm/unistd_32_ia32.h>
 #endif
 #endif
 
 
-__visible DEFINE_PER_CPU(unsigned long, rsp_scratch);
-
 /* Prints also some state that isn't saved in the pt_regs */
 /* Prints also some state that isn't saved in the pt_regs */
 void __show_regs(struct pt_regs *regs, int all)
 void __show_regs(struct pt_regs *regs, int all)
 {
 {

+ 5 - 3
arch/x86/xen/xen-asm_64.S

@@ -91,13 +91,15 @@ ENTRY(xen_iret)
 ENTRY(xen_sysret64)
 ENTRY(xen_sysret64)
 	/*
 	/*
 	 * We're already on the usermode stack at this point, but
 	 * We're already on the usermode stack at this point, but
-	 * still with the kernel gs, so we can easily switch back
+	 * still with the kernel gs, so we can easily switch back.
+	 *
+	 * tss.sp2 is scratch space.
 	 */
 	 */
-	movq %rsp, PER_CPU_VAR(rsp_scratch)
+	movq %rsp, PER_CPU_VAR(cpu_tss_rw + TSS_sp2)
 	movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
 	movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
 
 
 	pushq $__USER_DS
 	pushq $__USER_DS
-	pushq PER_CPU_VAR(rsp_scratch)
+	pushq PER_CPU_VAR(cpu_tss_rw + TSS_sp2)
 	pushq %r11
 	pushq %r11
 	pushq $__USER_CS
 	pushq $__USER_CS
 	pushq %rcx
 	pushq %rcx