Przeglądaj źródła

x86: replace privileged instructions with paravirt macros

The assembly code in entry_64.S issues a bunch of privileged instructions,
like cli, sti, swapgs, and others. Paravirt guests are forbidden to do so,
and we then replace them with macros that will do the right thing.

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Glauber de Oliveira Costa 17 lat temu
rodzic
commit
72fe485854
1 zmienionych plików z 59 dodań i 42 usunięć
  1. 59 42
      arch/x86/kernel/entry_64.S

+ 59 - 42
arch/x86/kernel/entry_64.S

@@ -50,6 +50,7 @@
 #include <asm/hw_irq.h>
 #include <asm/hw_irq.h>
 #include <asm/page.h>
 #include <asm/page.h>
 #include <asm/irqflags.h>
 #include <asm/irqflags.h>
+#include <asm/paravirt.h>
 
 
 	.code64
 	.code64
 
 
@@ -57,6 +58,13 @@
 #define retint_kernel retint_restore_args
 #define retint_kernel retint_restore_args
 #endif	
 #endif	
 
 
+#ifdef CONFIG_PARAVIRT
+ENTRY(native_irq_enable_syscall_ret)
+	movq	%gs:pda_oldrsp,%rsp
+	swapgs
+	sysretq
+#endif /* CONFIG_PARAVIRT */
+
 
 
 .macro TRACE_IRQS_IRETQ offset=ARGOFFSET
 .macro TRACE_IRQS_IRETQ offset=ARGOFFSET
 #ifdef CONFIG_TRACE_IRQFLAGS
 #ifdef CONFIG_TRACE_IRQFLAGS
@@ -216,14 +224,21 @@ ENTRY(system_call)
 	CFI_DEF_CFA	rsp,PDA_STACKOFFSET
 	CFI_DEF_CFA	rsp,PDA_STACKOFFSET
 	CFI_REGISTER	rip,rcx
 	CFI_REGISTER	rip,rcx
 	/*CFI_REGISTER	rflags,r11*/
 	/*CFI_REGISTER	rflags,r11*/
-	swapgs
+	SWAPGS_UNSAFE_STACK
+	/*
+	 * A hypervisor implementation might want to use a label
+	 * after the swapgs, so that it can do the swapgs
+	 * for the guest and jump here on syscall.
+	 */
+ENTRY(system_call_after_swapgs)
+
 	movq	%rsp,%gs:pda_oldrsp 
 	movq	%rsp,%gs:pda_oldrsp 
 	movq	%gs:pda_kernelstack,%rsp
 	movq	%gs:pda_kernelstack,%rsp
 	/*
 	/*
 	 * No need to follow this irqs off/on section - it's straight
 	 * No need to follow this irqs off/on section - it's straight
 	 * and short:
 	 * and short:
 	 */
 	 */
-	sti					
+	ENABLE_INTERRUPTS(CLBR_NONE)
 	SAVE_ARGS 8,1
 	SAVE_ARGS 8,1
 	movq  %rax,ORIG_RAX-ARGOFFSET(%rsp) 
 	movq  %rax,ORIG_RAX-ARGOFFSET(%rsp) 
 	movq  %rcx,RIP-ARGOFFSET(%rsp)
 	movq  %rcx,RIP-ARGOFFSET(%rsp)
@@ -246,7 +261,7 @@ ret_from_sys_call:
 sysret_check:		
 sysret_check:		
 	LOCKDEP_SYS_EXIT
 	LOCKDEP_SYS_EXIT
 	GET_THREAD_INFO(%rcx)
 	GET_THREAD_INFO(%rcx)
-	cli
+	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
 	TRACE_IRQS_OFF
 	movl threadinfo_flags(%rcx),%edx
 	movl threadinfo_flags(%rcx),%edx
 	andl %edi,%edx
 	andl %edi,%edx
@@ -260,9 +275,7 @@ sysret_check:
 	CFI_REGISTER	rip,rcx
 	CFI_REGISTER	rip,rcx
 	RESTORE_ARGS 0,-ARG_SKIP,1
 	RESTORE_ARGS 0,-ARG_SKIP,1
 	/*CFI_REGISTER	rflags,r11*/
 	/*CFI_REGISTER	rflags,r11*/
-	movq	%gs:pda_oldrsp,%rsp
-	swapgs
-	sysretq
+	ENABLE_INTERRUPTS_SYSCALL_RET
 
 
 	CFI_RESTORE_STATE
 	CFI_RESTORE_STATE
 	/* Handle reschedules */
 	/* Handle reschedules */
@@ -271,7 +284,7 @@ sysret_careful:
 	bt $TIF_NEED_RESCHED,%edx
 	bt $TIF_NEED_RESCHED,%edx
 	jnc sysret_signal
 	jnc sysret_signal
 	TRACE_IRQS_ON
 	TRACE_IRQS_ON
-	sti
+	ENABLE_INTERRUPTS(CLBR_NONE)
 	pushq %rdi
 	pushq %rdi
 	CFI_ADJUST_CFA_OFFSET 8
 	CFI_ADJUST_CFA_OFFSET 8
 	call schedule
 	call schedule
@@ -282,7 +295,7 @@ sysret_careful:
 	/* Handle a signal */ 
 	/* Handle a signal */ 
 sysret_signal:
 sysret_signal:
 	TRACE_IRQS_ON
 	TRACE_IRQS_ON
-	sti
+	ENABLE_INTERRUPTS(CLBR_NONE)
 	testl $_TIF_DO_NOTIFY_MASK,%edx
 	testl $_TIF_DO_NOTIFY_MASK,%edx
 	jz    1f
 	jz    1f
 
 
@@ -295,7 +308,7 @@ sysret_signal:
 1:	movl $_TIF_NEED_RESCHED,%edi
 1:	movl $_TIF_NEED_RESCHED,%edi
 	/* Use IRET because user could have changed frame. This
 	/* Use IRET because user could have changed frame. This
 	   works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
 	   works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
-	cli
+	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
 	TRACE_IRQS_OFF
 	jmp int_with_check
 	jmp int_with_check
 	
 	
@@ -327,7 +340,7 @@ tracesys:
  */
  */
 	.globl int_ret_from_sys_call
 	.globl int_ret_from_sys_call
 int_ret_from_sys_call:
 int_ret_from_sys_call:
-	cli
+	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
 	TRACE_IRQS_OFF
 	testl $3,CS-ARGOFFSET(%rsp)
 	testl $3,CS-ARGOFFSET(%rsp)
 	je retint_restore_args
 	je retint_restore_args
@@ -349,20 +362,20 @@ int_careful:
 	bt $TIF_NEED_RESCHED,%edx
 	bt $TIF_NEED_RESCHED,%edx
 	jnc  int_very_careful
 	jnc  int_very_careful
 	TRACE_IRQS_ON
 	TRACE_IRQS_ON
-	sti
+	ENABLE_INTERRUPTS(CLBR_NONE)
 	pushq %rdi
 	pushq %rdi
 	CFI_ADJUST_CFA_OFFSET 8
 	CFI_ADJUST_CFA_OFFSET 8
 	call schedule
 	call schedule
 	popq %rdi
 	popq %rdi
 	CFI_ADJUST_CFA_OFFSET -8
 	CFI_ADJUST_CFA_OFFSET -8
-	cli
+	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
 	TRACE_IRQS_OFF
 	jmp int_with_check
 	jmp int_with_check
 
 
 	/* handle signals and tracing -- both require a full stack frame */
 	/* handle signals and tracing -- both require a full stack frame */
 int_very_careful:
 int_very_careful:
 	TRACE_IRQS_ON
 	TRACE_IRQS_ON
-	sti
+	ENABLE_INTERRUPTS(CLBR_NONE)
 	SAVE_REST
 	SAVE_REST
 	/* Check for syscall exit trace */	
 	/* Check for syscall exit trace */	
 	testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
 	testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
@@ -385,7 +398,7 @@ int_signal:
 1:	movl $_TIF_NEED_RESCHED,%edi	
 1:	movl $_TIF_NEED_RESCHED,%edi	
 int_restore_rest:
 int_restore_rest:
 	RESTORE_REST
 	RESTORE_REST
-	cli
+	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
 	TRACE_IRQS_OFF
 	jmp int_with_check
 	jmp int_with_check
 	CFI_ENDPROC
 	CFI_ENDPROC
@@ -506,7 +519,7 @@ END(stub_rt_sigreturn)
 	CFI_DEF_CFA_REGISTER	rbp
 	CFI_DEF_CFA_REGISTER	rbp
 	testl $3,CS(%rdi)
 	testl $3,CS(%rdi)
 	je 1f
 	je 1f
-	swapgs	
+	SWAPGS
 	/* irqcount is used to check if a CPU is already on an interrupt
 	/* irqcount is used to check if a CPU is already on an interrupt
 	   stack or not. While this is essentially redundant with preempt_count
 	   stack or not. While this is essentially redundant with preempt_count
 	   it is a little cheaper to use a separate counter in the PDA
 	   it is a little cheaper to use a separate counter in the PDA
@@ -527,7 +540,7 @@ ENTRY(common_interrupt)
 	interrupt do_IRQ
 	interrupt do_IRQ
 	/* 0(%rsp): oldrsp-ARGOFFSET */
 	/* 0(%rsp): oldrsp-ARGOFFSET */
 ret_from_intr:
 ret_from_intr:
-	cli	
+	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
 	TRACE_IRQS_OFF
 	decl %gs:pda_irqcount
 	decl %gs:pda_irqcount
 	leaveq
 	leaveq
@@ -556,13 +569,13 @@ retint_swapgs:		/* return to user-space */
 	/*
 	/*
 	 * The iretq could re-enable interrupts:
 	 * The iretq could re-enable interrupts:
 	 */
 	 */
-	cli
+	DISABLE_INTERRUPTS(CLBR_ANY)
 	TRACE_IRQS_IRETQ
 	TRACE_IRQS_IRETQ
-	swapgs 
+	SWAPGS
 	jmp restore_args
 	jmp restore_args
 
 
 retint_restore_args:	/* return to kernel space */
 retint_restore_args:	/* return to kernel space */
-	cli
+	DISABLE_INTERRUPTS(CLBR_ANY)
 	/*
 	/*
 	 * The iretq could re-enable interrupts:
 	 * The iretq could re-enable interrupts:
 	 */
 	 */
@@ -570,10 +583,14 @@ retint_restore_args:	/* return to kernel space */
 restore_args:
 restore_args:
 	RESTORE_ARGS 0,8,0						
 	RESTORE_ARGS 0,8,0						
 iret_label:	
 iret_label:	
+#ifdef CONFIG_PARAVIRT
+	INTERRUPT_RETURN
+#endif
+ENTRY(native_iret)
 	iretq
 	iretq
 
 
 	.section __ex_table,"a"
 	.section __ex_table,"a"
-	.quad iret_label,bad_iret	
+	.quad native_iret, bad_iret
 	.previous
 	.previous
 	.section .fixup,"ax"
 	.section .fixup,"ax"
 	/* force a signal here? this matches i386 behaviour */
 	/* force a signal here? this matches i386 behaviour */
@@ -581,24 +598,24 @@ iret_label:
 bad_iret:
 bad_iret:
 	movq $11,%rdi	/* SIGSEGV */
 	movq $11,%rdi	/* SIGSEGV */
 	TRACE_IRQS_ON
 	TRACE_IRQS_ON
-	sti
-	jmp do_exit			
-	.previous	
-	
+	ENABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI))
+	jmp do_exit
+	.previous
+
 	/* edi: workmask, edx: work */
 	/* edi: workmask, edx: work */
 retint_careful:
 retint_careful:
 	CFI_RESTORE_STATE
 	CFI_RESTORE_STATE
 	bt    $TIF_NEED_RESCHED,%edx
 	bt    $TIF_NEED_RESCHED,%edx
 	jnc   retint_signal
 	jnc   retint_signal
 	TRACE_IRQS_ON
 	TRACE_IRQS_ON
-	sti
+	ENABLE_INTERRUPTS(CLBR_NONE)
 	pushq %rdi
 	pushq %rdi
 	CFI_ADJUST_CFA_OFFSET	8
 	CFI_ADJUST_CFA_OFFSET	8
 	call  schedule
 	call  schedule
 	popq %rdi		
 	popq %rdi		
 	CFI_ADJUST_CFA_OFFSET	-8
 	CFI_ADJUST_CFA_OFFSET	-8
 	GET_THREAD_INFO(%rcx)
 	GET_THREAD_INFO(%rcx)
-	cli
+	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
 	TRACE_IRQS_OFF
 	jmp retint_check
 	jmp retint_check
 	
 	
@@ -606,14 +623,14 @@ retint_signal:
 	testl $_TIF_DO_NOTIFY_MASK,%edx
 	testl $_TIF_DO_NOTIFY_MASK,%edx
 	jz    retint_swapgs
 	jz    retint_swapgs
 	TRACE_IRQS_ON
 	TRACE_IRQS_ON
-	sti
+	ENABLE_INTERRUPTS(CLBR_NONE)
 	SAVE_REST
 	SAVE_REST
 	movq $-1,ORIG_RAX(%rsp) 			
 	movq $-1,ORIG_RAX(%rsp) 			
 	xorl %esi,%esi		# oldset
 	xorl %esi,%esi		# oldset
 	movq %rsp,%rdi		# &pt_regs
 	movq %rsp,%rdi		# &pt_regs
 	call do_notify_resume
 	call do_notify_resume
 	RESTORE_REST
 	RESTORE_REST
-	cli
+	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
 	TRACE_IRQS_OFF
 	movl $_TIF_NEED_RESCHED,%edi
 	movl $_TIF_NEED_RESCHED,%edi
 	GET_THREAD_INFO(%rcx)
 	GET_THREAD_INFO(%rcx)
@@ -731,7 +748,7 @@ END(spurious_interrupt)
 	rdmsr
 	rdmsr
 	testl %edx,%edx
 	testl %edx,%edx
 	js    1f
 	js    1f
-	swapgs
+	SWAPGS
 	xorl  %ebx,%ebx
 	xorl  %ebx,%ebx
 1:
 1:
 	.if \ist
 	.if \ist
@@ -747,7 +764,7 @@ END(spurious_interrupt)
 	.if \ist
 	.if \ist
 	addq	$EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
 	addq	$EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
 	.endif
 	.endif
-	cli
+	DISABLE_INTERRUPTS(CLBR_NONE)
 	.if \irqtrace
 	.if \irqtrace
 	TRACE_IRQS_OFF
 	TRACE_IRQS_OFF
 	.endif
 	.endif
@@ -776,10 +793,10 @@ paranoid_swapgs\trace:
 	.if \trace
 	.if \trace
 	TRACE_IRQS_IRETQ 0
 	TRACE_IRQS_IRETQ 0
 	.endif
 	.endif
-	swapgs
+	SWAPGS_UNSAFE_STACK
 paranoid_restore\trace:
 paranoid_restore\trace:
 	RESTORE_ALL 8
 	RESTORE_ALL 8
-	iretq
+	INTERRUPT_RETURN
 paranoid_userspace\trace:
 paranoid_userspace\trace:
 	GET_THREAD_INFO(%rcx)
 	GET_THREAD_INFO(%rcx)
 	movl threadinfo_flags(%rcx),%ebx
 	movl threadinfo_flags(%rcx),%ebx
@@ -794,11 +811,11 @@ paranoid_userspace\trace:
 	.if \trace
 	.if \trace
 	TRACE_IRQS_ON
 	TRACE_IRQS_ON
 	.endif
 	.endif
-	sti
+	ENABLE_INTERRUPTS(CLBR_NONE)
 	xorl %esi,%esi 			/* arg2: oldset */
 	xorl %esi,%esi 			/* arg2: oldset */
 	movq %rsp,%rdi 			/* arg1: &pt_regs */
 	movq %rsp,%rdi 			/* arg1: &pt_regs */
 	call do_notify_resume
 	call do_notify_resume
-	cli
+	DISABLE_INTERRUPTS(CLBR_NONE)
 	.if \trace
 	.if \trace
 	TRACE_IRQS_OFF
 	TRACE_IRQS_OFF
 	.endif
 	.endif
@@ -807,9 +824,9 @@ paranoid_schedule\trace:
 	.if \trace
 	.if \trace
 	TRACE_IRQS_ON
 	TRACE_IRQS_ON
 	.endif
 	.endif
-	sti
+	ENABLE_INTERRUPTS(CLBR_ANY)
 	call schedule
 	call schedule
-	cli
+	DISABLE_INTERRUPTS(CLBR_ANY)
 	.if \trace
 	.if \trace
 	TRACE_IRQS_OFF
 	TRACE_IRQS_OFF
 	.endif
 	.endif
@@ -862,7 +879,7 @@ KPROBE_ENTRY(error_entry)
 	testl $3,CS(%rsp)
 	testl $3,CS(%rsp)
 	je  error_kernelspace
 	je  error_kernelspace
 error_swapgs:	
 error_swapgs:	
-	swapgs
+	SWAPGS
 error_sti:	
 error_sti:	
 	movq %rdi,RDI(%rsp) 	
 	movq %rdi,RDI(%rsp) 	
 	CFI_REL_OFFSET	rdi,RDI
 	CFI_REL_OFFSET	rdi,RDI
@@ -874,7 +891,7 @@ error_sti:
 error_exit:
 error_exit:
 	movl %ebx,%eax
 	movl %ebx,%eax
 	RESTORE_REST
 	RESTORE_REST
-	cli
+	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
 	TRACE_IRQS_OFF
 	GET_THREAD_INFO(%rcx)	
 	GET_THREAD_INFO(%rcx)	
 	testl %eax,%eax
 	testl %eax,%eax
@@ -911,12 +928,12 @@ ENTRY(load_gs_index)
 	CFI_STARTPROC
 	CFI_STARTPROC
 	pushf
 	pushf
 	CFI_ADJUST_CFA_OFFSET 8
 	CFI_ADJUST_CFA_OFFSET 8
-	cli
-        swapgs
+	DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI))
+        SWAPGS
 gs_change:     
 gs_change:     
         movl %edi,%gs   
         movl %edi,%gs   
 2:	mfence		/* workaround */
 2:	mfence		/* workaround */
-	swapgs
+	SWAPGS
         popf
         popf
 	CFI_ADJUST_CFA_OFFSET -8
 	CFI_ADJUST_CFA_OFFSET -8
         ret
         ret
@@ -930,7 +947,7 @@ ENDPROC(load_gs_index)
         .section .fixup,"ax"
         .section .fixup,"ax"
 	/* running with kernelgs */
 	/* running with kernelgs */
 bad_gs: 
 bad_gs: 
-	swapgs			/* switch back to user gs */
+	SWAPGS			/* switch back to user gs */
 	xorl %eax,%eax
 	xorl %eax,%eax
         movl %eax,%gs
         movl %eax,%gs
         jmp  2b
         jmp  2b