|
@@ -361,15 +361,12 @@ system_call_fastpath:
|
|
|
* Has incomplete stack frame and undefined top of stack.
|
|
|
*/
|
|
|
ret_from_sys_call:
|
|
|
- movl $_TIF_ALLWORK_MASK,%edi
|
|
|
- /* edi: flagmask */
|
|
|
-sysret_check:
|
|
|
+ testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
|
|
|
+ jnz int_ret_from_sys_call_fixup /* Go the the slow path */
|
|
|
+
|
|
|
LOCKDEP_SYS_EXIT
|
|
|
DISABLE_INTERRUPTS(CLBR_NONE)
|
|
|
TRACE_IRQS_OFF
|
|
|
- movl TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET),%edx
|
|
|
- andl %edi,%edx
|
|
|
- jnz sysret_careful
|
|
|
CFI_REMEMBER_STATE
|
|
|
/*
|
|
|
* sysretq will re-enable interrupts:
|
|
@@ -383,49 +380,10 @@ sysret_check:
|
|
|
USERGS_SYSRET64
|
|
|
|
|
|
CFI_RESTORE_STATE
|
|
|
- /* Handle reschedules */
|
|
|
- /* edx: work, edi: workmask */
|
|
|
-sysret_careful:
|
|
|
- bt $TIF_NEED_RESCHED,%edx
|
|
|
- jnc sysret_signal
|
|
|
- TRACE_IRQS_ON
|
|
|
- ENABLE_INTERRUPTS(CLBR_NONE)
|
|
|
- pushq_cfi %rdi
|
|
|
- SCHEDULE_USER
|
|
|
- popq_cfi %rdi
|
|
|
- jmp sysret_check
|
|
|
|
|
|
- /* Handle a signal */
|
|
|
-sysret_signal:
|
|
|
- TRACE_IRQS_ON
|
|
|
- ENABLE_INTERRUPTS(CLBR_NONE)
|
|
|
-#ifdef CONFIG_AUDITSYSCALL
|
|
|
- bt $TIF_SYSCALL_AUDIT,%edx
|
|
|
- jc sysret_audit
|
|
|
-#endif
|
|
|
- /*
|
|
|
- * We have a signal, or exit tracing or single-step.
|
|
|
- * These all wind up with the iret return path anyway,
|
|
|
- * so just join that path right now.
|
|
|
- */
|
|
|
+int_ret_from_sys_call_fixup:
|
|
|
FIXUP_TOP_OF_STACK %r11, -ARGOFFSET
|
|
|
- jmp int_check_syscall_exit_work
|
|
|
-
|
|
|
-#ifdef CONFIG_AUDITSYSCALL
|
|
|
- /*
|
|
|
- * Return fast path for syscall audit. Call __audit_syscall_exit()
|
|
|
- * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT
|
|
|
- * masked off.
|
|
|
- */
|
|
|
-sysret_audit:
|
|
|
- movq RAX-ARGOFFSET(%rsp),%rsi /* second arg, syscall return value */
|
|
|
- cmpq $-MAX_ERRNO,%rsi /* is it < -MAX_ERRNO? */
|
|
|
- setbe %al /* 1 if so, 0 if not */
|
|
|
- movzbl %al,%edi /* zero-extend that into %edi */
|
|
|
- call __audit_syscall_exit
|
|
|
- movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
|
|
|
- jmp sysret_check
|
|
|
-#endif /* CONFIG_AUDITSYSCALL */
|
|
|
+ jmp int_ret_from_sys_call
|
|
|
|
|
|
/* Do syscall tracing */
|
|
|
tracesys:
|
|
@@ -794,6 +752,60 @@ retint_swapgs: /* return to user-space */
|
|
|
*/
|
|
|
DISABLE_INTERRUPTS(CLBR_ANY)
|
|
|
TRACE_IRQS_IRETQ
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Try to use SYSRET instead of IRET if we're returning to
|
|
|
+ * a completely clean 64-bit userspace context.
|
|
|
+ */
|
|
|
+ movq (RCX-R11)(%rsp), %rcx
|
|
|
+ cmpq %rcx,(RIP-R11)(%rsp) /* RCX == RIP */
|
|
|
+ jne opportunistic_sysret_failed
|
|
|
+
|
|
|
+ /*
|
|
|
+ * On Intel CPUs, sysret with non-canonical RCX/RIP will #GP
|
|
|
+ * in kernel space. This essentially lets the user take over
|
|
|
+ * the kernel, since userspace controls RSP. It's not worth
|
|
|
+ * testing for canonicalness exactly -- this check detects any
|
|
|
+ * of the 17 high bits set, which is true for non-canonical
|
|
|
+ * or kernel addresses. (This will pessimize vsyscall=native.
|
|
|
+ * Big deal.)
|
|
|
+ *
|
|
|
+ * If virtual addresses ever become wider, this will need
|
|
|
+ * to be updated to remain correct on both old and new CPUs.
|
|
|
+ */
|
|
|
+ .ifne __VIRTUAL_MASK_SHIFT - 47
|
|
|
+ .error "virtual address width changed -- sysret checks need update"
|
|
|
+ .endif
|
|
|
+ shr $__VIRTUAL_MASK_SHIFT, %rcx
|
|
|
+ jnz opportunistic_sysret_failed
|
|
|
+
|
|
|
+ cmpq $__USER_CS,(CS-R11)(%rsp) /* CS must match SYSRET */
|
|
|
+ jne opportunistic_sysret_failed
|
|
|
+
|
|
|
+ movq (R11-ARGOFFSET)(%rsp), %r11
|
|
|
+ cmpq %r11,(EFLAGS-ARGOFFSET)(%rsp) /* R11 == RFLAGS */
|
|
|
+ jne opportunistic_sysret_failed
|
|
|
+
|
|
|
+ testq $X86_EFLAGS_RF,%r11 /* sysret can't restore RF */
|
|
|
+ jnz opportunistic_sysret_failed
|
|
|
+
|
|
|
+ /* nothing to check for RSP */
|
|
|
+
|
|
|
+ cmpq $__USER_DS,(SS-ARGOFFSET)(%rsp) /* SS must match SYSRET */
|
|
|
+ jne opportunistic_sysret_failed
|
|
|
+
|
|
|
+ /*
|
|
|
+ * We win! This label is here just for ease of understanding
|
|
|
+ * perf profiles. Nothing jumps here.
|
|
|
+ */
|
|
|
+irq_return_via_sysret:
|
|
|
+ CFI_REMEMBER_STATE
|
|
|
+ RESTORE_ARGS 1,8,1
|
|
|
+ movq (RSP-RIP)(%rsp),%rsp
|
|
|
+ USERGS_SYSRET64
|
|
|
+ CFI_RESTORE_STATE
|
|
|
+
|
|
|
+opportunistic_sysret_failed:
|
|
|
SWAPGS
|
|
|
jmp restore_args
|
|
|
|