|
@@ -101,119 +101,13 @@ sysenter_flags_fixed:
|
|
|
movl $11, %edi
|
|
|
call do_exit
|
|
|
|
|
|
- /*
|
|
|
- * Re-enable interrupts. IRQ tracing already thinks that IRQs are
|
|
|
- * on (since we treat user mode as having IRQs on), and the
|
|
|
- * prologue above is too short for it to be worth adding a
|
|
|
- * tracing round trip.
|
|
|
- */
|
|
|
- ENABLE_INTERRUPTS(CLBR_NONE)
|
|
|
-
|
|
|
- /*
|
|
|
- * No need to do an access_ok() check here because RBP has been
|
|
|
- * 32-bit zero extended:
|
|
|
- */
|
|
|
- ASM_STAC
|
|
|
-1: movl (%rbp), %ebp
|
|
|
- _ASM_EXTABLE(1b, ia32_badarg)
|
|
|
- ASM_CLAC
|
|
|
-
|
|
|
- orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
|
|
|
- testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
|
|
|
- jnz sysenter_tracesys
|
|
|
-
|
|
|
-sysenter_do_call:
|
|
|
- /* 32-bit syscall -> 64-bit C ABI argument conversion */
|
|
|
- movl %edi, %r8d /* arg5 */
|
|
|
- movl %ebp, %r9d /* arg6 */
|
|
|
- xchg %ecx, %esi /* rsi:arg2, rcx:arg4 */
|
|
|
- movl %ebx, %edi /* arg1 */
|
|
|
- movl %edx, %edx /* arg3 (zero extension) */
|
|
|
-sysenter_dispatch:
|
|
|
- cmpq $(IA32_NR_syscalls-1), %rax
|
|
|
- ja 1f
|
|
|
- call *ia32_sys_call_table(, %rax, 8)
|
|
|
- movq %rax, RAX(%rsp)
|
|
|
-1:
|
|
|
- DISABLE_INTERRUPTS(CLBR_NONE)
|
|
|
- TRACE_IRQS_OFF
|
|
|
- testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
|
|
|
- jnz ia32_ret_from_sys_call_irqs_off
|
|
|
-sysexit_from_sys_call:
|
|
|
- /*
|
|
|
- * NB: SYSEXIT is not obviously safe for 64-bit kernels -- an
|
|
|
- * NMI between STI and SYSEXIT has poorly specified behavior,
|
|
|
- * and and NMI followed by an IRQ with usergs is fatal. So
|
|
|
- * we just pretend we're using SYSEXIT but we really use
|
|
|
- * SYSRETL instead.
|
|
|
- *
|
|
|
- * This code path is still called 'sysexit' because it pairs
|
|
|
- * with 'sysenter' and it uses the SYSENTER calling convention.
|
|
|
- */
|
|
|
- andl $~TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
|
|
|
- movl RIP(%rsp), %ecx /* User %eip */
|
|
|
- movq RAX(%rsp), %rax
|
|
|
- movl RSI(%rsp), %esi
|
|
|
- movl RDI(%rsp), %edi
|
|
|
- xorl %edx, %edx /* Do not leak kernel information */
|
|
|
- xorq %r8, %r8
|
|
|
- xorq %r9, %r9
|
|
|
- xorq %r10, %r10
|
|
|
- movl EFLAGS(%rsp), %r11d /* User eflags */
|
|
|
- TRACE_IRQS_ON
|
|
|
-
|
|
|
- /*
|
|
|
- * SYSRETL works even on Intel CPUs. Use it in preference to SYSEXIT,
|
|
|
- * since it avoids a dicey window with interrupts enabled.
|
|
|
- */
|
|
|
- movl RSP(%rsp), %esp
|
|
|
-
|
|
|
- /*
|
|
|
- * USERGS_SYSRET32 does:
|
|
|
- * gsbase = user's gs base
|
|
|
- * eip = ecx
|
|
|
- * rflags = r11
|
|
|
- * cs = __USER32_CS
|
|
|
- * ss = __USER_DS
|
|
|
- *
|
|
|
- * The prologue set RIP(%rsp) to VDSO32_SYSENTER_RETURN, which does:
|
|
|
- *
|
|
|
- * pop %ebp
|
|
|
- * pop %edx
|
|
|
- * pop %ecx
|
|
|
- *
|
|
|
- * Therefore, we invoke SYSRETL with EDX and R8-R10 zeroed to
|
|
|
- * avoid info leaks. R11 ends up with VDSO32_SYSENTER_RETURN's
|
|
|
- * address (already known to user code), and R12-R15 are
|
|
|
- * callee-saved and therefore don't contain any interesting
|
|
|
- * kernel data.
|
|
|
- */
|
|
|
- USERGS_SYSRET32
|
|
|
+ /* Unreachable. */
|
|
|
+ ud2
|
|
|
|
|
|
sysenter_fix_flags:
|
|
|
pushq $X86_EFLAGS_FIXED
|
|
|
popfq
|
|
|
jmp sysenter_flags_fixed
|
|
|
-
|
|
|
-sysenter_tracesys:
|
|
|
- SAVE_EXTRA_REGS
|
|
|
- xorl %eax, %eax /* Do not leak kernel information */
|
|
|
- movq %rax, R11(%rsp)
|
|
|
- movq %rax, R10(%rsp)
|
|
|
- movq %rax, R9(%rsp)
|
|
|
- movq %rax, R8(%rsp)
|
|
|
- movq %rsp, %rdi /* &pt_regs -> arg1 */
|
|
|
- call syscall_trace_enter
|
|
|
-
|
|
|
- /* Reload arg registers from stack. (see sysenter_tracesys) */
|
|
|
- movl RCX(%rsp), %ecx
|
|
|
- movl RDX(%rsp), %edx
|
|
|
- movl RSI(%rsp), %esi
|
|
|
- movl RDI(%rsp), %edi
|
|
|
- movl %eax, %eax /* zero extension */
|
|
|
-
|
|
|
- RESTORE_EXTRA_REGS
|
|
|
- jmp sysenter_do_call
|
|
|
ENDPROC(entry_SYSENTER_compat)
|
|
|
|
|
|
/*
|
|
@@ -280,142 +174,10 @@ ENTRY(entry_SYSCALL_compat)
|
|
|
pushq $-ENOSYS /* pt_regs->ax */
|
|
|
sub $(10*8), %rsp /* pt_regs->r8-11, bp, bx, r12-15 not saved */
|
|
|
|
|
|
- /*
|
|
|
- * No need to do an access_ok check here because r8 has been
|
|
|
- * 32-bit zero extended:
|
|
|
- */
|
|
|
- ASM_STAC
|
|
|
-1: movl (%r8), %r9d
|
|
|
- _ASM_EXTABLE(1b, ia32_badarg)
|
|
|
- ASM_CLAC
|
|
|
- orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
|
|
|
- testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
|
|
|
- jnz cstar_tracesys
|
|
|
-
|
|
|
-cstar_do_call:
|
|
|
- /* 32-bit syscall -> 64-bit C ABI argument conversion */
|
|
|
- movl %edi, %r8d /* arg5 */
|
|
|
- /* r9 already loaded */ /* arg6 */
|
|
|
- xchg %ecx, %esi /* rsi:arg2, rcx:arg4 */
|
|
|
- movl %ebx, %edi /* arg1 */
|
|
|
- movl %edx, %edx /* arg3 (zero extension) */
|
|
|
-
|
|
|
-cstar_dispatch:
|
|
|
- cmpq $(IA32_NR_syscalls-1), %rax
|
|
|
- ja 1f
|
|
|
-
|
|
|
- call *ia32_sys_call_table(, %rax, 8)
|
|
|
- movq %rax, RAX(%rsp)
|
|
|
-1:
|
|
|
- DISABLE_INTERRUPTS(CLBR_NONE)
|
|
|
- TRACE_IRQS_OFF
|
|
|
- testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
|
|
|
- jnz ia32_ret_from_sys_call_irqs_off
|
|
|
-
|
|
|
-sysretl_from_sys_call:
|
|
|
- andl $~TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
|
|
|
- movl RDX(%rsp), %edx
|
|
|
- movl RSI(%rsp), %esi
|
|
|
- movl RDI(%rsp), %edi
|
|
|
- movl RIP(%rsp), %ecx
|
|
|
- movl EFLAGS(%rsp), %r11d
|
|
|
- movq RAX(%rsp), %rax
|
|
|
- xorq %r10, %r10
|
|
|
- xorq %r9, %r9
|
|
|
- xorq %r8, %r8
|
|
|
- TRACE_IRQS_ON
|
|
|
- movl RSP(%rsp), %esp
|
|
|
- /*
|
|
|
- * 64-bit->32-bit SYSRET restores eip from ecx,
|
|
|
- * eflags from r11 (but RF and VM bits are forced to 0),
|
|
|
- * cs and ss are loaded from MSRs.
|
|
|
- * (Note: 32-bit->32-bit SYSRET is different: since r11
|
|
|
- * does not exist, it merely sets eflags.IF=1).
|
|
|
- *
|
|
|
- * NB: On AMD CPUs with the X86_BUG_SYSRET_SS_ATTRS bug, the ss
|
|
|
- * descriptor is not reinitialized. This means that we must
|
|
|
- * avoid SYSRET with SS == NULL, which could happen if we schedule,
|
|
|
- * exit the kernel, and re-enter using an interrupt vector. (All
|
|
|
- * interrupt entries on x86_64 set SS to NULL.) We prevent that
|
|
|
- * from happening by reloading SS in __switch_to.
|
|
|
- */
|
|
|
- USERGS_SYSRET32
|
|
|
-
|
|
|
-cstar_tracesys:
|
|
|
- xchgl %r9d, %ebp
|
|
|
- SAVE_EXTRA_REGS
|
|
|
- xorl %eax, %eax /* Do not leak kernel information */
|
|
|
- movq %rax, R11(%rsp)
|
|
|
- movq %rax, R10(%rsp)
|
|
|
- movq %r9, R9(%rsp)
|
|
|
- movq %rax, R8(%rsp)
|
|
|
- movq %rsp, %rdi /* &pt_regs -> arg1 */
|
|
|
- call syscall_trace_enter
|
|
|
- movl R9(%rsp), %r9d
|
|
|
-
|
|
|
- /* Reload arg registers from stack. (see sysenter_tracesys) */
|
|
|
- movl RCX(%rsp), %ecx
|
|
|
- movl RDX(%rsp), %edx
|
|
|
- movl RSI(%rsp), %esi
|
|
|
- movl RDI(%rsp), %edi
|
|
|
- movl %eax, %eax /* zero extension */
|
|
|
-
|
|
|
- RESTORE_EXTRA_REGS
|
|
|
- xchgl %ebp, %r9d
|
|
|
- jmp cstar_do_call
|
|
|
+ /* Unreachable. */
|
|
|
+ ud2
|
|
|
END(entry_SYSCALL_compat)
|
|
|
|
|
|
-ia32_badarg:
|
|
|
- /*
|
|
|
- * So far, we've entered kernel mode, set AC, turned on IRQs, and
|
|
|
- * saved C regs except r8-r11. We haven't done any of the other
|
|
|
- * standard entry work, though. We want to bail, but we shouldn't
|
|
|
- * treat this as a syscall entry since we don't even know what the
|
|
|
- * args are. Instead, treat this as a non-syscall entry, finish
|
|
|
- * the entry work, and immediately exit after setting AX = -EFAULT.
|
|
|
- *
|
|
|
- * We're really just being polite here. Killing the task outright
|
|
|
- * would be a reasonable action, too. Given that the only valid
|
|
|
- * way to have gotten here is through the vDSO, and we already know
|
|
|
- * that the stack pointer is bad, the task isn't going to survive
|
|
|
- * for long no matter what we do.
|
|
|
- */
|
|
|
-
|
|
|
- ASM_CLAC /* undo STAC */
|
|
|
- movq $-EFAULT, RAX(%rsp) /* return -EFAULT if possible */
|
|
|
-
|
|
|
- /* Fill in the rest of pt_regs */
|
|
|
- xorl %eax, %eax
|
|
|
- movq %rax, R11(%rsp)
|
|
|
- movq %rax, R10(%rsp)
|
|
|
- movq %rax, R9(%rsp)
|
|
|
- movq %rax, R8(%rsp)
|
|
|
- SAVE_EXTRA_REGS
|
|
|
-
|
|
|
- /* Turn IRQs back off. */
|
|
|
- DISABLE_INTERRUPTS(CLBR_NONE)
|
|
|
- TRACE_IRQS_OFF
|
|
|
-
|
|
|
- /* Now finish entering normal kernel mode. */
|
|
|
-#ifdef CONFIG_CONTEXT_TRACKING
|
|
|
- call enter_from_user_mode
|
|
|
-#endif
|
|
|
-
|
|
|
- /* And exit again. */
|
|
|
- jmp retint_user
|
|
|
-
|
|
|
-ia32_ret_from_sys_call_irqs_off:
|
|
|
- TRACE_IRQS_ON
|
|
|
- ENABLE_INTERRUPTS(CLBR_NONE)
|
|
|
-
|
|
|
-ia32_ret_from_sys_call:
|
|
|
- xorl %eax, %eax /* Do not leak kernel information */
|
|
|
- movq %rax, R11(%rsp)
|
|
|
- movq %rax, R10(%rsp)
|
|
|
- movq %rax, R9(%rsp)
|
|
|
- movq %rax, R8(%rsp)
|
|
|
- jmp int_ret_from_sys_call
|
|
|
-
|
|
|
/*
|
|
|
* Emulated IA32 system calls via int 0x80.
|
|
|
*
|