|
@@ -19,12 +19,21 @@
|
|
|
.section .entry.text, "ax"
|
|
|
|
|
|
/*
|
|
|
- * 32-bit SYSENTER instruction entry.
|
|
|
+ * 32-bit SYSENTER entry.
|
|
|
*
|
|
|
- * SYSENTER loads ss, rsp, cs, and rip from previously programmed MSRs.
|
|
|
- * IF and VM in rflags are cleared (IOW: interrupts are off).
|
|
|
+ * 32-bit system calls through the vDSO's __kernel_vsyscall enter here
|
|
|
+ * on 64-bit kernels running on Intel CPUs.
|
|
|
+ *
|
|
|
+ * The SYSENTER instruction, in principle, should *only* occur in the
|
|
|
+ * vDSO. In practice, a small number of Android devices were shipped
|
|
|
+ * with a copy of Bionic that inlined a SYSENTER instruction. This
|
|
|
+ * never happened in any of Google's Bionic versions -- it only happened
|
|
|
+ * in a narrow range of Intel-provided versions.
|
|
|
+ *
|
|
|
+ * SYSENTER loads SS, RSP, CS, and RIP from previously programmed MSRs.
|
|
|
+ * IF and VM in RFLAGS are cleared (IOW: interrupts are off).
|
|
|
* SYSENTER does not save anything on the stack,
|
|
|
- * and does not save old rip (!!!) and rflags.
|
|
|
+ * and does not save old RIP (!!!), RSP, or RFLAGS.
|
|
|
*
|
|
|
* Arguments:
|
|
|
* eax system call number
|
|
@@ -35,10 +44,6 @@
|
|
|
* edi arg5
|
|
|
* ebp user stack
|
|
|
* 0(%ebp) arg6
|
|
|
- *
|
|
|
- * This is purely a fast path. For anything complicated we use the int 0x80
|
|
|
- * path below. We set up a complete hardware stack frame to share code
|
|
|
- * with the int 0x80 path.
|
|
|
*/
|
|
|
ENTRY(entry_SYSENTER_compat)
|
|
|
/* Interrupts are off on entry. */
|
|
@@ -131,17 +136,38 @@ GLOBAL(__end_entry_SYSENTER_compat)
|
|
|
ENDPROC(entry_SYSENTER_compat)
|
|
|
|
|
|
/*
|
|
|
- * 32-bit SYSCALL instruction entry.
|
|
|
+ * 32-bit SYSCALL entry.
|
|
|
+ *
|
|
|
+ * 32-bit system calls through the vDSO's __kernel_vsyscall enter here
|
|
|
+ * on 64-bit kernels running on AMD CPUs.
|
|
|
+ *
|
|
|
+ * The SYSCALL instruction, in principle, should *only* occur in the
|
|
|
+ * vDSO. In practice, it appears that this really is the case.
|
|
|
+ * As evidence:
|
|
|
+ *
|
|
|
+ * - The calling convention for SYSCALL has changed several times without
|
|
|
+ * anyone noticing.
|
|
|
+ *
|
|
|
+ * - Prior to the in-kernel X86_BUG_SYSRET_SS_ATTRS fixup, anything
|
|
|
+ * user task that did SYSCALL without immediately reloading SS
|
|
|
+ * would randomly crash.
|
|
|
*
|
|
|
- * 32-bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11,
|
|
|
- * then loads new ss, cs, and rip from previously programmed MSRs.
|
|
|
- * rflags gets masked by a value from another MSR (so CLD and CLAC
|
|
|
- * are not needed). SYSCALL does not save anything on the stack
|
|
|
- * and does not change rsp.
|
|
|
+ * - Most programmers do not directly target AMD CPUs, and the 32-bit
|
|
|
+ * SYSCALL instruction does not exist on Intel CPUs. Even on AMD
|
|
|
+ * CPUs, Linux disables the SYSCALL instruction on 32-bit kernels
|
|
|
+ * because the SYSCALL instruction in legacy/native 32-bit mode (as
|
|
|
+ * opposed to compat mode) is sufficiently poorly designed as to be
|
|
|
+ * essentially unusable.
|
|
|
*
|
|
|
- * Note: rflags saving+masking-with-MSR happens only in Long mode
|
|
|
+ * 32-bit SYSCALL saves RIP to RCX, clears RFLAGS.RF, then saves
|
|
|
+ * RFLAGS to R11, then loads new SS, CS, and RIP from previously
|
|
|
+ * programmed MSRs. RFLAGS gets masked by a value from another MSR
|
|
|
+ * (so CLD and CLAC are not needed). SYSCALL does not save anything on
|
|
|
+ * the stack and does not change RSP.
|
|
|
+ *
|
|
|
+ * Note: RFLAGS saving+masking-with-MSR happens only in Long mode
|
|
|
* (in legacy 32-bit mode, IF, RF and VM bits are cleared and that's it).
|
|
|
- * Don't get confused: rflags saving+masking depends on Long Mode Active bit
|
|
|
+ * Don't get confused: RFLAGS saving+masking depends on Long Mode Active bit
|
|
|
* (EFER.LMA=1), NOT on bitness of userspace where SYSCALL executes
|
|
|
* or target CS descriptor's L bit (SYSCALL does not read segment descriptors).
|
|
|
*
|
|
@@ -241,7 +267,21 @@ sysret32_from_system_call:
|
|
|
END(entry_SYSCALL_compat)
|
|
|
|
|
|
/*
|
|
|
- * Emulated IA32 system calls via int 0x80.
|
|
|
+ * 32-bit legacy system call entry.
|
|
|
+ *
|
|
|
+ * 32-bit x86 Linux system calls traditionally used the INT $0x80
|
|
|
+ * instruction. INT $0x80 lands here.
|
|
|
+ *
|
|
|
+ * This entry point can be used by 32-bit and 64-bit programs to perform
|
|
|
+ * 32-bit system calls. Instances of INT $0x80 can be found inline in
|
|
|
+ * various programs and libraries. It is also used by the vDSO's
|
|
|
+ * __kernel_vsyscall fallback for hardware that doesn't support a faster
|
|
|
+ * entry method. Restarted 32-bit system calls also fall back to INT
|
|
|
+ * $0x80 regardless of what instruction was originally used to do the
|
|
|
+ * system call.
|
|
|
+ *
|
|
|
+ * This is considered a slow path. It is not used by most libc
|
|
|
+ * implementations on modern hardware except during process startup.
|
|
|
*
|
|
|
* Arguments:
|
|
|
* eax system call number
|
|
@@ -250,17 +290,8 @@ END(entry_SYSCALL_compat)
|
|
|
* edx arg3
|
|
|
* esi arg4
|
|
|
* edi arg5
|
|
|
- * ebp arg6 (note: not saved in the stack frame, should not be touched)
|
|
|
- *
|
|
|
- * Notes:
|
|
|
- * Uses the same stack frame as the x86-64 version.
|
|
|
- * All registers except eax must be saved (but ptrace may violate that).
|
|
|
- * Arguments are zero extended. For system calls that want sign extension and
|
|
|
- * take long arguments a wrapper is needed. Most calls can just be called
|
|
|
- * directly.
|
|
|
- * Assumes it is only called from user space and entered with interrupts off.
|
|
|
+ * ebp arg6
|
|
|
*/
|
|
|
-
|
|
|
ENTRY(entry_INT80_compat)
|
|
|
/*
|
|
|
* Interrupts are off on entry.
|