|
@@ -142,67 +142,6 @@ END(native_usergs_sysret64)
|
|
|
* with them due to bugs in both AMD and Intel CPUs.
|
|
|
*/
|
|
|
|
|
|
- .pushsection .entry_trampoline, "ax"
|
|
|
-
|
|
|
-/*
|
|
|
- * The code in here gets remapped into cpu_entry_area's trampoline. This means
|
|
|
- * that the assembler and linker have the wrong idea as to where this code
|
|
|
- * lives (and, in fact, it's mapped more than once, so it's not even at a
|
|
|
- * fixed address). So we can't reference any symbols outside the entry
|
|
|
- * trampoline and expect it to work.
|
|
|
- *
|
|
|
- * Instead, we carefully abuse %rip-relative addressing.
|
|
|
- * _entry_trampoline(%rip) refers to the start of the remapped) entry
|
|
|
- * trampoline. We can thus find cpu_entry_area with this macro:
|
|
|
- */
|
|
|
-
|
|
|
-#define CPU_ENTRY_AREA \
|
|
|
- _entry_trampoline - CPU_ENTRY_AREA_entry_trampoline(%rip)
|
|
|
-
|
|
|
-/* The top word of the SYSENTER stack is hot and is usable as scratch space. */
|
|
|
-#define RSP_SCRATCH CPU_ENTRY_AREA_entry_stack + \
|
|
|
- SIZEOF_entry_stack - 8 + CPU_ENTRY_AREA
|
|
|
-
|
|
|
-ENTRY(entry_SYSCALL_64_trampoline)
|
|
|
- UNWIND_HINT_EMPTY
|
|
|
- swapgs
|
|
|
-
|
|
|
- /* Stash the user RSP. */
|
|
|
- movq %rsp, RSP_SCRATCH
|
|
|
-
|
|
|
- /* Note: using %rsp as a scratch reg. */
|
|
|
- SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
|
|
|
-
|
|
|
- /* Load the top of the task stack into RSP */
|
|
|
- movq CPU_ENTRY_AREA_tss + TSS_sp1 + CPU_ENTRY_AREA, %rsp
|
|
|
-
|
|
|
- /* Start building the simulated IRET frame. */
|
|
|
- pushq $__USER_DS /* pt_regs->ss */
|
|
|
- pushq RSP_SCRATCH /* pt_regs->sp */
|
|
|
- pushq %r11 /* pt_regs->flags */
|
|
|
- pushq $__USER_CS /* pt_regs->cs */
|
|
|
- pushq %rcx /* pt_regs->ip */
|
|
|
-
|
|
|
- /*
|
|
|
- * x86 lacks a near absolute jump, and we can't jump to the real
|
|
|
- * entry text with a relative jump. We could push the target
|
|
|
- * address and then use retq, but this destroys the pipeline on
|
|
|
- * many CPUs (wasting over 20 cycles on Sandy Bridge). Instead,
|
|
|
- * spill RDI and restore it in a second-stage trampoline.
|
|
|
- */
|
|
|
- pushq %rdi
|
|
|
- movq $entry_SYSCALL_64_stage2, %rdi
|
|
|
- JMP_NOSPEC %rdi
|
|
|
-END(entry_SYSCALL_64_trampoline)
|
|
|
-
|
|
|
- .popsection
|
|
|
-
|
|
|
-ENTRY(entry_SYSCALL_64_stage2)
|
|
|
- UNWIND_HINT_EMPTY
|
|
|
- popq %rdi
|
|
|
- jmp entry_SYSCALL_64_after_hwframe
|
|
|
-END(entry_SYSCALL_64_stage2)
|
|
|
-
|
|
|
ENTRY(entry_SYSCALL_64)
|
|
|
UNWIND_HINT_EMPTY
|
|
|
/*
|
|
@@ -212,21 +151,19 @@ ENTRY(entry_SYSCALL_64)
|
|
|
*/
|
|
|
|
|
|
swapgs
|
|
|
- /*
|
|
|
- * This path is only taken when PAGE_TABLE_ISOLATION is disabled so it
|
|
|
- * is not required to switch CR3.
|
|
|
- */
|
|
|
- movq %rsp, PER_CPU_VAR(rsp_scratch)
|
|
|
+ /* tss.sp2 is scratch space. */
|
|
|
+ movq %rsp, PER_CPU_VAR(cpu_tss_rw + TSS_sp2)
|
|
|
+ SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
|
|
|
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
|
|
|
|
|
|
/* Construct struct pt_regs on stack */
|
|
|
- pushq $__USER_DS /* pt_regs->ss */
|
|
|
- pushq PER_CPU_VAR(rsp_scratch) /* pt_regs->sp */
|
|
|
- pushq %r11 /* pt_regs->flags */
|
|
|
- pushq $__USER_CS /* pt_regs->cs */
|
|
|
- pushq %rcx /* pt_regs->ip */
|
|
|
+ pushq $__USER_DS /* pt_regs->ss */
|
|
|
+ pushq PER_CPU_VAR(cpu_tss_rw + TSS_sp2) /* pt_regs->sp */
|
|
|
+ pushq %r11 /* pt_regs->flags */
|
|
|
+ pushq $__USER_CS /* pt_regs->cs */
|
|
|
+ pushq %rcx /* pt_regs->ip */
|
|
|
GLOBAL(entry_SYSCALL_64_after_hwframe)
|
|
|
- pushq %rax /* pt_regs->orig_ax */
|
|
|
+ pushq %rax /* pt_regs->orig_ax */
|
|
|
|
|
|
PUSH_AND_CLEAR_REGS rax=$-ENOSYS
|
|
|
|
|
@@ -900,6 +837,42 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
|
|
|
*/
|
|
|
#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8)
|
|
|
|
|
|
+/**
|
|
|
+ * idtentry - Generate an IDT entry stub
|
|
|
+ * @sym: Name of the generated entry point
|
|
|
+ * @do_sym: C function to be called
|
|
|
+ * @has_error_code: True if this IDT vector has an error code on the stack
|
|
|
+ * @paranoid: non-zero means that this vector may be invoked from
|
|
|
+ * kernel mode with user GSBASE and/or user CR3.
|
|
|
+ * 2 is special -- see below.
|
|
|
+ * @shift_ist: Set to an IST index if entries from kernel mode should
|
|
|
+ * decrement the IST stack so that nested entries get a
|
|
|
+ * fresh stack. (This is for #DB, which has a nasty habit
|
|
|
+ * of recursing.)
|
|
|
+ *
|
|
|
+ * idtentry generates an IDT stub that sets up a usable kernel context,
|
|
|
+ * creates struct pt_regs, and calls @do_sym. The stub has the following
|
|
|
+ * special behaviors:
|
|
|
+ *
|
|
|
+ * On an entry from user mode, the stub switches from the trampoline or
|
|
|
+ * IST stack to the normal thread stack. On an exit to user mode, the
|
|
|
+ * normal exit-to-usermode path is invoked.
|
|
|
+ *
|
|
|
+ * On an exit to kernel mode, if @paranoid == 0, we check for preemption,
|
|
|
+ * whereas we omit the preemption check if @paranoid != 0. This is purely
|
|
|
+ * because the implementation is simpler this way. The kernel only needs
|
|
|
+ * to check for asynchronous kernel preemption when IRQ handlers return.
|
|
|
+ *
|
|
|
+ * If @paranoid == 0, then the stub will handle IRET faults by pretending
|
|
|
+ * that the fault came from user mode. It will handle gs_change faults by
|
|
|
+ * pretending that the fault happened with kernel GSBASE. Since this handling
|
|
|
+ * is omitted for @paranoid != 0, the #GP, #SS, and #NP stubs must have
|
|
|
+ * @paranoid == 0. This special handling will do the wrong thing for
|
|
|
+ * espfix-induced #DF on IRET, so #DF must not use @paranoid == 0.
|
|
|
+ *
|
|
|
+ * @paranoid == 2 is special: the stub will never switch stacks. This is for
|
|
|
+ * #DF: if the thread stack is somehow unusable, we'll still get a useful OOPS.
|
|
|
+ */
|
|
|
.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
|
|
|
ENTRY(\sym)
|
|
|
UNWIND_HINT_IRET_REGS offset=\has_error_code*8
|