entry_32.S 26 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087
  1. /* SPDX-License-Identifier: GPL-2.0 */
  2. /*
  3. * Copyright (C) 1991,1992 Linus Torvalds
  4. *
  5. * entry_32.S contains the system-call and low-level fault and trap handling routines.
  6. *
  7. * Stack layout while running C code:
  8. * ptrace needs to have all registers on the stack.
  9. * If the order here is changed, it needs to be
  10. * updated in fork.c:copy_process(), signal.c:do_signal(),
  11. * ptrace.c and ptrace.h
  12. *
  13. * 0(%esp) - %ebx
  14. * 4(%esp) - %ecx
  15. * 8(%esp) - %edx
  16. * C(%esp) - %esi
  17. * 10(%esp) - %edi
  18. * 14(%esp) - %ebp
  19. * 18(%esp) - %eax
  20. * 1C(%esp) - %ds
  21. * 20(%esp) - %es
  22. * 24(%esp) - %fs
  23. * 28(%esp) - %gs saved iff !CONFIG_X86_32_LAZY_GS
  24. * 2C(%esp) - orig_eax
  25. * 30(%esp) - %eip
  26. * 34(%esp) - %cs
  27. * 38(%esp) - %eflags
  28. * 3C(%esp) - %oldesp
  29. * 40(%esp) - %oldss
  30. */
  31. #include <linux/linkage.h>
  32. #include <linux/err.h>
  33. #include <asm/thread_info.h>
  34. #include <asm/irqflags.h>
  35. #include <asm/errno.h>
  36. #include <asm/segment.h>
  37. #include <asm/smp.h>
  38. #include <asm/percpu.h>
  39. #include <asm/processor-flags.h>
  40. #include <asm/irq_vectors.h>
  41. #include <asm/cpufeatures.h>
  42. #include <asm/alternative-asm.h>
  43. #include <asm/asm.h>
  44. #include <asm/smap.h>
  45. #include <asm/frame.h>
  46. #include <asm/nospec-branch.h>
  47. .section .entry.text, "ax"
  48. /*
  49. * We use macros for low-level operations which need to be overridden
  50. * for paravirtualization. The following will never clobber any registers:
  51. * INTERRUPT_RETURN (aka. "iret")
  52. * GET_CR0_INTO_EAX (aka. "movl %cr0, %eax")
  53. * ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit").
  54. *
  55. * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must
  56. * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY).
  57. * Allowing a register to be clobbered can shrink the paravirt replacement
  58. * enough to patch inline, increasing performance.
  59. */
  60. #ifdef CONFIG_PREEMPT
  61. # define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
  62. #else
  63. # define preempt_stop(clobbers)
  64. # define resume_kernel restore_all
  65. #endif
  66. .macro TRACE_IRQS_IRET
  67. #ifdef CONFIG_TRACE_IRQFLAGS
  68. testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off?
  69. jz 1f
  70. TRACE_IRQS_ON
  71. 1:
  72. #endif
  73. .endm
  74. /*
  75. * User gs save/restore
  76. *
  77. * %gs is used for userland TLS and kernel only uses it for stack
  78. * canary which is required to be at %gs:20 by gcc. Read the comment
  79. * at the top of stackprotector.h for more info.
  80. *
  81. * Local labels 98 and 99 are used.
  82. */
  83. #ifdef CONFIG_X86_32_LAZY_GS
  84. /* unfortunately push/pop can't be no-op */
  85. .macro PUSH_GS
  86. pushl $0
  87. .endm
  88. .macro POP_GS pop=0
  89. addl $(4 + \pop), %esp
  90. .endm
  91. .macro POP_GS_EX
  92. .endm
  93. /* all the rest are no-op */
  94. .macro PTGS_TO_GS
  95. .endm
  96. .macro PTGS_TO_GS_EX
  97. .endm
  98. .macro GS_TO_REG reg
  99. .endm
  100. .macro REG_TO_PTGS reg
  101. .endm
  102. .macro SET_KERNEL_GS reg
  103. .endm
  104. #else /* CONFIG_X86_32_LAZY_GS */
  105. .macro PUSH_GS
  106. pushl %gs
  107. .endm
  108. .macro POP_GS pop=0
  109. 98: popl %gs
  110. .if \pop <> 0
  111. add $\pop, %esp
  112. .endif
  113. .endm
  114. .macro POP_GS_EX
  115. .pushsection .fixup, "ax"
  116. 99: movl $0, (%esp)
  117. jmp 98b
  118. .popsection
  119. _ASM_EXTABLE(98b, 99b)
  120. .endm
  121. .macro PTGS_TO_GS
  122. 98: mov PT_GS(%esp), %gs
  123. .endm
  124. .macro PTGS_TO_GS_EX
  125. .pushsection .fixup, "ax"
  126. 99: movl $0, PT_GS(%esp)
  127. jmp 98b
  128. .popsection
  129. _ASM_EXTABLE(98b, 99b)
  130. .endm
  131. .macro GS_TO_REG reg
  132. movl %gs, \reg
  133. .endm
  134. .macro REG_TO_PTGS reg
  135. movl \reg, PT_GS(%esp)
  136. .endm
  137. .macro SET_KERNEL_GS reg
  138. movl $(__KERNEL_STACK_CANARY), \reg
  139. movl \reg, %gs
  140. .endm
  141. #endif /* CONFIG_X86_32_LAZY_GS */
  142. .macro SAVE_ALL pt_regs_ax=%eax
  143. cld
  144. PUSH_GS
  145. pushl %fs
  146. pushl %es
  147. pushl %ds
  148. pushl \pt_regs_ax
  149. pushl %ebp
  150. pushl %edi
  151. pushl %esi
  152. pushl %edx
  153. pushl %ecx
  154. pushl %ebx
  155. movl $(__USER_DS), %edx
  156. movl %edx, %ds
  157. movl %edx, %es
  158. movl $(__KERNEL_PERCPU), %edx
  159. movl %edx, %fs
  160. SET_KERNEL_GS %edx
  161. .endm
  162. /*
  163. * This is a sneaky trick to help the unwinder find pt_regs on the stack. The
  164. * frame pointer is replaced with an encoded pointer to pt_regs. The encoding
  165. * is just clearing the MSB, which makes it an invalid stack address and is also
  166. * a signal to the unwinder that it's a pt_regs pointer in disguise.
  167. *
  168. * NOTE: This macro must be used *after* SAVE_ALL because it corrupts the
  169. * original rbp.
  170. */
  171. .macro ENCODE_FRAME_POINTER
  172. #ifdef CONFIG_FRAME_POINTER
  173. mov %esp, %ebp
  174. andl $0x7fffffff, %ebp
  175. #endif
  176. .endm
  177. .macro RESTORE_INT_REGS
  178. popl %ebx
  179. popl %ecx
  180. popl %edx
  181. popl %esi
  182. popl %edi
  183. popl %ebp
  184. popl %eax
  185. .endm
  186. .macro RESTORE_REGS pop=0
  187. RESTORE_INT_REGS
  188. 1: popl %ds
  189. 2: popl %es
  190. 3: popl %fs
  191. POP_GS \pop
  192. .pushsection .fixup, "ax"
  193. 4: movl $0, (%esp)
  194. jmp 1b
  195. 5: movl $0, (%esp)
  196. jmp 2b
  197. 6: movl $0, (%esp)
  198. jmp 3b
  199. .popsection
  200. _ASM_EXTABLE(1b, 4b)
  201. _ASM_EXTABLE(2b, 5b)
  202. _ASM_EXTABLE(3b, 6b)
  203. POP_GS_EX
  204. .endm
  205. /*
  206. * %eax: prev task
  207. * %edx: next task
  208. */
  209. ENTRY(__switch_to_asm)
  210. /*
  211. * Save callee-saved registers
  212. * This must match the order in struct inactive_task_frame
  213. */
  214. pushl %ebp
  215. pushl %ebx
  216. pushl %edi
  217. pushl %esi
  218. /* switch stack */
  219. movl %esp, TASK_threadsp(%eax)
  220. movl TASK_threadsp(%edx), %esp
  221. #ifdef CONFIG_CC_STACKPROTECTOR
  222. movl TASK_stack_canary(%edx), %ebx
  223. movl %ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset
  224. #endif
  225. #ifdef CONFIG_RETPOLINE
  226. /*
  227. * When switching from a shallower to a deeper call stack
  228. * the RSB may either underflow or use entries populated
  229. * with userspace addresses. On CPUs where those concerns
  230. * exist, overwrite the RSB with entries which capture
  231. * speculative execution to prevent attack.
  232. */
  233. FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
  234. #endif
  235. /* restore callee-saved registers */
  236. popl %esi
  237. popl %edi
  238. popl %ebx
  239. popl %ebp
  240. jmp __switch_to
  241. END(__switch_to_asm)
  242. /*
  243. * The unwinder expects the last frame on the stack to always be at the same
  244. * offset from the end of the page, which allows it to validate the stack.
  245. * Calling schedule_tail() directly would break that convention because its an
  246. * asmlinkage function so its argument has to be pushed on the stack. This
  247. * wrapper creates a proper "end of stack" frame header before the call.
  248. */
  249. ENTRY(schedule_tail_wrapper)
  250. FRAME_BEGIN
  251. pushl %eax
  252. call schedule_tail
  253. popl %eax
  254. FRAME_END
  255. ret
  256. ENDPROC(schedule_tail_wrapper)
  257. /*
  258. * A newly forked process directly context switches into this address.
  259. *
  260. * eax: prev task we switched from
  261. * ebx: kernel thread func (NULL for user thread)
  262. * edi: kernel thread arg
  263. */
  264. ENTRY(ret_from_fork)
  265. call schedule_tail_wrapper
  266. testl %ebx, %ebx
  267. jnz 1f /* kernel threads are uncommon */
  268. 2:
  269. /* When we fork, we trace the syscall return in the child, too. */
  270. movl %esp, %eax
  271. call syscall_return_slowpath
  272. jmp restore_all
  273. /* kernel thread */
  274. 1: movl %edi, %eax
  275. CALL_NOSPEC %ebx
  276. /*
  277. * A kernel thread is allowed to return here after successfully
  278. * calling do_execve(). Exit to userspace to complete the execve()
  279. * syscall.
  280. */
  281. movl $0, PT_EAX(%esp)
  282. jmp 2b
  283. END(ret_from_fork)
  284. /*
  285. * Return to user mode is not as complex as all this looks,
  286. * but we want the default path for a system call return to
  287. * go as quickly as possible which is why some of this is
  288. * less clear than it otherwise should be.
  289. */
  290. # userspace resumption stub bypassing syscall exit tracing
  291. ALIGN
  292. ret_from_exception:
  293. preempt_stop(CLBR_ANY)
  294. ret_from_intr:
  295. #ifdef CONFIG_VM86
  296. movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS
  297. movb PT_CS(%esp), %al
  298. andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax
  299. #else
  300. /*
  301. * We can be coming here from child spawned by kernel_thread().
  302. */
  303. movl PT_CS(%esp), %eax
  304. andl $SEGMENT_RPL_MASK, %eax
  305. #endif
  306. cmpl $USER_RPL, %eax
  307. jb resume_kernel # not returning to v8086 or userspace
  308. ENTRY(resume_userspace)
  309. DISABLE_INTERRUPTS(CLBR_ANY)
  310. TRACE_IRQS_OFF
  311. movl %esp, %eax
  312. call prepare_exit_to_usermode
  313. jmp restore_all
  314. END(ret_from_exception)
  315. #ifdef CONFIG_PREEMPT
  316. ENTRY(resume_kernel)
  317. DISABLE_INTERRUPTS(CLBR_ANY)
  318. .Lneed_resched:
  319. cmpl $0, PER_CPU_VAR(__preempt_count)
  320. jnz restore_all
  321. testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off (exception path) ?
  322. jz restore_all
  323. call preempt_schedule_irq
  324. jmp .Lneed_resched
  325. END(resume_kernel)
  326. #endif
  327. GLOBAL(__begin_SYSENTER_singlestep_region)
  328. /*
  329. * All code from here through __end_SYSENTER_singlestep_region is subject
  330. * to being single-stepped if a user program sets TF and executes SYSENTER.
  331. * There is absolutely nothing that we can do to prevent this from happening
  332. * (thanks Intel!). To keep our handling of this situation as simple as
  333. * possible, we handle TF just like AC and NT, except that our #DB handler
  334. * will ignore all of the single-step traps generated in this range.
  335. */
  336. #ifdef CONFIG_XEN
  337. /*
  338. * Xen doesn't set %esp to be precisely what the normal SYSENTER
  339. * entry point expects, so fix it up before using the normal path.
  340. */
  341. ENTRY(xen_sysenter_target)
  342. addl $5*4, %esp /* remove xen-provided frame */
  343. jmp .Lsysenter_past_esp
  344. #endif
  345. /*
  346. * 32-bit SYSENTER entry.
  347. *
  348. * 32-bit system calls through the vDSO's __kernel_vsyscall enter here
  349. * if X86_FEATURE_SEP is available. This is the preferred system call
  350. * entry on 32-bit systems.
  351. *
  352. * The SYSENTER instruction, in principle, should *only* occur in the
  353. * vDSO. In practice, a small number of Android devices were shipped
  354. * with a copy of Bionic that inlined a SYSENTER instruction. This
  355. * never happened in any of Google's Bionic versions -- it only happened
  356. * in a narrow range of Intel-provided versions.
  357. *
  358. * SYSENTER loads SS, ESP, CS, and EIP from previously programmed MSRs.
  359. * IF and VM in RFLAGS are cleared (IOW: interrupts are off).
  360. * SYSENTER does not save anything on the stack,
  361. * and does not save old EIP (!!!), ESP, or EFLAGS.
  362. *
  363. * To avoid losing track of EFLAGS.VM (and thus potentially corrupting
  364. * user and/or vm86 state), we explicitly disable the SYSENTER
  365. * instruction in vm86 mode by reprogramming the MSRs.
  366. *
  367. * Arguments:
  368. * eax system call number
  369. * ebx arg1
  370. * ecx arg2
  371. * edx arg3
  372. * esi arg4
  373. * edi arg5
  374. * ebp user stack
  375. * 0(%ebp) arg6
  376. */
  377. ENTRY(entry_SYSENTER_32)
  378. movl TSS_sysenter_sp0(%esp), %esp
  379. .Lsysenter_past_esp:
  380. pushl $__USER_DS /* pt_regs->ss */
  381. pushl %ebp /* pt_regs->sp (stashed in bp) */
  382. pushfl /* pt_regs->flags (except IF = 0) */
  383. orl $X86_EFLAGS_IF, (%esp) /* Fix IF */
  384. pushl $__USER_CS /* pt_regs->cs */
  385. pushl $0 /* pt_regs->ip = 0 (placeholder) */
  386. pushl %eax /* pt_regs->orig_ax */
  387. SAVE_ALL pt_regs_ax=$-ENOSYS /* save rest */
  388. /*
  389. * SYSENTER doesn't filter flags, so we need to clear NT, AC
  390. * and TF ourselves. To save a few cycles, we can check whether
  391. * either was set instead of doing an unconditional popfq.
  392. * This needs to happen before enabling interrupts so that
  393. * we don't get preempted with NT set.
  394. *
  395. * If TF is set, we will single-step all the way to here -- do_debug
  396. * will ignore all the traps. (Yes, this is slow, but so is
  397. * single-stepping in general. This allows us to avoid having
  398. * a more complicated code to handle the case where a user program
  399. * forces us to single-step through the SYSENTER entry code.)
  400. *
  401. * NB.: .Lsysenter_fix_flags is a label with the code under it moved
  402. * out-of-line as an optimization: NT is unlikely to be set in the
  403. * majority of the cases and instead of polluting the I$ unnecessarily,
  404. * we're keeping that code behind a branch which will predict as
  405. * not-taken and therefore its instructions won't be fetched.
  406. */
  407. testl $X86_EFLAGS_NT|X86_EFLAGS_AC|X86_EFLAGS_TF, PT_EFLAGS(%esp)
  408. jnz .Lsysenter_fix_flags
  409. .Lsysenter_flags_fixed:
  410. /*
  411. * User mode is traced as though IRQs are on, and SYSENTER
  412. * turned them off.
  413. */
  414. TRACE_IRQS_OFF
  415. movl %esp, %eax
  416. call do_fast_syscall_32
  417. /* XEN PV guests always use IRET path */
  418. ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \
  419. "jmp .Lsyscall_32_done", X86_FEATURE_XENPV
  420. /* Opportunistic SYSEXIT */
  421. TRACE_IRQS_ON /* User mode traces as IRQs on. */
  422. movl PT_EIP(%esp), %edx /* pt_regs->ip */
  423. movl PT_OLDESP(%esp), %ecx /* pt_regs->sp */
  424. 1: mov PT_FS(%esp), %fs
  425. PTGS_TO_GS
  426. popl %ebx /* pt_regs->bx */
  427. addl $2*4, %esp /* skip pt_regs->cx and pt_regs->dx */
  428. popl %esi /* pt_regs->si */
  429. popl %edi /* pt_regs->di */
  430. popl %ebp /* pt_regs->bp */
  431. popl %eax /* pt_regs->ax */
  432. /*
  433. * Restore all flags except IF. (We restore IF separately because
  434. * STI gives a one-instruction window in which we won't be interrupted,
  435. * whereas POPF does not.)
  436. */
  437. addl $PT_EFLAGS-PT_DS, %esp /* point esp at pt_regs->flags */
  438. btr $X86_EFLAGS_IF_BIT, (%esp)
  439. popfl
  440. /*
  441. * Return back to the vDSO, which will pop ecx and edx.
  442. * Don't bother with DS and ES (they already contain __USER_DS).
  443. */
  444. sti
  445. sysexit
  446. .pushsection .fixup, "ax"
  447. 2: movl $0, PT_FS(%esp)
  448. jmp 1b
  449. .popsection
  450. _ASM_EXTABLE(1b, 2b)
  451. PTGS_TO_GS_EX
  452. .Lsysenter_fix_flags:
  453. pushl $X86_EFLAGS_FIXED
  454. popfl
  455. jmp .Lsysenter_flags_fixed
  456. GLOBAL(__end_SYSENTER_singlestep_region)
  457. ENDPROC(entry_SYSENTER_32)
  458. /*
  459. * 32-bit legacy system call entry.
  460. *
  461. * 32-bit x86 Linux system calls traditionally used the INT $0x80
  462. * instruction. INT $0x80 lands here.
  463. *
  464. * This entry point can be used by any 32-bit perform system calls.
  465. * Instances of INT $0x80 can be found inline in various programs and
  466. * libraries. It is also used by the vDSO's __kernel_vsyscall
  467. * fallback for hardware that doesn't support a faster entry method.
  468. * Restarted 32-bit system calls also fall back to INT $0x80
  469. * regardless of what instruction was originally used to do the system
  470. * call. (64-bit programs can use INT $0x80 as well, but they can
  471. * only run on 64-bit kernels and therefore land in
  472. * entry_INT80_compat.)
  473. *
  474. * This is considered a slow path. It is not used by most libc
  475. * implementations on modern hardware except during process startup.
  476. *
  477. * Arguments:
  478. * eax system call number
  479. * ebx arg1
  480. * ecx arg2
  481. * edx arg3
  482. * esi arg4
  483. * edi arg5
  484. * ebp arg6
  485. */
  486. ENTRY(entry_INT80_32)
  487. ASM_CLAC
  488. pushl %eax /* pt_regs->orig_ax */
  489. SAVE_ALL pt_regs_ax=$-ENOSYS /* save rest */
  490. /*
  491. * User mode is traced as though IRQs are on, and the interrupt gate
  492. * turned them off.
  493. */
  494. TRACE_IRQS_OFF
  495. movl %esp, %eax
  496. call do_int80_syscall_32
  497. .Lsyscall_32_done:
  498. restore_all:
  499. TRACE_IRQS_IRET
  500. .Lrestore_all_notrace:
  501. #ifdef CONFIG_X86_ESPFIX32
  502. ALTERNATIVE "jmp .Lrestore_nocheck", "", X86_BUG_ESPFIX
  503. movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS
  504. /*
  505. * Warning: PT_OLDSS(%esp) contains the wrong/random values if we
  506. * are returning to the kernel.
  507. * See comments in process.c:copy_thread() for details.
  508. */
  509. movb PT_OLDSS(%esp), %ah
  510. movb PT_CS(%esp), %al
  511. andl $(X86_EFLAGS_VM | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
  512. cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax
  513. je .Lldt_ss # returning to user-space with LDT SS
  514. #endif
  515. .Lrestore_nocheck:
  516. RESTORE_REGS 4 # skip orig_eax/error_code
  517. .Lirq_return:
  518. /*
  519. * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization
  520. * when returning from IPI handler and when returning from
  521. * scheduler to user-space.
  522. */
  523. INTERRUPT_RETURN
  524. .section .fixup, "ax"
  525. ENTRY(iret_exc )
  526. pushl $0 # no error code
  527. pushl $do_iret_error
  528. jmp common_exception
  529. .previous
  530. _ASM_EXTABLE(.Lirq_return, iret_exc)
  531. #ifdef CONFIG_X86_ESPFIX32
  532. .Lldt_ss:
  533. /*
  534. * Setup and switch to ESPFIX stack
  535. *
  536. * We're returning to userspace with a 16 bit stack. The CPU will not
  537. * restore the high word of ESP for us on executing iret... This is an
  538. * "official" bug of all the x86-compatible CPUs, which we can work
  539. * around to make dosemu and wine happy. We do this by preloading the
  540. * high word of ESP with the high word of the userspace ESP while
  541. * compensating for the offset by changing to the ESPFIX segment with
  542. * a base address that matches for the difference.
  543. */
  544. #define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + (GDT_ENTRY_ESPFIX_SS * 8)
  545. mov %esp, %edx /* load kernel esp */
  546. mov PT_OLDESP(%esp), %eax /* load userspace esp */
  547. mov %dx, %ax /* eax: new kernel esp */
  548. sub %eax, %edx /* offset (low word is 0) */
  549. shr $16, %edx
  550. mov %dl, GDT_ESPFIX_SS + 4 /* bits 16..23 */
  551. mov %dh, GDT_ESPFIX_SS + 7 /* bits 24..31 */
  552. pushl $__ESPFIX_SS
  553. pushl %eax /* new kernel esp */
  554. /*
  555. * Disable interrupts, but do not irqtrace this section: we
  556. * will soon execute iret and the tracer was already set to
  557. * the irqstate after the IRET:
  558. */
  559. DISABLE_INTERRUPTS(CLBR_ANY)
  560. lss (%esp), %esp /* switch to espfix segment */
  561. jmp .Lrestore_nocheck
  562. #endif
  563. ENDPROC(entry_INT80_32)
  564. .macro FIXUP_ESPFIX_STACK
  565. /*
  566. * Switch back for ESPFIX stack to the normal zerobased stack
  567. *
  568. * We can't call C functions using the ESPFIX stack. This code reads
  569. * the high word of the segment base from the GDT and swiches to the
  570. * normal stack and adjusts ESP with the matching offset.
  571. */
  572. #ifdef CONFIG_X86_ESPFIX32
  573. /* fixup the stack */
  574. mov GDT_ESPFIX_SS + 4, %al /* bits 16..23 */
  575. mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */
  576. shl $16, %eax
  577. addl %esp, %eax /* the adjusted stack pointer */
  578. pushl $__KERNEL_DS
  579. pushl %eax
  580. lss (%esp), %esp /* switch to the normal stack segment */
  581. #endif
  582. .endm
  583. .macro UNWIND_ESPFIX_STACK
  584. #ifdef CONFIG_X86_ESPFIX32
  585. movl %ss, %eax
  586. /* see if on espfix stack */
  587. cmpw $__ESPFIX_SS, %ax
  588. jne 27f
  589. movl $__KERNEL_DS, %eax
  590. movl %eax, %ds
  591. movl %eax, %es
  592. /* switch to normal stack */
  593. FIXUP_ESPFIX_STACK
  594. 27:
  595. #endif
  596. .endm
  597. /*
  598. * Build the entry stubs with some assembler magic.
  599. * We pack 1 stub into every 8-byte block.
  600. */
  601. .align 8
  602. ENTRY(irq_entries_start)
  603. vector=FIRST_EXTERNAL_VECTOR
  604. .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
  605. pushl $(~vector+0x80) /* Note: always in signed byte range */
  606. vector=vector+1
  607. jmp common_interrupt
  608. .align 8
  609. .endr
  610. END(irq_entries_start)
  611. /*
  612. * the CPU automatically disables interrupts when executing an IRQ vector,
  613. * so IRQ-flags tracing has to follow that:
  614. */
  615. .p2align CONFIG_X86_L1_CACHE_SHIFT
  616. common_interrupt:
  617. ASM_CLAC
  618. addl $-0x80, (%esp) /* Adjust vector into the [-256, -1] range */
  619. SAVE_ALL
  620. ENCODE_FRAME_POINTER
  621. TRACE_IRQS_OFF
  622. movl %esp, %eax
  623. call do_IRQ
  624. jmp ret_from_intr
  625. ENDPROC(common_interrupt)
  626. #define BUILD_INTERRUPT3(name, nr, fn) \
  627. ENTRY(name) \
  628. ASM_CLAC; \
  629. pushl $~(nr); \
  630. SAVE_ALL; \
  631. ENCODE_FRAME_POINTER; \
  632. TRACE_IRQS_OFF \
  633. movl %esp, %eax; \
  634. call fn; \
  635. jmp ret_from_intr; \
  636. ENDPROC(name)
  637. #define BUILD_INTERRUPT(name, nr) \
  638. BUILD_INTERRUPT3(name, nr, smp_##name); \
  639. /* The include is where all of the SMP etc. interrupts come from */
  640. #include <asm/entry_arch.h>
  641. ENTRY(coprocessor_error)
  642. ASM_CLAC
  643. pushl $0
  644. pushl $do_coprocessor_error
  645. jmp common_exception
  646. END(coprocessor_error)
  647. ENTRY(simd_coprocessor_error)
  648. ASM_CLAC
  649. pushl $0
  650. #ifdef CONFIG_X86_INVD_BUG
  651. /* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */
  652. ALTERNATIVE "pushl $do_general_protection", \
  653. "pushl $do_simd_coprocessor_error", \
  654. X86_FEATURE_XMM
  655. #else
  656. pushl $do_simd_coprocessor_error
  657. #endif
  658. jmp common_exception
  659. END(simd_coprocessor_error)
  660. ENTRY(device_not_available)
  661. ASM_CLAC
  662. pushl $-1 # mark this as an int
  663. pushl $do_device_not_available
  664. jmp common_exception
  665. END(device_not_available)
  666. #ifdef CONFIG_PARAVIRT
  667. ENTRY(native_iret)
  668. iret
  669. _ASM_EXTABLE(native_iret, iret_exc)
  670. END(native_iret)
  671. #endif
  672. ENTRY(overflow)
  673. ASM_CLAC
  674. pushl $0
  675. pushl $do_overflow
  676. jmp common_exception
  677. END(overflow)
  678. ENTRY(bounds)
  679. ASM_CLAC
  680. pushl $0
  681. pushl $do_bounds
  682. jmp common_exception
  683. END(bounds)
  684. ENTRY(invalid_op)
  685. ASM_CLAC
  686. pushl $0
  687. pushl $do_invalid_op
  688. jmp common_exception
  689. END(invalid_op)
  690. ENTRY(coprocessor_segment_overrun)
  691. ASM_CLAC
  692. pushl $0
  693. pushl $do_coprocessor_segment_overrun
  694. jmp common_exception
  695. END(coprocessor_segment_overrun)
  696. ENTRY(invalid_TSS)
  697. ASM_CLAC
  698. pushl $do_invalid_TSS
  699. jmp common_exception
  700. END(invalid_TSS)
  701. ENTRY(segment_not_present)
  702. ASM_CLAC
  703. pushl $do_segment_not_present
  704. jmp common_exception
  705. END(segment_not_present)
  706. ENTRY(stack_segment)
  707. ASM_CLAC
  708. pushl $do_stack_segment
  709. jmp common_exception
  710. END(stack_segment)
  711. ENTRY(alignment_check)
  712. ASM_CLAC
  713. pushl $do_alignment_check
  714. jmp common_exception
  715. END(alignment_check)
  716. ENTRY(divide_error)
  717. ASM_CLAC
  718. pushl $0 # no error code
  719. pushl $do_divide_error
  720. jmp common_exception
  721. END(divide_error)
  722. #ifdef CONFIG_X86_MCE
  723. ENTRY(machine_check)
  724. ASM_CLAC
  725. pushl $0
  726. pushl machine_check_vector
  727. jmp common_exception
  728. END(machine_check)
  729. #endif
  730. ENTRY(spurious_interrupt_bug)
  731. ASM_CLAC
  732. pushl $0
  733. pushl $do_spurious_interrupt_bug
  734. jmp common_exception
  735. END(spurious_interrupt_bug)
  736. #ifdef CONFIG_XEN
  737. ENTRY(xen_hypervisor_callback)
  738. pushl $-1 /* orig_ax = -1 => not a system call */
  739. SAVE_ALL
  740. ENCODE_FRAME_POINTER
  741. TRACE_IRQS_OFF
  742. /*
  743. * Check to see if we got the event in the critical
  744. * region in xen_iret_direct, after we've reenabled
  745. * events and checked for pending events. This simulates
  746. * iret instruction's behaviour where it delivers a
  747. * pending interrupt when enabling interrupts:
  748. */
  749. movl PT_EIP(%esp), %eax
  750. cmpl $xen_iret_start_crit, %eax
  751. jb 1f
  752. cmpl $xen_iret_end_crit, %eax
  753. jae 1f
  754. jmp xen_iret_crit_fixup
  755. ENTRY(xen_do_upcall)
  756. 1: mov %esp, %eax
  757. call xen_evtchn_do_upcall
  758. #ifndef CONFIG_PREEMPT
  759. call xen_maybe_preempt_hcall
  760. #endif
  761. jmp ret_from_intr
  762. ENDPROC(xen_hypervisor_callback)
  763. /*
  764. * Hypervisor uses this for application faults while it executes.
  765. * We get here for two reasons:
  766. * 1. Fault while reloading DS, ES, FS or GS
  767. * 2. Fault while executing IRET
  768. * Category 1 we fix up by reattempting the load, and zeroing the segment
  769. * register if the load fails.
  770. * Category 2 we fix up by jumping to do_iret_error. We cannot use the
  771. * normal Linux return path in this case because if we use the IRET hypercall
  772. * to pop the stack frame we end up in an infinite loop of failsafe callbacks.
  773. * We distinguish between categories by maintaining a status value in EAX.
  774. */
  775. ENTRY(xen_failsafe_callback)
  776. pushl %eax
  777. movl $1, %eax
  778. 1: mov 4(%esp), %ds
  779. 2: mov 8(%esp), %es
  780. 3: mov 12(%esp), %fs
  781. 4: mov 16(%esp), %gs
  782. /* EAX == 0 => Category 1 (Bad segment)
  783. EAX != 0 => Category 2 (Bad IRET) */
  784. testl %eax, %eax
  785. popl %eax
  786. lea 16(%esp), %esp
  787. jz 5f
  788. jmp iret_exc
  789. 5: pushl $-1 /* orig_ax = -1 => not a system call */
  790. SAVE_ALL
  791. ENCODE_FRAME_POINTER
  792. jmp ret_from_exception
  793. .section .fixup, "ax"
  794. 6: xorl %eax, %eax
  795. movl %eax, 4(%esp)
  796. jmp 1b
  797. 7: xorl %eax, %eax
  798. movl %eax, 8(%esp)
  799. jmp 2b
  800. 8: xorl %eax, %eax
  801. movl %eax, 12(%esp)
  802. jmp 3b
  803. 9: xorl %eax, %eax
  804. movl %eax, 16(%esp)
  805. jmp 4b
  806. .previous
  807. _ASM_EXTABLE(1b, 6b)
  808. _ASM_EXTABLE(2b, 7b)
  809. _ASM_EXTABLE(3b, 8b)
  810. _ASM_EXTABLE(4b, 9b)
  811. ENDPROC(xen_failsafe_callback)
  812. BUILD_INTERRUPT3(xen_hvm_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
  813. xen_evtchn_do_upcall)
  814. #endif /* CONFIG_XEN */
  815. #if IS_ENABLED(CONFIG_HYPERV)
  816. BUILD_INTERRUPT3(hyperv_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
  817. hyperv_vector_handler)
  818. BUILD_INTERRUPT3(hyperv_reenlightenment_vector, HYPERV_REENLIGHTENMENT_VECTOR,
  819. hyperv_reenlightenment_intr)
  820. #endif /* CONFIG_HYPERV */
  821. ENTRY(page_fault)
  822. ASM_CLAC
  823. pushl $do_page_fault
  824. ALIGN
  825. jmp common_exception
  826. END(page_fault)
  827. common_exception:
  828. /* the function address is in %gs's slot on the stack */
  829. pushl %fs
  830. pushl %es
  831. pushl %ds
  832. pushl %eax
  833. pushl %ebp
  834. pushl %edi
  835. pushl %esi
  836. pushl %edx
  837. pushl %ecx
  838. pushl %ebx
  839. ENCODE_FRAME_POINTER
  840. cld
  841. movl $(__KERNEL_PERCPU), %ecx
  842. movl %ecx, %fs
  843. UNWIND_ESPFIX_STACK
  844. GS_TO_REG %ecx
  845. movl PT_GS(%esp), %edi # get the function address
  846. movl PT_ORIG_EAX(%esp), %edx # get the error code
  847. movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
  848. REG_TO_PTGS %ecx
  849. SET_KERNEL_GS %ecx
  850. movl $(__USER_DS), %ecx
  851. movl %ecx, %ds
  852. movl %ecx, %es
  853. TRACE_IRQS_OFF
  854. movl %esp, %eax # pt_regs pointer
  855. CALL_NOSPEC %edi
  856. jmp ret_from_exception
  857. END(common_exception)
  858. ENTRY(debug)
  859. /*
  860. * #DB can happen at the first instruction of
  861. * entry_SYSENTER_32 or in Xen's SYSENTER prologue. If this
  862. * happens, then we will be running on a very small stack. We
  863. * need to detect this condition and switch to the thread
  864. * stack before calling any C code at all.
  865. *
  866. * If you edit this code, keep in mind that NMIs can happen in here.
  867. */
  868. ASM_CLAC
  869. pushl $-1 # mark this as an int
  870. SAVE_ALL
  871. ENCODE_FRAME_POINTER
  872. xorl %edx, %edx # error code 0
  873. movl %esp, %eax # pt_regs pointer
  874. /* Are we currently on the SYSENTER stack? */
  875. movl PER_CPU_VAR(cpu_entry_area), %ecx
  876. addl $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
  877. subl %eax, %ecx /* ecx = (end of entry_stack) - esp */
  878. cmpl $SIZEOF_entry_stack, %ecx
  879. jb .Ldebug_from_sysenter_stack
  880. TRACE_IRQS_OFF
  881. call do_debug
  882. jmp ret_from_exception
  883. .Ldebug_from_sysenter_stack:
  884. /* We're on the SYSENTER stack. Switch off. */
  885. movl %esp, %ebx
  886. movl PER_CPU_VAR(cpu_current_top_of_stack), %esp
  887. TRACE_IRQS_OFF
  888. call do_debug
  889. movl %ebx, %esp
  890. jmp ret_from_exception
  891. END(debug)
  892. /*
  893. * NMI is doubly nasty. It can happen on the first instruction of
  894. * entry_SYSENTER_32 (just like #DB), but it can also interrupt the beginning
  895. * of the #DB handler even if that #DB in turn hit before entry_SYSENTER_32
  896. * switched stacks. We handle both conditions by simply checking whether we
  897. * interrupted kernel code running on the SYSENTER stack.
  898. */
  899. ENTRY(nmi)
  900. ASM_CLAC
  901. #ifdef CONFIG_X86_ESPFIX32
  902. pushl %eax
  903. movl %ss, %eax
  904. cmpw $__ESPFIX_SS, %ax
  905. popl %eax
  906. je .Lnmi_espfix_stack
  907. #endif
  908. pushl %eax # pt_regs->orig_ax
  909. SAVE_ALL
  910. ENCODE_FRAME_POINTER
  911. xorl %edx, %edx # zero error code
  912. movl %esp, %eax # pt_regs pointer
  913. /* Are we currently on the SYSENTER stack? */
  914. movl PER_CPU_VAR(cpu_entry_area), %ecx
  915. addl $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
  916. subl %eax, %ecx /* ecx = (end of entry_stack) - esp */
  917. cmpl $SIZEOF_entry_stack, %ecx
  918. jb .Lnmi_from_sysenter_stack
  919. /* Not on SYSENTER stack. */
  920. call do_nmi
  921. jmp .Lrestore_all_notrace
  922. .Lnmi_from_sysenter_stack:
  923. /*
  924. * We're on the SYSENTER stack. Switch off. No one (not even debug)
  925. * is using the thread stack right now, so it's safe for us to use it.
  926. */
  927. movl %esp, %ebx
  928. movl PER_CPU_VAR(cpu_current_top_of_stack), %esp
  929. call do_nmi
  930. movl %ebx, %esp
  931. jmp .Lrestore_all_notrace
  932. #ifdef CONFIG_X86_ESPFIX32
  933. .Lnmi_espfix_stack:
  934. /*
  935. * create the pointer to lss back
  936. */
  937. pushl %ss
  938. pushl %esp
  939. addl $4, (%esp)
  940. /* copy the iret frame of 12 bytes */
  941. .rept 3
  942. pushl 16(%esp)
  943. .endr
  944. pushl %eax
  945. SAVE_ALL
  946. ENCODE_FRAME_POINTER
  947. FIXUP_ESPFIX_STACK # %eax == %esp
  948. xorl %edx, %edx # zero error code
  949. call do_nmi
  950. RESTORE_REGS
  951. lss 12+4(%esp), %esp # back to espfix stack
  952. jmp .Lirq_return
  953. #endif
  954. END(nmi)
  955. ENTRY(int3)
  956. ASM_CLAC
  957. pushl $-1 # mark this as an int
  958. SAVE_ALL
  959. ENCODE_FRAME_POINTER
  960. TRACE_IRQS_OFF
  961. xorl %edx, %edx # zero error code
  962. movl %esp, %eax # pt_regs pointer
  963. call do_int3
  964. jmp ret_from_exception
  965. END(int3)
  966. ENTRY(general_protection)
  967. pushl $do_general_protection
  968. jmp common_exception
  969. END(general_protection)
  970. #ifdef CONFIG_KVM_GUEST
  971. ENTRY(async_page_fault)
  972. ASM_CLAC
  973. pushl $do_async_page_fault
  974. jmp common_exception
  975. END(async_page_fault)
  976. #endif
  977. ENTRY(rewind_stack_do_exit)
  978. /* Prevent any naive code from trying to unwind to our caller. */
  979. xorl %ebp, %ebp
  980. movl PER_CPU_VAR(cpu_current_top_of_stack), %esi
  981. leal -TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%esi), %esp
  982. call do_exit
  983. 1: jmp 1b
  984. END(rewind_stack_do_exit)