internal.h 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606
  1. /*
  2. * Copyright (C) 1994 Linus Torvalds
  3. *
  4. * Pentium III FXSR, SSE support
  5. * General FPU state handling cleanups
  6. * Gareth Hughes <gareth@valinux.com>, May 2000
  7. * x86-64 work by Andi Kleen 2002
  8. */
  9. #ifndef _ASM_X86_FPU_INTERNAL_H
  10. #define _ASM_X86_FPU_INTERNAL_H
  11. #include <linux/compat.h>
  12. #include <linux/sched.h>
  13. #include <linux/slab.h>
  14. #include <asm/user.h>
  15. #include <asm/fpu/api.h>
  16. #include <asm/fpu/xstate.h>
  17. #include <asm/cpufeature.h>
  18. #include <asm/trace/fpu.h>
  19. /*
  20. * High level FPU state handling functions:
  21. */
  22. extern void fpu__initialize(struct fpu *fpu);
  23. extern void fpu__prepare_read(struct fpu *fpu);
  24. extern void fpu__prepare_write(struct fpu *fpu);
  25. extern void fpu__save(struct fpu *fpu);
  26. extern void fpu__restore(struct fpu *fpu);
  27. extern int fpu__restore_sig(void __user *buf, int ia32_frame);
  28. extern void fpu__drop(struct fpu *fpu);
  29. extern int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu);
  30. extern void fpu__clear(struct fpu *fpu);
  31. extern int fpu__exception_code(struct fpu *fpu, int trap_nr);
  32. extern int dump_fpu(struct pt_regs *ptregs, struct user_i387_struct *fpstate);
  33. /*
  34. * Boot time FPU initialization functions:
  35. */
  36. extern void fpu__init_cpu(void);
  37. extern void fpu__init_system_xstate(void);
  38. extern void fpu__init_cpu_xstate(void);
  39. extern void fpu__init_system(struct cpuinfo_x86 *c);
  40. extern void fpu__init_check_bugs(void);
  41. extern void fpu__resume_cpu(void);
  42. extern u64 fpu__get_supported_xfeatures_mask(void);
  43. /*
  44. * Debugging facility:
  45. */
  46. #ifdef CONFIG_X86_DEBUG_FPU
  47. # define WARN_ON_FPU(x) WARN_ON_ONCE(x)
  48. #else
  49. # define WARN_ON_FPU(x) ({ (void)(x); 0; })
  50. #endif
  51. /*
  52. * FPU related CPU feature flag helper routines:
  53. */
  54. static __always_inline __pure bool use_xsaveopt(void)
  55. {
  56. return static_cpu_has(X86_FEATURE_XSAVEOPT);
  57. }
  58. static __always_inline __pure bool use_xsave(void)
  59. {
  60. return static_cpu_has(X86_FEATURE_XSAVE);
  61. }
  62. static __always_inline __pure bool use_fxsr(void)
  63. {
  64. return static_cpu_has(X86_FEATURE_FXSR);
  65. }
  66. /*
  67. * fpstate handling functions:
  68. */
  69. extern union fpregs_state init_fpstate;
  70. extern void fpstate_init(union fpregs_state *state);
  71. #ifdef CONFIG_MATH_EMULATION
  72. extern void fpstate_init_soft(struct swregs_state *soft);
  73. #else
  74. static inline void fpstate_init_soft(struct swregs_state *soft) {}
  75. #endif
  76. static inline void fpstate_init_xstate(struct xregs_state *xsave)
  77. {
  78. /*
  79. * XRSTORS requires these bits set in xcomp_bv, or it will
  80. * trigger #GP:
  81. */
  82. xsave->header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT | xfeatures_mask;
  83. }
  84. static inline void fpstate_init_fxstate(struct fxregs_state *fx)
  85. {
  86. fx->cwd = 0x37f;
  87. fx->mxcsr = MXCSR_DEFAULT;
  88. }
  89. extern void fpstate_sanitize_xstate(struct fpu *fpu);
  90. #define user_insn(insn, output, input...) \
  91. ({ \
  92. int err; \
  93. asm volatile(ASM_STAC "\n" \
  94. "1:" #insn "\n\t" \
  95. "2: " ASM_CLAC "\n" \
  96. ".section .fixup,\"ax\"\n" \
  97. "3: movl $-1,%[err]\n" \
  98. " jmp 2b\n" \
  99. ".previous\n" \
  100. _ASM_EXTABLE(1b, 3b) \
  101. : [err] "=r" (err), output \
  102. : "0"(0), input); \
  103. err; \
  104. })
  105. #define kernel_insn(insn, output, input...) \
  106. asm volatile("1:" #insn "\n\t" \
  107. "2:\n" \
  108. _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_fprestore) \
  109. : output : input)
  110. static inline int copy_fregs_to_user(struct fregs_state __user *fx)
  111. {
  112. return user_insn(fnsave %[fx]; fwait, [fx] "=m" (*fx), "m" (*fx));
  113. }
  114. static inline int copy_fxregs_to_user(struct fxregs_state __user *fx)
  115. {
  116. if (IS_ENABLED(CONFIG_X86_32))
  117. return user_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx));
  118. else if (IS_ENABLED(CONFIG_AS_FXSAVEQ))
  119. return user_insn(fxsaveq %[fx], [fx] "=m" (*fx), "m" (*fx));
  120. /* See comment in copy_fxregs_to_kernel() below. */
  121. return user_insn(rex64/fxsave (%[fx]), "=m" (*fx), [fx] "R" (fx));
  122. }
  123. static inline void copy_kernel_to_fxregs(struct fxregs_state *fx)
  124. {
  125. if (IS_ENABLED(CONFIG_X86_32)) {
  126. kernel_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
  127. } else {
  128. if (IS_ENABLED(CONFIG_AS_FXSAVEQ)) {
  129. kernel_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
  130. } else {
  131. /* See comment in copy_fxregs_to_kernel() below. */
  132. kernel_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), "m" (*fx));
  133. }
  134. }
  135. }
  136. static inline int copy_user_to_fxregs(struct fxregs_state __user *fx)
  137. {
  138. if (IS_ENABLED(CONFIG_X86_32))
  139. return user_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
  140. else if (IS_ENABLED(CONFIG_AS_FXSAVEQ))
  141. return user_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
  142. /* See comment in copy_fxregs_to_kernel() below. */
  143. return user_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx),
  144. "m" (*fx));
  145. }
  146. static inline void copy_kernel_to_fregs(struct fregs_state *fx)
  147. {
  148. kernel_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
  149. }
  150. static inline int copy_user_to_fregs(struct fregs_state __user *fx)
  151. {
  152. return user_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
  153. }
  154. static inline void copy_fxregs_to_kernel(struct fpu *fpu)
  155. {
  156. if (IS_ENABLED(CONFIG_X86_32))
  157. asm volatile( "fxsave %[fx]" : [fx] "=m" (fpu->state.fxsave));
  158. else if (IS_ENABLED(CONFIG_AS_FXSAVEQ))
  159. asm volatile("fxsaveq %[fx]" : [fx] "=m" (fpu->state.fxsave));
  160. else {
  161. /* Using "rex64; fxsave %0" is broken because, if the memory
  162. * operand uses any extended registers for addressing, a second
  163. * REX prefix will be generated (to the assembler, rex64
  164. * followed by semicolon is a separate instruction), and hence
  165. * the 64-bitness is lost.
  166. *
  167. * Using "fxsaveq %0" would be the ideal choice, but is only
  168. * supported starting with gas 2.16.
  169. *
  170. * Using, as a workaround, the properly prefixed form below
  171. * isn't accepted by any binutils version so far released,
  172. * complaining that the same type of prefix is used twice if
  173. * an extended register is needed for addressing (fix submitted
  174. * to mainline 2005-11-21).
  175. *
  176. * asm volatile("rex64/fxsave %0" : "=m" (fpu->state.fxsave));
  177. *
  178. * This, however, we can work around by forcing the compiler to
  179. * select an addressing mode that doesn't require extended
  180. * registers.
  181. */
  182. asm volatile( "rex64/fxsave (%[fx])"
  183. : "=m" (fpu->state.fxsave)
  184. : [fx] "R" (&fpu->state.fxsave));
  185. }
  186. }
  187. /* These macros all use (%edi)/(%rdi) as the single memory argument. */
  188. #define XSAVE ".byte " REX_PREFIX "0x0f,0xae,0x27"
  189. #define XSAVEOPT ".byte " REX_PREFIX "0x0f,0xae,0x37"
  190. #define XSAVES ".byte " REX_PREFIX "0x0f,0xc7,0x2f"
  191. #define XRSTOR ".byte " REX_PREFIX "0x0f,0xae,0x2f"
  192. #define XRSTORS ".byte " REX_PREFIX "0x0f,0xc7,0x1f"
  193. #define XSTATE_OP(op, st, lmask, hmask, err) \
  194. asm volatile("1:" op "\n\t" \
  195. "xor %[err], %[err]\n" \
  196. "2:\n\t" \
  197. ".pushsection .fixup,\"ax\"\n\t" \
  198. "3: movl $-2,%[err]\n\t" \
  199. "jmp 2b\n\t" \
  200. ".popsection\n\t" \
  201. _ASM_EXTABLE(1b, 3b) \
  202. : [err] "=r" (err) \
  203. : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \
  204. : "memory")
  205. /*
  206. * If XSAVES is enabled, it replaces XSAVEOPT because it supports a compact
  207. * format and supervisor states in addition to modified optimization in
  208. * XSAVEOPT.
  209. *
  210. * Otherwise, if XSAVEOPT is enabled, XSAVEOPT replaces XSAVE because XSAVEOPT
  211. * supports modified optimization which is not supported by XSAVE.
  212. *
  213. * We use XSAVE as a fallback.
  214. *
  215. * The 661 label is defined in the ALTERNATIVE* macros as the address of the
  216. * original instruction which gets replaced. We need to use it here as the
  217. * address of the instruction where we might get an exception at.
  218. */
  219. #define XSTATE_XSAVE(st, lmask, hmask, err) \
  220. asm volatile(ALTERNATIVE_2(XSAVE, \
  221. XSAVEOPT, X86_FEATURE_XSAVEOPT, \
  222. XSAVES, X86_FEATURE_XSAVES) \
  223. "\n" \
  224. "xor %[err], %[err]\n" \
  225. "3:\n" \
  226. ".pushsection .fixup,\"ax\"\n" \
  227. "4: movl $-2, %[err]\n" \
  228. "jmp 3b\n" \
  229. ".popsection\n" \
  230. _ASM_EXTABLE(661b, 4b) \
  231. : [err] "=r" (err) \
  232. : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \
  233. : "memory")
  234. /*
  235. * Use XRSTORS to restore context if it is enabled. XRSTORS supports compact
  236. * XSAVE area format.
  237. */
  238. #define XSTATE_XRESTORE(st, lmask, hmask) \
  239. asm volatile(ALTERNATIVE(XRSTOR, \
  240. XRSTORS, X86_FEATURE_XSAVES) \
  241. "\n" \
  242. "3:\n" \
  243. _ASM_EXTABLE_HANDLE(661b, 3b, ex_handler_fprestore)\
  244. : \
  245. : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \
  246. : "memory")
  247. /*
  248. * This function is called only during boot time when x86 caps are not set
  249. * up and alternative can not be used yet.
  250. */
  251. static inline void copy_xregs_to_kernel_booting(struct xregs_state *xstate)
  252. {
  253. u64 mask = -1;
  254. u32 lmask = mask;
  255. u32 hmask = mask >> 32;
  256. int err;
  257. WARN_ON(system_state != SYSTEM_BOOTING);
  258. if (static_cpu_has(X86_FEATURE_XSAVES))
  259. XSTATE_OP(XSAVES, xstate, lmask, hmask, err);
  260. else
  261. XSTATE_OP(XSAVE, xstate, lmask, hmask, err);
  262. /* We should never fault when copying to a kernel buffer: */
  263. WARN_ON_FPU(err);
  264. }
  265. /*
  266. * This function is called only during boot time when x86 caps are not set
  267. * up and alternative can not be used yet.
  268. */
  269. static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate)
  270. {
  271. u64 mask = -1;
  272. u32 lmask = mask;
  273. u32 hmask = mask >> 32;
  274. int err;
  275. WARN_ON(system_state != SYSTEM_BOOTING);
  276. if (static_cpu_has(X86_FEATURE_XSAVES))
  277. XSTATE_OP(XRSTORS, xstate, lmask, hmask, err);
  278. else
  279. XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
  280. /*
  281. * We should never fault when copying from a kernel buffer, and the FPU
  282. * state we set at boot time should be valid.
  283. */
  284. WARN_ON_FPU(err);
  285. }
  286. /*
  287. * Save processor xstate to xsave area.
  288. */
  289. static inline void copy_xregs_to_kernel(struct xregs_state *xstate)
  290. {
  291. u64 mask = -1;
  292. u32 lmask = mask;
  293. u32 hmask = mask >> 32;
  294. int err;
  295. WARN_ON_FPU(!alternatives_patched);
  296. XSTATE_XSAVE(xstate, lmask, hmask, err);
  297. /* We should never fault when copying to a kernel buffer: */
  298. WARN_ON_FPU(err);
  299. }
  300. /*
  301. * Restore processor xstate from xsave area.
  302. */
  303. static inline void copy_kernel_to_xregs(struct xregs_state *xstate, u64 mask)
  304. {
  305. u32 lmask = mask;
  306. u32 hmask = mask >> 32;
  307. XSTATE_XRESTORE(xstate, lmask, hmask);
  308. }
  309. /*
  310. * Save xstate to user space xsave area.
  311. *
  312. * We don't use modified optimization because xrstor/xrstors might track
  313. * a different application.
  314. *
  315. * We don't use compacted format xsave area for
  316. * backward compatibility for old applications which don't understand
  317. * compacted format of xsave area.
  318. */
  319. static inline int copy_xregs_to_user(struct xregs_state __user *buf)
  320. {
  321. int err;
  322. /*
  323. * Clear the xsave header first, so that reserved fields are
  324. * initialized to zero.
  325. */
  326. err = __clear_user(&buf->header, sizeof(buf->header));
  327. if (unlikely(err))
  328. return -EFAULT;
  329. stac();
  330. XSTATE_OP(XSAVE, buf, -1, -1, err);
  331. clac();
  332. return err;
  333. }
  334. /*
  335. * Restore xstate from user space xsave area.
  336. */
  337. static inline int copy_user_to_xregs(struct xregs_state __user *buf, u64 mask)
  338. {
  339. struct xregs_state *xstate = ((__force struct xregs_state *)buf);
  340. u32 lmask = mask;
  341. u32 hmask = mask >> 32;
  342. int err;
  343. stac();
  344. XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
  345. clac();
  346. return err;
  347. }
  348. /*
  349. * These must be called with preempt disabled. Returns
  350. * 'true' if the FPU state is still intact and we can
  351. * keep registers active.
  352. *
  353. * The legacy FNSAVE instruction cleared all FPU state
  354. * unconditionally, so registers are essentially destroyed.
  355. * Modern FPU state can be kept in registers, if there are
  356. * no pending FP exceptions.
  357. */
  358. static inline int copy_fpregs_to_fpstate(struct fpu *fpu)
  359. {
  360. if (likely(use_xsave())) {
  361. copy_xregs_to_kernel(&fpu->state.xsave);
  362. return 1;
  363. }
  364. if (likely(use_fxsr())) {
  365. copy_fxregs_to_kernel(fpu);
  366. return 1;
  367. }
  368. /*
  369. * Legacy FPU register saving, FNSAVE always clears FPU registers,
  370. * so we have to mark them inactive:
  371. */
  372. asm volatile("fnsave %[fp]; fwait" : [fp] "=m" (fpu->state.fsave));
  373. return 0;
  374. }
  375. static inline void __copy_kernel_to_fpregs(union fpregs_state *fpstate, u64 mask)
  376. {
  377. if (use_xsave()) {
  378. copy_kernel_to_xregs(&fpstate->xsave, mask);
  379. } else {
  380. if (use_fxsr())
  381. copy_kernel_to_fxregs(&fpstate->fxsave);
  382. else
  383. copy_kernel_to_fregs(&fpstate->fsave);
  384. }
  385. }
  386. static inline void copy_kernel_to_fpregs(union fpregs_state *fpstate)
  387. {
  388. /*
  389. * AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception is
  390. * pending. Clear the x87 state here by setting it to fixed values.
  391. * "m" is a random variable that should be in L1.
  392. */
  393. if (unlikely(static_cpu_has_bug(X86_BUG_FXSAVE_LEAK))) {
  394. asm volatile(
  395. "fnclex\n\t"
  396. "emms\n\t"
  397. "fildl %P[addr]" /* set F?P to defined value */
  398. : : [addr] "m" (fpstate));
  399. }
  400. __copy_kernel_to_fpregs(fpstate, -1);
  401. }
  402. extern int copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size);
  403. /*
  404. * FPU context switch related helper methods:
  405. */
  406. DECLARE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);
  407. /*
  408. * The in-register FPU state for an FPU context on a CPU is assumed to be
  409. * valid if the fpu->last_cpu matches the CPU, and the fpu_fpregs_owner_ctx
  410. * matches the FPU.
  411. *
  412. * If the FPU register state is valid, the kernel can skip restoring the
  413. * FPU state from memory.
  414. *
  415. * Any code that clobbers the FPU registers or updates the in-memory
  416. * FPU state for a task MUST let the rest of the kernel know that the
  417. * FPU registers are no longer valid for this task.
  418. *
  419. * Either one of these invalidation functions is enough. Invalidate
  420. * a resource you control: CPU if using the CPU for something else
  421. * (with preemption disabled), FPU for the current task, or a task that
  422. * is prevented from running by the current task.
  423. */
  424. static inline void __cpu_invalidate_fpregs_state(void)
  425. {
  426. __this_cpu_write(fpu_fpregs_owner_ctx, NULL);
  427. }
  428. static inline void __fpu_invalidate_fpregs_state(struct fpu *fpu)
  429. {
  430. fpu->last_cpu = -1;
  431. }
  432. static inline int fpregs_state_valid(struct fpu *fpu, unsigned int cpu)
  433. {
  434. return fpu == this_cpu_read_stable(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu;
  435. }
  436. /*
  437. * These generally need preemption protection to work,
  438. * do try to avoid using these on their own:
  439. */
  440. static inline void fpregs_deactivate(struct fpu *fpu)
  441. {
  442. this_cpu_write(fpu_fpregs_owner_ctx, NULL);
  443. trace_x86_fpu_regs_deactivated(fpu);
  444. }
  445. static inline void fpregs_activate(struct fpu *fpu)
  446. {
  447. this_cpu_write(fpu_fpregs_owner_ctx, fpu);
  448. trace_x86_fpu_regs_activated(fpu);
  449. }
  450. /*
  451. * FPU state switching for scheduling.
  452. *
  453. * This is a two-stage process:
  454. *
  455. * - switch_fpu_prepare() saves the old state.
  456. * This is done within the context of the old process.
  457. *
  458. * - switch_fpu_finish() restores the new state as
  459. * necessary.
  460. */
  461. static inline void
  462. switch_fpu_prepare(struct fpu *old_fpu, int cpu)
  463. {
  464. if (old_fpu->initialized) {
  465. if (!copy_fpregs_to_fpstate(old_fpu))
  466. old_fpu->last_cpu = -1;
  467. else
  468. old_fpu->last_cpu = cpu;
  469. /* But leave fpu_fpregs_owner_ctx! */
  470. trace_x86_fpu_regs_deactivated(old_fpu);
  471. } else
  472. old_fpu->last_cpu = -1;
  473. }
  474. /*
  475. * Misc helper functions:
  476. */
  477. /*
  478. * Set up the userspace FPU context for the new task, if the task
  479. * has used the FPU.
  480. */
  481. static inline void switch_fpu_finish(struct fpu *new_fpu, int cpu)
  482. {
  483. bool preload = static_cpu_has(X86_FEATURE_FPU) &&
  484. new_fpu->initialized;
  485. if (preload) {
  486. if (!fpregs_state_valid(new_fpu, cpu))
  487. copy_kernel_to_fpregs(&new_fpu->state);
  488. fpregs_activate(new_fpu);
  489. }
  490. }
  491. /*
  492. * Needs to be preemption-safe.
  493. *
  494. * NOTE! user_fpu_begin() must be used only immediately before restoring
  495. * the save state. It does not do any saving/restoring on its own. In
  496. * lazy FPU mode, it is just an optimization to avoid a #NM exception,
  497. * the task can lose the FPU right after preempt_enable().
  498. */
  499. static inline void user_fpu_begin(void)
  500. {
  501. struct fpu *fpu = &current->thread.fpu;
  502. preempt_disable();
  503. fpregs_activate(fpu);
  504. preempt_enable();
  505. }
  506. /*
  507. * MXCSR and XCR definitions:
  508. */
  509. extern unsigned int mxcsr_feature_mask;
  510. #define XCR_XFEATURE_ENABLED_MASK 0x00000000
  511. static inline u64 xgetbv(u32 index)
  512. {
  513. u32 eax, edx;
  514. asm volatile(".byte 0x0f,0x01,0xd0" /* xgetbv */
  515. : "=a" (eax), "=d" (edx)
  516. : "c" (index));
  517. return eax + ((u64)edx << 32);
  518. }
  519. static inline void xsetbv(u32 index, u64 value)
  520. {
  521. u32 eax = value;
  522. u32 edx = value >> 32;
  523. asm volatile(".byte 0x0f,0x01,0xd1" /* xsetbv */
  524. : : "a" (eax), "d" (edx), "c" (index));
  525. }
  526. #endif /* _ASM_X86_FPU_INTERNAL_H */