internal.h 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464
  1. /*
  2. * Copyright (C) 1994 Linus Torvalds
  3. *
  4. * Pentium III FXSR, SSE support
  5. * General FPU state handling cleanups
  6. * Gareth Hughes <gareth@valinux.com>, May 2000
  7. * x86-64 work by Andi Kleen 2002
  8. */
  9. #ifndef _ASM_X86_FPU_INTERNAL_H
  10. #define _ASM_X86_FPU_INTERNAL_H
  11. #include <linux/compat.h>
  12. #include <linux/sched.h>
  13. #include <linux/slab.h>
  14. #include <asm/user.h>
  15. #include <asm/fpu/api.h>
  16. #include <asm/fpu/xstate.h>
  17. #define MXCSR_DEFAULT 0x1f80
  18. extern unsigned int mxcsr_feature_mask;
  19. extern union thread_xstate init_fpstate;
  20. extern void fpu__init_cpu(void);
  21. extern void fpu__init_system_xstate(void);
  22. extern void fpu__init_cpu_xstate(void);
  23. extern void fpu__init_system(struct cpuinfo_x86 *c);
  24. extern void fpu__activate_curr(struct fpu *fpu);
  25. extern void fpstate_init(union thread_xstate *state);
  26. #ifdef CONFIG_MATH_EMULATION
  27. extern void fpstate_init_soft(struct i387_soft_struct *soft);
  28. #else
  29. static inline void fpstate_init_soft(struct i387_soft_struct *soft) {}
  30. #endif
  31. static inline void fpstate_init_fxstate(struct i387_fxsave_struct *fx)
  32. {
  33. fx->cwd = 0x37f;
  34. fx->mxcsr = MXCSR_DEFAULT;
  35. }
  36. extern int dump_fpu(struct pt_regs *, struct user_i387_struct *);
  37. extern int fpu__exception_code(struct fpu *fpu, int trap_nr);
  38. /*
  39. * High level FPU state handling functions:
  40. */
  41. extern void fpu__save(struct fpu *fpu);
  42. extern void fpu__restore(void);
  43. extern int fpu__restore_sig(void __user *buf, int ia32_frame);
  44. extern void fpu__drop(struct fpu *fpu);
  45. extern int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu);
  46. extern void fpu__clear(struct fpu *fpu);
  47. extern void fpu__init_check_bugs(void);
  48. extern void fpu__resume_cpu(void);
  49. DECLARE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);
  50. /*
  51. * Must be run with preemption disabled: this clears the fpu_fpregs_owner_ctx,
  52. * on this CPU.
  53. *
  54. * This will disable any lazy FPU state restore of the current FPU state,
  55. * but if the current thread owns the FPU, it will still be saved by.
  56. */
  57. static inline void __cpu_disable_lazy_restore(unsigned int cpu)
  58. {
  59. per_cpu(fpu_fpregs_owner_ctx, cpu) = NULL;
  60. }
  61. static inline int fpu_want_lazy_restore(struct fpu *fpu, unsigned int cpu)
  62. {
  63. return fpu == this_cpu_read_stable(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu;
  64. }
  65. #define X87_FSW_ES (1 << 7) /* Exception Summary */
  66. static __always_inline __pure bool use_eager_fpu(void)
  67. {
  68. return static_cpu_has_safe(X86_FEATURE_EAGER_FPU);
  69. }
  70. static __always_inline __pure bool use_xsaveopt(void)
  71. {
  72. return static_cpu_has_safe(X86_FEATURE_XSAVEOPT);
  73. }
  74. static __always_inline __pure bool use_xsave(void)
  75. {
  76. return static_cpu_has_safe(X86_FEATURE_XSAVE);
  77. }
  78. static __always_inline __pure bool use_fxsr(void)
  79. {
  80. return static_cpu_has_safe(X86_FEATURE_FXSR);
  81. }
  82. extern void fpstate_sanitize_xstate(struct fpu *fpu);
  83. #define user_insn(insn, output, input...) \
  84. ({ \
  85. int err; \
  86. asm volatile(ASM_STAC "\n" \
  87. "1:" #insn "\n\t" \
  88. "2: " ASM_CLAC "\n" \
  89. ".section .fixup,\"ax\"\n" \
  90. "3: movl $-1,%[err]\n" \
  91. " jmp 2b\n" \
  92. ".previous\n" \
  93. _ASM_EXTABLE(1b, 3b) \
  94. : [err] "=r" (err), output \
  95. : "0"(0), input); \
  96. err; \
  97. })
  98. #define check_insn(insn, output, input...) \
  99. ({ \
  100. int err; \
  101. asm volatile("1:" #insn "\n\t" \
  102. "2:\n" \
  103. ".section .fixup,\"ax\"\n" \
  104. "3: movl $-1,%[err]\n" \
  105. " jmp 2b\n" \
  106. ".previous\n" \
  107. _ASM_EXTABLE(1b, 3b) \
  108. : [err] "=r" (err), output \
  109. : "0"(0), input); \
  110. err; \
  111. })
  112. static inline int copy_fregs_to_user(struct i387_fsave_struct __user *fx)
  113. {
  114. return user_insn(fnsave %[fx]; fwait, [fx] "=m" (*fx), "m" (*fx));
  115. }
  116. static inline int copy_fxregs_to_user(struct i387_fxsave_struct __user *fx)
  117. {
  118. if (config_enabled(CONFIG_X86_32))
  119. return user_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx));
  120. else if (config_enabled(CONFIG_AS_FXSAVEQ))
  121. return user_insn(fxsaveq %[fx], [fx] "=m" (*fx), "m" (*fx));
  122. /* See comment in copy_fxregs_to_kernel() below. */
  123. return user_insn(rex64/fxsave (%[fx]), "=m" (*fx), [fx] "R" (fx));
  124. }
  125. static inline int copy_kernel_to_fxregs(struct i387_fxsave_struct *fx)
  126. {
  127. if (config_enabled(CONFIG_X86_32))
  128. return check_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
  129. else if (config_enabled(CONFIG_AS_FXSAVEQ))
  130. return check_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
  131. /* See comment in copy_fxregs_to_kernel() below. */
  132. return check_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx),
  133. "m" (*fx));
  134. }
  135. static inline int copy_user_to_fxregs(struct i387_fxsave_struct __user *fx)
  136. {
  137. if (config_enabled(CONFIG_X86_32))
  138. return user_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
  139. else if (config_enabled(CONFIG_AS_FXSAVEQ))
  140. return user_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
  141. /* See comment in copy_fxregs_to_kernel() below. */
  142. return user_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx),
  143. "m" (*fx));
  144. }
  145. static inline int copy_kernel_to_fregs(struct i387_fsave_struct *fx)
  146. {
  147. return check_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
  148. }
  149. static inline int copy_user_to_fregs(struct i387_fsave_struct __user *fx)
  150. {
  151. return user_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
  152. }
  153. static inline void copy_fxregs_to_kernel(struct fpu *fpu)
  154. {
  155. if (config_enabled(CONFIG_X86_32))
  156. asm volatile( "fxsave %[fx]" : [fx] "=m" (fpu->state.fxsave));
  157. else if (config_enabled(CONFIG_AS_FXSAVEQ))
  158. asm volatile("fxsaveq %[fx]" : [fx] "=m" (fpu->state.fxsave));
  159. else {
  160. /* Using "rex64; fxsave %0" is broken because, if the memory
  161. * operand uses any extended registers for addressing, a second
  162. * REX prefix will be generated (to the assembler, rex64
  163. * followed by semicolon is a separate instruction), and hence
  164. * the 64-bitness is lost.
  165. *
  166. * Using "fxsaveq %0" would be the ideal choice, but is only
  167. * supported starting with gas 2.16.
  168. *
  169. * Using, as a workaround, the properly prefixed form below
  170. * isn't accepted by any binutils version so far released,
  171. * complaining that the same type of prefix is used twice if
  172. * an extended register is needed for addressing (fix submitted
  173. * to mainline 2005-11-21).
  174. *
  175. * asm volatile("rex64/fxsave %0" : "=m" (fpu->state.fxsave));
  176. *
  177. * This, however, we can work around by forcing the compiler to
  178. * select an addressing mode that doesn't require extended
  179. * registers.
  180. */
  181. asm volatile( "rex64/fxsave (%[fx])"
  182. : "=m" (fpu->state.fxsave)
  183. : [fx] "R" (&fpu->state.fxsave));
  184. }
  185. }
  186. /*
  187. * These must be called with preempt disabled. Returns
  188. * 'true' if the FPU state is still intact and we can
  189. * keep registers active.
  190. *
  191. * The legacy FNSAVE instruction cleared all FPU state
  192. * unconditionally, so registers are essentially destroyed.
  193. * Modern FPU state can be kept in registers, if there are
  194. * no pending FP exceptions.
  195. */
  196. static inline int copy_fpregs_to_fpstate(struct fpu *fpu)
  197. {
  198. if (likely(use_xsave())) {
  199. copy_xregs_to_kernel(&fpu->state.xsave);
  200. return 1;
  201. }
  202. if (likely(use_fxsr())) {
  203. copy_fxregs_to_kernel(fpu);
  204. return 1;
  205. }
  206. /*
  207. * Legacy FPU register saving, FNSAVE always clears FPU registers,
  208. * so we have to mark them inactive:
  209. */
  210. asm volatile("fnsave %[fx]; fwait" : [fx] "=m" (fpu->state.fsave));
  211. return 0;
  212. }
  213. static inline int __copy_fpstate_to_fpregs(struct fpu *fpu)
  214. {
  215. if (use_xsave())
  216. return copy_kernel_to_xregs(&fpu->state.xsave, -1);
  217. else if (use_fxsr())
  218. return copy_kernel_to_fxregs(&fpu->state.fxsave);
  219. else
  220. return copy_kernel_to_fregs(&fpu->state.fsave);
  221. }
  222. static inline int copy_fpstate_to_fpregs(struct fpu *fpu)
  223. {
  224. /*
  225. * AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception is
  226. * pending. Clear the x87 state here by setting it to fixed values.
  227. * "m" is a random variable that should be in L1.
  228. */
  229. if (unlikely(static_cpu_has_bug_safe(X86_BUG_FXSAVE_LEAK))) {
  230. asm volatile(
  231. "fnclex\n\t"
  232. "emms\n\t"
  233. "fildl %P[addr]" /* set F?P to defined value */
  234. : : [addr] "m" (fpu->fpregs_active));
  235. }
  236. return __copy_fpstate_to_fpregs(fpu);
  237. }
  238. /*
  239. * Wrap lazy FPU TS handling in a 'hw fpregs activation/deactivation'
  240. * idiom, which is then paired with the sw-flag (fpregs_active) later on:
  241. */
  242. static inline void __fpregs_activate_hw(void)
  243. {
  244. if (!use_eager_fpu())
  245. clts();
  246. }
  247. static inline void __fpregs_deactivate_hw(void)
  248. {
  249. if (!use_eager_fpu())
  250. stts();
  251. }
  252. /* Must be paired with an 'stts' (fpregs_deactivate_hw()) after! */
  253. static inline void __fpregs_deactivate(struct fpu *fpu)
  254. {
  255. fpu->fpregs_active = 0;
  256. this_cpu_write(fpu_fpregs_owner_ctx, NULL);
  257. }
  258. /* Must be paired with a 'clts' (fpregs_activate_hw()) before! */
  259. static inline void __fpregs_activate(struct fpu *fpu)
  260. {
  261. fpu->fpregs_active = 1;
  262. this_cpu_write(fpu_fpregs_owner_ctx, fpu);
  263. }
  264. /*
  265. * The question "does this thread have fpu access?"
  266. * is slightly racy, since preemption could come in
  267. * and revoke it immediately after the test.
  268. *
  269. * However, even in that very unlikely scenario,
  270. * we can just assume we have FPU access - typically
  271. * to save the FP state - we'll just take a #NM
  272. * fault and get the FPU access back.
  273. */
  274. static inline int fpregs_active(void)
  275. {
  276. return current->thread.fpu.fpregs_active;
  277. }
  278. /*
  279. * Encapsulate the CR0.TS handling together with the
  280. * software flag.
  281. *
  282. * These generally need preemption protection to work,
  283. * do try to avoid using these on their own.
  284. */
  285. static inline void fpregs_activate(struct fpu *fpu)
  286. {
  287. __fpregs_activate_hw();
  288. __fpregs_activate(fpu);
  289. }
  290. static inline void fpregs_deactivate(struct fpu *fpu)
  291. {
  292. __fpregs_deactivate(fpu);
  293. __fpregs_deactivate_hw();
  294. }
  295. /*
  296. * Definitions for the eXtended Control Register instructions
  297. */
  298. #define XCR_XFEATURE_ENABLED_MASK 0x00000000
  299. static inline u64 xgetbv(u32 index)
  300. {
  301. u32 eax, edx;
  302. asm volatile(".byte 0x0f,0x01,0xd0" /* xgetbv */
  303. : "=a" (eax), "=d" (edx)
  304. : "c" (index));
  305. return eax + ((u64)edx << 32);
  306. }
  307. static inline void xsetbv(u32 index, u64 value)
  308. {
  309. u32 eax = value;
  310. u32 edx = value >> 32;
  311. asm volatile(".byte 0x0f,0x01,0xd1" /* xsetbv */
  312. : : "a" (eax), "d" (edx), "c" (index));
  313. }
  314. /*
  315. * FPU state switching for scheduling.
  316. *
  317. * This is a two-stage process:
  318. *
  319. * - switch_fpu_prepare() saves the old state and
  320. * sets the new state of the CR0.TS bit. This is
  321. * done within the context of the old process.
  322. *
  323. * - switch_fpu_finish() restores the new state as
  324. * necessary.
  325. */
  326. typedef struct { int preload; } fpu_switch_t;
  327. static inline fpu_switch_t
  328. switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu)
  329. {
  330. fpu_switch_t fpu;
  331. /*
  332. * If the task has used the math, pre-load the FPU on xsave processors
  333. * or if the past 5 consecutive context-switches used math.
  334. */
  335. fpu.preload = new_fpu->fpstate_active &&
  336. (use_eager_fpu() || new_fpu->counter > 5);
  337. if (old_fpu->fpregs_active) {
  338. if (!copy_fpregs_to_fpstate(old_fpu))
  339. old_fpu->last_cpu = -1;
  340. else
  341. old_fpu->last_cpu = cpu;
  342. /* But leave fpu_fpregs_owner_ctx! */
  343. old_fpu->fpregs_active = 0;
  344. /* Don't change CR0.TS if we just switch! */
  345. if (fpu.preload) {
  346. new_fpu->counter++;
  347. __fpregs_activate(new_fpu);
  348. prefetch(&new_fpu->state);
  349. } else {
  350. __fpregs_deactivate_hw();
  351. }
  352. } else {
  353. old_fpu->counter = 0;
  354. old_fpu->last_cpu = -1;
  355. if (fpu.preload) {
  356. new_fpu->counter++;
  357. if (fpu_want_lazy_restore(new_fpu, cpu))
  358. fpu.preload = 0;
  359. else
  360. prefetch(&new_fpu->state);
  361. fpregs_activate(new_fpu);
  362. }
  363. }
  364. return fpu;
  365. }
  366. /*
  367. * By the time this gets called, we've already cleared CR0.TS and
  368. * given the process the FPU if we are going to preload the FPU
  369. * state - all we need to do is to conditionally restore the register
  370. * state itself.
  371. */
  372. static inline void switch_fpu_finish(struct fpu *new_fpu, fpu_switch_t fpu_switch)
  373. {
  374. if (fpu_switch.preload) {
  375. if (unlikely(copy_fpstate_to_fpregs(new_fpu)))
  376. fpu__clear(new_fpu);
  377. }
  378. }
  379. /*
  380. * Signal frame handlers...
  381. */
  382. extern int copy_fpstate_to_sigframe(void __user *buf, void __user *fx, int size);
  383. /*
  384. * Needs to be preemption-safe.
  385. *
  386. * NOTE! user_fpu_begin() must be used only immediately before restoring
  387. * the save state. It does not do any saving/restoring on its own. In
  388. * lazy FPU mode, it is just an optimization to avoid a #NM exception,
  389. * the task can lose the FPU right after preempt_enable().
  390. */
  391. static inline void user_fpu_begin(void)
  392. {
  393. struct fpu *fpu = &current->thread.fpu;
  394. preempt_disable();
  395. if (!fpregs_active())
  396. fpregs_activate(fpu);
  397. preempt_enable();
  398. }
  399. #endif /* _ASM_X86_FPU_INTERNAL_H */