|
@@ -159,8 +159,44 @@ union fpregs_state {
|
|
|
|
|
|
struct fpu {
|
|
|
/*
|
|
|
+ * @state:
|
|
|
+ *
|
|
|
+ * In-memory copy of all FPU registers that we save/restore
|
|
|
+ * over context switches. If the task is using the FPU then
|
|
|
+ * the registers in the FPU are more recent than this state
|
|
|
+ * copy. If the task context-switches away then they get
|
|
|
+ * saved here and represent the FPU state.
|
|
|
+ *
|
|
|
+ * After context switches there may be a (short) time period
|
|
|
+ * during which the in-FPU hardware registers are unchanged
|
|
|
+ * and still perfectly match this state, if the tasks
|
|
|
+ * scheduled afterwards are not using the FPU.
|
|
|
+ *
|
|
|
+ * This is the 'lazy restore' window of optimization, which
|
|
|
+ * we track though 'fpu_fpregs_owner_ctx' and 'fpu->last_cpu'.
|
|
|
+ *
|
|
|
+ * We detect whether a subsequent task uses the FPU via setting
|
|
|
+ * CR0::TS to 1, which causes any FPU use to raise a #NM fault.
|
|
|
+ *
|
|
|
+ * During this window, if the task gets scheduled again, we
|
|
|
+ * might be able to skip having to do a restore from this
|
|
|
+ * memory buffer to the hardware registers - at the cost of
|
|
|
+ * incurring the overhead of #NM fault traps.
|
|
|
+ *
|
|
|
+ * Note that on modern CPUs that support the XSAVEOPT (or other
|
|
|
+ * optimized XSAVE instructions), we don't use #NM traps anymore,
|
|
|
+ * as the hardware can track whether FPU registers need saving
|
|
|
+ * or not. On such CPUs we activate the non-lazy ('eagerfpu')
|
|
|
+ * logic, which unconditionally saves/restores all FPU state
|
|
|
+ * across context switches. (if FPU state exists.)
|
|
|
+ */
|
|
|
+ union fpregs_state state;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * @last_cpu:
|
|
|
+ *
|
|
|
* Records the last CPU on which this context was loaded into
|
|
|
- * FPU registers. (In the lazy-switching case we might be
|
|
|
+ * FPU registers. (In the lazy-restore case we might be
|
|
|
* able to reuse FPU registers across multiple context switches
|
|
|
* this way, if no intermediate task used the FPU.)
|
|
|
*
|
|
@@ -170,23 +206,49 @@ struct fpu {
|
|
|
*/
|
|
|
unsigned int last_cpu;
|
|
|
|
|
|
- unsigned int fpregs_active;
|
|
|
- union fpregs_state state;
|
|
|
/*
|
|
|
+ * @fpstate_active:
|
|
|
+ *
|
|
|
+ * This flag indicates whether this context is active: if the task
|
|
|
+ * is not running then we can restore from this context, if the task
|
|
|
+ * is running then we should save into this context.
|
|
|
+ */
|
|
|
+ unsigned char fpstate_active;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * @fpregs_active:
|
|
|
+ *
|
|
|
+ * This flag determines whether a given context is actively
|
|
|
+ * loaded into the FPU's registers and that those registers
|
|
|
+ * represent the task's current FPU state.
|
|
|
+ *
|
|
|
+ * Note the interaction with fpstate_active:
|
|
|
+ *
|
|
|
+ * # task does not use the FPU:
|
|
|
+ * fpstate_active == 0
|
|
|
+ *
|
|
|
+ * # task uses the FPU and regs are active:
|
|
|
+ * fpstate_active == 1 && fpregs_active == 1
|
|
|
+ *
|
|
|
+ * # the regs are inactive but still match fpstate:
|
|
|
+ * fpstate_active == 1 && fpregs_active == 0 && fpregs_owner == fpu
|
|
|
+ *
|
|
|
+ * The third state is what we use for the lazy restore optimization
|
|
|
+ * on lazy-switching CPUs.
|
|
|
+ */
|
|
|
+ unsigned char fpregs_active;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * @counter:
|
|
|
+ *
|
|
|
* This counter contains the number of consecutive context switches
|
|
|
* during which the FPU stays used. If this is over a threshold, the
|
|
|
- * lazy fpu saving logic becomes unlazy, to save the trap overhead.
|
|
|
+ * lazy FPU restore logic becomes eager, to save the trap overhead.
|
|
|
* This is an unsigned char so that after 256 iterations the counter
|
|
|
* wraps and the context switch behavior turns lazy again; this is to
|
|
|
* deal with bursty apps that only use the FPU for a short time:
|
|
|
*/
|
|
|
unsigned char counter;
|
|
|
- /*
|
|
|
- * This flag indicates whether this context is fpstate_active: if the task is
|
|
|
- * not running then we can restore from this context, if the task
|
|
|
- * is running then we should save into this context.
|
|
|
- */
|
|
|
- unsigned char fpstate_active;
|
|
|
};
|
|
|
|
|
|
#endif /* _ASM_X86_FPU_H */
|