10 years ago · 0c8c0f03e3
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -189,6 +189,7 @@ union fpregs_state {
 
				 	struct fxregs_state		fxsave;
			
 
				 	struct swregs_state		soft;
			
 
				 	struct xregs_state		xsave;
			
 
				+	u8 __padding[PAGE_SIZE];
			
 
				 };
			
 
				 
			
 
				 /*
			
@@ -197,40 +198,6 @@ union fpregs_state {
 
				  * state fields:
			
 
				  */
			
 
				 struct fpu {
			
 
				-	/*
			
 
				-	 * @state:
			
 
				-	 *
			
 
				-	 * In-memory copy of all FPU registers that we save/restore
			
 
				-	 * over context switches. If the task is using the FPU then
			
 
				-	 * the registers in the FPU are more recent than this state
			
 
				-	 * copy. If the task context-switches away then they get
			
 
				-	 * saved here and represent the FPU state.
			
 
				-	 *
			
 
				-	 * After context switches there may be a (short) time period
			
 
				-	 * during which the in-FPU hardware registers are unchanged
			
 
				-	 * and still perfectly match this state, if the tasks
			
 
				-	 * scheduled afterwards are not using the FPU.
			
 
				-	 *
			
 
				-	 * This is the 'lazy restore' window of optimization, which
			
 
				-	 * we track though 'fpu_fpregs_owner_ctx' and 'fpu->last_cpu'.
			
 
				-	 *
			
 
				-	 * We detect whether a subsequent task uses the FPU via setting
			
 
				-	 * CR0::TS to 1, which causes any FPU use to raise a #NM fault.
			
 
				-	 *
			
 
				-	 * During this window, if the task gets scheduled again, we
			
 
				-	 * might be able to skip having to do a restore from this
			
 
				-	 * memory buffer to the hardware registers - at the cost of
			
 
				-	 * incurring the overhead of #NM fault traps.
			
 
				-	 *
			
 
				-	 * Note that on modern CPUs that support the XSAVEOPT (or other
			
 
				-	 * optimized XSAVE instructions), we don't use #NM traps anymore,
			
 
				-	 * as the hardware can track whether FPU registers need saving
			
 
				-	 * or not. On such CPUs we activate the non-lazy ('eagerfpu')
			
 
				-	 * logic, which unconditionally saves/restores all FPU state
			
 
				-	 * across context switches. (if FPU state exists.)
			
 
				-	 */
			
 
				-	union fpregs_state		state;
			
 
				-
			
 
				 	/*
			
 
				 	 * @last_cpu:
			
 
				 	 *
			
@@ -288,6 +255,43 @@ struct fpu {
 
				 	 * deal with bursty apps that only use the FPU for a short time:
			
 
				 	 */
			
 
				 	unsigned char			counter;
			
 
				+	/*
			
 
				+	 * @state:
			
 
				+	 *
			
 
				+	 * In-memory copy of all FPU registers that we save/restore
			
 
				+	 * over context switches. If the task is using the FPU then
			
 
				+	 * the registers in the FPU are more recent than this state
			
 
				+	 * copy. If the task context-switches away then they get
			
 
				+	 * saved here and represent the FPU state.
			
 
				+	 *
			
 
				+	 * After context switches there may be a (short) time period
			
 
				+	 * during which the in-FPU hardware registers are unchanged
			
 
				+	 * and still perfectly match this state, if the tasks
			
 
				+	 * scheduled afterwards are not using the FPU.
			
 
				+	 *
			
 
				+	 * This is the 'lazy restore' window of optimization, which
			
 
				+	 * we track though 'fpu_fpregs_owner_ctx' and 'fpu->last_cpu'.
			
 
				+	 *
			
 
				+	 * We detect whether a subsequent task uses the FPU via setting
			
 
				+	 * CR0::TS to 1, which causes any FPU use to raise a #NM fault.
			
 
				+	 *
			
 
				+	 * During this window, if the task gets scheduled again, we
			
 
				+	 * might be able to skip having to do a restore from this
			
 
				+	 * memory buffer to the hardware registers - at the cost of
			
 
				+	 * incurring the overhead of #NM fault traps.
			
 
				+	 *
			
 
				+	 * Note that on modern CPUs that support the XSAVEOPT (or other
			
 
				+	 * optimized XSAVE instructions), we don't use #NM traps anymore,
			
 
				+	 * as the hardware can track whether FPU registers need saving
			
 
				+	 * or not. On such CPUs we activate the non-lazy ('eagerfpu')
			
 
				+	 * logic, which unconditionally saves/restores all FPU state
			
 
				+	 * across context switches. (if FPU state exists.)
			
 
				+	 */
			
 
				+	union fpregs_state		state;
			
 
				+	/*
			
 
				+	 * WARNING: 'state' is dynamically-sized.  Do not put
			
 
				+	 * anything after it here.
			
 
				+	 */
			
 
				 };
			
 
				 
			
 
				 #endif /* _ASM_X86_FPU_H */
			
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -390,9 +390,6 @@ struct thread_struct {
 
				 #endif
			
 
				 	unsigned long		gs;
			
 
				 
			
 
				-	/* Floating point and extended processor state */
			
 
				-	struct fpu		fpu;
			
 
				-
			
 
				 	/* Save middle states of ptrace breakpoints */
			
 
				 	struct perf_event	*ptrace_bps[HBP_NUM];
			
 
				 	/* Debug status used for traps, single steps, etc... */
			
@@ -418,6 +415,13 @@ struct thread_struct {
 
				 	unsigned long		iopl;
			
 
				 	/* Max allowed port in the bitmap, in bytes: */
			
 
				 	unsigned		io_bitmap_max;
			
 
				+
			
 
				+	/* Floating point and extended processor state */
			
 
				+	struct fpu		fpu;
			
 
				+	/*
			
 
				+	 * WARNING: 'fpu' is dynamically-sized.  It *MUST* be at
			
 
				+	 * the end.
			
 
				+	 */
			
 
				 };
			
 
				 
			
 
				 /*
			
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -136,6 +136,45 @@ static void __init fpu__init_system_generic(void)
 
				 unsigned int xstate_size;
			
 
				 EXPORT_SYMBOL_GPL(xstate_size);
			
 
				 
			
 
				+#define CHECK_MEMBER_AT_END_OF(TYPE, MEMBER)	\
			
 
				+	BUILD_BUG_ON((sizeof(TYPE) -			\
			
 
				+			offsetof(TYPE, MEMBER) -	\
			
 
				+			sizeof(((TYPE *)0)->MEMBER)) > 	\
			
 
				+			0)				\
			
 
				+
			
 
				+/*
			
 
				+ * We append the 'struct fpu' to the task_struct.
			
 
				+ */
			
 
				+int __weak arch_task_struct_size(void)
			
 
				+{
			
 
				+	int task_size = sizeof(struct task_struct);
			
 
				+
			
 
				+	/*
			
 
				+	 * Subtract off the static size of the register state.
			
 
				+	 * It potentially has a bunch of padding.
			
 
				+	 */
			
 
				+	task_size -= sizeof(((struct task_struct *)0)->thread.fpu.state);
			
 
				+
			
 
				+	/*
			
 
				+	 * Add back the dynamically-calculated register state
			
 
				+	 * size.
			
 
				+	 */
			
 
				+	task_size += xstate_size;
			
 
				+
			
 
				+	/*
			
 
				+	 * We dynamically size 'struct fpu', so we require that
			
 
				+	 * it be at the end of 'thread_struct' and that
			
 
				+	 * 'thread_struct' be at the end of 'task_struct'.  If
			
 
				+	 * you hit a compile error here, check the structure to
			
 
				+	 * see if something got added to the end.
			
 
				+	 */
			
 
				+	CHECK_MEMBER_AT_END_OF(struct fpu, state);
			
 
				+	CHECK_MEMBER_AT_END_OF(struct thread_struct, fpu);
			
 
				+	CHECK_MEMBER_AT_END_OF(struct task_struct, thread);
			
 
				+
			
 
				+	return task_size;
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * Set up the xstate_size based on the legacy FPU context size.
			
 
				  *
			
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -81,7 +81,7 @@ EXPORT_SYMBOL_GPL(idle_notifier_unregister);
 
				  */
			
 
				 int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
			
 
				 {
			
 
				-	*dst = *src;
			
 
				+	memcpy(dst, src, arch_task_struct_size());
			
 
				 
			
 
				 	return fpu__copy(&dst->thread.fpu, &src->thread.fpu);
			
 
				 }
			
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -92,7 +92,7 @@ static size_t get_kcore_size(int *nphdr, size_t *elf_buflen)
 
				 			     roundup(sizeof(CORE_STR), 4)) +
			
 
				 			roundup(sizeof(struct elf_prstatus), 4) +
			
 
				 			roundup(sizeof(struct elf_prpsinfo), 4) +
			
 
				-			roundup(sizeof(struct task_struct), 4);
			
 
				+			roundup(arch_task_struct_size(), 4);
			
 
				 	*elf_buflen = PAGE_ALIGN(*elf_buflen);
			
 
				 	return size + *elf_buflen;
			
 
				 }
			
@@ -415,7 +415,7 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff)
 
				 	/* set up the task structure */
			
 
				 	notes[2].name	= CORE_STR;
			
 
				 	notes[2].type	= NT_TASKSTRUCT;
			
 
				-	notes[2].datasz	= sizeof(struct task_struct);
			
 
				+	notes[2].datasz	= arch_task_struct_size();
			
 
				 	notes[2].data	= current;
			
 
				 
			
 
				 	nhdr->p_filesz	+= notesize(&notes[2]);
			
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1522,8 +1522,6 @@ struct task_struct {
 
				 /* hung task detection */
			
 
				 	unsigned long last_switch_count;
			
 
				 #endif
			
 
				-/* CPU-specific state of this task */
			
 
				-	struct thread_struct thread;
			
 
				 /* filesystem information */
			
 
				 	struct fs_struct *fs;
			
 
				 /* open file information */
			
@@ -1778,8 +1776,18 @@ struct task_struct {
 
				 	unsigned long	task_state_change;
			
 
				 #endif
			
 
				 	int pagefault_disabled;
			
 
				+/* CPU-specific state of this task */
			
 
				+	struct thread_struct thread;
			
 
				+/*
			
 
				+ * WARNING: on x86, 'thread_struct' contains a variable-sized
			
 
				+ * structure.  It *MUST* be at the end of 'task_struct'.
			
 
				+ *
			
 
				+ * Do not put anything below here!
			
 
				+ */
			
 
				 };
			
 
				 
			
 
				+extern int arch_task_struct_size(void);
			
 
				+
			
 
				 /* Future-safe accessor for struct task_struct's cpus_allowed. */
			
 
				 #define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
			
 
				 
			
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -287,15 +287,21 @@ static void set_max_threads(unsigned int max_threads_suggested)
 
				 	max_threads = clamp_t(u64, threads, MIN_THREADS, MAX_THREADS);
			
 
				 }
			
 
				 
			
 
				+int __weak arch_task_struct_size(void)
			
 
				+{
			
 
				+	return sizeof(struct task_struct);
			
 
				+}
			
 
				+
			
 
				 void __init fork_init(void)
			
 
				 {
			
 
				+	int task_struct_size = arch_task_struct_size();
			
 
				 #ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR
			
 
				 #ifndef ARCH_MIN_TASKALIGN
			
 
				 #define ARCH_MIN_TASKALIGN	L1_CACHE_BYTES
			
 
				 #endif
			
 
				 	/* create a slab on which task_structs can be allocated */
			
 
				 	task_struct_cachep =
			
 
				-		kmem_cache_create("task_struct", sizeof(struct task_struct),
			
 
				+		kmem_cache_create("task_struct", task_struct_size,
			
 
				 			ARCH_MIN_TASKALIGN, SLAB_PANIC | SLAB_NOTRACK, NULL);
			
 
				 #endif