9 年之前 · 9af6528ee9
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -259,17 +259,14 @@ static inline void might_fault(void) { }
 
				 extern struct atomic_notifier_head panic_notifier_list;
			
 
				 extern long (*panic_blink)(int state);
			
 
				 __printf(1, 2)
			
 
				-void panic(const char *fmt, ...)
			
 
				-	__noreturn __cold;
			
 
				+void panic(const char *fmt, ...) __noreturn __cold;
			
 
				 void nmi_panic(struct pt_regs *regs, const char *msg);
			
 
				 extern void oops_enter(void);
			
 
				 extern void oops_exit(void);
			
 
				 void print_oops_end_marker(void);
			
 
				 extern int oops_may_print(void);
			
 
				-void do_exit(long error_code)
			
 
				-	__noreturn;
			
 
				-void complete_and_exit(struct completion *, long)
			
 
				-	__noreturn;
			
 
				+void do_exit(long error_code) __noreturn;
			
 
				+void complete_and_exit(struct completion *, long) __noreturn;
			
 
				 
			
 
				 /* Internal, do not use. */
			
 
				 int __must_check _kstrtoul(const char *s, unsigned int base, unsigned long *res);
			
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -448,6 +448,8 @@ static inline void io_schedule(void)
 
				 	io_schedule_timeout(MAX_SCHEDULE_TIMEOUT);
			
 
				 }
			
 
				 
			
 
				+void __noreturn do_task_dead(void);
			
 
				+
			
 
				 struct nsproxy;
			
 
				 struct user_namespace;
			
 
				 
			
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -725,7 +725,7 @@ static void check_stack_usage(void)
 
				 static inline void check_stack_usage(void) {}
			
 
				 #endif
			
 
				 
			
 
				-void do_exit(long code)
			
 
				+void __noreturn do_exit(long code)
			
 
				 {
			
 
				 	struct task_struct *tsk = current;
			
 
				 	int group_dead;
			
@@ -882,29 +882,7 @@ void do_exit(long code)
 
				 	exit_rcu();
			
 
				 	TASKS_RCU(__srcu_read_unlock(&tasks_rcu_exit_srcu, tasks_rcu_i));
			
 
				 
			
 
				-	/*
			
 
				-	 * The setting of TASK_RUNNING by try_to_wake_up() may be delayed
			
 
				-	 * when the following two conditions become true.
			
 
				-	 *   - There is race condition of mmap_sem (It is acquired by
			
 
				-	 *     exit_mm()), and
			
 
				-	 *   - SMI occurs before setting TASK_RUNINNG.
			
 
				-	 *     (or hypervisor of virtual machine switches to other guest)
			
 
				-	 *  As a result, we may become TASK_RUNNING after becoming TASK_DEAD
			
 
				-	 *
			
 
				-	 * To avoid it, we have to wait for releasing tsk->pi_lock which
			
 
				-	 * is held by try_to_wake_up()
			
 
				-	 */
			
 
				-	smp_mb();
			
 
				-	raw_spin_unlock_wait(&tsk->pi_lock);
			
 
				-
			
 
				-	/* causes final put_task_struct in finish_task_switch(). */
			
 
				-	tsk->state = TASK_DEAD;
			
 
				-	tsk->flags |= PF_NOFREEZE;	/* tell freezer to ignore us */
			
 
				-	schedule();
			
 
				-	BUG();
			
 
				-	/* Avoid "noreturn function does return".  */
			
 
				-	for (;;)
			
 
				-		cpu_relax();	/* For when BUG is null */
			
 
				+	do_task_dead();
			
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(do_exit);
			
 
				 
			
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3331,17 +3331,6 @@ static void __sched notrace __schedule(bool preempt)
 
				 	rq = cpu_rq(cpu);
			
 
				 	prev = rq->curr;
			
 
				 
			
 
				-	/*
			
 
				-	 * do_exit() calls schedule() with preemption disabled as an exception;
			
 
				-	 * however we must fix that up, otherwise the next task will see an
			
 
				-	 * inconsistent (higher) preempt count.
			
 
				-	 *
			
 
				-	 * It also avoids the below schedule_debug() test from complaining
			
 
				-	 * about this.
			
 
				-	 */
			
 
				-	if (unlikely(prev->state == TASK_DEAD))
			
 
				-		preempt_enable_no_resched_notrace();
			
 
				-
			
 
				 	schedule_debug(prev);
			
 
				 
			
 
				 	if (sched_feat(HRTICK))
			
@@ -3409,6 +3398,33 @@ static void __sched notrace __schedule(bool preempt)
 
				 }
			
 
				 STACK_FRAME_NON_STANDARD(__schedule); /* switch_to() */
			
 
				 
			
 
				+void __noreturn do_task_dead(void)
			
 
				+{
			
 
				+	/*
			
 
				+	 * The setting of TASK_RUNNING by try_to_wake_up() may be delayed
			
 
				+	 * when the following two conditions become true.
			
 
				+	 *   - There is race condition of mmap_sem (It is acquired by
			
 
				+	 *     exit_mm()), and
			
 
				+	 *   - SMI occurs before setting TASK_RUNINNG.
			
 
				+	 *     (or hypervisor of virtual machine switches to other guest)
			
 
				+	 *  As a result, we may become TASK_RUNNING after becoming TASK_DEAD
			
 
				+	 *
			
 
				+	 * To avoid it, we have to wait for releasing tsk->pi_lock which
			
 
				+	 * is held by try_to_wake_up()
			
 
				+	 */
			
 
				+	smp_mb();
			
 
				+	raw_spin_unlock_wait(&current->pi_lock);
			
 
				+
			
 
				+	/* causes final put_task_struct in finish_task_switch(). */
			
 
				+	__set_current_state(TASK_DEAD);
			
 
				+	current->flags |= PF_NOFREEZE;	/* tell freezer to ignore us */
			
 
				+	__schedule(false);
			
 
				+	BUG();
			
 
				+	/* Avoid "noreturn function does return".  */
			
 
				+	for (;;)
			
 
				+		cpu_relax();	/* For when BUG is null */
			
 
				+}
			
 
				+
			
 
				 static inline void sched_submit_work(struct task_struct *tsk)
			
 
				 {
			
 
				 	if (!tsk->state || tsk_is_pi_blocked(tsk))