9 年之前 · 4eb8676517
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -565,7 +565,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
 
				 		smp_ops->give_timebase();
			
 
				 
			
 
				 	/* Wait until cpu puts itself in the online & active maps */
			
 
				-	while (!cpu_online(cpu) || !cpu_active(cpu))
			
 
				+	while (!cpu_online(cpu))
			
 
				 		cpu_relax();
			
 
				 
			
 
				 	return 0;
			
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -832,7 +832,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
 
				 	pcpu_attach_task(pcpu, tidle);
			
 
				 	pcpu_start_fn(pcpu, smp_start_secondary, NULL);
			
 
				 	/* Wait until cpu puts itself in the online & active maps */
			
 
				-	while (!cpu_online(cpu) || !cpu_active(cpu))
			
 
				+	while (!cpu_online(cpu))
			
 
				 		cpu_relax();
			
 
				 	return 0;
			
 
				 }
			
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -59,25 +59,7 @@ struct notifier_block;
 
				  * CPU notifier priorities.
			
 
				  */
			
 
				 enum {
			
 
				-	/*
			
 
				-	 * SCHED_ACTIVE marks a cpu which is coming up active during
			
 
				-	 * CPU_ONLINE and CPU_DOWN_FAILED and must be the first
			
 
				-	 * notifier.  CPUSET_ACTIVE adjusts cpuset according to
			
 
				-	 * cpu_active mask right after SCHED_ACTIVE.  During
			
 
				-	 * CPU_DOWN_PREPARE, SCHED_INACTIVE and CPUSET_INACTIVE are
			
 
				-	 * ordered in the similar way.
			
 
				-	 *
			
 
				-	 * This ordering guarantees consistent cpu_active mask and
			
 
				-	 * migration behavior to all cpu notifiers.
			
 
				-	 */
			
 
				-	CPU_PRI_SCHED_ACTIVE	= INT_MAX,
			
 
				-	CPU_PRI_CPUSET_ACTIVE	= INT_MAX - 1,
			
 
				-	CPU_PRI_SCHED_INACTIVE	= INT_MIN + 1,
			
 
				-	CPU_PRI_CPUSET_INACTIVE	= INT_MIN,
			
 
				-
			
 
				-	/* migration should happen before other stuff but after perf */
			
 
				 	CPU_PRI_PERF		= 20,
			
 
				-	CPU_PRI_MIGRATION	= 10,
			
 
				 
			
 
				 	/* bring up workqueues before normal notifiers and down after */
			
 
				 	CPU_PRI_WORKQUEUE_UP	= 5,
			
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -8,6 +8,7 @@ enum cpuhp_state {
 
				 	CPUHP_BRINGUP_CPU,
			
 
				 	CPUHP_AP_IDLE_DEAD,
			
 
				 	CPUHP_AP_OFFLINE,
			
 
				+	CPUHP_AP_SCHED_STARTING,
			
 
				 	CPUHP_AP_NOTIFY_STARTING,
			
 
				 	CPUHP_AP_ONLINE,
			
 
				 	CPUHP_TEARDOWN_CPU,
			
@@ -16,6 +17,7 @@ enum cpuhp_state {
 
				 	CPUHP_AP_NOTIFY_ONLINE,
			
 
				 	CPUHP_AP_ONLINE_DYN,
			
 
				 	CPUHP_AP_ONLINE_DYN_END		= CPUHP_AP_ONLINE_DYN + 30,
			
 
				+	CPUHP_AP_ACTIVE,
			
 
				 	CPUHP_ONLINE,
			
 
				 };
			
 
				 
			
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -743,12 +743,10 @@ set_cpu_present(unsigned int cpu, bool present)
 
				 static inline void
			
 
				 set_cpu_online(unsigned int cpu, bool online)
			
 
				 {
			
 
				-	if (online) {
			
 
				+	if (online)
			
 
				 		cpumask_set_cpu(cpu, &__cpu_online_mask);
			
 
				-		cpumask_set_cpu(cpu, &__cpu_active_mask);
			
 
				-	} else {
			
 
				+	else
			
 
				 		cpumask_clear_cpu(cpu, &__cpu_online_mask);
			
 
				-	}
			
 
				 }
			
 
				 
			
 
				 static inline void
			
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -374,6 +374,15 @@ extern void cpu_init (void);
 
				 extern void trap_init(void);
			
 
				 extern void update_process_times(int user);
			
 
				 extern void scheduler_tick(void);
			
 
				+extern int sched_cpu_starting(unsigned int cpu);
			
 
				+extern int sched_cpu_activate(unsigned int cpu);
			
 
				+extern int sched_cpu_deactivate(unsigned int cpu);
			
 
				+
			
 
				+#ifdef CONFIG_HOTPLUG_CPU
			
 
				+extern int sched_cpu_dying(unsigned int cpu);
			
 
				+#else
			
 
				+# define sched_cpu_dying	NULL
			
 
				+#endif
			
 
				 
			
 
				 extern void sched_show_task(struct task_struct *p);
			
 
				 
			
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -703,21 +703,6 @@ static int takedown_cpu(unsigned int cpu)
 
				 	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
			
 
				 	int err;
			
 
				 
			
 
				-	/*
			
 
				-	 * By now we've cleared cpu_active_mask, wait for all preempt-disabled
			
 
				-	 * and RCU users of this state to go away such that all new such users
			
 
				-	 * will observe it.
			
 
				-	 *
			
 
				-	 * For CONFIG_PREEMPT we have preemptible RCU and its sync_rcu() might
			
 
				-	 * not imply sync_sched(), so wait for both.
			
 
				-	 *
			
 
				-	 * Do sync before park smpboot threads to take care the rcu boost case.
			
 
				-	 */
			
 
				-	if (IS_ENABLED(CONFIG_PREEMPT))
			
 
				-		synchronize_rcu_mult(call_rcu, call_rcu_sched);
			
 
				-	else
			
 
				-		synchronize_rcu();
			
 
				-
			
 
				 	/* Park the smpboot threads */
			
 
				 	kthread_park(per_cpu_ptr(&cpuhp_state, cpu)->thread);
			
 
				 	smpboot_park_threads(cpu);
			
@@ -923,8 +908,6 @@ void cpuhp_online_idle(enum cpuhp_state state)
 
				 
			
 
				 	st->state = CPUHP_AP_ONLINE_IDLE;
			
 
				 
			
 
				-	/* The cpu is marked online, set it active now */
			
 
				-	set_cpu_active(cpu, true);
			
 
				 	/* Unpark the stopper thread and the hotplug thread of this cpu */
			
 
				 	stop_machine_unpark(cpu);
			
 
				 	kthread_unpark(st->thread);
			
@@ -1236,6 +1219,12 @@ static struct cpuhp_step cpuhp_ap_states[] = {
 
				 		.name			= "ap:offline",
			
 
				 		.cant_stop		= true,
			
 
				 	},
			
 
				+	/* First state is scheduler control. Interrupts are disabled */
			
 
				+	[CPUHP_AP_SCHED_STARTING] = {
			
 
				+		.name			= "sched:starting",
			
 
				+		.startup		= sched_cpu_starting,
			
 
				+		.teardown		= sched_cpu_dying,
			
 
				+	},
			
 
				 	/*
			
 
				 	 * Low level startup/teardown notifiers. Run with interrupts
			
 
				 	 * disabled. Will be removed once the notifiers are converted to
			
@@ -1274,6 +1263,15 @@ static struct cpuhp_step cpuhp_ap_states[] = {
 
				 	 * The dynamically registered state space is here
			
 
				 	 */
			
 
				 
			
 
				+#ifdef CONFIG_SMP
			
 
				+	/* Last state is scheduler control setting the cpu active */
			
 
				+	[CPUHP_AP_ACTIVE] = {
			
 
				+		.name			= "sched:active",
			
 
				+		.startup		= sched_cpu_activate,
			
 
				+		.teardown		= sched_cpu_deactivate,
			
 
				+	},
			
 
				+#endif
			
 
				+
			
 
				 	/* CPU is fully up and running. */
			
 
				 	[CPUHP_ONLINE] = {
			
 
				 		.name			= "online",
			
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -314,29 +314,6 @@ void hrtick_start(struct rq *rq, u64 delay)
 
				 	}
			
 
				 }
			
 
				 
			
 
				-static int
			
 
				-hotplug_hrtick(struct notifier_block *nfb, unsigned long action, void *hcpu)
			
 
				-{
			
 
				-	int cpu = (int)(long)hcpu;
			
 
				-
			
 
				-	switch (action) {
			
 
				-	case CPU_UP_CANCELED:
			
 
				-	case CPU_UP_CANCELED_FROZEN:
			
 
				-	case CPU_DOWN_PREPARE:
			
 
				-	case CPU_DOWN_PREPARE_FROZEN:
			
 
				-	case CPU_DEAD:
			
 
				-	case CPU_DEAD_FROZEN:
			
 
				-		hrtick_clear(cpu_rq(cpu));
			
 
				-		return NOTIFY_OK;
			
 
				-	}
			
 
				-
			
 
				-	return NOTIFY_DONE;
			
 
				-}
			
 
				-
			
 
				-static __init void init_hrtick(void)
			
 
				-{
			
 
				-	hotcpu_notifier(hotplug_hrtick, 0);
			
 
				-}
			
 
				 #else
			
 
				 /*
			
 
				  * Called to set the hrtick timer state.
			
@@ -353,10 +330,6 @@ void hrtick_start(struct rq *rq, u64 delay)
 
				 	hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay),
			
 
				 		      HRTIMER_MODE_REL_PINNED);
			
 
				 }
			
 
				-
			
 
				-static inline void init_hrtick(void)
			
 
				-{
			
 
				-}
			
 
				 #endif /* CONFIG_SMP */
			
 
				 
			
 
				 static void init_rq_hrtick(struct rq *rq)
			
@@ -380,10 +353,6 @@ static inline void hrtick_clear(struct rq *rq)
 
				 static inline void init_rq_hrtick(struct rq *rq)
			
 
				 {
			
 
				 }
			
 
				-
			
 
				-static inline void init_hrtick(void)
			
 
				-{
			
 
				-}
			
 
				 #endif	/* CONFIG_SCHED_HRTICK */
			
 
				 
			
 
				 /*
			
@@ -1150,6 +1119,7 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
 
				 static int __set_cpus_allowed_ptr(struct task_struct *p,
			
 
				 				  const struct cpumask *new_mask, bool check)
			
 
				 {
			
 
				+	const struct cpumask *cpu_valid_mask = cpu_active_mask;
			
 
				 	unsigned int dest_cpu;
			
 
				 	struct rq_flags rf;
			
 
				 	struct rq *rq;
			
@@ -1157,6 +1127,13 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
 
				 
			
 
				 	rq = task_rq_lock(p, &rf);
			
 
				 
			
 
				+	if (p->flags & PF_KTHREAD) {
			
 
				+		/*
			
 
				+		 * Kernel threads are allowed on online && !active CPUs
			
 
				+		 */
			
 
				+		cpu_valid_mask = cpu_online_mask;
			
 
				+	}
			
 
				+
			
 
				 	/*
			
 
				 	 * Must re-check here, to close a race against __kthread_bind(),
			
 
				 	 * sched_setaffinity() is not guaranteed to observe the flag.
			
@@ -1169,18 +1146,28 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
 
				 	if (cpumask_equal(&p->cpus_allowed, new_mask))
			
 
				 		goto out;
			
 
				 
			
 
				-	if (!cpumask_intersects(new_mask, cpu_active_mask)) {
			
 
				+	if (!cpumask_intersects(new_mask, cpu_valid_mask)) {
			
 
				 		ret = -EINVAL;
			
 
				 		goto out;
			
 
				 	}
			
 
				 
			
 
				 	do_set_cpus_allowed(p, new_mask);
			
 
				 
			
 
				+	if (p->flags & PF_KTHREAD) {
			
 
				+		/*
			
 
				+		 * For kernel threads that do indeed end up on online &&
			
 
				+		 * !active we want to ensure they are strict per-cpu threads.
			
 
				+		 */
			
 
				+		WARN_ON(cpumask_intersects(new_mask, cpu_online_mask) &&
			
 
				+			!cpumask_intersects(new_mask, cpu_active_mask) &&
			
 
				+			p->nr_cpus_allowed != 1);
			
 
				+	}
			
 
				+
			
 
				 	/* Can the task run on the task's current CPU? If so, we're done */
			
 
				 	if (cpumask_test_cpu(task_cpu(p), new_mask))
			
 
				 		goto out;
			
 
				 
			
 
				-	dest_cpu = cpumask_any_and(cpu_active_mask, new_mask);
			
 
				+	dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask);
			
 
				 	if (task_running(rq, p) || p->state == TASK_WAKING) {
			
 
				 		struct migration_arg arg = { p, dest_cpu };
			
 
				 		/* Need help from migration thread: drop lock and wait. */
			
@@ -1499,6 +1486,25 @@ EXPORT_SYMBOL_GPL(kick_process);
 
				 
			
 
				 /*
			
 
				  * ->cpus_allowed is protected by both rq->lock and p->pi_lock
			
 
				+ *
			
 
				+ * A few notes on cpu_active vs cpu_online:
			
 
				+ *
			
 
				+ *  - cpu_active must be a subset of cpu_online
			
 
				+ *
			
 
				+ *  - on cpu-up we allow per-cpu kthreads on the online && !active cpu,
			
 
				+ *    see __set_cpus_allowed_ptr(). At this point the newly online
			
 
				+ *    cpu isn't yet part of the sched domains, and balancing will not
			
 
				+ *    see it.
			
 
				+ *
			
 
				+ *  - on cpu-down we clear cpu_active() to mask the sched domains and
			
 
				+ *    avoid the load balancer to place new tasks on the to be removed
			
 
				+ *    cpu. Existing tasks will remain running there and will be taken
			
 
				+ *    off.
			
 
				+ *
			
 
				+ * This means that fallback selection must not select !active CPUs.
			
 
				+ * And can assume that any active CPU must be online. Conversely
			
 
				+ * select_task_rq() below may allow selection of !active CPUs in order
			
 
				+ * to satisfy the above rules.
			
 
				  */
			
 
				 static int select_fallback_rq(int cpu, struct task_struct *p)
			
 
				 {
			
@@ -1517,8 +1523,6 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
 
				 
			
 
				 		/* Look for allowed, online CPU in same node. */
			
 
				 		for_each_cpu(dest_cpu, nodemask) {
			
 
				-			if (!cpu_online(dest_cpu))
			
 
				-				continue;
			
 
				 			if (!cpu_active(dest_cpu))
			
 
				 				continue;
			
 
				 			if (cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p)))
			
@@ -1529,8 +1533,6 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
 
				 	for (;;) {
			
 
				 		/* Any allowed, online CPU? */
			
 
				 		for_each_cpu(dest_cpu, tsk_cpus_allowed(p)) {
			
 
				-			if (!cpu_online(dest_cpu))
			
 
				-				continue;
			
 
				 			if (!cpu_active(dest_cpu))
			
 
				 				continue;
			
 
				 			goto out;
			
@@ -1582,6 +1584,8 @@ int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
 
				 
			
 
				 	if (p->nr_cpus_allowed > 1)
			
 
				 		cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags);
			
 
				+	else
			
 
				+		cpu = cpumask_any(tsk_cpus_allowed(p));
			
 
				 
			
 
				 	/*
			
 
				 	 * In order not to call set_task_cpu() on a blocking task we need
			
@@ -5288,6 +5292,8 @@ out:
 
				 
			
 
				 #ifdef CONFIG_SMP
			
 
				 
			
 
				+static bool sched_smp_initialized __read_mostly;
			
 
				+
			
 
				 #ifdef CONFIG_NUMA_BALANCING
			
 
				 /* Migrate current task p to target_cpu */
			
 
				 int migrate_task_to(struct task_struct *p, int target_cpu)
			
@@ -5503,127 +5509,13 @@ static void set_rq_offline(struct rq *rq)
 
				 	}
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * migration_call - callback that gets triggered when a CPU is added.
			
 
				- * Here we can start up the necessary migration thread for the new CPU.
			
 
				- */
			
 
				-static int
			
 
				-migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
			
 
				+static void set_cpu_rq_start_time(unsigned int cpu)
			
 
				 {
			
 
				-	int cpu = (long)hcpu;
			
 
				-	unsigned long flags;
			
 
				 	struct rq *rq = cpu_rq(cpu);
			
 
				 
			
 
				-	switch (action & ~CPU_TASKS_FROZEN) {
			
 
				-
			
 
				-	case CPU_UP_PREPARE:
			
 
				-		rq->calc_load_update = calc_load_update;
			
 
				-		account_reset_rq(rq);
			
 
				-		break;
			
 
				-
			
 
				-	case CPU_ONLINE:
			
 
				-		/* Update our root-domain */
			
 
				-		raw_spin_lock_irqsave(&rq->lock, flags);
			
 
				-		if (rq->rd) {
			
 
				-			BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
			
 
				-
			
 
				-			set_rq_online(rq);
			
 
				-		}
			
 
				-		raw_spin_unlock_irqrestore(&rq->lock, flags);
			
 
				-		break;
			
 
				-
			
 
				-#ifdef CONFIG_HOTPLUG_CPU
			
 
				-	case CPU_DYING:
			
 
				-		sched_ttwu_pending();
			
 
				-		/* Update our root-domain */
			
 
				-		raw_spin_lock_irqsave(&rq->lock, flags);
			
 
				-		if (rq->rd) {
			
 
				-			BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
			
 
				-			set_rq_offline(rq);
			
 
				-		}
			
 
				-		migrate_tasks(rq);
			
 
				-		BUG_ON(rq->nr_running != 1); /* the migration thread */
			
 
				-		raw_spin_unlock_irqrestore(&rq->lock, flags);
			
 
				-		break;
			
 
				-
			
 
				-	case CPU_DEAD:
			
 
				-		calc_load_migrate(rq);
			
 
				-		break;
			
 
				-#endif
			
 
				-	}
			
 
				-
			
 
				-	update_max_interval();
			
 
				-
			
 
				-	return NOTIFY_OK;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Register at high priority so that task migration (migrate_all_tasks)
			
 
				- * happens before everything else.  This has to be lower priority than
			
 
				- * the notifier in the perf_event subsystem, though.
			
 
				- */
			
 
				-static struct notifier_block migration_notifier = {
			
 
				-	.notifier_call = migration_call,
			
 
				-	.priority = CPU_PRI_MIGRATION,
			
 
				-};
			
 
				-
			
 
				-static void set_cpu_rq_start_time(void)
			
 
				-{
			
 
				-	int cpu = smp_processor_id();
			
 
				-	struct rq *rq = cpu_rq(cpu);
			
 
				 	rq->age_stamp = sched_clock_cpu(cpu);
			
 
				 }
			
 
				 
			
 
				-static int sched_cpu_active(struct notifier_block *nfb,
			
 
				-				      unsigned long action, void *hcpu)
			
 
				-{
			
 
				-	int cpu = (long)hcpu;
			
 
				-
			
 
				-	switch (action & ~CPU_TASKS_FROZEN) {
			
 
				-	case CPU_STARTING:
			
 
				-		set_cpu_rq_start_time();
			
 
				-		return NOTIFY_OK;
			
 
				-
			
 
				-	case CPU_DOWN_FAILED:
			
 
				-		set_cpu_active(cpu, true);
			
 
				-		return NOTIFY_OK;
			
 
				-
			
 
				-	default:
			
 
				-		return NOTIFY_DONE;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static int sched_cpu_inactive(struct notifier_block *nfb,
			
 
				-					unsigned long action, void *hcpu)
			
 
				-{
			
 
				-	switch (action & ~CPU_TASKS_FROZEN) {
			
 
				-	case CPU_DOWN_PREPARE:
			
 
				-		set_cpu_active((long)hcpu, false);
			
 
				-		return NOTIFY_OK;
			
 
				-	default:
			
 
				-		return NOTIFY_DONE;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static int __init migration_init(void)
			
 
				-{
			
 
				-	void *cpu = (void *)(long)smp_processor_id();
			
 
				-	int err;
			
 
				-
			
 
				-	/* Initialize migration for the boot CPU */
			
 
				-	err = migration_call(&migration_notifier, CPU_UP_PREPARE, cpu);
			
 
				-	BUG_ON(err == NOTIFY_BAD);
			
 
				-	migration_call(&migration_notifier, CPU_ONLINE, cpu);
			
 
				-	register_cpu_notifier(&migration_notifier);
			
 
				-
			
 
				-	/* Register cpu active notifiers */
			
 
				-	cpu_notifier(sched_cpu_active, CPU_PRI_SCHED_ACTIVE);
			
 
				-	cpu_notifier(sched_cpu_inactive, CPU_PRI_SCHED_INACTIVE);
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-early_initcall(migration_init);
			
 
				-
			
 
				 static cpumask_var_t sched_domains_tmpmask; /* sched_domains_mutex */
			
 
				 
			
 
				 #ifdef CONFIG_SCHED_DEBUG
			
@@ -6771,10 +6663,10 @@ static void sched_init_numa(void)
 
				 	init_numa_topology_type();
			
 
				 }
			
 
				 
			
 
				-static void sched_domains_numa_masks_set(int cpu)
			
 
				+static void sched_domains_numa_masks_set(unsigned int cpu)
			
 
				 {
			
 
				-	int i, j;
			
 
				 	int node = cpu_to_node(cpu);
			
 
				+	int i, j;
			
 
				 
			
 
				 	for (i = 0; i < sched_domains_numa_levels; i++) {
			
 
				 		for (j = 0; j < nr_node_ids; j++) {
			
@@ -6784,51 +6676,20 @@ static void sched_domains_numa_masks_set(int cpu)
 
				 	}
			
 
				 }
			
 
				 
			
 
				-static void sched_domains_numa_masks_clear(int cpu)
			
 
				+static void sched_domains_numa_masks_clear(unsigned int cpu)
			
 
				 {
			
 
				 	int i, j;
			
 
				+
			
 
				 	for (i = 0; i < sched_domains_numa_levels; i++) {
			
 
				 		for (j = 0; j < nr_node_ids; j++)
			
 
				 			cpumask_clear_cpu(cpu, sched_domains_numa_masks[i][j]);
			
 
				 	}
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Update sched_domains_numa_masks[level][node] array when new cpus
			
 
				- * are onlined.
			
 
				- */
			
 
				-static int sched_domains_numa_masks_update(struct notifier_block *nfb,
			
 
				-					   unsigned long action,
			
 
				-					   void *hcpu)
			
 
				-{
			
 
				-	int cpu = (long)hcpu;
			
 
				-
			
 
				-	switch (action & ~CPU_TASKS_FROZEN) {
			
 
				-	case CPU_ONLINE:
			
 
				-		sched_domains_numa_masks_set(cpu);
			
 
				-		break;
			
 
				-
			
 
				-	case CPU_DEAD:
			
 
				-		sched_domains_numa_masks_clear(cpu);
			
 
				-		break;
			
 
				-
			
 
				-	default:
			
 
				-		return NOTIFY_DONE;
			
 
				-	}
			
 
				-
			
 
				-	return NOTIFY_OK;
			
 
				-}
			
 
				 #else
			
 
				-static inline void sched_init_numa(void)
			
 
				-{
			
 
				-}
			
 
				-
			
 
				-static int sched_domains_numa_masks_update(struct notifier_block *nfb,
			
 
				-					   unsigned long action,
			
 
				-					   void *hcpu)
			
 
				-{
			
 
				-	return 0;
			
 
				-}
			
 
				+static inline void sched_init_numa(void) { }
			
 
				+static void sched_domains_numa_masks_set(unsigned int cpu) { }
			
 
				+static void sched_domains_numa_masks_clear(unsigned int cpu) { }
			
 
				 #endif /* CONFIG_NUMA */
			
 
				 
			
 
				 static int __sdt_alloc(const struct cpumask *cpu_map)
			
@@ -7218,13 +7079,9 @@ static int num_cpus_frozen;	/* used to mark begin/end of suspend/resume */
 
				  * If we come here as part of a suspend/resume, don't touch cpusets because we
			
 
				  * want to restore it back to its original state upon resume anyway.
			
 
				  */
			
 
				-static int cpuset_cpu_active(struct notifier_block *nfb, unsigned long action,
			
 
				-			     void *hcpu)
			
 
				+static void cpuset_cpu_active(void)
			
 
				 {
			
 
				-	switch (action) {
			
 
				-	case CPU_ONLINE_FROZEN:
			
 
				-	case CPU_DOWN_FAILED_FROZEN:
			
 
				-
			
 
				+	if (cpuhp_tasks_frozen) {
			
 
				 		/*
			
 
				 		 * num_cpus_frozen tracks how many CPUs are involved in suspend
			
 
				 		 * resume sequence. As long as this is not the last online
			
@@ -7234,35 +7091,25 @@ static int cpuset_cpu_active(struct notifier_block *nfb, unsigned long action,
 
				 		num_cpus_frozen--;
			
 
				 		if (likely(num_cpus_frozen)) {
			
 
				 			partition_sched_domains(1, NULL, NULL);
			
 
				-			break;
			
 
				+			return;
			
 
				 		}
			
 
				-
			
 
				 		/*
			
 
				 		 * This is the last CPU online operation. So fall through and
			
 
				 		 * restore the original sched domains by considering the
			
 
				 		 * cpuset configurations.
			
 
				 		 */
			
 
				-
			
 
				-	case CPU_ONLINE:
			
 
				-		cpuset_update_active_cpus(true);
			
 
				-		break;
			
 
				-	default:
			
 
				-		return NOTIFY_DONE;
			
 
				 	}
			
 
				-	return NOTIFY_OK;
			
 
				+	cpuset_update_active_cpus(true);
			
 
				 }
			
 
				 
			
 
				-static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long action,
			
 
				-			       void *hcpu)
			
 
				+static int cpuset_cpu_inactive(unsigned int cpu)
			
 
				 {
			
 
				 	unsigned long flags;
			
 
				-	long cpu = (long)hcpu;
			
 
				 	struct dl_bw *dl_b;
			
 
				 	bool overflow;
			
 
				 	int cpus;
			
 
				 
			
 
				-	switch (action) {
			
 
				-	case CPU_DOWN_PREPARE:
			
 
				+	if (!cpuhp_tasks_frozen) {
			
 
				 		rcu_read_lock_sched();
			
 
				 		dl_b = dl_bw_of(cpu);
			
 
				 
			
@@ -7274,19 +7121,120 @@ static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long action,
 
				 		rcu_read_unlock_sched();
			
 
				 
			
 
				 		if (overflow)
			
 
				-			return notifier_from_errno(-EBUSY);
			
 
				+			return -EBUSY;
			
 
				 		cpuset_update_active_cpus(false);
			
 
				-		break;
			
 
				-	case CPU_DOWN_PREPARE_FROZEN:
			
 
				+	} else {
			
 
				 		num_cpus_frozen++;
			
 
				 		partition_sched_domains(1, NULL, NULL);
			
 
				-		break;
			
 
				-	default:
			
 
				-		return NOTIFY_DONE;
			
 
				 	}
			
 
				-	return NOTIFY_OK;
			
 
				+	return 0;
			
 
				 }
			
 
				 
			
 
				+int sched_cpu_activate(unsigned int cpu)
			
 
				+{
			
 
				+	struct rq *rq = cpu_rq(cpu);
			
 
				+	unsigned long flags;
			
 
				+
			
 
				+	set_cpu_active(cpu, true);
			
 
				+
			
 
				+	if (sched_smp_initialized) {
			
 
				+		sched_domains_numa_masks_set(cpu);
			
 
				+		cpuset_cpu_active();
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Put the rq online, if not already. This happens:
			
 
				+	 *
			
 
				+	 * 1) In the early boot process, because we build the real domains
			
 
				+	 *    after all cpus have been brought up.
			
 
				+	 *
			
 
				+	 * 2) At runtime, if cpuset_cpu_active() fails to rebuild the
			
 
				+	 *    domains.
			
 
				+	 */
			
 
				+	raw_spin_lock_irqsave(&rq->lock, flags);
			
 
				+	if (rq->rd) {
			
 
				+		BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
			
 
				+		set_rq_online(rq);
			
 
				+	}
			
 
				+	raw_spin_unlock_irqrestore(&rq->lock, flags);
			
 
				+
			
 
				+	update_max_interval();
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int sched_cpu_deactivate(unsigned int cpu)
			
 
				+{
			
 
				+	int ret;
			
 
				+
			
 
				+	set_cpu_active(cpu, false);
			
 
				+	/*
			
 
				+	 * We've cleared cpu_active_mask, wait for all preempt-disabled and RCU
			
 
				+	 * users of this state to go away such that all new such users will
			
 
				+	 * observe it.
			
 
				+	 *
			
 
				+	 * For CONFIG_PREEMPT we have preemptible RCU and its sync_rcu() might
			
 
				+	 * not imply sync_sched(), so wait for both.
			
 
				+	 *
			
 
				+	 * Do sync before park smpboot threads to take care the rcu boost case.
			
 
				+	 */
			
 
				+	if (IS_ENABLED(CONFIG_PREEMPT))
			
 
				+		synchronize_rcu_mult(call_rcu, call_rcu_sched);
			
 
				+	else
			
 
				+		synchronize_rcu();
			
 
				+
			
 
				+	if (!sched_smp_initialized)
			
 
				+		return 0;
			
 
				+
			
 
				+	ret = cpuset_cpu_inactive(cpu);
			
 
				+	if (ret) {
			
 
				+		set_cpu_active(cpu, true);
			
 
				+		return ret;
			
 
				+	}
			
 
				+	sched_domains_numa_masks_clear(cpu);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static void sched_rq_cpu_starting(unsigned int cpu)
			
 
				+{
			
 
				+	struct rq *rq = cpu_rq(cpu);
			
 
				+
			
 
				+	rq->calc_load_update = calc_load_update;
			
 
				+	account_reset_rq(rq);
			
 
				+	update_max_interval();
			
 
				+}
			
 
				+
			
 
				+int sched_cpu_starting(unsigned int cpu)
			
 
				+{
			
 
				+	set_cpu_rq_start_time(cpu);
			
 
				+	sched_rq_cpu_starting(cpu);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+#ifdef CONFIG_HOTPLUG_CPU
			
 
				+int sched_cpu_dying(unsigned int cpu)
			
 
				+{
			
 
				+	struct rq *rq = cpu_rq(cpu);
			
 
				+	unsigned long flags;
			
 
				+
			
 
				+	/* Handle pending wakeups and then migrate everything off */
			
 
				+	sched_ttwu_pending();
			
 
				+	raw_spin_lock_irqsave(&rq->lock, flags);
			
 
				+	if (rq->rd) {
			
 
				+		BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
			
 
				+		set_rq_offline(rq);
			
 
				+	}
			
 
				+	migrate_tasks(rq);
			
 
				+	BUG_ON(rq->nr_running != 1);
			
 
				+	raw_spin_unlock_irqrestore(&rq->lock, flags);
			
 
				+	calc_load_migrate(rq);
			
 
				+	update_max_interval();
			
 
				+	nohz_balance_exit_idle(cpu);
			
 
				+	hrtick_clear(rq);
			
 
				+	return 0;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				 void __init sched_init_smp(void)
			
 
				 {
			
 
				 	cpumask_var_t non_isolated_cpus;
			
@@ -7308,12 +7256,6 @@ void __init sched_init_smp(void)
 
				 		cpumask_set_cpu(smp_processor_id(), non_isolated_cpus);
			
 
				 	mutex_unlock(&sched_domains_mutex);
			
 
				 
			
 
				-	hotcpu_notifier(sched_domains_numa_masks_update, CPU_PRI_SCHED_ACTIVE);
			
 
				-	hotcpu_notifier(cpuset_cpu_active, CPU_PRI_CPUSET_ACTIVE);
			
 
				-	hotcpu_notifier(cpuset_cpu_inactive, CPU_PRI_CPUSET_INACTIVE);
			
 
				-
			
 
				-	init_hrtick();
			
 
				-
			
 
				 	/* Move init over to a non-isolated CPU */
			
 
				 	if (set_cpus_allowed_ptr(current, non_isolated_cpus) < 0)
			
 
				 		BUG();
			
@@ -7322,7 +7264,16 @@ void __init sched_init_smp(void)
 
				 
			
 
				 	init_sched_rt_class();
			
 
				 	init_sched_dl_class();
			
 
				+	sched_smp_initialized = true;
			
 
				+}
			
 
				+
			
 
				+static int __init migration_init(void)
			
 
				+{
			
 
				+	sched_rq_cpu_starting(smp_processor_id());
			
 
				+	return 0;
			
 
				 }
			
 
				+early_initcall(migration_init);
			
 
				+
			
 
				 #else
			
 
				 void __init sched_init_smp(void)
			
 
				 {
			
@@ -7519,7 +7470,7 @@ void __init sched_init(void)
 
				 	if (cpu_isolated_map == NULL)
			
 
				 		zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT);
			
 
				 	idle_thread_set_boot_cpu();
			
 
				-	set_cpu_rq_start_time();
			
 
				+	set_cpu_rq_start_time(smp_processor_id());
			
 
				 #endif
			
 
				 	init_sched_fair_class();
			
 
				 
			
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -7814,7 +7814,7 @@ static void nohz_balancer_kick(void)
 
				 	return;
			
 
				 }
			
 
				 
			
 
				-static inline void nohz_balance_exit_idle(int cpu)
			
 
				+void nohz_balance_exit_idle(unsigned int cpu)
			
 
				 {
			
 
				 	if (unlikely(test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)))) {
			
 
				 		/*
			
@@ -7887,18 +7887,6 @@ void nohz_balance_enter_idle(int cpu)
 
				 	atomic_inc(&nohz.nr_cpus);
			
 
				 	set_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu));
			
 
				 }
			
 
				-
			
 
				-static int sched_ilb_notifier(struct notifier_block *nfb,
			
 
				-					unsigned long action, void *hcpu)
			
 
				-{
			
 
				-	switch (action & ~CPU_TASKS_FROZEN) {
			
 
				-	case CPU_DYING:
			
 
				-		nohz_balance_exit_idle(smp_processor_id());
			
 
				-		return NOTIFY_OK;
			
 
				-	default:
			
 
				-		return NOTIFY_DONE;
			
 
				-	}
			
 
				-}
			
 
				 #endif
			
 
				 
			
 
				 static DEFINE_SPINLOCK(balancing);
			
@@ -8704,7 +8692,6 @@ __init void init_sched_fair_class(void)
 
				 #ifdef CONFIG_NO_HZ_COMMON
			
 
				 	nohz.next_balance = jiffies;
			
 
				 	zalloc_cpumask_var(&nohz.idle_cpus_mask, GFP_NOWAIT);
			
 
				-	cpu_notifier(sched_ilb_notifier, 0);
			
 
				 #endif
			
 
				 #endif /* SMP */
			
 
				 
			
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1700,6 +1700,10 @@ enum rq_nohz_flag_bits {
 
				 };
			
 
				 
			
 
				 #define nohz_flags(cpu)	(&cpu_rq(cpu)->nohz_flags)
			
 
				+
			
 
				+extern void nohz_balance_exit_idle(unsigned int cpu);
			
 
				+#else
			
 
				+static inline void nohz_balance_exit_idle(unsigned int cpu) { }
			
 
				 #endif
			
 
				 
			
 
				 #ifdef CONFIG_IRQ_TIME_ACCOUNTING