|
@@ -1000,7 +1000,11 @@ inline int task_curr(const struct task_struct *p)
|
|
|
}
|
|
|
|
|
|
/*
|
|
|
- * Can drop rq->lock because from sched_class::switched_from() methods drop it.
|
|
|
+ * switched_from, switched_to and prio_changed must _NOT_ drop rq->lock,
|
|
|
+ * use the balance_callback list if you want balancing.
|
|
|
+ *
|
|
|
+ * this means any call to check_class_changed() must be followed by a call to
|
|
|
+ * balance_callback().
|
|
|
*/
|
|
|
static inline void check_class_changed(struct rq *rq, struct task_struct *p,
|
|
|
const struct sched_class *prev_class,
|
|
@@ -1009,7 +1013,7 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p,
|
|
|
if (prev_class != p->sched_class) {
|
|
|
if (prev_class->switched_from)
|
|
|
prev_class->switched_from(rq, p);
|
|
|
- /* Possble rq->lock 'hole'. */
|
|
|
+
|
|
|
p->sched_class->switched_to(rq, p);
|
|
|
} else if (oldprio != p->prio || dl_task(p))
|
|
|
p->sched_class->prio_changed(rq, p, oldprio);
|
|
@@ -1041,6 +1045,177 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
|
|
|
}
|
|
|
|
|
|
#ifdef CONFIG_SMP
|
|
|
+/*
|
|
|
+ * This is how migration works:
|
|
|
+ *
|
|
|
+ * 1) we invoke migration_cpu_stop() on the target CPU using
|
|
|
+ * stop_one_cpu().
|
|
|
+ * 2) stopper starts to run (implicitly forcing the migrated thread
|
|
|
+ * off the CPU)
|
|
|
+ * 3) it checks whether the migrated task is still in the wrong runqueue.
|
|
|
+ * 4) if it's in the wrong runqueue then the migration thread removes
|
|
|
+ * it and puts it into the right queue.
|
|
|
+ * 5) stopper completes and stop_one_cpu() returns and the migration
|
|
|
+ * is done.
|
|
|
+ */
|
|
|
+
|
|
|
+/*
|
|
|
+ * move_queued_task - move a queued task to new rq.
|
|
|
+ *
|
|
|
+ * Returns (locked) new rq. Old rq's lock is released.
|
|
|
+ */
|
|
|
+static struct rq *move_queued_task(struct rq *rq, struct task_struct *p, int new_cpu)
|
|
|
+{
|
|
|
+ lockdep_assert_held(&rq->lock);
|
|
|
+
|
|
|
+ dequeue_task(rq, p, 0);
|
|
|
+ p->on_rq = TASK_ON_RQ_MIGRATING;
|
|
|
+ set_task_cpu(p, new_cpu);
|
|
|
+ raw_spin_unlock(&rq->lock);
|
|
|
+
|
|
|
+ rq = cpu_rq(new_cpu);
|
|
|
+
|
|
|
+ raw_spin_lock(&rq->lock);
|
|
|
+ BUG_ON(task_cpu(p) != new_cpu);
|
|
|
+ p->on_rq = TASK_ON_RQ_QUEUED;
|
|
|
+ enqueue_task(rq, p, 0);
|
|
|
+ check_preempt_curr(rq, p, 0);
|
|
|
+
|
|
|
+ return rq;
|
|
|
+}
|
|
|
+
|
|
|
+struct migration_arg {
|
|
|
+ struct task_struct *task;
|
|
|
+ int dest_cpu;
|
|
|
+};
|
|
|
+
|
|
|
+/*
|
|
|
+ * Move (not current) task off this cpu, onto dest cpu. We're doing
|
|
|
+ * this because either it can't run here any more (set_cpus_allowed()
|
|
|
+ * away from this CPU, or CPU going down), or because we're
|
|
|
+ * attempting to rebalance this task on exec (sched_exec).
|
|
|
+ *
|
|
|
+ * So we race with normal scheduler movements, but that's OK, as long
|
|
|
+ * as the task is no longer on this CPU.
|
|
|
+ */
|
|
|
+static struct rq *__migrate_task(struct rq *rq, struct task_struct *p, int dest_cpu)
|
|
|
+{
|
|
|
+ if (unlikely(!cpu_active(dest_cpu)))
|
|
|
+ return rq;
|
|
|
+
|
|
|
+ /* Affinity changed (again). */
|
|
|
+ if (!cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p)))
|
|
|
+ return rq;
|
|
|
+
|
|
|
+ rq = move_queued_task(rq, p, dest_cpu);
|
|
|
+
|
|
|
+ return rq;
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * migration_cpu_stop - this will be executed by a highprio stopper thread
|
|
|
+ * and performs thread migration by bumping thread off CPU then
|
|
|
+ * 'pushing' onto another runqueue.
|
|
|
+ */
|
|
|
+static int migration_cpu_stop(void *data)
|
|
|
+{
|
|
|
+ struct migration_arg *arg = data;
|
|
|
+ struct task_struct *p = arg->task;
|
|
|
+ struct rq *rq = this_rq();
|
|
|
+
|
|
|
+ /*
|
|
|
+ * The original target cpu might have gone down and we might
|
|
|
+ * be on another cpu but it doesn't matter.
|
|
|
+ */
|
|
|
+ local_irq_disable();
|
|
|
+ /*
|
|
|
+ * We need to explicitly wake pending tasks before running
|
|
|
+ * __migrate_task() such that we will not miss enforcing cpus_allowed
|
|
|
+ * during wakeups, see set_cpus_allowed_ptr()'s TASK_WAKING test.
|
|
|
+ */
|
|
|
+ sched_ttwu_pending();
|
|
|
+
|
|
|
+ raw_spin_lock(&p->pi_lock);
|
|
|
+ raw_spin_lock(&rq->lock);
|
|
|
+ /*
|
|
|
+ * If task_rq(p) != rq, it cannot be migrated here, because we're
|
|
|
+ * holding rq->lock, if p->on_rq == 0 it cannot get enqueued because
|
|
|
+ * we're holding p->pi_lock.
|
|
|
+ */
|
|
|
+ if (task_rq(p) == rq && task_on_rq_queued(p))
|
|
|
+ rq = __migrate_task(rq, p, arg->dest_cpu);
|
|
|
+ raw_spin_unlock(&rq->lock);
|
|
|
+ raw_spin_unlock(&p->pi_lock);
|
|
|
+
|
|
|
+ local_irq_enable();
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
|
|
|
+{
|
|
|
+ if (p->sched_class->set_cpus_allowed)
|
|
|
+ p->sched_class->set_cpus_allowed(p, new_mask);
|
|
|
+
|
|
|
+ cpumask_copy(&p->cpus_allowed, new_mask);
|
|
|
+ p->nr_cpus_allowed = cpumask_weight(new_mask);
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * Change a given task's CPU affinity. Migrate the thread to a
|
|
|
+ * proper CPU and schedule it away if the CPU it's executing on
|
|
|
+ * is removed from the allowed bitmask.
|
|
|
+ *
|
|
|
+ * NOTE: the caller must have a valid reference to the task, the
|
|
|
+ * task must not exit() & deallocate itself prematurely. The
|
|
|
+ * call is not atomic; no spinlocks may be held.
|
|
|
+ */
|
|
|
+int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
|
|
|
+{
|
|
|
+ unsigned long flags;
|
|
|
+ struct rq *rq;
|
|
|
+ unsigned int dest_cpu;
|
|
|
+ int ret = 0;
|
|
|
+
|
|
|
+ rq = task_rq_lock(p, &flags);
|
|
|
+
|
|
|
+ if (cpumask_equal(&p->cpus_allowed, new_mask))
|
|
|
+ goto out;
|
|
|
+
|
|
|
+ if (!cpumask_intersects(new_mask, cpu_active_mask)) {
|
|
|
+ ret = -EINVAL;
|
|
|
+ goto out;
|
|
|
+ }
|
|
|
+
|
|
|
+ do_set_cpus_allowed(p, new_mask);
|
|
|
+
|
|
|
+ /* Can the task run on the task's current CPU? If so, we're done */
|
|
|
+ if (cpumask_test_cpu(task_cpu(p), new_mask))
|
|
|
+ goto out;
|
|
|
+
|
|
|
+ dest_cpu = cpumask_any_and(cpu_active_mask, new_mask);
|
|
|
+ if (task_running(rq, p) || p->state == TASK_WAKING) {
|
|
|
+ struct migration_arg arg = { p, dest_cpu };
|
|
|
+ /* Need help from migration thread: drop lock and wait. */
|
|
|
+ task_rq_unlock(rq, p, &flags);
|
|
|
+ stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
|
|
|
+ tlb_migrate_finish(p->mm);
|
|
|
+ return 0;
|
|
|
+ } else if (task_on_rq_queued(p)) {
|
|
|
+ /*
|
|
|
+ * OK, since we're going to drop the lock immediately
|
|
|
+ * afterwards anyway.
|
|
|
+ */
|
|
|
+ lockdep_unpin_lock(&rq->lock);
|
|
|
+ rq = move_queued_task(rq, p, dest_cpu);
|
|
|
+ lockdep_pin_lock(&rq->lock);
|
|
|
+ }
|
|
|
+out:
|
|
|
+ task_rq_unlock(rq, p, &flags);
|
|
|
+
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
|
|
|
+
|
|
|
void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
|
|
|
{
|
|
|
#ifdef CONFIG_SCHED_DEBUG
|
|
@@ -1181,13 +1356,6 @@ out:
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
|
-struct migration_arg {
|
|
|
- struct task_struct *task;
|
|
|
- int dest_cpu;
|
|
|
-};
|
|
|
-
|
|
|
-static int migration_cpu_stop(void *data);
|
|
|
-
|
|
|
/*
|
|
|
* wait_task_inactive - wait for a thread to unschedule.
|
|
|
*
|
|
@@ -1320,9 +1488,7 @@ void kick_process(struct task_struct *p)
|
|
|
preempt_enable();
|
|
|
}
|
|
|
EXPORT_SYMBOL_GPL(kick_process);
|
|
|
-#endif /* CONFIG_SMP */
|
|
|
|
|
|
-#ifdef CONFIG_SMP
|
|
|
/*
|
|
|
* ->cpus_allowed is protected by both rq->lock and p->pi_lock
|
|
|
*/
|
|
@@ -1402,6 +1568,8 @@ out:
|
|
|
static inline
|
|
|
int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
|
|
|
{
|
|
|
+ lockdep_assert_held(&p->pi_lock);
|
|
|
+
|
|
|
if (p->nr_cpus_allowed > 1)
|
|
|
cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags);
|
|
|
|
|
@@ -1427,7 +1595,7 @@ static void update_avg(u64 *avg, u64 sample)
|
|
|
s64 diff = sample - *avg;
|
|
|
*avg += diff >> 3;
|
|
|
}
|
|
|
-#endif
|
|
|
+#endif /* CONFIG_SMP */
|
|
|
|
|
|
static void
|
|
|
ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
|
|
@@ -1490,8 +1658,15 @@ ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
|
|
|
|
|
|
p->state = TASK_RUNNING;
|
|
|
#ifdef CONFIG_SMP
|
|
|
- if (p->sched_class->task_woken)
|
|
|
+ if (p->sched_class->task_woken) {
|
|
|
+ /*
|
|
|
+ * Our task @p is fully woken up and running; so its safe to
|
|
|
+ * drop the rq->lock, hereafter rq is only used for statistics.
|
|
|
+ */
|
|
|
+ lockdep_unpin_lock(&rq->lock);
|
|
|
p->sched_class->task_woken(rq, p);
|
|
|
+ lockdep_pin_lock(&rq->lock);
|
|
|
+ }
|
|
|
|
|
|
if (rq->idle_stamp) {
|
|
|
u64 delta = rq_clock(rq) - rq->idle_stamp;
|
|
@@ -1510,6 +1685,8 @@ ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
|
|
|
static void
|
|
|
ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags)
|
|
|
{
|
|
|
+ lockdep_assert_held(&rq->lock);
|
|
|
+
|
|
|
#ifdef CONFIG_SMP
|
|
|
if (p->sched_contributes_to_load)
|
|
|
rq->nr_uninterruptible--;
|
|
@@ -1554,6 +1731,7 @@ void sched_ttwu_pending(void)
|
|
|
return;
|
|
|
|
|
|
raw_spin_lock_irqsave(&rq->lock, flags);
|
|
|
+ lockdep_pin_lock(&rq->lock);
|
|
|
|
|
|
while (llist) {
|
|
|
p = llist_entry(llist, struct task_struct, wake_entry);
|
|
@@ -1561,6 +1739,7 @@ void sched_ttwu_pending(void)
|
|
|
ttwu_do_activate(rq, p, 0);
|
|
|
}
|
|
|
|
|
|
+ lockdep_unpin_lock(&rq->lock);
|
|
|
raw_spin_unlock_irqrestore(&rq->lock, flags);
|
|
|
}
|
|
|
|
|
@@ -1657,7 +1836,9 @@ static void ttwu_queue(struct task_struct *p, int cpu)
|
|
|
#endif
|
|
|
|
|
|
raw_spin_lock(&rq->lock);
|
|
|
+ lockdep_pin_lock(&rq->lock);
|
|
|
ttwu_do_activate(rq, p, 0);
|
|
|
+ lockdep_unpin_lock(&rq->lock);
|
|
|
raw_spin_unlock(&rq->lock);
|
|
|
}
|
|
|
|
|
@@ -1752,9 +1933,17 @@ static void try_to_wake_up_local(struct task_struct *p)
|
|
|
lockdep_assert_held(&rq->lock);
|
|
|
|
|
|
if (!raw_spin_trylock(&p->pi_lock)) {
|
|
|
+ /*
|
|
|
+ * This is OK, because current is on_cpu, which avoids it being
|
|
|
+ * picked for load-balance and preemption/IRQs are still
|
|
|
+ * disabled avoiding further scheduler activity on it and we've
|
|
|
+ * not yet picked a replacement task.
|
|
|
+ */
|
|
|
+ lockdep_unpin_lock(&rq->lock);
|
|
|
raw_spin_unlock(&rq->lock);
|
|
|
raw_spin_lock(&p->pi_lock);
|
|
|
raw_spin_lock(&rq->lock);
|
|
|
+ lockdep_pin_lock(&rq->lock);
|
|
|
}
|
|
|
|
|
|
if (!(p->state & TASK_NORMAL))
|
|
@@ -2294,23 +2483,35 @@ static struct rq *finish_task_switch(struct task_struct *prev)
|
|
|
#ifdef CONFIG_SMP
|
|
|
|
|
|
/* rq->lock is NOT held, but preemption is disabled */
|
|
|
-static inline void post_schedule(struct rq *rq)
|
|
|
+static void __balance_callback(struct rq *rq)
|
|
|
{
|
|
|
- if (rq->post_schedule) {
|
|
|
- unsigned long flags;
|
|
|
+ struct callback_head *head, *next;
|
|
|
+ void (*func)(struct rq *rq);
|
|
|
+ unsigned long flags;
|
|
|
|
|
|
- raw_spin_lock_irqsave(&rq->lock, flags);
|
|
|
- if (rq->curr->sched_class->post_schedule)
|
|
|
- rq->curr->sched_class->post_schedule(rq);
|
|
|
- raw_spin_unlock_irqrestore(&rq->lock, flags);
|
|
|
+ raw_spin_lock_irqsave(&rq->lock, flags);
|
|
|
+ head = rq->balance_callback;
|
|
|
+ rq->balance_callback = NULL;
|
|
|
+ while (head) {
|
|
|
+ func = (void (*)(struct rq *))head->func;
|
|
|
+ next = head->next;
|
|
|
+ head->next = NULL;
|
|
|
+ head = next;
|
|
|
|
|
|
- rq->post_schedule = 0;
|
|
|
+ func(rq);
|
|
|
}
|
|
|
+ raw_spin_unlock_irqrestore(&rq->lock, flags);
|
|
|
+}
|
|
|
+
|
|
|
+static inline void balance_callback(struct rq *rq)
|
|
|
+{
|
|
|
+ if (unlikely(rq->balance_callback))
|
|
|
+ __balance_callback(rq);
|
|
|
}
|
|
|
|
|
|
#else
|
|
|
|
|
|
-static inline void post_schedule(struct rq *rq)
|
|
|
+static inline void balance_callback(struct rq *rq)
|
|
|
{
|
|
|
}
|
|
|
|
|
@@ -2328,7 +2529,7 @@ asmlinkage __visible void schedule_tail(struct task_struct *prev)
|
|
|
/* finish_task_switch() drops rq->lock and enables preemtion */
|
|
|
preempt_disable();
|
|
|
rq = finish_task_switch(prev);
|
|
|
- post_schedule(rq);
|
|
|
+ balance_callback(rq);
|
|
|
preempt_enable();
|
|
|
|
|
|
if (current->set_child_tid)
|
|
@@ -2372,6 +2573,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
|
|
|
* of the scheduler it's an obvious special-case), so we
|
|
|
* do an early lockdep release here:
|
|
|
*/
|
|
|
+ lockdep_unpin_lock(&rq->lock);
|
|
|
spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
|
|
|
|
|
|
/* Here we just switch the register state and the stack. */
|
|
@@ -2794,6 +2996,7 @@ static void __sched __schedule(void)
|
|
|
*/
|
|
|
smp_mb__before_spinlock();
|
|
|
raw_spin_lock_irq(&rq->lock);
|
|
|
+ lockdep_pin_lock(&rq->lock);
|
|
|
|
|
|
rq->clock_skip_update <<= 1; /* promote REQ to ACT */
|
|
|
|
|
@@ -2836,10 +3039,12 @@ static void __sched __schedule(void)
|
|
|
|
|
|
rq = context_switch(rq, prev, next); /* unlocks the rq */
|
|
|
cpu = cpu_of(rq);
|
|
|
- } else
|
|
|
+ } else {
|
|
|
+ lockdep_unpin_lock(&rq->lock);
|
|
|
raw_spin_unlock_irq(&rq->lock);
|
|
|
+ }
|
|
|
|
|
|
- post_schedule(rq);
|
|
|
+ balance_callback(rq);
|
|
|
}
|
|
|
|
|
|
static inline void sched_submit_work(struct task_struct *tsk)
|
|
@@ -3103,7 +3308,11 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
|
|
|
|
|
|
check_class_changed(rq, p, prev_class, oldprio);
|
|
|
out_unlock:
|
|
|
+ preempt_disable(); /* avoid rq from going away on us */
|
|
|
__task_rq_unlock(rq);
|
|
|
+
|
|
|
+ balance_callback(rq);
|
|
|
+ preempt_enable();
|
|
|
}
|
|
|
#endif
|
|
|
|
|
@@ -3441,7 +3650,7 @@ static bool dl_param_changed(struct task_struct *p,
|
|
|
|
|
|
static int __sched_setscheduler(struct task_struct *p,
|
|
|
const struct sched_attr *attr,
|
|
|
- bool user)
|
|
|
+ bool user, bool pi)
|
|
|
{
|
|
|
int newprio = dl_policy(attr->sched_policy) ? MAX_DL_PRIO - 1 :
|
|
|
MAX_RT_PRIO - 1 - attr->sched_priority;
|
|
@@ -3627,18 +3836,20 @@ change:
|
|
|
p->sched_reset_on_fork = reset_on_fork;
|
|
|
oldprio = p->prio;
|
|
|
|
|
|
- /*
|
|
|
- * Take priority boosted tasks into account. If the new
|
|
|
- * effective priority is unchanged, we just store the new
|
|
|
- * normal parameters and do not touch the scheduler class and
|
|
|
- * the runqueue. This will be done when the task deboost
|
|
|
- * itself.
|
|
|
- */
|
|
|
- new_effective_prio = rt_mutex_get_effective_prio(p, newprio);
|
|
|
- if (new_effective_prio == oldprio) {
|
|
|
- __setscheduler_params(p, attr);
|
|
|
- task_rq_unlock(rq, p, &flags);
|
|
|
- return 0;
|
|
|
+ if (pi) {
|
|
|
+ /*
|
|
|
+ * Take priority boosted tasks into account. If the new
|
|
|
+ * effective priority is unchanged, we just store the new
|
|
|
+ * normal parameters and do not touch the scheduler class and
|
|
|
+ * the runqueue. This will be done when the task deboost
|
|
|
+ * itself.
|
|
|
+ */
|
|
|
+ new_effective_prio = rt_mutex_get_effective_prio(p, newprio);
|
|
|
+ if (new_effective_prio == oldprio) {
|
|
|
+ __setscheduler_params(p, attr);
|
|
|
+ task_rq_unlock(rq, p, &flags);
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
queued = task_on_rq_queued(p);
|
|
@@ -3649,7 +3860,7 @@ change:
|
|
|
put_prev_task(rq, p);
|
|
|
|
|
|
prev_class = p->sched_class;
|
|
|
- __setscheduler(rq, p, attr, true);
|
|
|
+ __setscheduler(rq, p, attr, pi);
|
|
|
|
|
|
if (running)
|
|
|
p->sched_class->set_curr_task(rq);
|
|
@@ -3662,9 +3873,17 @@ change:
|
|
|
}
|
|
|
|
|
|
check_class_changed(rq, p, prev_class, oldprio);
|
|
|
+ preempt_disable(); /* avoid rq from going away on us */
|
|
|
task_rq_unlock(rq, p, &flags);
|
|
|
|
|
|
- rt_mutex_adjust_pi(p);
|
|
|
+ if (pi)
|
|
|
+ rt_mutex_adjust_pi(p);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Run balance callbacks after we've adjusted the PI chain.
|
|
|
+ */
|
|
|
+ balance_callback(rq);
|
|
|
+ preempt_enable();
|
|
|
|
|
|
return 0;
|
|
|
}
|
|
@@ -3685,7 +3904,7 @@ static int _sched_setscheduler(struct task_struct *p, int policy,
|
|
|
attr.sched_policy = policy;
|
|
|
}
|
|
|
|
|
|
- return __sched_setscheduler(p, &attr, check);
|
|
|
+ return __sched_setscheduler(p, &attr, check, true);
|
|
|
}
|
|
|
/**
|
|
|
* sched_setscheduler - change the scheduling policy and/or RT priority of a thread.
|
|
@@ -3706,7 +3925,7 @@ EXPORT_SYMBOL_GPL(sched_setscheduler);
|
|
|
|
|
|
int sched_setattr(struct task_struct *p, const struct sched_attr *attr)
|
|
|
{
|
|
|
- return __sched_setscheduler(p, attr, true);
|
|
|
+ return __sched_setscheduler(p, attr, true, true);
|
|
|
}
|
|
|
EXPORT_SYMBOL_GPL(sched_setattr);
|
|
|
|
|
@@ -4754,149 +4973,6 @@ out:
|
|
|
}
|
|
|
|
|
|
#ifdef CONFIG_SMP
|
|
|
-/*
|
|
|
- * move_queued_task - move a queued task to new rq.
|
|
|
- *
|
|
|
- * Returns (locked) new rq. Old rq's lock is released.
|
|
|
- */
|
|
|
-static struct rq *move_queued_task(struct task_struct *p, int new_cpu)
|
|
|
-{
|
|
|
- struct rq *rq = task_rq(p);
|
|
|
-
|
|
|
- lockdep_assert_held(&rq->lock);
|
|
|
-
|
|
|
- dequeue_task(rq, p, 0);
|
|
|
- p->on_rq = TASK_ON_RQ_MIGRATING;
|
|
|
- set_task_cpu(p, new_cpu);
|
|
|
- raw_spin_unlock(&rq->lock);
|
|
|
-
|
|
|
- rq = cpu_rq(new_cpu);
|
|
|
-
|
|
|
- raw_spin_lock(&rq->lock);
|
|
|
- BUG_ON(task_cpu(p) != new_cpu);
|
|
|
- p->on_rq = TASK_ON_RQ_QUEUED;
|
|
|
- enqueue_task(rq, p, 0);
|
|
|
- check_preempt_curr(rq, p, 0);
|
|
|
-
|
|
|
- return rq;
|
|
|
-}
|
|
|
-
|
|
|
-void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
|
|
|
-{
|
|
|
- if (p->sched_class->set_cpus_allowed)
|
|
|
- p->sched_class->set_cpus_allowed(p, new_mask);
|
|
|
-
|
|
|
- cpumask_copy(&p->cpus_allowed, new_mask);
|
|
|
- p->nr_cpus_allowed = cpumask_weight(new_mask);
|
|
|
-}
|
|
|
-
|
|
|
-/*
|
|
|
- * This is how migration works:
|
|
|
- *
|
|
|
- * 1) we invoke migration_cpu_stop() on the target CPU using
|
|
|
- * stop_one_cpu().
|
|
|
- * 2) stopper starts to run (implicitly forcing the migrated thread
|
|
|
- * off the CPU)
|
|
|
- * 3) it checks whether the migrated task is still in the wrong runqueue.
|
|
|
- * 4) if it's in the wrong runqueue then the migration thread removes
|
|
|
- * it and puts it into the right queue.
|
|
|
- * 5) stopper completes and stop_one_cpu() returns and the migration
|
|
|
- * is done.
|
|
|
- */
|
|
|
-
|
|
|
-/*
|
|
|
- * Change a given task's CPU affinity. Migrate the thread to a
|
|
|
- * proper CPU and schedule it away if the CPU it's executing on
|
|
|
- * is removed from the allowed bitmask.
|
|
|
- *
|
|
|
- * NOTE: the caller must have a valid reference to the task, the
|
|
|
- * task must not exit() & deallocate itself prematurely. The
|
|
|
- * call is not atomic; no spinlocks may be held.
|
|
|
- */
|
|
|
-int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
|
|
|
-{
|
|
|
- unsigned long flags;
|
|
|
- struct rq *rq;
|
|
|
- unsigned int dest_cpu;
|
|
|
- int ret = 0;
|
|
|
-
|
|
|
- rq = task_rq_lock(p, &flags);
|
|
|
-
|
|
|
- if (cpumask_equal(&p->cpus_allowed, new_mask))
|
|
|
- goto out;
|
|
|
-
|
|
|
- if (!cpumask_intersects(new_mask, cpu_active_mask)) {
|
|
|
- ret = -EINVAL;
|
|
|
- goto out;
|
|
|
- }
|
|
|
-
|
|
|
- do_set_cpus_allowed(p, new_mask);
|
|
|
-
|
|
|
- /* Can the task run on the task's current CPU? If so, we're done */
|
|
|
- if (cpumask_test_cpu(task_cpu(p), new_mask))
|
|
|
- goto out;
|
|
|
-
|
|
|
- dest_cpu = cpumask_any_and(cpu_active_mask, new_mask);
|
|
|
- if (task_running(rq, p) || p->state == TASK_WAKING) {
|
|
|
- struct migration_arg arg = { p, dest_cpu };
|
|
|
- /* Need help from migration thread: drop lock and wait. */
|
|
|
- task_rq_unlock(rq, p, &flags);
|
|
|
- stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
|
|
|
- tlb_migrate_finish(p->mm);
|
|
|
- return 0;
|
|
|
- } else if (task_on_rq_queued(p))
|
|
|
- rq = move_queued_task(p, dest_cpu);
|
|
|
-out:
|
|
|
- task_rq_unlock(rq, p, &flags);
|
|
|
-
|
|
|
- return ret;
|
|
|
-}
|
|
|
-EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
|
|
|
-
|
|
|
-/*
|
|
|
- * Move (not current) task off this cpu, onto dest cpu. We're doing
|
|
|
- * this because either it can't run here any more (set_cpus_allowed()
|
|
|
- * away from this CPU, or CPU going down), or because we're
|
|
|
- * attempting to rebalance this task on exec (sched_exec).
|
|
|
- *
|
|
|
- * So we race with normal scheduler movements, but that's OK, as long
|
|
|
- * as the task is no longer on this CPU.
|
|
|
- *
|
|
|
- * Returns non-zero if task was successfully migrated.
|
|
|
- */
|
|
|
-static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
|
|
|
-{
|
|
|
- struct rq *rq;
|
|
|
- int ret = 0;
|
|
|
-
|
|
|
- if (unlikely(!cpu_active(dest_cpu)))
|
|
|
- return ret;
|
|
|
-
|
|
|
- rq = cpu_rq(src_cpu);
|
|
|
-
|
|
|
- raw_spin_lock(&p->pi_lock);
|
|
|
- raw_spin_lock(&rq->lock);
|
|
|
- /* Already moved. */
|
|
|
- if (task_cpu(p) != src_cpu)
|
|
|
- goto done;
|
|
|
-
|
|
|
- /* Affinity changed (again). */
|
|
|
- if (!cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p)))
|
|
|
- goto fail;
|
|
|
-
|
|
|
- /*
|
|
|
- * If we're not on a rq, the next wake-up will ensure we're
|
|
|
- * placed properly.
|
|
|
- */
|
|
|
- if (task_on_rq_queued(p))
|
|
|
- rq = move_queued_task(p, dest_cpu);
|
|
|
-done:
|
|
|
- ret = 1;
|
|
|
-fail:
|
|
|
- raw_spin_unlock(&rq->lock);
|
|
|
- raw_spin_unlock(&p->pi_lock);
|
|
|
- return ret;
|
|
|
-}
|
|
|
|
|
|
#ifdef CONFIG_NUMA_BALANCING
|
|
|
/* Migrate current task p to target_cpu */
|
|
@@ -4944,35 +5020,9 @@ void sched_setnuma(struct task_struct *p, int nid)
|
|
|
enqueue_task(rq, p, 0);
|
|
|
task_rq_unlock(rq, p, &flags);
|
|
|
}
|
|
|
-#endif
|
|
|
-
|
|
|
-/*
|
|
|
- * migration_cpu_stop - this will be executed by a highprio stopper thread
|
|
|
- * and performs thread migration by bumping thread off CPU then
|
|
|
- * 'pushing' onto another runqueue.
|
|
|
- */
|
|
|
-static int migration_cpu_stop(void *data)
|
|
|
-{
|
|
|
- struct migration_arg *arg = data;
|
|
|
-
|
|
|
- /*
|
|
|
- * The original target cpu might have gone down and we might
|
|
|
- * be on another cpu but it doesn't matter.
|
|
|
- */
|
|
|
- local_irq_disable();
|
|
|
- /*
|
|
|
- * We need to explicitly wake pending tasks before running
|
|
|
- * __migrate_task() such that we will not miss enforcing cpus_allowed
|
|
|
- * during wakeups, see set_cpus_allowed_ptr()'s TASK_WAKING test.
|
|
|
- */
|
|
|
- sched_ttwu_pending();
|
|
|
- __migrate_task(arg->task, raw_smp_processor_id(), arg->dest_cpu);
|
|
|
- local_irq_enable();
|
|
|
- return 0;
|
|
|
-}
|
|
|
+#endif /* CONFIG_NUMA_BALANCING */
|
|
|
|
|
|
#ifdef CONFIG_HOTPLUG_CPU
|
|
|
-
|
|
|
/*
|
|
|
* Ensures that the idle task is using init_mm right before its cpu goes
|
|
|
* offline.
|
|
@@ -5028,9 +5078,9 @@ static struct task_struct fake_task = {
|
|
|
* there's no concurrency possible, we hold the required locks anyway
|
|
|
* because of lock validation efforts.
|
|
|
*/
|
|
|
-static void migrate_tasks(unsigned int dead_cpu)
|
|
|
+static void migrate_tasks(struct rq *dead_rq)
|
|
|
{
|
|
|
- struct rq *rq = cpu_rq(dead_cpu);
|
|
|
+ struct rq *rq = dead_rq;
|
|
|
struct task_struct *next, *stop = rq->stop;
|
|
|
int dest_cpu;
|
|
|
|
|
@@ -5052,7 +5102,7 @@ static void migrate_tasks(unsigned int dead_cpu)
|
|
|
*/
|
|
|
update_rq_clock(rq);
|
|
|
|
|
|
- for ( ; ; ) {
|
|
|
+ for (;;) {
|
|
|
/*
|
|
|
* There's this thread running, bail when that's the only
|
|
|
* remaining thread.
|
|
@@ -5060,22 +5110,29 @@ static void migrate_tasks(unsigned int dead_cpu)
|
|
|
if (rq->nr_running == 1)
|
|
|
break;
|
|
|
|
|
|
+ /*
|
|
|
+ * Ensure rq->lock covers the entire task selection
|
|
|
+ * until the migration.
|
|
|
+ */
|
|
|
+ lockdep_pin_lock(&rq->lock);
|
|
|
next = pick_next_task(rq, &fake_task);
|
|
|
BUG_ON(!next);
|
|
|
next->sched_class->put_prev_task(rq, next);
|
|
|
|
|
|
/* Find suitable destination for @next, with force if needed. */
|
|
|
- dest_cpu = select_fallback_rq(dead_cpu, next);
|
|
|
- raw_spin_unlock(&rq->lock);
|
|
|
-
|
|
|
- __migrate_task(next, dead_cpu, dest_cpu);
|
|
|
-
|
|
|
- raw_spin_lock(&rq->lock);
|
|
|
+ dest_cpu = select_fallback_rq(dead_rq->cpu, next);
|
|
|
+
|
|
|
+ lockdep_unpin_lock(&rq->lock);
|
|
|
+ rq = __migrate_task(rq, next, dest_cpu);
|
|
|
+ if (rq != dead_rq) {
|
|
|
+ raw_spin_unlock(&rq->lock);
|
|
|
+ rq = dead_rq;
|
|
|
+ raw_spin_lock(&rq->lock);
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
rq->stop = stop;
|
|
|
}
|
|
|
-
|
|
|
#endif /* CONFIG_HOTPLUG_CPU */
|
|
|
|
|
|
#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
|
|
@@ -5254,7 +5311,7 @@ static void register_sched_domain_sysctl(void)
|
|
|
static void unregister_sched_domain_sysctl(void)
|
|
|
{
|
|
|
}
|
|
|
-#endif
|
|
|
+#endif /* CONFIG_SCHED_DEBUG && CONFIG_SYSCTL */
|
|
|
|
|
|
static void set_rq_online(struct rq *rq)
|
|
|
{
|
|
@@ -5323,7 +5380,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
|
|
|
BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
|
|
|
set_rq_offline(rq);
|
|
|
}
|
|
|
- migrate_tasks(cpu);
|
|
|
+ migrate_tasks(rq);
|
|
|
BUG_ON(rq->nr_running != 1); /* the migration thread */
|
|
|
raw_spin_unlock_irqrestore(&rq->lock, flags);
|
|
|
break;
|
|
@@ -5401,9 +5458,6 @@ static int __init migration_init(void)
|
|
|
return 0;
|
|
|
}
|
|
|
early_initcall(migration_init);
|
|
|
-#endif
|
|
|
-
|
|
|
-#ifdef CONFIG_SMP
|
|
|
|
|
|
static cpumask_var_t sched_domains_tmpmask; /* sched_domains_mutex */
|
|
|
|
|
@@ -6629,7 +6683,7 @@ static int __sdt_alloc(const struct cpumask *cpu_map)
|
|
|
struct sched_group *sg;
|
|
|
struct sched_group_capacity *sgc;
|
|
|
|
|
|
- sd = kzalloc_node(sizeof(struct sched_domain) + cpumask_size(),
|
|
|
+ sd = kzalloc_node(sizeof(struct sched_domain) + cpumask_size(),
|
|
|
GFP_KERNEL, cpu_to_node(j));
|
|
|
if (!sd)
|
|
|
return -ENOMEM;
|
|
@@ -7235,7 +7289,7 @@ void __init sched_init(void)
|
|
|
rq->sd = NULL;
|
|
|
rq->rd = NULL;
|
|
|
rq->cpu_capacity = rq->cpu_capacity_orig = SCHED_CAPACITY_SCALE;
|
|
|
- rq->post_schedule = 0;
|
|
|
+ rq->balance_callback = NULL;
|
|
|
rq->active_balance = 0;
|
|
|
rq->next_balance = jiffies;
|
|
|
rq->push_cpu = 0;
|
|
@@ -7365,32 +7419,12 @@ EXPORT_SYMBOL(___might_sleep);
|
|
|
#endif
|
|
|
|
|
|
#ifdef CONFIG_MAGIC_SYSRQ
|
|
|
-static void normalize_task(struct rq *rq, struct task_struct *p)
|
|
|
+void normalize_rt_tasks(void)
|
|
|
{
|
|
|
- const struct sched_class *prev_class = p->sched_class;
|
|
|
+ struct task_struct *g, *p;
|
|
|
struct sched_attr attr = {
|
|
|
.sched_policy = SCHED_NORMAL,
|
|
|
};
|
|
|
- int old_prio = p->prio;
|
|
|
- int queued;
|
|
|
-
|
|
|
- queued = task_on_rq_queued(p);
|
|
|
- if (queued)
|
|
|
- dequeue_task(rq, p, 0);
|
|
|
- __setscheduler(rq, p, &attr, false);
|
|
|
- if (queued) {
|
|
|
- enqueue_task(rq, p, 0);
|
|
|
- resched_curr(rq);
|
|
|
- }
|
|
|
-
|
|
|
- check_class_changed(rq, p, prev_class, old_prio);
|
|
|
-}
|
|
|
-
|
|
|
-void normalize_rt_tasks(void)
|
|
|
-{
|
|
|
- struct task_struct *g, *p;
|
|
|
- unsigned long flags;
|
|
|
- struct rq *rq;
|
|
|
|
|
|
read_lock(&tasklist_lock);
|
|
|
for_each_process_thread(g, p) {
|
|
@@ -7417,9 +7451,7 @@ void normalize_rt_tasks(void)
|
|
|
continue;
|
|
|
}
|
|
|
|
|
|
- rq = task_rq_lock(p, &flags);
|
|
|
- normalize_task(rq, p);
|
|
|
- task_rq_unlock(rq, p, &flags);
|
|
|
+ __sched_setscheduler(p, &attr, false, false);
|
|
|
}
|
|
|
read_unlock(&tasklist_lock);
|
|
|
}
|