|
@@ -231,7 +231,7 @@ static void destroy_rt_bandwidth(struct rt_bandwidth *rt_b)
|
|
|
#endif
|
|
|
|
|
|
/*
|
|
|
- * sched_domains_mutex serializes calls to arch_init_sched_domains,
|
|
|
+ * sched_domains_mutex serializes calls to init_sched_domains,
|
|
|
* detach_destroy_domains and partition_sched_domains.
|
|
|
*/
|
|
|
static DEFINE_MUTEX(sched_domains_mutex);
|
|
@@ -312,6 +312,9 @@ struct cfs_rq {
|
|
|
|
|
|
u64 exec_clock;
|
|
|
u64 min_vruntime;
|
|
|
+#ifndef CONFIG_64BIT
|
|
|
+ u64 min_vruntime_copy;
|
|
|
+#endif
|
|
|
|
|
|
struct rb_root tasks_timeline;
|
|
|
struct rb_node *rb_leftmost;
|
|
@@ -325,7 +328,9 @@ struct cfs_rq {
|
|
|
*/
|
|
|
struct sched_entity *curr, *next, *last, *skip;
|
|
|
|
|
|
+#ifdef CONFIG_SCHED_DEBUG
|
|
|
unsigned int nr_spread_over;
|
|
|
+#endif
|
|
|
|
|
|
#ifdef CONFIG_FAIR_GROUP_SCHED
|
|
|
struct rq *rq; /* cpu runqueue to which this cfs_rq is attached */
|
|
@@ -417,6 +422,7 @@ struct rt_rq {
|
|
|
*/
|
|
|
struct root_domain {
|
|
|
atomic_t refcount;
|
|
|
+ struct rcu_head rcu;
|
|
|
cpumask_var_t span;
|
|
|
cpumask_var_t online;
|
|
|
|
|
@@ -460,7 +466,7 @@ struct rq {
|
|
|
u64 nohz_stamp;
|
|
|
unsigned char nohz_balance_kick;
|
|
|
#endif
|
|
|
- unsigned int skip_clock_update;
|
|
|
+ int skip_clock_update;
|
|
|
|
|
|
/* capture load from *all* tasks on this cpu: */
|
|
|
struct load_weight load;
|
|
@@ -553,6 +559,10 @@ struct rq {
|
|
|
unsigned int ttwu_count;
|
|
|
unsigned int ttwu_local;
|
|
|
#endif
|
|
|
+
|
|
|
+#ifdef CONFIG_SMP
|
|
|
+ struct task_struct *wake_list;
|
|
|
+#endif
|
|
|
};
|
|
|
|
|
|
static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
|
|
@@ -571,7 +581,7 @@ static inline int cpu_of(struct rq *rq)
|
|
|
|
|
|
#define rcu_dereference_check_sched_domain(p) \
|
|
|
rcu_dereference_check((p), \
|
|
|
- rcu_read_lock_sched_held() || \
|
|
|
+ rcu_read_lock_held() || \
|
|
|
lockdep_is_held(&sched_domains_mutex))
|
|
|
|
|
|
/*
|
|
@@ -596,7 +606,7 @@ static inline int cpu_of(struct rq *rq)
|
|
|
* Return the group to which this tasks belongs.
|
|
|
*
|
|
|
* We use task_subsys_state_check() and extend the RCU verification
|
|
|
- * with lockdep_is_held(&task_rq(p)->lock) because cpu_cgroup_attach()
|
|
|
+ * with lockdep_is_held(&p->pi_lock) because cpu_cgroup_attach()
|
|
|
* holds that lock for each task it moves into the cgroup. Therefore
|
|
|
* by holding that lock, we pin the task to the current cgroup.
|
|
|
*/
|
|
@@ -606,7 +616,7 @@ static inline struct task_group *task_group(struct task_struct *p)
|
|
|
struct cgroup_subsys_state *css;
|
|
|
|
|
|
css = task_subsys_state_check(p, cpu_cgroup_subsys_id,
|
|
|
- lockdep_is_held(&task_rq(p)->lock));
|
|
|
+ lockdep_is_held(&p->pi_lock));
|
|
|
tg = container_of(css, struct task_group, css);
|
|
|
|
|
|
return autogroup_task_group(p, tg);
|
|
@@ -642,7 +652,7 @@ static void update_rq_clock(struct rq *rq)
|
|
|
{
|
|
|
s64 delta;
|
|
|
|
|
|
- if (rq->skip_clock_update)
|
|
|
+ if (rq->skip_clock_update > 0)
|
|
|
return;
|
|
|
|
|
|
delta = sched_clock_cpu(cpu_of(rq)) - rq->clock;
|
|
@@ -838,18 +848,39 @@ static inline int task_current(struct rq *rq, struct task_struct *p)
|
|
|
return rq->curr == p;
|
|
|
}
|
|
|
|
|
|
-#ifndef __ARCH_WANT_UNLOCKED_CTXSW
|
|
|
static inline int task_running(struct rq *rq, struct task_struct *p)
|
|
|
{
|
|
|
+#ifdef CONFIG_SMP
|
|
|
+ return p->on_cpu;
|
|
|
+#else
|
|
|
return task_current(rq, p);
|
|
|
+#endif
|
|
|
}
|
|
|
|
|
|
+#ifndef __ARCH_WANT_UNLOCKED_CTXSW
|
|
|
static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
|
|
|
{
|
|
|
+#ifdef CONFIG_SMP
|
|
|
+ /*
|
|
|
+ * We can optimise this out completely for !SMP, because the
|
|
|
+ * SMP rebalancing from interrupt is the only thing that cares
|
|
|
+ * here.
|
|
|
+ */
|
|
|
+ next->on_cpu = 1;
|
|
|
+#endif
|
|
|
}
|
|
|
|
|
|
static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
|
|
|
{
|
|
|
+#ifdef CONFIG_SMP
|
|
|
+ /*
|
|
|
+ * After ->on_cpu is cleared, the task can be moved to a different CPU.
|
|
|
+ * We must ensure this doesn't happen until the switch is completely
|
|
|
+ * finished.
|
|
|
+ */
|
|
|
+ smp_wmb();
|
|
|
+ prev->on_cpu = 0;
|
|
|
+#endif
|
|
|
#ifdef CONFIG_DEBUG_SPINLOCK
|
|
|
/* this is a valid case when another task releases the spinlock */
|
|
|
rq->lock.owner = current;
|
|
@@ -865,15 +896,6 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
|
|
|
}
|
|
|
|
|
|
#else /* __ARCH_WANT_UNLOCKED_CTXSW */
|
|
|
-static inline int task_running(struct rq *rq, struct task_struct *p)
|
|
|
-{
|
|
|
-#ifdef CONFIG_SMP
|
|
|
- return p->oncpu;
|
|
|
-#else
|
|
|
- return task_current(rq, p);
|
|
|
-#endif
|
|
|
-}
|
|
|
-
|
|
|
static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
|
|
|
{
|
|
|
#ifdef CONFIG_SMP
|
|
@@ -882,7 +904,7 @@ static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
|
|
|
* SMP rebalancing from interrupt is the only thing that cares
|
|
|
* here.
|
|
|
*/
|
|
|
- next->oncpu = 1;
|
|
|
+ next->on_cpu = 1;
|
|
|
#endif
|
|
|
#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
|
|
|
raw_spin_unlock_irq(&rq->lock);
|
|
@@ -895,12 +917,12 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
|
|
|
{
|
|
|
#ifdef CONFIG_SMP
|
|
|
/*
|
|
|
- * After ->oncpu is cleared, the task can be moved to a different CPU.
|
|
|
+ * After ->on_cpu is cleared, the task can be moved to a different CPU.
|
|
|
* We must ensure this doesn't happen until the switch is completely
|
|
|
* finished.
|
|
|
*/
|
|
|
smp_wmb();
|
|
|
- prev->oncpu = 0;
|
|
|
+ prev->on_cpu = 0;
|
|
|
#endif
|
|
|
#ifndef __ARCH_WANT_INTERRUPTS_ON_CTXSW
|
|
|
local_irq_enable();
|
|
@@ -909,23 +931,15 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
|
|
|
#endif /* __ARCH_WANT_UNLOCKED_CTXSW */
|
|
|
|
|
|
/*
|
|
|
- * Check whether the task is waking, we use this to synchronize ->cpus_allowed
|
|
|
- * against ttwu().
|
|
|
- */
|
|
|
-static inline int task_is_waking(struct task_struct *p)
|
|
|
-{
|
|
|
- return unlikely(p->state == TASK_WAKING);
|
|
|
-}
|
|
|
-
|
|
|
-/*
|
|
|
- * __task_rq_lock - lock the runqueue a given task resides on.
|
|
|
- * Must be called interrupts disabled.
|
|
|
+ * __task_rq_lock - lock the rq @p resides on.
|
|
|
*/
|
|
|
static inline struct rq *__task_rq_lock(struct task_struct *p)
|
|
|
__acquires(rq->lock)
|
|
|
{
|
|
|
struct rq *rq;
|
|
|
|
|
|
+ lockdep_assert_held(&p->pi_lock);
|
|
|
+
|
|
|
for (;;) {
|
|
|
rq = task_rq(p);
|
|
|
raw_spin_lock(&rq->lock);
|
|
@@ -936,22 +950,22 @@ static inline struct rq *__task_rq_lock(struct task_struct *p)
|
|
|
}
|
|
|
|
|
|
/*
|
|
|
- * task_rq_lock - lock the runqueue a given task resides on and disable
|
|
|
- * interrupts. Note the ordering: we can safely lookup the task_rq without
|
|
|
- * explicitly disabling preemption.
|
|
|
+ * task_rq_lock - lock p->pi_lock and lock the rq @p resides on.
|
|
|
*/
|
|
|
static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
|
|
|
+ __acquires(p->pi_lock)
|
|
|
__acquires(rq->lock)
|
|
|
{
|
|
|
struct rq *rq;
|
|
|
|
|
|
for (;;) {
|
|
|
- local_irq_save(*flags);
|
|
|
+ raw_spin_lock_irqsave(&p->pi_lock, *flags);
|
|
|
rq = task_rq(p);
|
|
|
raw_spin_lock(&rq->lock);
|
|
|
if (likely(rq == task_rq(p)))
|
|
|
return rq;
|
|
|
- raw_spin_unlock_irqrestore(&rq->lock, *flags);
|
|
|
+ raw_spin_unlock(&rq->lock);
|
|
|
+ raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -961,10 +975,13 @@ static void __task_rq_unlock(struct rq *rq)
|
|
|
raw_spin_unlock(&rq->lock);
|
|
|
}
|
|
|
|
|
|
-static inline void task_rq_unlock(struct rq *rq, unsigned long *flags)
|
|
|
+static inline void
|
|
|
+task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags)
|
|
|
__releases(rq->lock)
|
|
|
+ __releases(p->pi_lock)
|
|
|
{
|
|
|
- raw_spin_unlock_irqrestore(&rq->lock, *flags);
|
|
|
+ raw_spin_unlock(&rq->lock);
|
|
|
+ raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -1193,11 +1210,17 @@ int get_nohz_timer_target(void)
|
|
|
int i;
|
|
|
struct sched_domain *sd;
|
|
|
|
|
|
+ rcu_read_lock();
|
|
|
for_each_domain(cpu, sd) {
|
|
|
- for_each_cpu(i, sched_domain_span(sd))
|
|
|
- if (!idle_cpu(i))
|
|
|
- return i;
|
|
|
+ for_each_cpu(i, sched_domain_span(sd)) {
|
|
|
+ if (!idle_cpu(i)) {
|
|
|
+ cpu = i;
|
|
|
+ goto unlock;
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|
|
|
+unlock:
|
|
|
+ rcu_read_unlock();
|
|
|
return cpu;
|
|
|
}
|
|
|
/*
|
|
@@ -1307,15 +1330,15 @@ calc_delta_mine(unsigned long delta_exec, unsigned long weight,
|
|
|
{
|
|
|
u64 tmp;
|
|
|
|
|
|
+ tmp = (u64)delta_exec * weight;
|
|
|
+
|
|
|
if (!lw->inv_weight) {
|
|
|
if (BITS_PER_LONG > 32 && unlikely(lw->weight >= WMULT_CONST))
|
|
|
lw->inv_weight = 1;
|
|
|
else
|
|
|
- lw->inv_weight = 1 + (WMULT_CONST-lw->weight/2)
|
|
|
- / (lw->weight+1);
|
|
|
+ lw->inv_weight = WMULT_CONST / lw->weight;
|
|
|
}
|
|
|
|
|
|
- tmp = (u64)delta_exec * weight;
|
|
|
/*
|
|
|
* Check whether we'd overflow the 64-bit multiplication:
|
|
|
*/
|
|
@@ -1773,7 +1796,6 @@ static void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
|
|
|
update_rq_clock(rq);
|
|
|
sched_info_queued(p);
|
|
|
p->sched_class->enqueue_task(rq, p, flags);
|
|
|
- p->se.on_rq = 1;
|
|
|
}
|
|
|
|
|
|
static void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
|
|
@@ -1781,7 +1803,6 @@ static void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
|
|
|
update_rq_clock(rq);
|
|
|
sched_info_dequeued(p);
|
|
|
p->sched_class->dequeue_task(rq, p, flags);
|
|
|
- p->se.on_rq = 0;
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -2116,7 +2137,7 @@ static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
|
|
|
* A queue event has occurred, and we're going to schedule. In
|
|
|
* this case, we can save a useless back to back clock update.
|
|
|
*/
|
|
|
- if (rq->curr->se.on_rq && test_tsk_need_resched(rq->curr))
|
|
|
+ if (rq->curr->on_rq && test_tsk_need_resched(rq->curr))
|
|
|
rq->skip_clock_update = 1;
|
|
|
}
|
|
|
|
|
@@ -2162,6 +2183,11 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
|
|
|
*/
|
|
|
WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING &&
|
|
|
!(task_thread_info(p)->preempt_count & PREEMPT_ACTIVE));
|
|
|
+
|
|
|
+#ifdef CONFIG_LOCKDEP
|
|
|
+ WARN_ON_ONCE(debug_locks && !(lockdep_is_held(&p->pi_lock) ||
|
|
|
+ lockdep_is_held(&task_rq(p)->lock)));
|
|
|
+#endif
|
|
|
#endif
|
|
|
|
|
|
trace_sched_migrate_task(p, new_cpu);
|
|
@@ -2181,19 +2207,6 @@ struct migration_arg {
|
|
|
|
|
|
static int migration_cpu_stop(void *data);
|
|
|
|
|
|
-/*
|
|
|
- * The task's runqueue lock must be held.
|
|
|
- * Returns true if you have to wait for migration thread.
|
|
|
- */
|
|
|
-static bool migrate_task(struct task_struct *p, struct rq *rq)
|
|
|
-{
|
|
|
- /*
|
|
|
- * If the task is not on a runqueue (and not running), then
|
|
|
- * the next wake-up will properly place the task.
|
|
|
- */
|
|
|
- return p->se.on_rq || task_running(rq, p);
|
|
|
-}
|
|
|
-
|
|
|
/*
|
|
|
* wait_task_inactive - wait for a thread to unschedule.
|
|
|
*
|
|
@@ -2251,11 +2264,11 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
|
|
|
rq = task_rq_lock(p, &flags);
|
|
|
trace_sched_wait_task(p);
|
|
|
running = task_running(rq, p);
|
|
|
- on_rq = p->se.on_rq;
|
|
|
+ on_rq = p->on_rq;
|
|
|
ncsw = 0;
|
|
|
if (!match_state || p->state == match_state)
|
|
|
ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
|
|
|
- task_rq_unlock(rq, &flags);
|
|
|
+ task_rq_unlock(rq, p, &flags);
|
|
|
|
|
|
/*
|
|
|
* If it changed from the expected state, bail out now.
|
|
@@ -2330,7 +2343,7 @@ EXPORT_SYMBOL_GPL(kick_process);
|
|
|
|
|
|
#ifdef CONFIG_SMP
|
|
|
/*
|
|
|
- * ->cpus_allowed is protected by either TASK_WAKING or rq->lock held.
|
|
|
+ * ->cpus_allowed is protected by both rq->lock and p->pi_lock
|
|
|
*/
|
|
|
static int select_fallback_rq(int cpu, struct task_struct *p)
|
|
|
{
|
|
@@ -2363,12 +2376,12 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
|
|
|
}
|
|
|
|
|
|
/*
|
|
|
- * The caller (fork, wakeup) owns TASK_WAKING, ->cpus_allowed is stable.
|
|
|
+ * The caller (fork, wakeup) owns p->pi_lock, ->cpus_allowed is stable.
|
|
|
*/
|
|
|
static inline
|
|
|
-int select_task_rq(struct rq *rq, struct task_struct *p, int sd_flags, int wake_flags)
|
|
|
+int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags)
|
|
|
{
|
|
|
- int cpu = p->sched_class->select_task_rq(rq, p, sd_flags, wake_flags);
|
|
|
+ int cpu = p->sched_class->select_task_rq(p, sd_flags, wake_flags);
|
|
|
|
|
|
/*
|
|
|
* In order not to call set_task_cpu() on a blocking task we need
|
|
@@ -2394,27 +2407,62 @@ static void update_avg(u64 *avg, u64 sample)
|
|
|
}
|
|
|
#endif
|
|
|
|
|
|
-static inline void ttwu_activate(struct task_struct *p, struct rq *rq,
|
|
|
- bool is_sync, bool is_migrate, bool is_local,
|
|
|
- unsigned long en_flags)
|
|
|
+static void
|
|
|
+ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
|
|
|
{
|
|
|
+#ifdef CONFIG_SCHEDSTATS
|
|
|
+ struct rq *rq = this_rq();
|
|
|
+
|
|
|
+#ifdef CONFIG_SMP
|
|
|
+ int this_cpu = smp_processor_id();
|
|
|
+
|
|
|
+ if (cpu == this_cpu) {
|
|
|
+ schedstat_inc(rq, ttwu_local);
|
|
|
+ schedstat_inc(p, se.statistics.nr_wakeups_local);
|
|
|
+ } else {
|
|
|
+ struct sched_domain *sd;
|
|
|
+
|
|
|
+ schedstat_inc(p, se.statistics.nr_wakeups_remote);
|
|
|
+ rcu_read_lock();
|
|
|
+ for_each_domain(this_cpu, sd) {
|
|
|
+ if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
|
|
|
+ schedstat_inc(sd, ttwu_wake_remote);
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ rcu_read_unlock();
|
|
|
+ }
|
|
|
+#endif /* CONFIG_SMP */
|
|
|
+
|
|
|
+ schedstat_inc(rq, ttwu_count);
|
|
|
schedstat_inc(p, se.statistics.nr_wakeups);
|
|
|
- if (is_sync)
|
|
|
+
|
|
|
+ if (wake_flags & WF_SYNC)
|
|
|
schedstat_inc(p, se.statistics.nr_wakeups_sync);
|
|
|
- if (is_migrate)
|
|
|
+
|
|
|
+ if (cpu != task_cpu(p))
|
|
|
schedstat_inc(p, se.statistics.nr_wakeups_migrate);
|
|
|
- if (is_local)
|
|
|
- schedstat_inc(p, se.statistics.nr_wakeups_local);
|
|
|
- else
|
|
|
- schedstat_inc(p, se.statistics.nr_wakeups_remote);
|
|
|
|
|
|
+#endif /* CONFIG_SCHEDSTATS */
|
|
|
+}
|
|
|
+
|
|
|
+static void ttwu_activate(struct rq *rq, struct task_struct *p, int en_flags)
|
|
|
+{
|
|
|
activate_task(rq, p, en_flags);
|
|
|
+ p->on_rq = 1;
|
|
|
+
|
|
|
+ /* if a worker is waking up, notify workqueue */
|
|
|
+ if (p->flags & PF_WQ_WORKER)
|
|
|
+ wq_worker_waking_up(p, cpu_of(rq));
|
|
|
}
|
|
|
|
|
|
-static inline void ttwu_post_activation(struct task_struct *p, struct rq *rq,
|
|
|
- int wake_flags, bool success)
|
|
|
+/*
|
|
|
+ * Mark the task runnable and perform wakeup-preemption.
|
|
|
+ */
|
|
|
+static void
|
|
|
+ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
|
|
|
{
|
|
|
- trace_sched_wakeup(p, success);
|
|
|
+ trace_sched_wakeup(p, true);
|
|
|
check_preempt_curr(rq, p, wake_flags);
|
|
|
|
|
|
p->state = TASK_RUNNING;
|
|
@@ -2433,9 +2481,99 @@ static inline void ttwu_post_activation(struct task_struct *p, struct rq *rq,
|
|
|
rq->idle_stamp = 0;
|
|
|
}
|
|
|
#endif
|
|
|
- /* if a worker is waking up, notify workqueue */
|
|
|
- if ((p->flags & PF_WQ_WORKER) && success)
|
|
|
- wq_worker_waking_up(p, cpu_of(rq));
|
|
|
+}
|
|
|
+
|
|
|
+static void
|
|
|
+ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags)
|
|
|
+{
|
|
|
+#ifdef CONFIG_SMP
|
|
|
+ if (p->sched_contributes_to_load)
|
|
|
+ rq->nr_uninterruptible--;
|
|
|
+#endif
|
|
|
+
|
|
|
+ ttwu_activate(rq, p, ENQUEUE_WAKEUP | ENQUEUE_WAKING);
|
|
|
+ ttwu_do_wakeup(rq, p, wake_flags);
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * Called in case the task @p isn't fully descheduled from its runqueue,
|
|
|
+ * in this case we must do a remote wakeup. Its a 'light' wakeup though,
|
|
|
+ * since all we need to do is flip p->state to TASK_RUNNING, since
|
|
|
+ * the task is still ->on_rq.
|
|
|
+ */
|
|
|
+static int ttwu_remote(struct task_struct *p, int wake_flags)
|
|
|
+{
|
|
|
+ struct rq *rq;
|
|
|
+ int ret = 0;
|
|
|
+
|
|
|
+ rq = __task_rq_lock(p);
|
|
|
+ if (p->on_rq) {
|
|
|
+ ttwu_do_wakeup(rq, p, wake_flags);
|
|
|
+ ret = 1;
|
|
|
+ }
|
|
|
+ __task_rq_unlock(rq);
|
|
|
+
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
+#ifdef CONFIG_SMP
|
|
|
+static void sched_ttwu_pending(void)
|
|
|
+{
|
|
|
+ struct rq *rq = this_rq();
|
|
|
+ struct task_struct *list = xchg(&rq->wake_list, NULL);
|
|
|
+
|
|
|
+ if (!list)
|
|
|
+ return;
|
|
|
+
|
|
|
+ raw_spin_lock(&rq->lock);
|
|
|
+
|
|
|
+ while (list) {
|
|
|
+ struct task_struct *p = list;
|
|
|
+ list = list->wake_entry;
|
|
|
+ ttwu_do_activate(rq, p, 0);
|
|
|
+ }
|
|
|
+
|
|
|
+ raw_spin_unlock(&rq->lock);
|
|
|
+}
|
|
|
+
|
|
|
+void scheduler_ipi(void)
|
|
|
+{
|
|
|
+ sched_ttwu_pending();
|
|
|
+}
|
|
|
+
|
|
|
+static void ttwu_queue_remote(struct task_struct *p, int cpu)
|
|
|
+{
|
|
|
+ struct rq *rq = cpu_rq(cpu);
|
|
|
+ struct task_struct *next = rq->wake_list;
|
|
|
+
|
|
|
+ for (;;) {
|
|
|
+ struct task_struct *old = next;
|
|
|
+
|
|
|
+ p->wake_entry = next;
|
|
|
+ next = cmpxchg(&rq->wake_list, old, p);
|
|
|
+ if (next == old)
|
|
|
+ break;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (!next)
|
|
|
+ smp_send_reschedule(cpu);
|
|
|
+}
|
|
|
+#endif
|
|
|
+
|
|
|
+static void ttwu_queue(struct task_struct *p, int cpu)
|
|
|
+{
|
|
|
+ struct rq *rq = cpu_rq(cpu);
|
|
|
+
|
|
|
+#if defined(CONFIG_SMP) && defined(CONFIG_SCHED_TTWU_QUEUE)
|
|
|
+ if (sched_feat(TTWU_QUEUE) && cpu != smp_processor_id()) {
|
|
|
+ ttwu_queue_remote(p, cpu);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+#endif
|
|
|
+
|
|
|
+ raw_spin_lock(&rq->lock);
|
|
|
+ ttwu_do_activate(rq, p, 0);
|
|
|
+ raw_spin_unlock(&rq->lock);
|
|
|
}
|
|
|
|
|
|
/**
|
|
@@ -2453,92 +2591,64 @@ static inline void ttwu_post_activation(struct task_struct *p, struct rq *rq,
|
|
|
* Returns %true if @p was woken up, %false if it was already running
|
|
|
* or @state didn't match @p's state.
|
|
|
*/
|
|
|
-static int try_to_wake_up(struct task_struct *p, unsigned int state,
|
|
|
- int wake_flags)
|
|
|
+static int
|
|
|
+try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
|
|
|
{
|
|
|
- int cpu, orig_cpu, this_cpu, success = 0;
|
|
|
unsigned long flags;
|
|
|
- unsigned long en_flags = ENQUEUE_WAKEUP;
|
|
|
- struct rq *rq;
|
|
|
-
|
|
|
- this_cpu = get_cpu();
|
|
|
+ int cpu, success = 0;
|
|
|
|
|
|
smp_wmb();
|
|
|
- rq = task_rq_lock(p, &flags);
|
|
|
+ raw_spin_lock_irqsave(&p->pi_lock, flags);
|
|
|
if (!(p->state & state))
|
|
|
goto out;
|
|
|
|
|
|
- if (p->se.on_rq)
|
|
|
- goto out_running;
|
|
|
-
|
|
|
+ success = 1; /* we're going to change ->state */
|
|
|
cpu = task_cpu(p);
|
|
|
- orig_cpu = cpu;
|
|
|
|
|
|
-#ifdef CONFIG_SMP
|
|
|
- if (unlikely(task_running(rq, p)))
|
|
|
- goto out_activate;
|
|
|
+ if (p->on_rq && ttwu_remote(p, wake_flags))
|
|
|
+ goto stat;
|
|
|
|
|
|
+#ifdef CONFIG_SMP
|
|
|
/*
|
|
|
- * In order to handle concurrent wakeups and release the rq->lock
|
|
|
- * we put the task in TASK_WAKING state.
|
|
|
- *
|
|
|
- * First fix up the nr_uninterruptible count:
|
|
|
+ * If the owning (remote) cpu is still in the middle of schedule() with
|
|
|
+ * this task as prev, wait until its done referencing the task.
|
|
|
*/
|
|
|
- if (task_contributes_to_load(p)) {
|
|
|
- if (likely(cpu_online(orig_cpu)))
|
|
|
- rq->nr_uninterruptible--;
|
|
|
- else
|
|
|
- this_rq()->nr_uninterruptible--;
|
|
|
- }
|
|
|
- p->state = TASK_WAKING;
|
|
|
-
|
|
|
- if (p->sched_class->task_waking) {
|
|
|
- p->sched_class->task_waking(rq, p);
|
|
|
- en_flags |= ENQUEUE_WAKING;
|
|
|
+ while (p->on_cpu) {
|
|
|
+#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
|
|
|
+ /*
|
|
|
+ * If called from interrupt context we could have landed in the
|
|
|
+ * middle of schedule(), in this case we should take care not
|
|
|
+ * to spin on ->on_cpu if p is current, since that would
|
|
|
+ * deadlock.
|
|
|
+ */
|
|
|
+ if (p == current) {
|
|
|
+ ttwu_queue(p, cpu);
|
|
|
+ goto stat;
|
|
|
+ }
|
|
|
+#endif
|
|
|
+ cpu_relax();
|
|
|
}
|
|
|
-
|
|
|
- cpu = select_task_rq(rq, p, SD_BALANCE_WAKE, wake_flags);
|
|
|
- if (cpu != orig_cpu)
|
|
|
- set_task_cpu(p, cpu);
|
|
|
- __task_rq_unlock(rq);
|
|
|
-
|
|
|
- rq = cpu_rq(cpu);
|
|
|
- raw_spin_lock(&rq->lock);
|
|
|
-
|
|
|
/*
|
|
|
- * We migrated the task without holding either rq->lock, however
|
|
|
- * since the task is not on the task list itself, nobody else
|
|
|
- * will try and migrate the task, hence the rq should match the
|
|
|
- * cpu we just moved it to.
|
|
|
+ * Pairs with the smp_wmb() in finish_lock_switch().
|
|
|
*/
|
|
|
- WARN_ON(task_cpu(p) != cpu);
|
|
|
- WARN_ON(p->state != TASK_WAKING);
|
|
|
+ smp_rmb();
|
|
|
|
|
|
-#ifdef CONFIG_SCHEDSTATS
|
|
|
- schedstat_inc(rq, ttwu_count);
|
|
|
- if (cpu == this_cpu)
|
|
|
- schedstat_inc(rq, ttwu_local);
|
|
|
- else {
|
|
|
- struct sched_domain *sd;
|
|
|
- for_each_domain(this_cpu, sd) {
|
|
|
- if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
|
|
|
- schedstat_inc(sd, ttwu_wake_remote);
|
|
|
- break;
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
-#endif /* CONFIG_SCHEDSTATS */
|
|
|
+ p->sched_contributes_to_load = !!task_contributes_to_load(p);
|
|
|
+ p->state = TASK_WAKING;
|
|
|
|
|
|
-out_activate:
|
|
|
+ if (p->sched_class->task_waking)
|
|
|
+ p->sched_class->task_waking(p);
|
|
|
+
|
|
|
+ cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
|
|
|
+ if (task_cpu(p) != cpu)
|
|
|
+ set_task_cpu(p, cpu);
|
|
|
#endif /* CONFIG_SMP */
|
|
|
- ttwu_activate(p, rq, wake_flags & WF_SYNC, orig_cpu != cpu,
|
|
|
- cpu == this_cpu, en_flags);
|
|
|
- success = 1;
|
|
|
-out_running:
|
|
|
- ttwu_post_activation(p, rq, wake_flags, success);
|
|
|
+
|
|
|
+ ttwu_queue(p, cpu);
|
|
|
+stat:
|
|
|
+ ttwu_stat(p, cpu, wake_flags);
|
|
|
out:
|
|
|
- task_rq_unlock(rq, &flags);
|
|
|
- put_cpu();
|
|
|
+ raw_spin_unlock_irqrestore(&p->pi_lock, flags);
|
|
|
|
|
|
return success;
|
|
|
}
|
|
@@ -2547,31 +2657,34 @@ out:
|
|
|
* try_to_wake_up_local - try to wake up a local task with rq lock held
|
|
|
* @p: the thread to be awakened
|
|
|
*
|
|
|
- * Put @p on the run-queue if it's not already there. The caller must
|
|
|
+ * Put @p on the run-queue if it's not already there. The caller must
|
|
|
* ensure that this_rq() is locked, @p is bound to this_rq() and not
|
|
|
- * the current task. this_rq() stays locked over invocation.
|
|
|
+ * the current task.
|
|
|
*/
|
|
|
static void try_to_wake_up_local(struct task_struct *p)
|
|
|
{
|
|
|
struct rq *rq = task_rq(p);
|
|
|
- bool success = false;
|
|
|
|
|
|
BUG_ON(rq != this_rq());
|
|
|
BUG_ON(p == current);
|
|
|
lockdep_assert_held(&rq->lock);
|
|
|
|
|
|
+ if (!raw_spin_trylock(&p->pi_lock)) {
|
|
|
+ raw_spin_unlock(&rq->lock);
|
|
|
+ raw_spin_lock(&p->pi_lock);
|
|
|
+ raw_spin_lock(&rq->lock);
|
|
|
+ }
|
|
|
+
|
|
|
if (!(p->state & TASK_NORMAL))
|
|
|
- return;
|
|
|
+ goto out;
|
|
|
|
|
|
- if (!p->se.on_rq) {
|
|
|
- if (likely(!task_running(rq, p))) {
|
|
|
- schedstat_inc(rq, ttwu_count);
|
|
|
- schedstat_inc(rq, ttwu_local);
|
|
|
- }
|
|
|
- ttwu_activate(p, rq, false, false, true, ENQUEUE_WAKEUP);
|
|
|
- success = true;
|
|
|
- }
|
|
|
- ttwu_post_activation(p, rq, 0, success);
|
|
|
+ if (!p->on_rq)
|
|
|
+ ttwu_activate(rq, p, ENQUEUE_WAKEUP);
|
|
|
+
|
|
|
+ ttwu_do_wakeup(rq, p, 0);
|
|
|
+ ttwu_stat(p, smp_processor_id(), 0);
|
|
|
+out:
|
|
|
+ raw_spin_unlock(&p->pi_lock);
|
|
|
}
|
|
|
|
|
|
/**
|
|
@@ -2604,19 +2717,21 @@ int wake_up_state(struct task_struct *p, unsigned int state)
|
|
|
*/
|
|
|
static void __sched_fork(struct task_struct *p)
|
|
|
{
|
|
|
+ p->on_rq = 0;
|
|
|
+
|
|
|
+ p->se.on_rq = 0;
|
|
|
p->se.exec_start = 0;
|
|
|
p->se.sum_exec_runtime = 0;
|
|
|
p->se.prev_sum_exec_runtime = 0;
|
|
|
p->se.nr_migrations = 0;
|
|
|
p->se.vruntime = 0;
|
|
|
+ INIT_LIST_HEAD(&p->se.group_node);
|
|
|
|
|
|
#ifdef CONFIG_SCHEDSTATS
|
|
|
memset(&p->se.statistics, 0, sizeof(p->se.statistics));
|
|
|
#endif
|
|
|
|
|
|
INIT_LIST_HEAD(&p->rt.run_list);
|
|
|
- p->se.on_rq = 0;
|
|
|
- INIT_LIST_HEAD(&p->se.group_node);
|
|
|
|
|
|
#ifdef CONFIG_PREEMPT_NOTIFIERS
|
|
|
INIT_HLIST_HEAD(&p->preempt_notifiers);
|
|
@@ -2626,8 +2741,9 @@ static void __sched_fork(struct task_struct *p)
|
|
|
/*
|
|
|
* fork()/clone()-time setup:
|
|
|
*/
|
|
|
-void sched_fork(struct task_struct *p, int clone_flags)
|
|
|
+void sched_fork(struct task_struct *p)
|
|
|
{
|
|
|
+ unsigned long flags;
|
|
|
int cpu = get_cpu();
|
|
|
|
|
|
__sched_fork(p);
|
|
@@ -2678,16 +2794,16 @@ void sched_fork(struct task_struct *p, int clone_flags)
|
|
|
*
|
|
|
* Silence PROVE_RCU.
|
|
|
*/
|
|
|
- rcu_read_lock();
|
|
|
+ raw_spin_lock_irqsave(&p->pi_lock, flags);
|
|
|
set_task_cpu(p, cpu);
|
|
|
- rcu_read_unlock();
|
|
|
+ raw_spin_unlock_irqrestore(&p->pi_lock, flags);
|
|
|
|
|
|
#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
|
|
|
if (likely(sched_info_on()))
|
|
|
memset(&p->sched_info, 0, sizeof(p->sched_info));
|
|
|
#endif
|
|
|
-#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
|
|
|
- p->oncpu = 0;
|
|
|
+#if defined(CONFIG_SMP)
|
|
|
+ p->on_cpu = 0;
|
|
|
#endif
|
|
|
#ifdef CONFIG_PREEMPT
|
|
|
/* Want to start with kernel preemption disabled. */
|
|
@@ -2707,41 +2823,31 @@ void sched_fork(struct task_struct *p, int clone_flags)
|
|
|
* that must be done for every newly created context, then puts the task
|
|
|
* on the runqueue and wakes it.
|
|
|
*/
|
|
|
-void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
|
|
|
+void wake_up_new_task(struct task_struct *p)
|
|
|
{
|
|
|
unsigned long flags;
|
|
|
struct rq *rq;
|
|
|
- int cpu __maybe_unused = get_cpu();
|
|
|
|
|
|
+ raw_spin_lock_irqsave(&p->pi_lock, flags);
|
|
|
#ifdef CONFIG_SMP
|
|
|
- rq = task_rq_lock(p, &flags);
|
|
|
- p->state = TASK_WAKING;
|
|
|
-
|
|
|
/*
|
|
|
* Fork balancing, do it here and not earlier because:
|
|
|
* - cpus_allowed can change in the fork path
|
|
|
* - any previously selected cpu might disappear through hotplug
|
|
|
- *
|
|
|
- * We set TASK_WAKING so that select_task_rq() can drop rq->lock
|
|
|
- * without people poking at ->cpus_allowed.
|
|
|
*/
|
|
|
- cpu = select_task_rq(rq, p, SD_BALANCE_FORK, 0);
|
|
|
- set_task_cpu(p, cpu);
|
|
|
-
|
|
|
- p->state = TASK_RUNNING;
|
|
|
- task_rq_unlock(rq, &flags);
|
|
|
+ set_task_cpu(p, select_task_rq(p, SD_BALANCE_FORK, 0));
|
|
|
#endif
|
|
|
|
|
|
- rq = task_rq_lock(p, &flags);
|
|
|
+ rq = __task_rq_lock(p);
|
|
|
activate_task(rq, p, 0);
|
|
|
- trace_sched_wakeup_new(p, 1);
|
|
|
+ p->on_rq = 1;
|
|
|
+ trace_sched_wakeup_new(p, true);
|
|
|
check_preempt_curr(rq, p, WF_FORK);
|
|
|
#ifdef CONFIG_SMP
|
|
|
if (p->sched_class->task_woken)
|
|
|
p->sched_class->task_woken(rq, p);
|
|
|
#endif
|
|
|
- task_rq_unlock(rq, &flags);
|
|
|
- put_cpu();
|
|
|
+ task_rq_unlock(rq, p, &flags);
|
|
|
}
|
|
|
|
|
|
#ifdef CONFIG_PREEMPT_NOTIFIERS
|
|
@@ -3450,27 +3556,22 @@ void sched_exec(void)
|
|
|
{
|
|
|
struct task_struct *p = current;
|
|
|
unsigned long flags;
|
|
|
- struct rq *rq;
|
|
|
int dest_cpu;
|
|
|
|
|
|
- rq = task_rq_lock(p, &flags);
|
|
|
- dest_cpu = p->sched_class->select_task_rq(rq, p, SD_BALANCE_EXEC, 0);
|
|
|
+ raw_spin_lock_irqsave(&p->pi_lock, flags);
|
|
|
+ dest_cpu = p->sched_class->select_task_rq(p, SD_BALANCE_EXEC, 0);
|
|
|
if (dest_cpu == smp_processor_id())
|
|
|
goto unlock;
|
|
|
|
|
|
- /*
|
|
|
- * select_task_rq() can race against ->cpus_allowed
|
|
|
- */
|
|
|
- if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed) &&
|
|
|
- likely(cpu_active(dest_cpu)) && migrate_task(p, rq)) {
|
|
|
+ if (likely(cpu_active(dest_cpu))) {
|
|
|
struct migration_arg arg = { p, dest_cpu };
|
|
|
|
|
|
- task_rq_unlock(rq, &flags);
|
|
|
- stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
|
|
|
+ raw_spin_unlock_irqrestore(&p->pi_lock, flags);
|
|
|
+ stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg);
|
|
|
return;
|
|
|
}
|
|
|
unlock:
|
|
|
- task_rq_unlock(rq, &flags);
|
|
|
+ raw_spin_unlock_irqrestore(&p->pi_lock, flags);
|
|
|
}
|
|
|
|
|
|
#endif
|
|
@@ -3507,7 +3608,7 @@ unsigned long long task_delta_exec(struct task_struct *p)
|
|
|
|
|
|
rq = task_rq_lock(p, &flags);
|
|
|
ns = do_task_delta_exec(p, rq);
|
|
|
- task_rq_unlock(rq, &flags);
|
|
|
+ task_rq_unlock(rq, p, &flags);
|
|
|
|
|
|
return ns;
|
|
|
}
|
|
@@ -3525,7 +3626,7 @@ unsigned long long task_sched_runtime(struct task_struct *p)
|
|
|
|
|
|
rq = task_rq_lock(p, &flags);
|
|
|
ns = p->se.sum_exec_runtime + do_task_delta_exec(p, rq);
|
|
|
- task_rq_unlock(rq, &flags);
|
|
|
+ task_rq_unlock(rq, p, &flags);
|
|
|
|
|
|
return ns;
|
|
|
}
|
|
@@ -3549,7 +3650,7 @@ unsigned long long thread_group_sched_runtime(struct task_struct *p)
|
|
|
rq = task_rq_lock(p, &flags);
|
|
|
thread_group_cputime(p, &totals);
|
|
|
ns = totals.sum_exec_runtime + do_task_delta_exec(p, rq);
|
|
|
- task_rq_unlock(rq, &flags);
|
|
|
+ task_rq_unlock(rq, p, &flags);
|
|
|
|
|
|
return ns;
|
|
|
}
|
|
@@ -3903,9 +4004,6 @@ void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
|
|
|
/*
|
|
|
* This function gets called by the timer code, with HZ frequency.
|
|
|
* We call it with interrupts disabled.
|
|
|
- *
|
|
|
- * It also gets called by the fork code, when changing the parent's
|
|
|
- * timeslices.
|
|
|
*/
|
|
|
void scheduler_tick(void)
|
|
|
{
|
|
@@ -4025,17 +4123,11 @@ static inline void schedule_debug(struct task_struct *prev)
|
|
|
profile_hit(SCHED_PROFILING, __builtin_return_address(0));
|
|
|
|
|
|
schedstat_inc(this_rq(), sched_count);
|
|
|
-#ifdef CONFIG_SCHEDSTATS
|
|
|
- if (unlikely(prev->lock_depth >= 0)) {
|
|
|
- schedstat_inc(this_rq(), rq_sched_info.bkl_count);
|
|
|
- schedstat_inc(prev, sched_info.bkl_count);
|
|
|
- }
|
|
|
-#endif
|
|
|
}
|
|
|
|
|
|
static void put_prev_task(struct rq *rq, struct task_struct *prev)
|
|
|
{
|
|
|
- if (prev->se.on_rq)
|
|
|
+ if (prev->on_rq || rq->skip_clock_update < 0)
|
|
|
update_rq_clock(rq);
|
|
|
prev->sched_class->put_prev_task(rq, prev);
|
|
|
}
|
|
@@ -4097,11 +4189,13 @@ need_resched:
|
|
|
if (unlikely(signal_pending_state(prev->state, prev))) {
|
|
|
prev->state = TASK_RUNNING;
|
|
|
} else {
|
|
|
+ deactivate_task(rq, prev, DEQUEUE_SLEEP);
|
|
|
+ prev->on_rq = 0;
|
|
|
+
|
|
|
/*
|
|
|
- * If a worker is going to sleep, notify and
|
|
|
- * ask workqueue whether it wants to wake up a
|
|
|
- * task to maintain concurrency. If so, wake
|
|
|
- * up the task.
|
|
|
+ * If a worker went to sleep, notify and ask workqueue
|
|
|
+ * whether it wants to wake up a task to maintain
|
|
|
+ * concurrency.
|
|
|
*/
|
|
|
if (prev->flags & PF_WQ_WORKER) {
|
|
|
struct task_struct *to_wakeup;
|
|
@@ -4110,11 +4204,10 @@ need_resched:
|
|
|
if (to_wakeup)
|
|
|
try_to_wake_up_local(to_wakeup);
|
|
|
}
|
|
|
- deactivate_task(rq, prev, DEQUEUE_SLEEP);
|
|
|
|
|
|
/*
|
|
|
- * If we are going to sleep and we have plugged IO queued, make
|
|
|
- * sure to submit it to avoid deadlocks.
|
|
|
+ * If we are going to sleep and we have plugged IO
|
|
|
+ * queued, make sure to submit it to avoid deadlocks.
|
|
|
*/
|
|
|
if (blk_needs_flush_plug(prev)) {
|
|
|
raw_spin_unlock(&rq->lock);
|
|
@@ -4161,70 +4254,53 @@ need_resched:
|
|
|
EXPORT_SYMBOL(schedule);
|
|
|
|
|
|
#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
|
|
|
-/*
|
|
|
- * Look out! "owner" is an entirely speculative pointer
|
|
|
- * access and not reliable.
|
|
|
- */
|
|
|
-int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner)
|
|
|
-{
|
|
|
- unsigned int cpu;
|
|
|
- struct rq *rq;
|
|
|
|
|
|
- if (!sched_feat(OWNER_SPIN))
|
|
|
- return 0;
|
|
|
+static inline bool owner_running(struct mutex *lock, struct task_struct *owner)
|
|
|
+{
|
|
|
+ bool ret = false;
|
|
|
|
|
|
-#ifdef CONFIG_DEBUG_PAGEALLOC
|
|
|
- /*
|
|
|
- * Need to access the cpu field knowing that
|
|
|
- * DEBUG_PAGEALLOC could have unmapped it if
|
|
|
- * the mutex owner just released it and exited.
|
|
|
- */
|
|
|
- if (probe_kernel_address(&owner->cpu, cpu))
|
|
|
- return 0;
|
|
|
-#else
|
|
|
- cpu = owner->cpu;
|
|
|
-#endif
|
|
|
+ rcu_read_lock();
|
|
|
+ if (lock->owner != owner)
|
|
|
+ goto fail;
|
|
|
|
|
|
/*
|
|
|
- * Even if the access succeeded (likely case),
|
|
|
- * the cpu field may no longer be valid.
|
|
|
+ * Ensure we emit the owner->on_cpu, dereference _after_ checking
|
|
|
+ * lock->owner still matches owner, if that fails, owner might
|
|
|
+ * point to free()d memory, if it still matches, the rcu_read_lock()
|
|
|
+ * ensures the memory stays valid.
|
|
|
*/
|
|
|
- if (cpu >= nr_cpumask_bits)
|
|
|
- return 0;
|
|
|
+ barrier();
|
|
|
|
|
|
- /*
|
|
|
- * We need to validate that we can do a
|
|
|
- * get_cpu() and that we have the percpu area.
|
|
|
- */
|
|
|
- if (!cpu_online(cpu))
|
|
|
- return 0;
|
|
|
+ ret = owner->on_cpu;
|
|
|
+fail:
|
|
|
+ rcu_read_unlock();
|
|
|
|
|
|
- rq = cpu_rq(cpu);
|
|
|
+ return ret;
|
|
|
+}
|
|
|
|
|
|
- for (;;) {
|
|
|
- /*
|
|
|
- * Owner changed, break to re-assess state.
|
|
|
- */
|
|
|
- if (lock->owner != owner) {
|
|
|
- /*
|
|
|
- * If the lock has switched to a different owner,
|
|
|
- * we likely have heavy contention. Return 0 to quit
|
|
|
- * optimistic spinning and not contend further:
|
|
|
- */
|
|
|
- if (lock->owner)
|
|
|
- return 0;
|
|
|
- break;
|
|
|
- }
|
|
|
+/*
|
|
|
+ * Look out! "owner" is an entirely speculative pointer
|
|
|
+ * access and not reliable.
|
|
|
+ */
|
|
|
+int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner)
|
|
|
+{
|
|
|
+ if (!sched_feat(OWNER_SPIN))
|
|
|
+ return 0;
|
|
|
|
|
|
- /*
|
|
|
- * Is that owner really running on that cpu?
|
|
|
- */
|
|
|
- if (task_thread_info(rq->curr) != owner || need_resched())
|
|
|
+ while (owner_running(lock, owner)) {
|
|
|
+ if (need_resched())
|
|
|
return 0;
|
|
|
|
|
|
arch_mutex_cpu_relax();
|
|
|
}
|
|
|
|
|
|
+ /*
|
|
|
+ * If the owner changed to another task there is likely
|
|
|
+ * heavy contention, stop spinning.
|
|
|
+ */
|
|
|
+ if (lock->owner)
|
|
|
+ return 0;
|
|
|
+
|
|
|
return 1;
|
|
|
}
|
|
|
#endif
|
|
@@ -4684,19 +4760,18 @@ EXPORT_SYMBOL(sleep_on_timeout);
|
|
|
*/
|
|
|
void rt_mutex_setprio(struct task_struct *p, int prio)
|
|
|
{
|
|
|
- unsigned long flags;
|
|
|
int oldprio, on_rq, running;
|
|
|
struct rq *rq;
|
|
|
const struct sched_class *prev_class;
|
|
|
|
|
|
BUG_ON(prio < 0 || prio > MAX_PRIO);
|
|
|
|
|
|
- rq = task_rq_lock(p, &flags);
|
|
|
+ rq = __task_rq_lock(p);
|
|
|
|
|
|
trace_sched_pi_setprio(p, prio);
|
|
|
oldprio = p->prio;
|
|
|
prev_class = p->sched_class;
|
|
|
- on_rq = p->se.on_rq;
|
|
|
+ on_rq = p->on_rq;
|
|
|
running = task_current(rq, p);
|
|
|
if (on_rq)
|
|
|
dequeue_task(rq, p, 0);
|
|
@@ -4716,7 +4791,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
|
|
|
enqueue_task(rq, p, oldprio < prio ? ENQUEUE_HEAD : 0);
|
|
|
|
|
|
check_class_changed(rq, p, prev_class, oldprio);
|
|
|
- task_rq_unlock(rq, &flags);
|
|
|
+ __task_rq_unlock(rq);
|
|
|
}
|
|
|
|
|
|
#endif
|
|
@@ -4744,7 +4819,7 @@ void set_user_nice(struct task_struct *p, long nice)
|
|
|
p->static_prio = NICE_TO_PRIO(nice);
|
|
|
goto out_unlock;
|
|
|
}
|
|
|
- on_rq = p->se.on_rq;
|
|
|
+ on_rq = p->on_rq;
|
|
|
if (on_rq)
|
|
|
dequeue_task(rq, p, 0);
|
|
|
|
|
@@ -4764,7 +4839,7 @@ void set_user_nice(struct task_struct *p, long nice)
|
|
|
resched_task(rq->curr);
|
|
|
}
|
|
|
out_unlock:
|
|
|
- task_rq_unlock(rq, &flags);
|
|
|
+ task_rq_unlock(rq, p, &flags);
|
|
|
}
|
|
|
EXPORT_SYMBOL(set_user_nice);
|
|
|
|
|
@@ -4878,8 +4953,6 @@ static struct task_struct *find_process_by_pid(pid_t pid)
|
|
|
static void
|
|
|
__setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio)
|
|
|
{
|
|
|
- BUG_ON(p->se.on_rq);
|
|
|
-
|
|
|
p->policy = policy;
|
|
|
p->rt_priority = prio;
|
|
|
p->normal_prio = normal_prio(p);
|
|
@@ -4994,20 +5067,17 @@ recheck:
|
|
|
/*
|
|
|
* make sure no PI-waiters arrive (or leave) while we are
|
|
|
* changing the priority of the task:
|
|
|
- */
|
|
|
- raw_spin_lock_irqsave(&p->pi_lock, flags);
|
|
|
- /*
|
|
|
+ *
|
|
|
* To be able to change p->policy safely, the appropriate
|
|
|
* runqueue lock must be held.
|
|
|
*/
|
|
|
- rq = __task_rq_lock(p);
|
|
|
+ rq = task_rq_lock(p, &flags);
|
|
|
|
|
|
/*
|
|
|
* Changing the policy of the stop threads its a very bad idea
|
|
|
*/
|
|
|
if (p == rq->stop) {
|
|
|
- __task_rq_unlock(rq);
|
|
|
- raw_spin_unlock_irqrestore(&p->pi_lock, flags);
|
|
|
+ task_rq_unlock(rq, p, &flags);
|
|
|
return -EINVAL;
|
|
|
}
|
|
|
|
|
@@ -5031,8 +5101,7 @@ recheck:
|
|
|
if (rt_bandwidth_enabled() && rt_policy(policy) &&
|
|
|
task_group(p)->rt_bandwidth.rt_runtime == 0 &&
|
|
|
!task_group_is_autogroup(task_group(p))) {
|
|
|
- __task_rq_unlock(rq);
|
|
|
- raw_spin_unlock_irqrestore(&p->pi_lock, flags);
|
|
|
+ task_rq_unlock(rq, p, &flags);
|
|
|
return -EPERM;
|
|
|
}
|
|
|
}
|
|
@@ -5041,11 +5110,10 @@ recheck:
|
|
|
/* recheck policy now with rq lock held */
|
|
|
if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) {
|
|
|
policy = oldpolicy = -1;
|
|
|
- __task_rq_unlock(rq);
|
|
|
- raw_spin_unlock_irqrestore(&p->pi_lock, flags);
|
|
|
+ task_rq_unlock(rq, p, &flags);
|
|
|
goto recheck;
|
|
|
}
|
|
|
- on_rq = p->se.on_rq;
|
|
|
+ on_rq = p->on_rq;
|
|
|
running = task_current(rq, p);
|
|
|
if (on_rq)
|
|
|
deactivate_task(rq, p, 0);
|
|
@@ -5064,8 +5132,7 @@ recheck:
|
|
|
activate_task(rq, p, 0);
|
|
|
|
|
|
check_class_changed(rq, p, prev_class, oldprio);
|
|
|
- __task_rq_unlock(rq);
|
|
|
- raw_spin_unlock_irqrestore(&p->pi_lock, flags);
|
|
|
+ task_rq_unlock(rq, p, &flags);
|
|
|
|
|
|
rt_mutex_adjust_pi(p);
|
|
|
|
|
@@ -5316,7 +5383,6 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask)
|
|
|
{
|
|
|
struct task_struct *p;
|
|
|
unsigned long flags;
|
|
|
- struct rq *rq;
|
|
|
int retval;
|
|
|
|
|
|
get_online_cpus();
|
|
@@ -5331,9 +5397,9 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask)
|
|
|
if (retval)
|
|
|
goto out_unlock;
|
|
|
|
|
|
- rq = task_rq_lock(p, &flags);
|
|
|
+ raw_spin_lock_irqsave(&p->pi_lock, flags);
|
|
|
cpumask_and(mask, &p->cpus_allowed, cpu_online_mask);
|
|
|
- task_rq_unlock(rq, &flags);
|
|
|
+ raw_spin_unlock_irqrestore(&p->pi_lock, flags);
|
|
|
|
|
|
out_unlock:
|
|
|
rcu_read_unlock();
|
|
@@ -5658,7 +5724,7 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
|
|
|
|
|
|
rq = task_rq_lock(p, &flags);
|
|
|
time_slice = p->sched_class->get_rr_interval(rq, p);
|
|
|
- task_rq_unlock(rq, &flags);
|
|
|
+ task_rq_unlock(rq, p, &flags);
|
|
|
|
|
|
rcu_read_unlock();
|
|
|
jiffies_to_timespec(time_slice, &t);
|
|
@@ -5776,17 +5842,14 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
|
|
|
rcu_read_unlock();
|
|
|
|
|
|
rq->curr = rq->idle = idle;
|
|
|
-#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
|
|
|
- idle->oncpu = 1;
|
|
|
+#if defined(CONFIG_SMP)
|
|
|
+ idle->on_cpu = 1;
|
|
|
#endif
|
|
|
raw_spin_unlock_irqrestore(&rq->lock, flags);
|
|
|
|
|
|
/* Set the preempt count _outside_ the spinlocks! */
|
|
|
-#if defined(CONFIG_PREEMPT)
|
|
|
- task_thread_info(idle)->preempt_count = (idle->lock_depth >= 0);
|
|
|
-#else
|
|
|
task_thread_info(idle)->preempt_count = 0;
|
|
|
-#endif
|
|
|
+
|
|
|
/*
|
|
|
* The idle tasks have their own, simple scheduling class:
|
|
|
*/
|
|
@@ -5881,26 +5944,17 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
|
|
|
unsigned int dest_cpu;
|
|
|
int ret = 0;
|
|
|
|
|
|
- /*
|
|
|
- * Serialize against TASK_WAKING so that ttwu() and wunt() can
|
|
|
- * drop the rq->lock and still rely on ->cpus_allowed.
|
|
|
- */
|
|
|
-again:
|
|
|
- while (task_is_waking(p))
|
|
|
- cpu_relax();
|
|
|
rq = task_rq_lock(p, &flags);
|
|
|
- if (task_is_waking(p)) {
|
|
|
- task_rq_unlock(rq, &flags);
|
|
|
- goto again;
|
|
|
- }
|
|
|
+
|
|
|
+ if (cpumask_equal(&p->cpus_allowed, new_mask))
|
|
|
+ goto out;
|
|
|
|
|
|
if (!cpumask_intersects(new_mask, cpu_active_mask)) {
|
|
|
ret = -EINVAL;
|
|
|
goto out;
|
|
|
}
|
|
|
|
|
|
- if (unlikely((p->flags & PF_THREAD_BOUND) && p != current &&
|
|
|
- !cpumask_equal(&p->cpus_allowed, new_mask))) {
|
|
|
+ if (unlikely((p->flags & PF_THREAD_BOUND) && p != current)) {
|
|
|
ret = -EINVAL;
|
|
|
goto out;
|
|
|
}
|
|
@@ -5917,16 +5971,16 @@ again:
|
|
|
goto out;
|
|
|
|
|
|
dest_cpu = cpumask_any_and(cpu_active_mask, new_mask);
|
|
|
- if (migrate_task(p, rq)) {
|
|
|
+ if (p->on_rq) {
|
|
|
struct migration_arg arg = { p, dest_cpu };
|
|
|
/* Need help from migration thread: drop lock and wait. */
|
|
|
- task_rq_unlock(rq, &flags);
|
|
|
+ task_rq_unlock(rq, p, &flags);
|
|
|
stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
|
|
|
tlb_migrate_finish(p->mm);
|
|
|
return 0;
|
|
|
}
|
|
|
out:
|
|
|
- task_rq_unlock(rq, &flags);
|
|
|
+ task_rq_unlock(rq, p, &flags);
|
|
|
|
|
|
return ret;
|
|
|
}
|
|
@@ -5954,6 +6008,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
|
|
|
rq_src = cpu_rq(src_cpu);
|
|
|
rq_dest = cpu_rq(dest_cpu);
|
|
|
|
|
|
+ raw_spin_lock(&p->pi_lock);
|
|
|
double_rq_lock(rq_src, rq_dest);
|
|
|
/* Already moved. */
|
|
|
if (task_cpu(p) != src_cpu)
|
|
@@ -5966,7 +6021,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
|
|
|
* If we're not on a rq, the next wake-up will ensure we're
|
|
|
* placed properly.
|
|
|
*/
|
|
|
- if (p->se.on_rq) {
|
|
|
+ if (p->on_rq) {
|
|
|
deactivate_task(rq_src, p, 0);
|
|
|
set_task_cpu(p, dest_cpu);
|
|
|
activate_task(rq_dest, p, 0);
|
|
@@ -5976,6 +6031,7 @@ done:
|
|
|
ret = 1;
|
|
|
fail:
|
|
|
double_rq_unlock(rq_src, rq_dest);
|
|
|
+ raw_spin_unlock(&p->pi_lock);
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
@@ -6316,6 +6372,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
|
|
|
|
|
|
#ifdef CONFIG_HOTPLUG_CPU
|
|
|
case CPU_DYING:
|
|
|
+ sched_ttwu_pending();
|
|
|
/* Update our root-domain */
|
|
|
raw_spin_lock_irqsave(&rq->lock, flags);
|
|
|
if (rq->rd) {
|
|
@@ -6394,6 +6451,8 @@ early_initcall(migration_init);
|
|
|
|
|
|
#ifdef CONFIG_SMP
|
|
|
|
|
|
+static cpumask_var_t sched_domains_tmpmask; /* sched_domains_mutex */
|
|
|
+
|
|
|
#ifdef CONFIG_SCHED_DEBUG
|
|
|
|
|
|
static __read_mostly int sched_domain_debug_enabled;
|
|
@@ -6489,7 +6548,6 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
|
|
|
|
|
|
static void sched_domain_debug(struct sched_domain *sd, int cpu)
|
|
|
{
|
|
|
- cpumask_var_t groupmask;
|
|
|
int level = 0;
|
|
|
|
|
|
if (!sched_domain_debug_enabled)
|
|
@@ -6502,20 +6560,14 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu)
|
|
|
|
|
|
printk(KERN_DEBUG "CPU%d attaching sched-domain:\n", cpu);
|
|
|
|
|
|
- if (!alloc_cpumask_var(&groupmask, GFP_KERNEL)) {
|
|
|
- printk(KERN_DEBUG "Cannot load-balance (out of memory)\n");
|
|
|
- return;
|
|
|
- }
|
|
|
-
|
|
|
for (;;) {
|
|
|
- if (sched_domain_debug_one(sd, cpu, level, groupmask))
|
|
|
+ if (sched_domain_debug_one(sd, cpu, level, sched_domains_tmpmask))
|
|
|
break;
|
|
|
level++;
|
|
|
sd = sd->parent;
|
|
|
if (!sd)
|
|
|
break;
|
|
|
}
|
|
|
- free_cpumask_var(groupmask);
|
|
|
}
|
|
|
#else /* !CONFIG_SCHED_DEBUG */
|
|
|
# define sched_domain_debug(sd, cpu) do { } while (0)
|
|
@@ -6572,12 +6624,11 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
|
|
|
return 1;
|
|
|
}
|
|
|
|
|
|
-static void free_rootdomain(struct root_domain *rd)
|
|
|
+static void free_rootdomain(struct rcu_head *rcu)
|
|
|
{
|
|
|
- synchronize_sched();
|
|
|
+ struct root_domain *rd = container_of(rcu, struct root_domain, rcu);
|
|
|
|
|
|
cpupri_cleanup(&rd->cpupri);
|
|
|
-
|
|
|
free_cpumask_var(rd->rto_mask);
|
|
|
free_cpumask_var(rd->online);
|
|
|
free_cpumask_var(rd->span);
|
|
@@ -6618,7 +6669,7 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd)
|
|
|
raw_spin_unlock_irqrestore(&rq->lock, flags);
|
|
|
|
|
|
if (old_rd)
|
|
|
- free_rootdomain(old_rd);
|
|
|
+ call_rcu_sched(&old_rd->rcu, free_rootdomain);
|
|
|
}
|
|
|
|
|
|
static int init_rootdomain(struct root_domain *rd)
|
|
@@ -6669,6 +6720,25 @@ static struct root_domain *alloc_rootdomain(void)
|
|
|
return rd;
|
|
|
}
|
|
|
|
|
|
+static void free_sched_domain(struct rcu_head *rcu)
|
|
|
+{
|
|
|
+ struct sched_domain *sd = container_of(rcu, struct sched_domain, rcu);
|
|
|
+ if (atomic_dec_and_test(&sd->groups->ref))
|
|
|
+ kfree(sd->groups);
|
|
|
+ kfree(sd);
|
|
|
+}
|
|
|
+
|
|
|
+static void destroy_sched_domain(struct sched_domain *sd, int cpu)
|
|
|
+{
|
|
|
+ call_rcu(&sd->rcu, free_sched_domain);
|
|
|
+}
|
|
|
+
|
|
|
+static void destroy_sched_domains(struct sched_domain *sd, int cpu)
|
|
|
+{
|
|
|
+ for (; sd; sd = sd->parent)
|
|
|
+ destroy_sched_domain(sd, cpu);
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* Attach the domain 'sd' to 'cpu' as its base domain. Callers must
|
|
|
* hold the hotplug lock.
|
|
@@ -6679,9 +6749,6 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
|
|
|
struct rq *rq = cpu_rq(cpu);
|
|
|
struct sched_domain *tmp;
|
|
|
|
|
|
- for (tmp = sd; tmp; tmp = tmp->parent)
|
|
|
- tmp->span_weight = cpumask_weight(sched_domain_span(tmp));
|
|
|
-
|
|
|
/* Remove the sched domains which do not contribute to scheduling. */
|
|
|
for (tmp = sd; tmp; ) {
|
|
|
struct sched_domain *parent = tmp->parent;
|
|
@@ -6692,12 +6759,15 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
|
|
|
tmp->parent = parent->parent;
|
|
|
if (parent->parent)
|
|
|
parent->parent->child = tmp;
|
|
|
+ destroy_sched_domain(parent, cpu);
|
|
|
} else
|
|
|
tmp = tmp->parent;
|
|
|
}
|
|
|
|
|
|
if (sd && sd_degenerate(sd)) {
|
|
|
+ tmp = sd;
|
|
|
sd = sd->parent;
|
|
|
+ destroy_sched_domain(tmp, cpu);
|
|
|
if (sd)
|
|
|
sd->child = NULL;
|
|
|
}
|
|
@@ -6705,7 +6775,9 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
|
|
|
sched_domain_debug(sd, cpu);
|
|
|
|
|
|
rq_attach_root(rq, rd);
|
|
|
+ tmp = rq->sd;
|
|
|
rcu_assign_pointer(rq->sd, sd);
|
|
|
+ destroy_sched_domains(tmp, cpu);
|
|
|
}
|
|
|
|
|
|
/* cpus with isolated domains */
|
|
@@ -6721,56 +6793,6 @@ static int __init isolated_cpu_setup(char *str)
|
|
|
|
|
|
__setup("isolcpus=", isolated_cpu_setup);
|
|
|
|
|
|
-/*
|
|
|
- * init_sched_build_groups takes the cpumask we wish to span, and a pointer
|
|
|
- * to a function which identifies what group(along with sched group) a CPU
|
|
|
- * belongs to. The return value of group_fn must be a >= 0 and < nr_cpu_ids
|
|
|
- * (due to the fact that we keep track of groups covered with a struct cpumask).
|
|
|
- *
|
|
|
- * init_sched_build_groups will build a circular linked list of the groups
|
|
|
- * covered by the given span, and will set each group's ->cpumask correctly,
|
|
|
- * and ->cpu_power to 0.
|
|
|
- */
|
|
|
-static void
|
|
|
-init_sched_build_groups(const struct cpumask *span,
|
|
|
- const struct cpumask *cpu_map,
|
|
|
- int (*group_fn)(int cpu, const struct cpumask *cpu_map,
|
|
|
- struct sched_group **sg,
|
|
|
- struct cpumask *tmpmask),
|
|
|
- struct cpumask *covered, struct cpumask *tmpmask)
|
|
|
-{
|
|
|
- struct sched_group *first = NULL, *last = NULL;
|
|
|
- int i;
|
|
|
-
|
|
|
- cpumask_clear(covered);
|
|
|
-
|
|
|
- for_each_cpu(i, span) {
|
|
|
- struct sched_group *sg;
|
|
|
- int group = group_fn(i, cpu_map, &sg, tmpmask);
|
|
|
- int j;
|
|
|
-
|
|
|
- if (cpumask_test_cpu(i, covered))
|
|
|
- continue;
|
|
|
-
|
|
|
- cpumask_clear(sched_group_cpus(sg));
|
|
|
- sg->cpu_power = 0;
|
|
|
-
|
|
|
- for_each_cpu(j, span) {
|
|
|
- if (group_fn(j, cpu_map, NULL, tmpmask) != group)
|
|
|
- continue;
|
|
|
-
|
|
|
- cpumask_set_cpu(j, covered);
|
|
|
- cpumask_set_cpu(j, sched_group_cpus(sg));
|
|
|
- }
|
|
|
- if (!first)
|
|
|
- first = sg;
|
|
|
- if (last)
|
|
|
- last->next = sg;
|
|
|
- last = sg;
|
|
|
- }
|
|
|
- last->next = first;
|
|
|
-}
|
|
|
-
|
|
|
#define SD_NODES_PER_DOMAIN 16
|
|
|
|
|
|
#ifdef CONFIG_NUMA
|
|
@@ -6787,7 +6809,7 @@ init_sched_build_groups(const struct cpumask *span,
|
|
|
*/
|
|
|
static int find_next_best_node(int node, nodemask_t *used_nodes)
|
|
|
{
|
|
|
- int i, n, val, min_val, best_node = 0;
|
|
|
+ int i, n, val, min_val, best_node = -1;
|
|
|
|
|
|
min_val = INT_MAX;
|
|
|
|
|
@@ -6811,7 +6833,8 @@ static int find_next_best_node(int node, nodemask_t *used_nodes)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- node_set(best_node, *used_nodes);
|
|
|
+ if (best_node != -1)
|
|
|
+ node_set(best_node, *used_nodes);
|
|
|
return best_node;
|
|
|
}
|
|
|
|
|
@@ -6837,315 +6860,130 @@ static void sched_domain_node_span(int node, struct cpumask *span)
|
|
|
|
|
|
for (i = 1; i < SD_NODES_PER_DOMAIN; i++) {
|
|
|
int next_node = find_next_best_node(node, &used_nodes);
|
|
|
-
|
|
|
+ if (next_node < 0)
|
|
|
+ break;
|
|
|
cpumask_or(span, span, cpumask_of_node(next_node));
|
|
|
}
|
|
|
}
|
|
|
+
|
|
|
+static const struct cpumask *cpu_node_mask(int cpu)
|
|
|
+{
|
|
|
+ lockdep_assert_held(&sched_domains_mutex);
|
|
|
+
|
|
|
+ sched_domain_node_span(cpu_to_node(cpu), sched_domains_tmpmask);
|
|
|
+
|
|
|
+ return sched_domains_tmpmask;
|
|
|
+}
|
|
|
+
|
|
|
+static const struct cpumask *cpu_allnodes_mask(int cpu)
|
|
|
+{
|
|
|
+ return cpu_possible_mask;
|
|
|
+}
|
|
|
#endif /* CONFIG_NUMA */
|
|
|
|
|
|
-int sched_smt_power_savings = 0, sched_mc_power_savings = 0;
|
|
|
+static const struct cpumask *cpu_cpu_mask(int cpu)
|
|
|
+{
|
|
|
+ return cpumask_of_node(cpu_to_node(cpu));
|
|
|
+}
|
|
|
|
|
|
-/*
|
|
|
- * The cpus mask in sched_group and sched_domain hangs off the end.
|
|
|
- *
|
|
|
- * ( See the the comments in include/linux/sched.h:struct sched_group
|
|
|
- * and struct sched_domain. )
|
|
|
- */
|
|
|
-struct static_sched_group {
|
|
|
- struct sched_group sg;
|
|
|
- DECLARE_BITMAP(cpus, CONFIG_NR_CPUS);
|
|
|
-};
|
|
|
+int sched_smt_power_savings = 0, sched_mc_power_savings = 0;
|
|
|
|
|
|
-struct static_sched_domain {
|
|
|
- struct sched_domain sd;
|
|
|
- DECLARE_BITMAP(span, CONFIG_NR_CPUS);
|
|
|
+struct sd_data {
|
|
|
+ struct sched_domain **__percpu sd;
|
|
|
+ struct sched_group **__percpu sg;
|
|
|
};
|
|
|
|
|
|
struct s_data {
|
|
|
-#ifdef CONFIG_NUMA
|
|
|
- int sd_allnodes;
|
|
|
- cpumask_var_t domainspan;
|
|
|
- cpumask_var_t covered;
|
|
|
- cpumask_var_t notcovered;
|
|
|
-#endif
|
|
|
- cpumask_var_t nodemask;
|
|
|
- cpumask_var_t this_sibling_map;
|
|
|
- cpumask_var_t this_core_map;
|
|
|
- cpumask_var_t this_book_map;
|
|
|
- cpumask_var_t send_covered;
|
|
|
- cpumask_var_t tmpmask;
|
|
|
- struct sched_group **sched_group_nodes;
|
|
|
+ struct sched_domain ** __percpu sd;
|
|
|
struct root_domain *rd;
|
|
|
};
|
|
|
|
|
|
enum s_alloc {
|
|
|
- sa_sched_groups = 0,
|
|
|
sa_rootdomain,
|
|
|
- sa_tmpmask,
|
|
|
- sa_send_covered,
|
|
|
- sa_this_book_map,
|
|
|
- sa_this_core_map,
|
|
|
- sa_this_sibling_map,
|
|
|
- sa_nodemask,
|
|
|
- sa_sched_group_nodes,
|
|
|
-#ifdef CONFIG_NUMA
|
|
|
- sa_notcovered,
|
|
|
- sa_covered,
|
|
|
- sa_domainspan,
|
|
|
-#endif
|
|
|
+ sa_sd,
|
|
|
+ sa_sd_storage,
|
|
|
sa_none,
|
|
|
};
|
|
|
|
|
|
-/*
|
|
|
- * SMT sched-domains:
|
|
|
- */
|
|
|
-#ifdef CONFIG_SCHED_SMT
|
|
|
-static DEFINE_PER_CPU(struct static_sched_domain, cpu_domains);
|
|
|
-static DEFINE_PER_CPU(struct static_sched_group, sched_groups);
|
|
|
-
|
|
|
-static int
|
|
|
-cpu_to_cpu_group(int cpu, const struct cpumask *cpu_map,
|
|
|
- struct sched_group **sg, struct cpumask *unused)
|
|
|
-{
|
|
|
- if (sg)
|
|
|
- *sg = &per_cpu(sched_groups, cpu).sg;
|
|
|
- return cpu;
|
|
|
-}
|
|
|
-#endif /* CONFIG_SCHED_SMT */
|
|
|
+struct sched_domain_topology_level;
|
|
|
|
|
|
-/*
|
|
|
- * multi-core sched-domains:
|
|
|
- */
|
|
|
-#ifdef CONFIG_SCHED_MC
|
|
|
-static DEFINE_PER_CPU(struct static_sched_domain, core_domains);
|
|
|
-static DEFINE_PER_CPU(struct static_sched_group, sched_group_core);
|
|
|
+typedef struct sched_domain *(*sched_domain_init_f)(struct sched_domain_topology_level *tl, int cpu);
|
|
|
+typedef const struct cpumask *(*sched_domain_mask_f)(int cpu);
|
|
|
|
|
|
-static int
|
|
|
-cpu_to_core_group(int cpu, const struct cpumask *cpu_map,
|
|
|
- struct sched_group **sg, struct cpumask *mask)
|
|
|
-{
|
|
|
- int group;
|
|
|
-#ifdef CONFIG_SCHED_SMT
|
|
|
- cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map);
|
|
|
- group = cpumask_first(mask);
|
|
|
-#else
|
|
|
- group = cpu;
|
|
|
-#endif
|
|
|
- if (sg)
|
|
|
- *sg = &per_cpu(sched_group_core, group).sg;
|
|
|
- return group;
|
|
|
-}
|
|
|
-#endif /* CONFIG_SCHED_MC */
|
|
|
+struct sched_domain_topology_level {
|
|
|
+ sched_domain_init_f init;
|
|
|
+ sched_domain_mask_f mask;
|
|
|
+ struct sd_data data;
|
|
|
+};
|
|
|
|
|
|
/*
|
|
|
- * book sched-domains:
|
|
|
+ * Assumes the sched_domain tree is fully constructed
|
|
|
*/
|
|
|
-#ifdef CONFIG_SCHED_BOOK
|
|
|
-static DEFINE_PER_CPU(struct static_sched_domain, book_domains);
|
|
|
-static DEFINE_PER_CPU(struct static_sched_group, sched_group_book);
|
|
|
-
|
|
|
-static int
|
|
|
-cpu_to_book_group(int cpu, const struct cpumask *cpu_map,
|
|
|
- struct sched_group **sg, struct cpumask *mask)
|
|
|
+static int get_group(int cpu, struct sd_data *sdd, struct sched_group **sg)
|
|
|
{
|
|
|
- int group = cpu;
|
|
|
-#ifdef CONFIG_SCHED_MC
|
|
|
- cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map);
|
|
|
- group = cpumask_first(mask);
|
|
|
-#elif defined(CONFIG_SCHED_SMT)
|
|
|
- cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map);
|
|
|
- group = cpumask_first(mask);
|
|
|
-#endif
|
|
|
- if (sg)
|
|
|
- *sg = &per_cpu(sched_group_book, group).sg;
|
|
|
- return group;
|
|
|
-}
|
|
|
-#endif /* CONFIG_SCHED_BOOK */
|
|
|
+ struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu);
|
|
|
+ struct sched_domain *child = sd->child;
|
|
|
|
|
|
-static DEFINE_PER_CPU(struct static_sched_domain, phys_domains);
|
|
|
-static DEFINE_PER_CPU(struct static_sched_group, sched_group_phys);
|
|
|
+ if (child)
|
|
|
+ cpu = cpumask_first(sched_domain_span(child));
|
|
|
|
|
|
-static int
|
|
|
-cpu_to_phys_group(int cpu, const struct cpumask *cpu_map,
|
|
|
- struct sched_group **sg, struct cpumask *mask)
|
|
|
-{
|
|
|
- int group;
|
|
|
-#ifdef CONFIG_SCHED_BOOK
|
|
|
- cpumask_and(mask, cpu_book_mask(cpu), cpu_map);
|
|
|
- group = cpumask_first(mask);
|
|
|
-#elif defined(CONFIG_SCHED_MC)
|
|
|
- cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map);
|
|
|
- group = cpumask_first(mask);
|
|
|
-#elif defined(CONFIG_SCHED_SMT)
|
|
|
- cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map);
|
|
|
- group = cpumask_first(mask);
|
|
|
-#else
|
|
|
- group = cpu;
|
|
|
-#endif
|
|
|
if (sg)
|
|
|
- *sg = &per_cpu(sched_group_phys, group).sg;
|
|
|
- return group;
|
|
|
+ *sg = *per_cpu_ptr(sdd->sg, cpu);
|
|
|
+
|
|
|
+ return cpu;
|
|
|
}
|
|
|
|
|
|
-#ifdef CONFIG_NUMA
|
|
|
/*
|
|
|
- * The init_sched_build_groups can't handle what we want to do with node
|
|
|
- * groups, so roll our own. Now each node has its own list of groups which
|
|
|
- * gets dynamically allocated.
|
|
|
+ * build_sched_groups takes the cpumask we wish to span, and a pointer
|
|
|
+ * to a function which identifies what group(along with sched group) a CPU
|
|
|
+ * belongs to. The return value of group_fn must be a >= 0 and < nr_cpu_ids
|
|
|
+ * (due to the fact that we keep track of groups covered with a struct cpumask).
|
|
|
+ *
|
|
|
+ * build_sched_groups will build a circular linked list of the groups
|
|
|
+ * covered by the given span, and will set each group's ->cpumask correctly,
|
|
|
+ * and ->cpu_power to 0.
|
|
|
*/
|
|
|
-static DEFINE_PER_CPU(struct static_sched_domain, node_domains);
|
|
|
-static struct sched_group ***sched_group_nodes_bycpu;
|
|
|
-
|
|
|
-static DEFINE_PER_CPU(struct static_sched_domain, allnodes_domains);
|
|
|
-static DEFINE_PER_CPU(struct static_sched_group, sched_group_allnodes);
|
|
|
-
|
|
|
-static int cpu_to_allnodes_group(int cpu, const struct cpumask *cpu_map,
|
|
|
- struct sched_group **sg,
|
|
|
- struct cpumask *nodemask)
|
|
|
-{
|
|
|
- int group;
|
|
|
-
|
|
|
- cpumask_and(nodemask, cpumask_of_node(cpu_to_node(cpu)), cpu_map);
|
|
|
- group = cpumask_first(nodemask);
|
|
|
-
|
|
|
- if (sg)
|
|
|
- *sg = &per_cpu(sched_group_allnodes, group).sg;
|
|
|
- return group;
|
|
|
-}
|
|
|
-
|
|
|
-static void init_numa_sched_groups_power(struct sched_group *group_head)
|
|
|
-{
|
|
|
- struct sched_group *sg = group_head;
|
|
|
- int j;
|
|
|
-
|
|
|
- if (!sg)
|
|
|
- return;
|
|
|
- do {
|
|
|
- for_each_cpu(j, sched_group_cpus(sg)) {
|
|
|
- struct sched_domain *sd;
|
|
|
-
|
|
|
- sd = &per_cpu(phys_domains, j).sd;
|
|
|
- if (j != group_first_cpu(sd->groups)) {
|
|
|
- /*
|
|
|
- * Only add "power" once for each
|
|
|
- * physical package.
|
|
|
- */
|
|
|
- continue;
|
|
|
- }
|
|
|
-
|
|
|
- sg->cpu_power += sd->groups->cpu_power;
|
|
|
- }
|
|
|
- sg = sg->next;
|
|
|
- } while (sg != group_head);
|
|
|
-}
|
|
|
-
|
|
|
-static int build_numa_sched_groups(struct s_data *d,
|
|
|
- const struct cpumask *cpu_map, int num)
|
|
|
+static void
|
|
|
+build_sched_groups(struct sched_domain *sd)
|
|
|
{
|
|
|
- struct sched_domain *sd;
|
|
|
- struct sched_group *sg, *prev;
|
|
|
- int n, j;
|
|
|
-
|
|
|
- cpumask_clear(d->covered);
|
|
|
- cpumask_and(d->nodemask, cpumask_of_node(num), cpu_map);
|
|
|
- if (cpumask_empty(d->nodemask)) {
|
|
|
- d->sched_group_nodes[num] = NULL;
|
|
|
- goto out;
|
|
|
- }
|
|
|
-
|
|
|
- sched_domain_node_span(num, d->domainspan);
|
|
|
- cpumask_and(d->domainspan, d->domainspan, cpu_map);
|
|
|
-
|
|
|
- sg = kmalloc_node(sizeof(struct sched_group) + cpumask_size(),
|
|
|
- GFP_KERNEL, num);
|
|
|
- if (!sg) {
|
|
|
- printk(KERN_WARNING "Can not alloc domain group for node %d\n",
|
|
|
- num);
|
|
|
- return -ENOMEM;
|
|
|
- }
|
|
|
- d->sched_group_nodes[num] = sg;
|
|
|
-
|
|
|
- for_each_cpu(j, d->nodemask) {
|
|
|
- sd = &per_cpu(node_domains, j).sd;
|
|
|
- sd->groups = sg;
|
|
|
- }
|
|
|
-
|
|
|
- sg->cpu_power = 0;
|
|
|
- cpumask_copy(sched_group_cpus(sg), d->nodemask);
|
|
|
- sg->next = sg;
|
|
|
- cpumask_or(d->covered, d->covered, d->nodemask);
|
|
|
+ struct sched_group *first = NULL, *last = NULL;
|
|
|
+ struct sd_data *sdd = sd->private;
|
|
|
+ const struct cpumask *span = sched_domain_span(sd);
|
|
|
+ struct cpumask *covered;
|
|
|
+ int i;
|
|
|
|
|
|
- prev = sg;
|
|
|
- for (j = 0; j < nr_node_ids; j++) {
|
|
|
- n = (num + j) % nr_node_ids;
|
|
|
- cpumask_complement(d->notcovered, d->covered);
|
|
|
- cpumask_and(d->tmpmask, d->notcovered, cpu_map);
|
|
|
- cpumask_and(d->tmpmask, d->tmpmask, d->domainspan);
|
|
|
- if (cpumask_empty(d->tmpmask))
|
|
|
- break;
|
|
|
- cpumask_and(d->tmpmask, d->tmpmask, cpumask_of_node(n));
|
|
|
- if (cpumask_empty(d->tmpmask))
|
|
|
- continue;
|
|
|
- sg = kmalloc_node(sizeof(struct sched_group) + cpumask_size(),
|
|
|
- GFP_KERNEL, num);
|
|
|
- if (!sg) {
|
|
|
- printk(KERN_WARNING
|
|
|
- "Can not alloc domain group for node %d\n", j);
|
|
|
- return -ENOMEM;
|
|
|
- }
|
|
|
- sg->cpu_power = 0;
|
|
|
- cpumask_copy(sched_group_cpus(sg), d->tmpmask);
|
|
|
- sg->next = prev->next;
|
|
|
- cpumask_or(d->covered, d->covered, d->tmpmask);
|
|
|
- prev->next = sg;
|
|
|
- prev = sg;
|
|
|
- }
|
|
|
-out:
|
|
|
- return 0;
|
|
|
-}
|
|
|
-#endif /* CONFIG_NUMA */
|
|
|
+ lockdep_assert_held(&sched_domains_mutex);
|
|
|
+ covered = sched_domains_tmpmask;
|
|
|
|
|
|
-#ifdef CONFIG_NUMA
|
|
|
-/* Free memory allocated for various sched_group structures */
|
|
|
-static void free_sched_groups(const struct cpumask *cpu_map,
|
|
|
- struct cpumask *nodemask)
|
|
|
-{
|
|
|
- int cpu, i;
|
|
|
+ cpumask_clear(covered);
|
|
|
|
|
|
- for_each_cpu(cpu, cpu_map) {
|
|
|
- struct sched_group **sched_group_nodes
|
|
|
- = sched_group_nodes_bycpu[cpu];
|
|
|
+ for_each_cpu(i, span) {
|
|
|
+ struct sched_group *sg;
|
|
|
+ int group = get_group(i, sdd, &sg);
|
|
|
+ int j;
|
|
|
|
|
|
- if (!sched_group_nodes)
|
|
|
+ if (cpumask_test_cpu(i, covered))
|
|
|
continue;
|
|
|
|
|
|
- for (i = 0; i < nr_node_ids; i++) {
|
|
|
- struct sched_group *oldsg, *sg = sched_group_nodes[i];
|
|
|
+ cpumask_clear(sched_group_cpus(sg));
|
|
|
+ sg->cpu_power = 0;
|
|
|
|
|
|
- cpumask_and(nodemask, cpumask_of_node(i), cpu_map);
|
|
|
- if (cpumask_empty(nodemask))
|
|
|
+ for_each_cpu(j, span) {
|
|
|
+ if (get_group(j, sdd, NULL) != group)
|
|
|
continue;
|
|
|
|
|
|
- if (sg == NULL)
|
|
|
- continue;
|
|
|
- sg = sg->next;
|
|
|
-next_sg:
|
|
|
- oldsg = sg;
|
|
|
- sg = sg->next;
|
|
|
- kfree(oldsg);
|
|
|
- if (oldsg != sched_group_nodes[i])
|
|
|
- goto next_sg;
|
|
|
+ cpumask_set_cpu(j, covered);
|
|
|
+ cpumask_set_cpu(j, sched_group_cpus(sg));
|
|
|
}
|
|
|
- kfree(sched_group_nodes);
|
|
|
- sched_group_nodes_bycpu[cpu] = NULL;
|
|
|
+
|
|
|
+ if (!first)
|
|
|
+ first = sg;
|
|
|
+ if (last)
|
|
|
+ last->next = sg;
|
|
|
+ last = sg;
|
|
|
}
|
|
|
+ last->next = first;
|
|
|
}
|
|
|
-#else /* !CONFIG_NUMA */
|
|
|
-static void free_sched_groups(const struct cpumask *cpu_map,
|
|
|
- struct cpumask *nodemask)
|
|
|
-{
|
|
|
-}
|
|
|
-#endif /* CONFIG_NUMA */
|
|
|
|
|
|
/*
|
|
|
* Initialize sched groups cpu_power.
|
|
@@ -7159,11 +6997,6 @@ static void free_sched_groups(const struct cpumask *cpu_map,
|
|
|
*/
|
|
|
static void init_sched_groups_power(int cpu, struct sched_domain *sd)
|
|
|
{
|
|
|
- struct sched_domain *child;
|
|
|
- struct sched_group *group;
|
|
|
- long power;
|
|
|
- int weight;
|
|
|
-
|
|
|
WARN_ON(!sd || !sd->groups);
|
|
|
|
|
|
if (cpu != group_first_cpu(sd->groups))
|
|
@@ -7171,36 +7004,7 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
|
|
|
|
|
|
sd->groups->group_weight = cpumask_weight(sched_group_cpus(sd->groups));
|
|
|
|
|
|
- child = sd->child;
|
|
|
-
|
|
|
- sd->groups->cpu_power = 0;
|
|
|
-
|
|
|
- if (!child) {
|
|
|
- power = SCHED_LOAD_SCALE;
|
|
|
- weight = cpumask_weight(sched_domain_span(sd));
|
|
|
- /*
|
|
|
- * SMT siblings share the power of a single core.
|
|
|
- * Usually multiple threads get a better yield out of
|
|
|
- * that one core than a single thread would have,
|
|
|
- * reflect that in sd->smt_gain.
|
|
|
- */
|
|
|
- if ((sd->flags & SD_SHARE_CPUPOWER) && weight > 1) {
|
|
|
- power *= sd->smt_gain;
|
|
|
- power /= weight;
|
|
|
- power >>= SCHED_LOAD_SHIFT;
|
|
|
- }
|
|
|
- sd->groups->cpu_power += power;
|
|
|
- return;
|
|
|
- }
|
|
|
-
|
|
|
- /*
|
|
|
- * Add cpu_power of each child group to this groups cpu_power.
|
|
|
- */
|
|
|
- group = child->groups;
|
|
|
- do {
|
|
|
- sd->groups->cpu_power += group->cpu_power;
|
|
|
- group = group->next;
|
|
|
- } while (group != child->groups);
|
|
|
+ update_group_power(sd, cpu);
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -7214,15 +7018,15 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
|
|
|
# define SD_INIT_NAME(sd, type) do { } while (0)
|
|
|
#endif
|
|
|
|
|
|
-#define SD_INIT(sd, type) sd_init_##type(sd)
|
|
|
-
|
|
|
-#define SD_INIT_FUNC(type) \
|
|
|
-static noinline void sd_init_##type(struct sched_domain *sd) \
|
|
|
-{ \
|
|
|
- memset(sd, 0, sizeof(*sd)); \
|
|
|
- *sd = SD_##type##_INIT; \
|
|
|
- sd->level = SD_LV_##type; \
|
|
|
- SD_INIT_NAME(sd, type); \
|
|
|
+#define SD_INIT_FUNC(type) \
|
|
|
+static noinline struct sched_domain * \
|
|
|
+sd_init_##type(struct sched_domain_topology_level *tl, int cpu) \
|
|
|
+{ \
|
|
|
+ struct sched_domain *sd = *per_cpu_ptr(tl->data.sd, cpu); \
|
|
|
+ *sd = SD_##type##_INIT; \
|
|
|
+ SD_INIT_NAME(sd, type); \
|
|
|
+ sd->private = &tl->data; \
|
|
|
+ return sd; \
|
|
|
}
|
|
|
|
|
|
SD_INIT_FUNC(CPU)
|
|
@@ -7241,13 +7045,14 @@ SD_INIT_FUNC(CPU)
|
|
|
#endif
|
|
|
|
|
|
static int default_relax_domain_level = -1;
|
|
|
+int sched_domain_level_max;
|
|
|
|
|
|
static int __init setup_relax_domain_level(char *str)
|
|
|
{
|
|
|
unsigned long val;
|
|
|
|
|
|
val = simple_strtoul(str, NULL, 0);
|
|
|
- if (val < SD_LV_MAX)
|
|
|
+ if (val < sched_domain_level_max)
|
|
|
default_relax_domain_level = val;
|
|
|
|
|
|
return 1;
|
|
@@ -7275,37 +7080,20 @@ static void set_domain_attribute(struct sched_domain *sd,
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+static void __sdt_free(const struct cpumask *cpu_map);
|
|
|
+static int __sdt_alloc(const struct cpumask *cpu_map);
|
|
|
+
|
|
|
static void __free_domain_allocs(struct s_data *d, enum s_alloc what,
|
|
|
const struct cpumask *cpu_map)
|
|
|
{
|
|
|
switch (what) {
|
|
|
- case sa_sched_groups:
|
|
|
- free_sched_groups(cpu_map, d->tmpmask); /* fall through */
|
|
|
- d->sched_group_nodes = NULL;
|
|
|
case sa_rootdomain:
|
|
|
- free_rootdomain(d->rd); /* fall through */
|
|
|
- case sa_tmpmask:
|
|
|
- free_cpumask_var(d->tmpmask); /* fall through */
|
|
|
- case sa_send_covered:
|
|
|
- free_cpumask_var(d->send_covered); /* fall through */
|
|
|
- case sa_this_book_map:
|
|
|
- free_cpumask_var(d->this_book_map); /* fall through */
|
|
|
- case sa_this_core_map:
|
|
|
- free_cpumask_var(d->this_core_map); /* fall through */
|
|
|
- case sa_this_sibling_map:
|
|
|
- free_cpumask_var(d->this_sibling_map); /* fall through */
|
|
|
- case sa_nodemask:
|
|
|
- free_cpumask_var(d->nodemask); /* fall through */
|
|
|
- case sa_sched_group_nodes:
|
|
|
-#ifdef CONFIG_NUMA
|
|
|
- kfree(d->sched_group_nodes); /* fall through */
|
|
|
- case sa_notcovered:
|
|
|
- free_cpumask_var(d->notcovered); /* fall through */
|
|
|
- case sa_covered:
|
|
|
- free_cpumask_var(d->covered); /* fall through */
|
|
|
- case sa_domainspan:
|
|
|
- free_cpumask_var(d->domainspan); /* fall through */
|
|
|
-#endif
|
|
|
+ if (!atomic_read(&d->rd->refcount))
|
|
|
+ free_rootdomain(&d->rd->rcu); /* fall through */
|
|
|
+ case sa_sd:
|
|
|
+ free_percpu(d->sd); /* fall through */
|
|
|
+ case sa_sd_storage:
|
|
|
+ __sdt_free(cpu_map); /* fall through */
|
|
|
case sa_none:
|
|
|
break;
|
|
|
}
|
|
@@ -7314,308 +7102,212 @@ static void __free_domain_allocs(struct s_data *d, enum s_alloc what,
|
|
|
static enum s_alloc __visit_domain_allocation_hell(struct s_data *d,
|
|
|
const struct cpumask *cpu_map)
|
|
|
{
|
|
|
-#ifdef CONFIG_NUMA
|
|
|
- if (!alloc_cpumask_var(&d->domainspan, GFP_KERNEL))
|
|
|
- return sa_none;
|
|
|
- if (!alloc_cpumask_var(&d->covered, GFP_KERNEL))
|
|
|
- return sa_domainspan;
|
|
|
- if (!alloc_cpumask_var(&d->notcovered, GFP_KERNEL))
|
|
|
- return sa_covered;
|
|
|
- /* Allocate the per-node list of sched groups */
|
|
|
- d->sched_group_nodes = kcalloc(nr_node_ids,
|
|
|
- sizeof(struct sched_group *), GFP_KERNEL);
|
|
|
- if (!d->sched_group_nodes) {
|
|
|
- printk(KERN_WARNING "Can not alloc sched group node list\n");
|
|
|
- return sa_notcovered;
|
|
|
- }
|
|
|
- sched_group_nodes_bycpu[cpumask_first(cpu_map)] = d->sched_group_nodes;
|
|
|
-#endif
|
|
|
- if (!alloc_cpumask_var(&d->nodemask, GFP_KERNEL))
|
|
|
- return sa_sched_group_nodes;
|
|
|
- if (!alloc_cpumask_var(&d->this_sibling_map, GFP_KERNEL))
|
|
|
- return sa_nodemask;
|
|
|
- if (!alloc_cpumask_var(&d->this_core_map, GFP_KERNEL))
|
|
|
- return sa_this_sibling_map;
|
|
|
- if (!alloc_cpumask_var(&d->this_book_map, GFP_KERNEL))
|
|
|
- return sa_this_core_map;
|
|
|
- if (!alloc_cpumask_var(&d->send_covered, GFP_KERNEL))
|
|
|
- return sa_this_book_map;
|
|
|
- if (!alloc_cpumask_var(&d->tmpmask, GFP_KERNEL))
|
|
|
- return sa_send_covered;
|
|
|
+ memset(d, 0, sizeof(*d));
|
|
|
+
|
|
|
+ if (__sdt_alloc(cpu_map))
|
|
|
+ return sa_sd_storage;
|
|
|
+ d->sd = alloc_percpu(struct sched_domain *);
|
|
|
+ if (!d->sd)
|
|
|
+ return sa_sd_storage;
|
|
|
d->rd = alloc_rootdomain();
|
|
|
- if (!d->rd) {
|
|
|
- printk(KERN_WARNING "Cannot alloc root domain\n");
|
|
|
- return sa_tmpmask;
|
|
|
- }
|
|
|
+ if (!d->rd)
|
|
|
+ return sa_sd;
|
|
|
return sa_rootdomain;
|
|
|
}
|
|
|
|
|
|
-static struct sched_domain *__build_numa_sched_domains(struct s_data *d,
|
|
|
- const struct cpumask *cpu_map, struct sched_domain_attr *attr, int i)
|
|
|
+/*
|
|
|
+ * NULL the sd_data elements we've used to build the sched_domain and
|
|
|
+ * sched_group structure so that the subsequent __free_domain_allocs()
|
|
|
+ * will not free the data we're using.
|
|
|
+ */
|
|
|
+static void claim_allocations(int cpu, struct sched_domain *sd)
|
|
|
{
|
|
|
- struct sched_domain *sd = NULL;
|
|
|
-#ifdef CONFIG_NUMA
|
|
|
- struct sched_domain *parent;
|
|
|
-
|
|
|
- d->sd_allnodes = 0;
|
|
|
- if (cpumask_weight(cpu_map) >
|
|
|
- SD_NODES_PER_DOMAIN * cpumask_weight(d->nodemask)) {
|
|
|
- sd = &per_cpu(allnodes_domains, i).sd;
|
|
|
- SD_INIT(sd, ALLNODES);
|
|
|
- set_domain_attribute(sd, attr);
|
|
|
- cpumask_copy(sched_domain_span(sd), cpu_map);
|
|
|
- cpu_to_allnodes_group(i, cpu_map, &sd->groups, d->tmpmask);
|
|
|
- d->sd_allnodes = 1;
|
|
|
- }
|
|
|
- parent = sd;
|
|
|
-
|
|
|
- sd = &per_cpu(node_domains, i).sd;
|
|
|
- SD_INIT(sd, NODE);
|
|
|
- set_domain_attribute(sd, attr);
|
|
|
- sched_domain_node_span(cpu_to_node(i), sched_domain_span(sd));
|
|
|
- sd->parent = parent;
|
|
|
- if (parent)
|
|
|
- parent->child = sd;
|
|
|
- cpumask_and(sched_domain_span(sd), sched_domain_span(sd), cpu_map);
|
|
|
-#endif
|
|
|
- return sd;
|
|
|
-}
|
|
|
+ struct sd_data *sdd = sd->private;
|
|
|
+ struct sched_group *sg = sd->groups;
|
|
|
|
|
|
-static struct sched_domain *__build_cpu_sched_domain(struct s_data *d,
|
|
|
- const struct cpumask *cpu_map, struct sched_domain_attr *attr,
|
|
|
- struct sched_domain *parent, int i)
|
|
|
-{
|
|
|
- struct sched_domain *sd;
|
|
|
- sd = &per_cpu(phys_domains, i).sd;
|
|
|
- SD_INIT(sd, CPU);
|
|
|
- set_domain_attribute(sd, attr);
|
|
|
- cpumask_copy(sched_domain_span(sd), d->nodemask);
|
|
|
- sd->parent = parent;
|
|
|
- if (parent)
|
|
|
- parent->child = sd;
|
|
|
- cpu_to_phys_group(i, cpu_map, &sd->groups, d->tmpmask);
|
|
|
- return sd;
|
|
|
-}
|
|
|
+ WARN_ON_ONCE(*per_cpu_ptr(sdd->sd, cpu) != sd);
|
|
|
+ *per_cpu_ptr(sdd->sd, cpu) = NULL;
|
|
|
|
|
|
-static struct sched_domain *__build_book_sched_domain(struct s_data *d,
|
|
|
- const struct cpumask *cpu_map, struct sched_domain_attr *attr,
|
|
|
- struct sched_domain *parent, int i)
|
|
|
-{
|
|
|
- struct sched_domain *sd = parent;
|
|
|
-#ifdef CONFIG_SCHED_BOOK
|
|
|
- sd = &per_cpu(book_domains, i).sd;
|
|
|
- SD_INIT(sd, BOOK);
|
|
|
- set_domain_attribute(sd, attr);
|
|
|
- cpumask_and(sched_domain_span(sd), cpu_map, cpu_book_mask(i));
|
|
|
- sd->parent = parent;
|
|
|
- parent->child = sd;
|
|
|
- cpu_to_book_group(i, cpu_map, &sd->groups, d->tmpmask);
|
|
|
-#endif
|
|
|
- return sd;
|
|
|
+ if (cpu == cpumask_first(sched_group_cpus(sg))) {
|
|
|
+ WARN_ON_ONCE(*per_cpu_ptr(sdd->sg, cpu) != sg);
|
|
|
+ *per_cpu_ptr(sdd->sg, cpu) = NULL;
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
-static struct sched_domain *__build_mc_sched_domain(struct s_data *d,
|
|
|
- const struct cpumask *cpu_map, struct sched_domain_attr *attr,
|
|
|
- struct sched_domain *parent, int i)
|
|
|
+#ifdef CONFIG_SCHED_SMT
|
|
|
+static const struct cpumask *cpu_smt_mask(int cpu)
|
|
|
{
|
|
|
- struct sched_domain *sd = parent;
|
|
|
-#ifdef CONFIG_SCHED_MC
|
|
|
- sd = &per_cpu(core_domains, i).sd;
|
|
|
- SD_INIT(sd, MC);
|
|
|
- set_domain_attribute(sd, attr);
|
|
|
- cpumask_and(sched_domain_span(sd), cpu_map, cpu_coregroup_mask(i));
|
|
|
- sd->parent = parent;
|
|
|
- parent->child = sd;
|
|
|
- cpu_to_core_group(i, cpu_map, &sd->groups, d->tmpmask);
|
|
|
-#endif
|
|
|
- return sd;
|
|
|
+ return topology_thread_cpumask(cpu);
|
|
|
}
|
|
|
-
|
|
|
-static struct sched_domain *__build_smt_sched_domain(struct s_data *d,
|
|
|
- const struct cpumask *cpu_map, struct sched_domain_attr *attr,
|
|
|
- struct sched_domain *parent, int i)
|
|
|
-{
|
|
|
- struct sched_domain *sd = parent;
|
|
|
-#ifdef CONFIG_SCHED_SMT
|
|
|
- sd = &per_cpu(cpu_domains, i).sd;
|
|
|
- SD_INIT(sd, SIBLING);
|
|
|
- set_domain_attribute(sd, attr);
|
|
|
- cpumask_and(sched_domain_span(sd), cpu_map, topology_thread_cpumask(i));
|
|
|
- sd->parent = parent;
|
|
|
- parent->child = sd;
|
|
|
- cpu_to_cpu_group(i, cpu_map, &sd->groups, d->tmpmask);
|
|
|
#endif
|
|
|
- return sd;
|
|
|
-}
|
|
|
|
|
|
-static void build_sched_groups(struct s_data *d, enum sched_domain_level l,
|
|
|
- const struct cpumask *cpu_map, int cpu)
|
|
|
-{
|
|
|
- switch (l) {
|
|
|
+/*
|
|
|
+ * Topology list, bottom-up.
|
|
|
+ */
|
|
|
+static struct sched_domain_topology_level default_topology[] = {
|
|
|
#ifdef CONFIG_SCHED_SMT
|
|
|
- case SD_LV_SIBLING: /* set up CPU (sibling) groups */
|
|
|
- cpumask_and(d->this_sibling_map, cpu_map,
|
|
|
- topology_thread_cpumask(cpu));
|
|
|
- if (cpu == cpumask_first(d->this_sibling_map))
|
|
|
- init_sched_build_groups(d->this_sibling_map, cpu_map,
|
|
|
- &cpu_to_cpu_group,
|
|
|
- d->send_covered, d->tmpmask);
|
|
|
- break;
|
|
|
+ { sd_init_SIBLING, cpu_smt_mask, },
|
|
|
#endif
|
|
|
#ifdef CONFIG_SCHED_MC
|
|
|
- case SD_LV_MC: /* set up multi-core groups */
|
|
|
- cpumask_and(d->this_core_map, cpu_map, cpu_coregroup_mask(cpu));
|
|
|
- if (cpu == cpumask_first(d->this_core_map))
|
|
|
- init_sched_build_groups(d->this_core_map, cpu_map,
|
|
|
- &cpu_to_core_group,
|
|
|
- d->send_covered, d->tmpmask);
|
|
|
- break;
|
|
|
+ { sd_init_MC, cpu_coregroup_mask, },
|
|
|
#endif
|
|
|
#ifdef CONFIG_SCHED_BOOK
|
|
|
- case SD_LV_BOOK: /* set up book groups */
|
|
|
- cpumask_and(d->this_book_map, cpu_map, cpu_book_mask(cpu));
|
|
|
- if (cpu == cpumask_first(d->this_book_map))
|
|
|
- init_sched_build_groups(d->this_book_map, cpu_map,
|
|
|
- &cpu_to_book_group,
|
|
|
- d->send_covered, d->tmpmask);
|
|
|
- break;
|
|
|
+ { sd_init_BOOK, cpu_book_mask, },
|
|
|
#endif
|
|
|
- case SD_LV_CPU: /* set up physical groups */
|
|
|
- cpumask_and(d->nodemask, cpumask_of_node(cpu), cpu_map);
|
|
|
- if (!cpumask_empty(d->nodemask))
|
|
|
- init_sched_build_groups(d->nodemask, cpu_map,
|
|
|
- &cpu_to_phys_group,
|
|
|
- d->send_covered, d->tmpmask);
|
|
|
- break;
|
|
|
+ { sd_init_CPU, cpu_cpu_mask, },
|
|
|
#ifdef CONFIG_NUMA
|
|
|
- case SD_LV_ALLNODES:
|
|
|
- init_sched_build_groups(cpu_map, cpu_map, &cpu_to_allnodes_group,
|
|
|
- d->send_covered, d->tmpmask);
|
|
|
- break;
|
|
|
+ { sd_init_NODE, cpu_node_mask, },
|
|
|
+ { sd_init_ALLNODES, cpu_allnodes_mask, },
|
|
|
#endif
|
|
|
- default:
|
|
|
- break;
|
|
|
+ { NULL, },
|
|
|
+};
|
|
|
+
|
|
|
+static struct sched_domain_topology_level *sched_domain_topology = default_topology;
|
|
|
+
|
|
|
+static int __sdt_alloc(const struct cpumask *cpu_map)
|
|
|
+{
|
|
|
+ struct sched_domain_topology_level *tl;
|
|
|
+ int j;
|
|
|
+
|
|
|
+ for (tl = sched_domain_topology; tl->init; tl++) {
|
|
|
+ struct sd_data *sdd = &tl->data;
|
|
|
+
|
|
|
+ sdd->sd = alloc_percpu(struct sched_domain *);
|
|
|
+ if (!sdd->sd)
|
|
|
+ return -ENOMEM;
|
|
|
+
|
|
|
+ sdd->sg = alloc_percpu(struct sched_group *);
|
|
|
+ if (!sdd->sg)
|
|
|
+ return -ENOMEM;
|
|
|
+
|
|
|
+ for_each_cpu(j, cpu_map) {
|
|
|
+ struct sched_domain *sd;
|
|
|
+ struct sched_group *sg;
|
|
|
+
|
|
|
+ sd = kzalloc_node(sizeof(struct sched_domain) + cpumask_size(),
|
|
|
+ GFP_KERNEL, cpu_to_node(j));
|
|
|
+ if (!sd)
|
|
|
+ return -ENOMEM;
|
|
|
+
|
|
|
+ *per_cpu_ptr(sdd->sd, j) = sd;
|
|
|
+
|
|
|
+ sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(),
|
|
|
+ GFP_KERNEL, cpu_to_node(j));
|
|
|
+ if (!sg)
|
|
|
+ return -ENOMEM;
|
|
|
+
|
|
|
+ *per_cpu_ptr(sdd->sg, j) = sg;
|
|
|
+ }
|
|
|
}
|
|
|
+
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+static void __sdt_free(const struct cpumask *cpu_map)
|
|
|
+{
|
|
|
+ struct sched_domain_topology_level *tl;
|
|
|
+ int j;
|
|
|
+
|
|
|
+ for (tl = sched_domain_topology; tl->init; tl++) {
|
|
|
+ struct sd_data *sdd = &tl->data;
|
|
|
+
|
|
|
+ for_each_cpu(j, cpu_map) {
|
|
|
+ kfree(*per_cpu_ptr(sdd->sd, j));
|
|
|
+ kfree(*per_cpu_ptr(sdd->sg, j));
|
|
|
+ }
|
|
|
+ free_percpu(sdd->sd);
|
|
|
+ free_percpu(sdd->sg);
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+struct sched_domain *build_sched_domain(struct sched_domain_topology_level *tl,
|
|
|
+ struct s_data *d, const struct cpumask *cpu_map,
|
|
|
+ struct sched_domain_attr *attr, struct sched_domain *child,
|
|
|
+ int cpu)
|
|
|
+{
|
|
|
+ struct sched_domain *sd = tl->init(tl, cpu);
|
|
|
+ if (!sd)
|
|
|
+ return child;
|
|
|
+
|
|
|
+ set_domain_attribute(sd, attr);
|
|
|
+ cpumask_and(sched_domain_span(sd), cpu_map, tl->mask(cpu));
|
|
|
+ if (child) {
|
|
|
+ sd->level = child->level + 1;
|
|
|
+ sched_domain_level_max = max(sched_domain_level_max, sd->level);
|
|
|
+ child->parent = sd;
|
|
|
+ }
|
|
|
+ sd->child = child;
|
|
|
+
|
|
|
+ return sd;
|
|
|
}
|
|
|
|
|
|
/*
|
|
|
* Build sched domains for a given set of cpus and attach the sched domains
|
|
|
* to the individual cpus
|
|
|
*/
|
|
|
-static int __build_sched_domains(const struct cpumask *cpu_map,
|
|
|
- struct sched_domain_attr *attr)
|
|
|
+static int build_sched_domains(const struct cpumask *cpu_map,
|
|
|
+ struct sched_domain_attr *attr)
|
|
|
{
|
|
|
enum s_alloc alloc_state = sa_none;
|
|
|
- struct s_data d;
|
|
|
struct sched_domain *sd;
|
|
|
- int i;
|
|
|
-#ifdef CONFIG_NUMA
|
|
|
- d.sd_allnodes = 0;
|
|
|
-#endif
|
|
|
+ struct s_data d;
|
|
|
+ int i, ret = -ENOMEM;
|
|
|
|
|
|
alloc_state = __visit_domain_allocation_hell(&d, cpu_map);
|
|
|
if (alloc_state != sa_rootdomain)
|
|
|
goto error;
|
|
|
- alloc_state = sa_sched_groups;
|
|
|
-
|
|
|
- /*
|
|
|
- * Set up domains for cpus specified by the cpu_map.
|
|
|
- */
|
|
|
- for_each_cpu(i, cpu_map) {
|
|
|
- cpumask_and(d.nodemask, cpumask_of_node(cpu_to_node(i)),
|
|
|
- cpu_map);
|
|
|
-
|
|
|
- sd = __build_numa_sched_domains(&d, cpu_map, attr, i);
|
|
|
- sd = __build_cpu_sched_domain(&d, cpu_map, attr, sd, i);
|
|
|
- sd = __build_book_sched_domain(&d, cpu_map, attr, sd, i);
|
|
|
- sd = __build_mc_sched_domain(&d, cpu_map, attr, sd, i);
|
|
|
- sd = __build_smt_sched_domain(&d, cpu_map, attr, sd, i);
|
|
|
- }
|
|
|
|
|
|
+ /* Set up domains for cpus specified by the cpu_map. */
|
|
|
for_each_cpu(i, cpu_map) {
|
|
|
- build_sched_groups(&d, SD_LV_SIBLING, cpu_map, i);
|
|
|
- build_sched_groups(&d, SD_LV_BOOK, cpu_map, i);
|
|
|
- build_sched_groups(&d, SD_LV_MC, cpu_map, i);
|
|
|
- }
|
|
|
+ struct sched_domain_topology_level *tl;
|
|
|
|
|
|
- /* Set up physical groups */
|
|
|
- for (i = 0; i < nr_node_ids; i++)
|
|
|
- build_sched_groups(&d, SD_LV_CPU, cpu_map, i);
|
|
|
+ sd = NULL;
|
|
|
+ for (tl = sched_domain_topology; tl->init; tl++)
|
|
|
+ sd = build_sched_domain(tl, &d, cpu_map, attr, sd, i);
|
|
|
|
|
|
-#ifdef CONFIG_NUMA
|
|
|
- /* Set up node groups */
|
|
|
- if (d.sd_allnodes)
|
|
|
- build_sched_groups(&d, SD_LV_ALLNODES, cpu_map, 0);
|
|
|
+ while (sd->child)
|
|
|
+ sd = sd->child;
|
|
|
|
|
|
- for (i = 0; i < nr_node_ids; i++)
|
|
|
- if (build_numa_sched_groups(&d, cpu_map, i))
|
|
|
- goto error;
|
|
|
-#endif
|
|
|
-
|
|
|
- /* Calculate CPU power for physical packages and nodes */
|
|
|
-#ifdef CONFIG_SCHED_SMT
|
|
|
- for_each_cpu(i, cpu_map) {
|
|
|
- sd = &per_cpu(cpu_domains, i).sd;
|
|
|
- init_sched_groups_power(i, sd);
|
|
|
- }
|
|
|
-#endif
|
|
|
-#ifdef CONFIG_SCHED_MC
|
|
|
- for_each_cpu(i, cpu_map) {
|
|
|
- sd = &per_cpu(core_domains, i).sd;
|
|
|
- init_sched_groups_power(i, sd);
|
|
|
+ *per_cpu_ptr(d.sd, i) = sd;
|
|
|
}
|
|
|
-#endif
|
|
|
-#ifdef CONFIG_SCHED_BOOK
|
|
|
- for_each_cpu(i, cpu_map) {
|
|
|
- sd = &per_cpu(book_domains, i).sd;
|
|
|
- init_sched_groups_power(i, sd);
|
|
|
- }
|
|
|
-#endif
|
|
|
|
|
|
+ /* Build the groups for the domains */
|
|
|
for_each_cpu(i, cpu_map) {
|
|
|
- sd = &per_cpu(phys_domains, i).sd;
|
|
|
- init_sched_groups_power(i, sd);
|
|
|
- }
|
|
|
+ for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
|
|
|
+ sd->span_weight = cpumask_weight(sched_domain_span(sd));
|
|
|
+ get_group(i, sd->private, &sd->groups);
|
|
|
+ atomic_inc(&sd->groups->ref);
|
|
|
|
|
|
-#ifdef CONFIG_NUMA
|
|
|
- for (i = 0; i < nr_node_ids; i++)
|
|
|
- init_numa_sched_groups_power(d.sched_group_nodes[i]);
|
|
|
+ if (i != cpumask_first(sched_domain_span(sd)))
|
|
|
+ continue;
|
|
|
|
|
|
- if (d.sd_allnodes) {
|
|
|
- struct sched_group *sg;
|
|
|
+ build_sched_groups(sd);
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
- cpu_to_allnodes_group(cpumask_first(cpu_map), cpu_map, &sg,
|
|
|
- d.tmpmask);
|
|
|
- init_numa_sched_groups_power(sg);
|
|
|
+ /* Calculate CPU power for physical packages and nodes */
|
|
|
+ for (i = nr_cpumask_bits-1; i >= 0; i--) {
|
|
|
+ if (!cpumask_test_cpu(i, cpu_map))
|
|
|
+ continue;
|
|
|
+
|
|
|
+ for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
|
|
|
+ claim_allocations(i, sd);
|
|
|
+ init_sched_groups_power(i, sd);
|
|
|
+ }
|
|
|
}
|
|
|
-#endif
|
|
|
|
|
|
/* Attach the domains */
|
|
|
+ rcu_read_lock();
|
|
|
for_each_cpu(i, cpu_map) {
|
|
|
-#ifdef CONFIG_SCHED_SMT
|
|
|
- sd = &per_cpu(cpu_domains, i).sd;
|
|
|
-#elif defined(CONFIG_SCHED_MC)
|
|
|
- sd = &per_cpu(core_domains, i).sd;
|
|
|
-#elif defined(CONFIG_SCHED_BOOK)
|
|
|
- sd = &per_cpu(book_domains, i).sd;
|
|
|
-#else
|
|
|
- sd = &per_cpu(phys_domains, i).sd;
|
|
|
-#endif
|
|
|
+ sd = *per_cpu_ptr(d.sd, i);
|
|
|
cpu_attach_domain(sd, d.rd, i);
|
|
|
}
|
|
|
+ rcu_read_unlock();
|
|
|
|
|
|
- d.sched_group_nodes = NULL; /* don't free this we still need it */
|
|
|
- __free_domain_allocs(&d, sa_tmpmask, cpu_map);
|
|
|
- return 0;
|
|
|
-
|
|
|
+ ret = 0;
|
|
|
error:
|
|
|
__free_domain_allocs(&d, alloc_state, cpu_map);
|
|
|
- return -ENOMEM;
|
|
|
-}
|
|
|
-
|
|
|
-static int build_sched_domains(const struct cpumask *cpu_map)
|
|
|
-{
|
|
|
- return __build_sched_domains(cpu_map, NULL);
|
|
|
+ return ret;
|
|
|
}
|
|
|
|
|
|
static cpumask_var_t *doms_cur; /* current sched domains */
|
|
@@ -7670,7 +7362,7 @@ void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms)
|
|
|
* For now this just excludes isolated cpus, but could be used to
|
|
|
* exclude other special cases in the future.
|
|
|
*/
|
|
|
-static int arch_init_sched_domains(const struct cpumask *cpu_map)
|
|
|
+static int init_sched_domains(const struct cpumask *cpu_map)
|
|
|
{
|
|
|
int err;
|
|
|
|
|
@@ -7681,32 +7373,24 @@ static int arch_init_sched_domains(const struct cpumask *cpu_map)
|
|
|
doms_cur = &fallback_doms;
|
|
|
cpumask_andnot(doms_cur[0], cpu_map, cpu_isolated_map);
|
|
|
dattr_cur = NULL;
|
|
|
- err = build_sched_domains(doms_cur[0]);
|
|
|
+ err = build_sched_domains(doms_cur[0], NULL);
|
|
|
register_sched_domain_sysctl();
|
|
|
|
|
|
return err;
|
|
|
}
|
|
|
|
|
|
-static void arch_destroy_sched_domains(const struct cpumask *cpu_map,
|
|
|
- struct cpumask *tmpmask)
|
|
|
-{
|
|
|
- free_sched_groups(cpu_map, tmpmask);
|
|
|
-}
|
|
|
-
|
|
|
/*
|
|
|
* Detach sched domains from a group of cpus specified in cpu_map
|
|
|
* These cpus will now be attached to the NULL domain
|
|
|
*/
|
|
|
static void detach_destroy_domains(const struct cpumask *cpu_map)
|
|
|
{
|
|
|
- /* Save because hotplug lock held. */
|
|
|
- static DECLARE_BITMAP(tmpmask, CONFIG_NR_CPUS);
|
|
|
int i;
|
|
|
|
|
|
+ rcu_read_lock();
|
|
|
for_each_cpu(i, cpu_map)
|
|
|
cpu_attach_domain(NULL, &def_root_domain, i);
|
|
|
- synchronize_sched();
|
|
|
- arch_destroy_sched_domains(cpu_map, to_cpumask(tmpmask));
|
|
|
+ rcu_read_unlock();
|
|
|
}
|
|
|
|
|
|
/* handle null as "default" */
|
|
@@ -7795,8 +7479,7 @@ match1:
|
|
|
goto match2;
|
|
|
}
|
|
|
/* no match - add a new doms_new */
|
|
|
- __build_sched_domains(doms_new[i],
|
|
|
- dattr_new ? dattr_new + i : NULL);
|
|
|
+ build_sched_domains(doms_new[i], dattr_new ? dattr_new + i : NULL);
|
|
|
match2:
|
|
|
;
|
|
|
}
|
|
@@ -7815,7 +7498,7 @@ match2:
|
|
|
}
|
|
|
|
|
|
#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
|
|
|
-static void arch_reinit_sched_domains(void)
|
|
|
+static void reinit_sched_domains(void)
|
|
|
{
|
|
|
get_online_cpus();
|
|
|
|
|
@@ -7848,7 +7531,7 @@ static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt)
|
|
|
else
|
|
|
sched_mc_power_savings = level;
|
|
|
|
|
|
- arch_reinit_sched_domains();
|
|
|
+ reinit_sched_domains();
|
|
|
|
|
|
return count;
|
|
|
}
|
|
@@ -7967,14 +7650,9 @@ void __init sched_init_smp(void)
|
|
|
alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL);
|
|
|
alloc_cpumask_var(&fallback_doms, GFP_KERNEL);
|
|
|
|
|
|
-#if defined(CONFIG_NUMA)
|
|
|
- sched_group_nodes_bycpu = kzalloc(nr_cpu_ids * sizeof(void **),
|
|
|
- GFP_KERNEL);
|
|
|
- BUG_ON(sched_group_nodes_bycpu == NULL);
|
|
|
-#endif
|
|
|
get_online_cpus();
|
|
|
mutex_lock(&sched_domains_mutex);
|
|
|
- arch_init_sched_domains(cpu_active_mask);
|
|
|
+ init_sched_domains(cpu_active_mask);
|
|
|
cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map);
|
|
|
if (cpumask_empty(non_isolated_cpus))
|
|
|
cpumask_set_cpu(smp_processor_id(), non_isolated_cpus);
|
|
@@ -8281,6 +7959,7 @@ void __init sched_init(void)
|
|
|
/* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */
|
|
|
zalloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT);
|
|
|
#ifdef CONFIG_SMP
|
|
|
+ zalloc_cpumask_var(&sched_domains_tmpmask, GFP_NOWAIT);
|
|
|
#ifdef CONFIG_NO_HZ
|
|
|
zalloc_cpumask_var(&nohz.idle_cpus_mask, GFP_NOWAIT);
|
|
|
alloc_cpumask_var(&nohz.grp_idle_mask, GFP_NOWAIT);
|
|
@@ -8340,7 +8019,7 @@ static void normalize_task(struct rq *rq, struct task_struct *p)
|
|
|
int old_prio = p->prio;
|
|
|
int on_rq;
|
|
|
|
|
|
- on_rq = p->se.on_rq;
|
|
|
+ on_rq = p->on_rq;
|
|
|
if (on_rq)
|
|
|
deactivate_task(rq, p, 0);
|
|
|
__setscheduler(rq, p, SCHED_NORMAL, 0);
|
|
@@ -8553,7 +8232,6 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
|
|
|
{
|
|
|
struct rt_rq *rt_rq;
|
|
|
struct sched_rt_entity *rt_se;
|
|
|
- struct rq *rq;
|
|
|
int i;
|
|
|
|
|
|
tg->rt_rq = kzalloc(sizeof(rt_rq) * nr_cpu_ids, GFP_KERNEL);
|
|
@@ -8567,8 +8245,6 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
|
|
|
ktime_to_ns(def_rt_bandwidth.rt_period), 0);
|
|
|
|
|
|
for_each_possible_cpu(i) {
|
|
|
- rq = cpu_rq(i);
|
|
|
-
|
|
|
rt_rq = kzalloc_node(sizeof(struct rt_rq),
|
|
|
GFP_KERNEL, cpu_to_node(i));
|
|
|
if (!rt_rq)
|
|
@@ -8683,7 +8359,7 @@ void sched_move_task(struct task_struct *tsk)
|
|
|
rq = task_rq_lock(tsk, &flags);
|
|
|
|
|
|
running = task_current(rq, tsk);
|
|
|
- on_rq = tsk->se.on_rq;
|
|
|
+ on_rq = tsk->on_rq;
|
|
|
|
|
|
if (on_rq)
|
|
|
dequeue_task(rq, tsk, 0);
|
|
@@ -8702,7 +8378,7 @@ void sched_move_task(struct task_struct *tsk)
|
|
|
if (on_rq)
|
|
|
enqueue_task(rq, tsk, 0);
|
|
|
|
|
|
- task_rq_unlock(rq, &flags);
|
|
|
+ task_rq_unlock(rq, tsk, &flags);
|
|
|
}
|
|
|
#endif /* CONFIG_CGROUP_SCHED */
|
|
|
|