|
@@ -871,7 +871,7 @@ update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
|
likely(wait_start > prev_wait_start))
|
|
likely(wait_start > prev_wait_start))
|
|
wait_start -= prev_wait_start;
|
|
wait_start -= prev_wait_start;
|
|
|
|
|
|
- schedstat_set(se->statistics.wait_start, wait_start);
|
|
|
|
|
|
+ __schedstat_set(se->statistics.wait_start, wait_start);
|
|
}
|
|
}
|
|
|
|
|
|
static inline void
|
|
static inline void
|
|
@@ -893,17 +893,17 @@ update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
|
* time stamp can be adjusted to accumulate wait time
|
|
* time stamp can be adjusted to accumulate wait time
|
|
* prior to migration.
|
|
* prior to migration.
|
|
*/
|
|
*/
|
|
- schedstat_set(se->statistics.wait_start, delta);
|
|
|
|
|
|
+ __schedstat_set(se->statistics.wait_start, delta);
|
|
return;
|
|
return;
|
|
}
|
|
}
|
|
trace_sched_stat_wait(p, delta);
|
|
trace_sched_stat_wait(p, delta);
|
|
}
|
|
}
|
|
|
|
|
|
- schedstat_set(se->statistics.wait_max,
|
|
|
|
|
|
+ __schedstat_set(se->statistics.wait_max,
|
|
max(schedstat_val(se->statistics.wait_max), delta));
|
|
max(schedstat_val(se->statistics.wait_max), delta));
|
|
- schedstat_inc(se->statistics.wait_count);
|
|
|
|
- schedstat_add(se->statistics.wait_sum, delta);
|
|
|
|
- schedstat_set(se->statistics.wait_start, 0);
|
|
|
|
|
|
+ __schedstat_inc(se->statistics.wait_count);
|
|
|
|
+ __schedstat_add(se->statistics.wait_sum, delta);
|
|
|
|
+ __schedstat_set(se->statistics.wait_start, 0);
|
|
}
|
|
}
|
|
|
|
|
|
static inline void
|
|
static inline void
|
|
@@ -928,10 +928,10 @@ update_stats_enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
|
delta = 0;
|
|
delta = 0;
|
|
|
|
|
|
if (unlikely(delta > schedstat_val(se->statistics.sleep_max)))
|
|
if (unlikely(delta > schedstat_val(se->statistics.sleep_max)))
|
|
- schedstat_set(se->statistics.sleep_max, delta);
|
|
|
|
|
|
+ __schedstat_set(se->statistics.sleep_max, delta);
|
|
|
|
|
|
- schedstat_set(se->statistics.sleep_start, 0);
|
|
|
|
- schedstat_add(se->statistics.sum_sleep_runtime, delta);
|
|
|
|
|
|
+ __schedstat_set(se->statistics.sleep_start, 0);
|
|
|
|
+ __schedstat_add(se->statistics.sum_sleep_runtime, delta);
|
|
|
|
|
|
if (tsk) {
|
|
if (tsk) {
|
|
account_scheduler_latency(tsk, delta >> 10, 1);
|
|
account_scheduler_latency(tsk, delta >> 10, 1);
|
|
@@ -945,15 +945,15 @@ update_stats_enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
|
delta = 0;
|
|
delta = 0;
|
|
|
|
|
|
if (unlikely(delta > schedstat_val(se->statistics.block_max)))
|
|
if (unlikely(delta > schedstat_val(se->statistics.block_max)))
|
|
- schedstat_set(se->statistics.block_max, delta);
|
|
|
|
|
|
+ __schedstat_set(se->statistics.block_max, delta);
|
|
|
|
|
|
- schedstat_set(se->statistics.block_start, 0);
|
|
|
|
- schedstat_add(se->statistics.sum_sleep_runtime, delta);
|
|
|
|
|
|
+ __schedstat_set(se->statistics.block_start, 0);
|
|
|
|
+ __schedstat_add(se->statistics.sum_sleep_runtime, delta);
|
|
|
|
|
|
if (tsk) {
|
|
if (tsk) {
|
|
if (tsk->in_iowait) {
|
|
if (tsk->in_iowait) {
|
|
- schedstat_add(se->statistics.iowait_sum, delta);
|
|
|
|
- schedstat_inc(se->statistics.iowait_count);
|
|
|
|
|
|
+ __schedstat_add(se->statistics.iowait_sum, delta);
|
|
|
|
+ __schedstat_inc(se->statistics.iowait_count);
|
|
trace_sched_stat_iowait(tsk, delta);
|
|
trace_sched_stat_iowait(tsk, delta);
|
|
}
|
|
}
|
|
|
|
|
|
@@ -1012,10 +1012,10 @@ update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
|
struct task_struct *tsk = task_of(se);
|
|
struct task_struct *tsk = task_of(se);
|
|
|
|
|
|
if (tsk->state & TASK_INTERRUPTIBLE)
|
|
if (tsk->state & TASK_INTERRUPTIBLE)
|
|
- schedstat_set(se->statistics.sleep_start,
|
|
|
|
|
|
+ __schedstat_set(se->statistics.sleep_start,
|
|
rq_clock(rq_of(cfs_rq)));
|
|
rq_clock(rq_of(cfs_rq)));
|
|
if (tsk->state & TASK_UNINTERRUPTIBLE)
|
|
if (tsk->state & TASK_UNINTERRUPTIBLE)
|
|
- schedstat_set(se->statistics.block_start,
|
|
|
|
|
|
+ __schedstat_set(se->statistics.block_start,
|
|
rq_clock(rq_of(cfs_rq)));
|
|
rq_clock(rq_of(cfs_rq)));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
@@ -5692,27 +5692,31 @@ static int wake_wide(struct task_struct *p)
|
|
* scheduling latency of the CPUs. This seems to work
|
|
* scheduling latency of the CPUs. This seems to work
|
|
* for the overloaded case.
|
|
* for the overloaded case.
|
|
*/
|
|
*/
|
|
-
|
|
|
|
-static bool
|
|
|
|
-wake_affine_idle(struct sched_domain *sd, struct task_struct *p,
|
|
|
|
- int this_cpu, int prev_cpu, int sync)
|
|
|
|
|
|
+static int
|
|
|
|
+wake_affine_idle(int this_cpu, int prev_cpu, int sync)
|
|
{
|
|
{
|
|
/*
|
|
/*
|
|
* If this_cpu is idle, it implies the wakeup is from interrupt
|
|
* If this_cpu is idle, it implies the wakeup is from interrupt
|
|
* context. Only allow the move if cache is shared. Otherwise an
|
|
* context. Only allow the move if cache is shared. Otherwise an
|
|
* interrupt intensive workload could force all tasks onto one
|
|
* interrupt intensive workload could force all tasks onto one
|
|
* node depending on the IO topology or IRQ affinity settings.
|
|
* node depending on the IO topology or IRQ affinity settings.
|
|
|
|
+ *
|
|
|
|
+ * If the prev_cpu is idle and cache affine then avoid a migration.
|
|
|
|
+ * There is no guarantee that the cache hot data from an interrupt
|
|
|
|
+ * is more important than cache hot data on the prev_cpu and from
|
|
|
|
+ * a cpufreq perspective, it's better to have higher utilisation
|
|
|
|
+ * on one CPU.
|
|
*/
|
|
*/
|
|
if (idle_cpu(this_cpu) && cpus_share_cache(this_cpu, prev_cpu))
|
|
if (idle_cpu(this_cpu) && cpus_share_cache(this_cpu, prev_cpu))
|
|
- return true;
|
|
|
|
|
|
+ return idle_cpu(prev_cpu) ? prev_cpu : this_cpu;
|
|
|
|
|
|
if (sync && cpu_rq(this_cpu)->nr_running == 1)
|
|
if (sync && cpu_rq(this_cpu)->nr_running == 1)
|
|
- return true;
|
|
|
|
|
|
+ return this_cpu;
|
|
|
|
|
|
- return false;
|
|
|
|
|
|
+ return nr_cpumask_bits;
|
|
}
|
|
}
|
|
|
|
|
|
-static bool
|
|
|
|
|
|
+static int
|
|
wake_affine_weight(struct sched_domain *sd, struct task_struct *p,
|
|
wake_affine_weight(struct sched_domain *sd, struct task_struct *p,
|
|
int this_cpu, int prev_cpu, int sync)
|
|
int this_cpu, int prev_cpu, int sync)
|
|
{
|
|
{
|
|
@@ -5726,7 +5730,7 @@ wake_affine_weight(struct sched_domain *sd, struct task_struct *p,
|
|
unsigned long current_load = task_h_load(current);
|
|
unsigned long current_load = task_h_load(current);
|
|
|
|
|
|
if (current_load > this_eff_load)
|
|
if (current_load > this_eff_load)
|
|
- return true;
|
|
|
|
|
|
+ return this_cpu;
|
|
|
|
|
|
this_eff_load -= current_load;
|
|
this_eff_load -= current_load;
|
|
}
|
|
}
|
|
@@ -5743,28 +5747,28 @@ wake_affine_weight(struct sched_domain *sd, struct task_struct *p,
|
|
prev_eff_load *= 100 + (sd->imbalance_pct - 100) / 2;
|
|
prev_eff_load *= 100 + (sd->imbalance_pct - 100) / 2;
|
|
prev_eff_load *= capacity_of(this_cpu);
|
|
prev_eff_load *= capacity_of(this_cpu);
|
|
|
|
|
|
- return this_eff_load <= prev_eff_load;
|
|
|
|
|
|
+ return this_eff_load <= prev_eff_load ? this_cpu : nr_cpumask_bits;
|
|
}
|
|
}
|
|
|
|
|
|
static int wake_affine(struct sched_domain *sd, struct task_struct *p,
|
|
static int wake_affine(struct sched_domain *sd, struct task_struct *p,
|
|
int prev_cpu, int sync)
|
|
int prev_cpu, int sync)
|
|
{
|
|
{
|
|
int this_cpu = smp_processor_id();
|
|
int this_cpu = smp_processor_id();
|
|
- bool affine = false;
|
|
|
|
|
|
+ int target = nr_cpumask_bits;
|
|
|
|
|
|
- if (sched_feat(WA_IDLE) && !affine)
|
|
|
|
- affine = wake_affine_idle(sd, p, this_cpu, prev_cpu, sync);
|
|
|
|
|
|
+ if (sched_feat(WA_IDLE))
|
|
|
|
+ target = wake_affine_idle(this_cpu, prev_cpu, sync);
|
|
|
|
|
|
- if (sched_feat(WA_WEIGHT) && !affine)
|
|
|
|
- affine = wake_affine_weight(sd, p, this_cpu, prev_cpu, sync);
|
|
|
|
|
|
+ if (sched_feat(WA_WEIGHT) && target == nr_cpumask_bits)
|
|
|
|
+ target = wake_affine_weight(sd, p, this_cpu, prev_cpu, sync);
|
|
|
|
|
|
schedstat_inc(p->se.statistics.nr_wakeups_affine_attempts);
|
|
schedstat_inc(p->se.statistics.nr_wakeups_affine_attempts);
|
|
- if (affine) {
|
|
|
|
- schedstat_inc(sd->ttwu_move_affine);
|
|
|
|
- schedstat_inc(p->se.statistics.nr_wakeups_affine);
|
|
|
|
- }
|
|
|
|
|
|
+ if (target == nr_cpumask_bits)
|
|
|
|
+ return prev_cpu;
|
|
|
|
|
|
- return affine;
|
|
|
|
|
|
+ schedstat_inc(sd->ttwu_move_affine);
|
|
|
|
+ schedstat_inc(p->se.statistics.nr_wakeups_affine);
|
|
|
|
+ return target;
|
|
}
|
|
}
|
|
|
|
|
|
static inline unsigned long task_util(struct task_struct *p);
|
|
static inline unsigned long task_util(struct task_struct *p);
|
|
@@ -6193,7 +6197,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
|
|
static int select_idle_sibling(struct task_struct *p, int prev, int target)
|
|
static int select_idle_sibling(struct task_struct *p, int prev, int target)
|
|
{
|
|
{
|
|
struct sched_domain *sd;
|
|
struct sched_domain *sd;
|
|
- int i;
|
|
|
|
|
|
+ int i, recent_used_cpu;
|
|
|
|
|
|
if (idle_cpu(target))
|
|
if (idle_cpu(target))
|
|
return target;
|
|
return target;
|
|
@@ -6204,6 +6208,21 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
|
|
if (prev != target && cpus_share_cache(prev, target) && idle_cpu(prev))
|
|
if (prev != target && cpus_share_cache(prev, target) && idle_cpu(prev))
|
|
return prev;
|
|
return prev;
|
|
|
|
|
|
|
|
+ /* Check a recently used CPU as a potential idle candidate */
|
|
|
|
+ recent_used_cpu = p->recent_used_cpu;
|
|
|
|
+ if (recent_used_cpu != prev &&
|
|
|
|
+ recent_used_cpu != target &&
|
|
|
|
+ cpus_share_cache(recent_used_cpu, target) &&
|
|
|
|
+ idle_cpu(recent_used_cpu) &&
|
|
|
|
+ cpumask_test_cpu(p->recent_used_cpu, &p->cpus_allowed)) {
|
|
|
|
+ /*
|
|
|
|
+ * Replace recent_used_cpu with prev as it is a potential
|
|
|
|
+ * candidate for the next wake.
|
|
|
|
+ */
|
|
|
|
+ p->recent_used_cpu = prev;
|
|
|
|
+ return recent_used_cpu;
|
|
|
|
+ }
|
|
|
|
+
|
|
sd = rcu_dereference(per_cpu(sd_llc, target));
|
|
sd = rcu_dereference(per_cpu(sd_llc, target));
|
|
if (!sd)
|
|
if (!sd)
|
|
return target;
|
|
return target;
|
|
@@ -6357,8 +6376,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
|
|
if (cpu == prev_cpu)
|
|
if (cpu == prev_cpu)
|
|
goto pick_cpu;
|
|
goto pick_cpu;
|
|
|
|
|
|
- if (wake_affine(affine_sd, p, prev_cpu, sync))
|
|
|
|
- new_cpu = cpu;
|
|
|
|
|
|
+ new_cpu = wake_affine(affine_sd, p, prev_cpu, sync);
|
|
}
|
|
}
|
|
|
|
|
|
if (sd && !(sd_flag & SD_BALANCE_FORK)) {
|
|
if (sd && !(sd_flag & SD_BALANCE_FORK)) {
|
|
@@ -6372,9 +6390,12 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
|
|
|
|
|
|
if (!sd) {
|
|
if (!sd) {
|
|
pick_cpu:
|
|
pick_cpu:
|
|
- if (sd_flag & SD_BALANCE_WAKE) /* XXX always ? */
|
|
|
|
|
|
+ if (sd_flag & SD_BALANCE_WAKE) { /* XXX always ? */
|
|
new_cpu = select_idle_sibling(p, prev_cpu, new_cpu);
|
|
new_cpu = select_idle_sibling(p, prev_cpu, new_cpu);
|
|
|
|
|
|
|
|
+ if (want_affine)
|
|
|
|
+ current->recent_used_cpu = cpu;
|
|
|
|
+ }
|
|
} else {
|
|
} else {
|
|
new_cpu = find_idlest_cpu(sd, p, cpu, prev_cpu, sd_flag);
|
|
new_cpu = find_idlest_cpu(sd, p, cpu, prev_cpu, sd_flag);
|
|
}
|
|
}
|