|
@@ -1139,6 +1139,47 @@ static unsigned int task_scan_max(struct task_struct *p)
|
|
|
return max(smin, smax);
|
|
|
}
|
|
|
|
|
|
+void init_numa_balancing(unsigned long clone_flags, struct task_struct *p)
|
|
|
+{
|
|
|
+ int mm_users = 0;
|
|
|
+ struct mm_struct *mm = p->mm;
|
|
|
+
|
|
|
+ if (mm) {
|
|
|
+ mm_users = atomic_read(&mm->mm_users);
|
|
|
+ if (mm_users == 1) {
|
|
|
+ mm->numa_next_scan = jiffies + msecs_to_jiffies(sysctl_numa_balancing_scan_delay);
|
|
|
+ mm->numa_scan_seq = 0;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ p->node_stamp = 0;
|
|
|
+ p->numa_scan_seq = mm ? mm->numa_scan_seq : 0;
|
|
|
+ p->numa_scan_period = sysctl_numa_balancing_scan_delay;
|
|
|
+ p->numa_work.next = &p->numa_work;
|
|
|
+ p->numa_faults = NULL;
|
|
|
+ p->numa_group = NULL;
|
|
|
+ p->last_task_numa_placement = 0;
|
|
|
+ p->last_sum_exec_runtime = 0;
|
|
|
+
|
|
|
+ /* New address space, reset the preferred nid */
|
|
|
+ if (!(clone_flags & CLONE_VM)) {
|
|
|
+ p->numa_preferred_nid = -1;
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ /*
|
|
|
+ * New thread, keep existing numa_preferred_nid which should be copied
|
|
|
+ * already by arch_dup_task_struct but stagger when scans start.
|
|
|
+ */
|
|
|
+ if (mm) {
|
|
|
+ unsigned int delay;
|
|
|
+
|
|
|
+ delay = min_t(unsigned int, task_scan_max(current),
|
|
|
+ current->numa_scan_period * mm_users * NSEC_PER_MSEC);
|
|
|
+ delay += 2 * TICK_NSEC;
|
|
|
+ p->node_stamp = delay;
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
static void account_numa_enqueue(struct rq *rq, struct task_struct *p)
|
|
|
{
|
|
|
rq->nr_numa_running += (p->numa_preferred_nid != -1);
|
|
@@ -5344,6 +5385,14 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
|
|
|
struct cfs_rq *cfs_rq;
|
|
|
struct sched_entity *se = &p->se;
|
|
|
|
|
|
+ /*
|
|
|
+ * The code below (indirectly) updates schedutil which looks at
|
|
|
+ * the cfs_rq utilization to select a frequency.
|
|
|
+ * Let's add the task's estimated utilization to the cfs_rq's
|
|
|
+ * estimated utilization, before we update schedutil.
|
|
|
+ */
|
|
|
+ util_est_enqueue(&rq->cfs, p);
|
|
|
+
|
|
|
/*
|
|
|
* If in_iowait is set, the code below may not trigger any cpufreq
|
|
|
* utilization updates, so do it here explicitly with the IOWAIT flag
|
|
@@ -5385,7 +5434,6 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
|
|
|
if (!se)
|
|
|
add_nr_running(rq, 1);
|
|
|
|
|
|
- util_est_enqueue(&rq->cfs, p);
|
|
|
hrtick_update(rq);
|
|
|
}
|
|
|
|
|
@@ -5858,8 +5906,8 @@ wake_affine_idle(int this_cpu, int prev_cpu, int sync)
|
|
|
* a cpufreq perspective, it's better to have higher utilisation
|
|
|
* on one CPU.
|
|
|
*/
|
|
|
- if (idle_cpu(this_cpu) && cpus_share_cache(this_cpu, prev_cpu))
|
|
|
- return idle_cpu(prev_cpu) ? prev_cpu : this_cpu;
|
|
|
+ if (available_idle_cpu(this_cpu) && cpus_share_cache(this_cpu, prev_cpu))
|
|
|
+ return available_idle_cpu(prev_cpu) ? prev_cpu : this_cpu;
|
|
|
|
|
|
if (sync && cpu_rq(this_cpu)->nr_running == 1)
|
|
|
return this_cpu;
|
|
@@ -6102,7 +6150,7 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this
|
|
|
|
|
|
/* Traverse only the allowed CPUs */
|
|
|
for_each_cpu_and(i, sched_group_span(group), &p->cpus_allowed) {
|
|
|
- if (idle_cpu(i)) {
|
|
|
+ if (available_idle_cpu(i)) {
|
|
|
struct rq *rq = cpu_rq(i);
|
|
|
struct cpuidle_state *idle = idle_get_state(rq);
|
|
|
if (idle && idle->exit_latency < min_exit_latency) {
|
|
@@ -6144,6 +6192,13 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p
|
|
|
if (!cpumask_intersects(sched_domain_span(sd), &p->cpus_allowed))
|
|
|
return prev_cpu;
|
|
|
|
|
|
+ /*
|
|
|
+ * We need task's util for capacity_spare_wake, sync it up to prev_cpu's
|
|
|
+ * last_update_time.
|
|
|
+ */
|
|
|
+ if (!(sd_flag & SD_BALANCE_FORK))
|
|
|
+ sync_entity_load_avg(&p->se);
|
|
|
+
|
|
|
while (sd) {
|
|
|
struct sched_group *group;
|
|
|
struct sched_domain *tmp;
|
|
@@ -6224,7 +6279,7 @@ void __update_idle_core(struct rq *rq)
|
|
|
if (cpu == core)
|
|
|
continue;
|
|
|
|
|
|
- if (!idle_cpu(cpu))
|
|
|
+ if (!available_idle_cpu(cpu))
|
|
|
goto unlock;
|
|
|
}
|
|
|
|
|
@@ -6256,7 +6311,7 @@ static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int
|
|
|
|
|
|
for_each_cpu(cpu, cpu_smt_mask(core)) {
|
|
|
cpumask_clear_cpu(cpu, cpus);
|
|
|
- if (!idle_cpu(cpu))
|
|
|
+ if (!available_idle_cpu(cpu))
|
|
|
idle = false;
|
|
|
}
|
|
|
|
|
@@ -6285,7 +6340,7 @@ static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int t
|
|
|
for_each_cpu(cpu, cpu_smt_mask(target)) {
|
|
|
if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
|
|
|
continue;
|
|
|
- if (idle_cpu(cpu))
|
|
|
+ if (available_idle_cpu(cpu))
|
|
|
return cpu;
|
|
|
}
|
|
|
|
|
@@ -6348,7 +6403,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
|
|
|
return -1;
|
|
|
if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
|
|
|
continue;
|
|
|
- if (idle_cpu(cpu))
|
|
|
+ if (available_idle_cpu(cpu))
|
|
|
break;
|
|
|
}
|
|
|
|
|
@@ -6368,13 +6423,13 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
|
|
|
struct sched_domain *sd;
|
|
|
int i, recent_used_cpu;
|
|
|
|
|
|
- if (idle_cpu(target))
|
|
|
+ if (available_idle_cpu(target))
|
|
|
return target;
|
|
|
|
|
|
/*
|
|
|
* If the previous CPU is cache affine and idle, don't be stupid:
|
|
|
*/
|
|
|
- if (prev != target && cpus_share_cache(prev, target) && idle_cpu(prev))
|
|
|
+ if (prev != target && cpus_share_cache(prev, target) && available_idle_cpu(prev))
|
|
|
return prev;
|
|
|
|
|
|
/* Check a recently used CPU as a potential idle candidate: */
|
|
@@ -6382,7 +6437,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
|
|
|
if (recent_used_cpu != prev &&
|
|
|
recent_used_cpu != target &&
|
|
|
cpus_share_cache(recent_used_cpu, target) &&
|
|
|
- idle_cpu(recent_used_cpu) &&
|
|
|
+ available_idle_cpu(recent_used_cpu) &&
|
|
|
cpumask_test_cpu(p->recent_used_cpu, &p->cpus_allowed)) {
|
|
|
/*
|
|
|
* Replace recent_used_cpu with prev as it is a potential
|
|
@@ -6558,7 +6613,7 @@ static int wake_cap(struct task_struct *p, int cpu, int prev_cpu)
|
|
|
static int
|
|
|
select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_flags)
|
|
|
{
|
|
|
- struct sched_domain *tmp, *affine_sd = NULL, *sd = NULL;
|
|
|
+ struct sched_domain *tmp, *sd = NULL;
|
|
|
int cpu = smp_processor_id();
|
|
|
int new_cpu = prev_cpu;
|
|
|
int want_affine = 0;
|
|
@@ -6581,7 +6636,10 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
|
|
|
*/
|
|
|
if (want_affine && (tmp->flags & SD_WAKE_AFFINE) &&
|
|
|
cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) {
|
|
|
- affine_sd = tmp;
|
|
|
+ if (cpu != prev_cpu)
|
|
|
+ new_cpu = wake_affine(tmp, p, cpu, prev_cpu, sync);
|
|
|
+
|
|
|
+ sd = NULL; /* Prefer wake_affine over balance flags */
|
|
|
break;
|
|
|
}
|
|
|
|
|
@@ -6591,33 +6649,16 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
|
|
|
break;
|
|
|
}
|
|
|
|
|
|
- if (affine_sd) {
|
|
|
- sd = NULL; /* Prefer wake_affine over balance flags */
|
|
|
- if (cpu == prev_cpu)
|
|
|
- goto pick_cpu;
|
|
|
-
|
|
|
- new_cpu = wake_affine(affine_sd, p, cpu, prev_cpu, sync);
|
|
|
- }
|
|
|
-
|
|
|
- if (sd && !(sd_flag & SD_BALANCE_FORK)) {
|
|
|
- /*
|
|
|
- * We're going to need the task's util for capacity_spare_wake
|
|
|
- * in find_idlest_group. Sync it up to prev_cpu's
|
|
|
- * last_update_time.
|
|
|
- */
|
|
|
- sync_entity_load_avg(&p->se);
|
|
|
- }
|
|
|
+ if (unlikely(sd)) {
|
|
|
+ /* Slow path */
|
|
|
+ new_cpu = find_idlest_cpu(sd, p, cpu, prev_cpu, sd_flag);
|
|
|
+ } else if (sd_flag & SD_BALANCE_WAKE) { /* XXX always ? */
|
|
|
+ /* Fast path */
|
|
|
|
|
|
- if (!sd) {
|
|
|
-pick_cpu:
|
|
|
- if (sd_flag & SD_BALANCE_WAKE) { /* XXX always ? */
|
|
|
- new_cpu = select_idle_sibling(p, prev_cpu, new_cpu);
|
|
|
+ new_cpu = select_idle_sibling(p, prev_cpu, new_cpu);
|
|
|
|
|
|
- if (want_affine)
|
|
|
- current->recent_used_cpu = cpu;
|
|
|
- }
|
|
|
- } else {
|
|
|
- new_cpu = find_idlest_cpu(sd, p, cpu, prev_cpu, sd_flag);
|
|
|
+ if (want_affine)
|
|
|
+ current->recent_used_cpu = cpu;
|
|
|
}
|
|
|
rcu_read_unlock();
|
|
|
|