|
@@ -5674,11 +5674,11 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p,
|
|
|
return target;
|
|
|
}
|
|
|
|
|
|
-static unsigned long cpu_util_wake(int cpu, struct task_struct *p);
|
|
|
+static unsigned long cpu_util_without(int cpu, struct task_struct *p);
|
|
|
|
|
|
-static unsigned long capacity_spare_wake(int cpu, struct task_struct *p)
|
|
|
+static unsigned long capacity_spare_without(int cpu, struct task_struct *p)
|
|
|
{
|
|
|
- return max_t(long, capacity_of(cpu) - cpu_util_wake(cpu, p), 0);
|
|
|
+ return max_t(long, capacity_of(cpu) - cpu_util_without(cpu, p), 0);
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -5738,7 +5738,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
|
|
|
|
|
|
avg_load += cfs_rq_load_avg(&cpu_rq(i)->cfs);
|
|
|
|
|
|
- spare_cap = capacity_spare_wake(i, p);
|
|
|
+ spare_cap = capacity_spare_without(i, p);
|
|
|
|
|
|
if (spare_cap > max_spare_cap)
|
|
|
max_spare_cap = spare_cap;
|
|
@@ -5889,8 +5889,8 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p
|
|
|
return prev_cpu;
|
|
|
|
|
|
/*
|
|
|
- * We need task's util for capacity_spare_wake, sync it up to prev_cpu's
|
|
|
- * last_update_time.
|
|
|
+ * We need task's util for capacity_spare_without, sync it up to
|
|
|
+ * prev_cpu's last_update_time.
|
|
|
*/
|
|
|
if (!(sd_flag & SD_BALANCE_FORK))
|
|
|
sync_entity_load_avg(&p->se);
|
|
@@ -6216,10 +6216,19 @@ static inline unsigned long cpu_util(int cpu)
|
|
|
}
|
|
|
|
|
|
/*
|
|
|
- * cpu_util_wake: Compute CPU utilization with any contributions from
|
|
|
- * the waking task p removed.
|
|
|
+ * cpu_util_without: compute cpu utilization without any contributions from *p
|
|
|
+ * @cpu: the CPU which utilization is requested
|
|
|
+ * @p: the task which utilization should be discounted
|
|
|
+ *
|
|
|
+ * The utilization of a CPU is defined by the utilization of tasks currently
|
|
|
+ * enqueued on that CPU as well as tasks which are currently sleeping after an
|
|
|
+ * execution on that CPU.
|
|
|
+ *
|
|
|
+ * This method returns the utilization of the specified CPU by discounting the
|
|
|
+ * utilization of the specified task, whenever the task is currently
|
|
|
+ * contributing to the CPU utilization.
|
|
|
*/
|
|
|
-static unsigned long cpu_util_wake(int cpu, struct task_struct *p)
|
|
|
+static unsigned long cpu_util_without(int cpu, struct task_struct *p)
|
|
|
{
|
|
|
struct cfs_rq *cfs_rq;
|
|
|
unsigned int util;
|
|
@@ -6231,7 +6240,7 @@ static unsigned long cpu_util_wake(int cpu, struct task_struct *p)
|
|
|
cfs_rq = &cpu_rq(cpu)->cfs;
|
|
|
util = READ_ONCE(cfs_rq->avg.util_avg);
|
|
|
|
|
|
- /* Discount task's blocked util from CPU's util */
|
|
|
+ /* Discount task's util from CPU's util */
|
|
|
util -= min_t(unsigned int, util, task_util(p));
|
|
|
|
|
|
/*
|
|
@@ -6240,14 +6249,14 @@ static unsigned long cpu_util_wake(int cpu, struct task_struct *p)
|
|
|
* a) if *p is the only task sleeping on this CPU, then:
|
|
|
* cpu_util (== task_util) > util_est (== 0)
|
|
|
* and thus we return:
|
|
|
- * cpu_util_wake = (cpu_util - task_util) = 0
|
|
|
+ * cpu_util_without = (cpu_util - task_util) = 0
|
|
|
*
|
|
|
* b) if other tasks are SLEEPING on this CPU, which is now exiting
|
|
|
* IDLE, then:
|
|
|
* cpu_util >= task_util
|
|
|
* cpu_util > util_est (== 0)
|
|
|
* and thus we discount *p's blocked utilization to return:
|
|
|
- * cpu_util_wake = (cpu_util - task_util) >= 0
|
|
|
+ * cpu_util_without = (cpu_util - task_util) >= 0
|
|
|
*
|
|
|
* c) if other tasks are RUNNABLE on that CPU and
|
|
|
* util_est > cpu_util
|
|
@@ -6260,8 +6269,33 @@ static unsigned long cpu_util_wake(int cpu, struct task_struct *p)
|
|
|
* covered by the following code when estimated utilization is
|
|
|
* enabled.
|
|
|
*/
|
|
|
- if (sched_feat(UTIL_EST))
|
|
|
- util = max(util, READ_ONCE(cfs_rq->avg.util_est.enqueued));
|
|
|
+ if (sched_feat(UTIL_EST)) {
|
|
|
+ unsigned int estimated =
|
|
|
+ READ_ONCE(cfs_rq->avg.util_est.enqueued);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Despite the following checks we still have a small window
|
|
|
+ * for a possible race, when an execl's select_task_rq_fair()
|
|
|
+ * races with LB's detach_task():
|
|
|
+ *
|
|
|
+ * detach_task()
|
|
|
+ * p->on_rq = TASK_ON_RQ_MIGRATING;
|
|
|
+ * ---------------------------------- A
|
|
|
+ * deactivate_task() \
|
|
|
+ * dequeue_task() + RaceTime
|
|
|
+ * util_est_dequeue() /
|
|
|
+ * ---------------------------------- B
|
|
|
+ *
|
|
|
+ * The additional check on "current == p" it's required to
|
|
|
+ * properly fix the execl regression and it helps in further
|
|
|
+ * reducing the chances for the above race.
|
|
|
+ */
|
|
|
+ if (unlikely(task_on_rq_queued(p) || current == p)) {
|
|
|
+ estimated -= min_t(unsigned int, estimated,
|
|
|
+ (_task_util_est(p) | UTIL_AVG_UNCHANGED));
|
|
|
+ }
|
|
|
+ util = max(util, estimated);
|
|
|
+ }
|
|
|
|
|
|
/*
|
|
|
* Utilization (estimated) can exceed the CPU capacity, thus let's
|