|
@@ -6432,11 +6432,13 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
|
|
|
return target;
|
|
|
}
|
|
|
|
|
|
-/*
|
|
|
- * cpu_util returns the amount of capacity of a CPU that is used by CFS
|
|
|
- * tasks. The unit of the return value must be the one of capacity so we can
|
|
|
- * compare the utilization with the capacity of the CPU that is available for
|
|
|
- * CFS task (ie cpu_capacity).
|
|
|
+/**
|
|
|
+ * Amount of capacity of a CPU that is (estimated to be) used by CFS tasks
|
|
|
+ * @cpu: the CPU to get the utilization of
|
|
|
+ *
|
|
|
+ * The unit of the return value must be the one of capacity so we can compare
|
|
|
+ * the utilization with the capacity of the CPU that is available for CFS task
|
|
|
+ * (ie cpu_capacity).
|
|
|
*
|
|
|
* cfs_rq.avg.util_avg is the sum of running time of runnable tasks plus the
|
|
|
* recent utilization of currently non-runnable tasks on a CPU. It represents
|
|
@@ -6447,6 +6449,14 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
|
|
|
* current capacity (capacity_curr <= capacity_orig) of the CPU because it is
|
|
|
* the running time on this CPU scaled by capacity_curr.
|
|
|
*
|
|
|
+ * The estimated utilization of a CPU is defined to be the maximum between its
|
|
|
+ * cfs_rq.avg.util_avg and the sum of the estimated utilization of the tasks
|
|
|
+ * currently RUNNABLE on that CPU.
|
|
|
+ * This allows to properly represent the expected utilization of a CPU which
|
|
|
+ * has just got a big task running since a long sleep period. At the same time
|
|
|
+ * however it preserves the benefits of the "blocked utilization" in
|
|
|
+ * describing the potential for other tasks waking up on the same CPU.
|
|
|
+ *
|
|
|
* Nevertheless, cfs_rq.avg.util_avg can be higher than capacity_curr or even
|
|
|
* higher than capacity_orig because of unfortunate rounding in
|
|
|
* cfs.avg.util_avg or just after migrating tasks and new task wakeups until
|
|
@@ -6457,13 +6467,21 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
|
|
|
* available capacity. We allow utilization to overshoot capacity_curr (but not
|
|
|
* capacity_orig) as it useful for predicting the capacity required after task
|
|
|
* migrations (scheduler-driven DVFS).
|
|
|
+ *
|
|
|
+ * Return: the (estimated) utilization for the specified CPU
|
|
|
*/
|
|
|
-static unsigned long cpu_util(int cpu)
|
|
|
+static inline unsigned long cpu_util(int cpu)
|
|
|
{
|
|
|
- unsigned long util = cpu_rq(cpu)->cfs.avg.util_avg;
|
|
|
- unsigned long capacity = capacity_orig_of(cpu);
|
|
|
+ struct cfs_rq *cfs_rq;
|
|
|
+ unsigned int util;
|
|
|
+
|
|
|
+ cfs_rq = &cpu_rq(cpu)->cfs;
|
|
|
+ util = READ_ONCE(cfs_rq->avg.util_avg);
|
|
|
+
|
|
|
+ if (sched_feat(UTIL_EST))
|
|
|
+ util = max(util, READ_ONCE(cfs_rq->avg.util_est.enqueued));
|
|
|
|
|
|
- return (util >= capacity) ? capacity : util;
|
|
|
+ return min_t(unsigned long, util, capacity_orig_of(cpu));
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -6472,16 +6490,54 @@ static unsigned long cpu_util(int cpu)
|
|
|
*/
|
|
|
static unsigned long cpu_util_wake(int cpu, struct task_struct *p)
|
|
|
{
|
|
|
- unsigned long util, capacity;
|
|
|
+ struct cfs_rq *cfs_rq;
|
|
|
+ unsigned int util;
|
|
|
|
|
|
/* Task has no contribution or is new */
|
|
|
- if (cpu != task_cpu(p) || !p->se.avg.last_update_time)
|
|
|
+ if (cpu != task_cpu(p) || !READ_ONCE(p->se.avg.last_update_time))
|
|
|
return cpu_util(cpu);
|
|
|
|
|
|
- capacity = capacity_orig_of(cpu);
|
|
|
- util = max_t(long, cpu_rq(cpu)->cfs.avg.util_avg - task_util(p), 0);
|
|
|
+ cfs_rq = &cpu_rq(cpu)->cfs;
|
|
|
+ util = READ_ONCE(cfs_rq->avg.util_avg);
|
|
|
+
|
|
|
+ /* Discount task's blocked util from CPU's util */
|
|
|
+ util -= min_t(unsigned int, util, task_util(p));
|
|
|
|
|
|
- return (util >= capacity) ? capacity : util;
|
|
|
+ /*
|
|
|
+ * Covered cases:
|
|
|
+ *
|
|
|
+ * a) if *p is the only task sleeping on this CPU, then:
|
|
|
+ * cpu_util (== task_util) > util_est (== 0)
|
|
|
+ * and thus we return:
|
|
|
+ * cpu_util_wake = (cpu_util - task_util) = 0
|
|
|
+ *
|
|
|
+ * b) if other tasks are SLEEPING on this CPU, which is now exiting
|
|
|
+ * IDLE, then:
|
|
|
+ * cpu_util >= task_util
|
|
|
+ * cpu_util > util_est (== 0)
|
|
|
+ * and thus we discount *p's blocked utilization to return:
|
|
|
+ * cpu_util_wake = (cpu_util - task_util) >= 0
|
|
|
+ *
|
|
|
+ * c) if other tasks are RUNNABLE on that CPU and
|
|
|
+ * util_est > cpu_util
|
|
|
+ * then we use util_est since it returns a more restrictive
|
|
|
+ * estimation of the spare capacity on that CPU, by just
|
|
|
+ * considering the expected utilization of tasks already
|
|
|
+ * runnable on that CPU.
|
|
|
+ *
|
|
|
+ * Cases a) and b) are covered by the above code, while case c) is
|
|
|
+ * covered by the following code when estimated utilization is
|
|
|
+ * enabled.
|
|
|
+ */
|
|
|
+ if (sched_feat(UTIL_EST))
|
|
|
+ util = max(util, READ_ONCE(cfs_rq->avg.util_est.enqueued));
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Utilization (estimated) can exceed the CPU capacity, thus let's
|
|
|
+ * clamp to the maximum CPU capacity to ensure consistency with
|
|
|
+ * the cpu_util call.
|
|
|
+ */
|
|
|
+ return min_t(unsigned long, util, capacity_orig_of(cpu));
|
|
|
}
|
|
|
|
|
|
/*
|