|
@@ -3319,11 +3319,77 @@ void set_task_rq_fair(struct sched_entity *se,
|
|
|
se->avg.last_update_time = n_last_update_time;
|
|
|
}
|
|
|
|
|
|
-/* Take into account change of utilization of a child task group */
|
|
|
+
|
|
|
+/*
|
|
|
+ * When on migration a sched_entity joins/leaves the PELT hierarchy, we need to
|
|
|
+ * propagate its contribution. The key to this propagation is the invariant
|
|
|
+ * that for each group:
|
|
|
+ *
|
|
|
+ * ge->avg == grq->avg (1)
|
|
|
+ *
|
|
|
+ * _IFF_ we look at the pure running and runnable sums. Because they
|
|
|
+ * represent the very same entity, just at different points in the hierarchy.
|
|
|
+ *
|
|
|
+ *
|
|
|
+ * Per the above update_tg_cfs_util() is trivial (and still 'wrong') and
|
|
|
+ * simply copies the running sum over.
|
|
|
+ *
|
|
|
+ * However, update_tg_cfs_runnable() is more complex. So we have:
|
|
|
+ *
|
|
|
+ * ge->avg.load_avg = ge->load.weight * ge->avg.runnable_avg (2)
|
|
|
+ *
|
|
|
+ * And since, like util, the runnable part should be directly transferable,
|
|
|
+ * the following would _appear_ to be the straight forward approach:
|
|
|
+ *
|
|
|
+ * grq->avg.load_avg = grq->load.weight * grq->avg.running_avg (3)
|
|
|
+ *
|
|
|
+ * And per (1) we have:
|
|
|
+ *
|
|
|
+ * ge->avg.running_avg == grq->avg.running_avg
|
|
|
+ *
|
|
|
+ * Which gives:
|
|
|
+ *
|
|
|
+ * ge->load.weight * grq->avg.load_avg
|
|
|
+ * ge->avg.load_avg = ----------------------------------- (4)
|
|
|
+ * grq->load.weight
|
|
|
+ *
|
|
|
+ * Except that is wrong!
|
|
|
+ *
|
|
|
+ * Because while for entities historical weight is not important and we
|
|
|
+ * really only care about our future and therefore can consider a pure
|
|
|
+ * runnable sum, runqueues can NOT do this.
|
|
|
+ *
|
|
|
+ * We specifically want runqueues to have a load_avg that includes
|
|
|
+ * historical weights. Those represent the blocked load, the load we expect
|
|
|
+ * to (shortly) return to us. This only works by keeping the weights as
|
|
|
+ * integral part of the sum. We therefore cannot decompose as per (3).
|
|
|
+ *
|
|
|
+ * OK, so what then?
|
|
|
+ *
|
|
|
+ *
|
|
|
+ * Another way to look at things is:
|
|
|
+ *
|
|
|
+ * grq->avg.load_avg = \Sum se->avg.load_avg
|
|
|
+ *
|
|
|
+ * Therefore, per (2):
|
|
|
+ *
|
|
|
+ * grq->avg.load_avg = \Sum se->load.weight * se->avg.runnable_avg
|
|
|
+ *
|
|
|
+ * And the very thing we're propagating is a change in that sum (someone
|
|
|
+ * joined/left). So we can easily know the runnable change, which would be, per
|
|
|
+ * (2) the already tracked se->load_avg divided by the corresponding
|
|
|
+ * se->weight.
|
|
|
+ *
|
|
|
+ * Basically (4) but in differential form:
|
|
|
+ *
|
|
|
+ * d(runnable_avg) += se->avg.load_avg / se->load.weight
|
|
|
+ * (5)
|
|
|
+ * ge->avg.load_avg += ge->load.weight * d(runnable_avg)
|
|
|
+ */
|
|
|
+
|
|
|
static inline void
|
|
|
-update_tg_cfs_util(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
|
|
+update_tg_cfs_util(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq)
|
|
|
{
|
|
|
- struct cfs_rq *gcfs_rq = group_cfs_rq(se);
|
|
|
long delta = gcfs_rq->avg.util_avg - se->avg.util_avg;
|
|
|
|
|
|
/* Nothing to update */
|
|
@@ -3339,102 +3405,59 @@ update_tg_cfs_util(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
|
|
cfs_rq->avg.util_sum = cfs_rq->avg.util_avg * LOAD_AVG_MAX;
|
|
|
}
|
|
|
|
|
|
-/* Take into account change of load of a child task group */
|
|
|
static inline void
|
|
|
-update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
|
|
+update_tg_cfs_runnable(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq)
|
|
|
{
|
|
|
- struct cfs_rq *gcfs_rq = group_cfs_rq(se);
|
|
|
- long delta, load = gcfs_rq->avg.load_avg;
|
|
|
+ long runnable_sum = gcfs_rq->prop_runnable_sum;
|
|
|
+ long load_avg;
|
|
|
+ s64 load_sum;
|
|
|
|
|
|
- /*
|
|
|
- * If the load of group cfs_rq is null, the load of the
|
|
|
- * sched_entity will also be null so we can skip the formula
|
|
|
- */
|
|
|
- if (load) {
|
|
|
- long tg_load;
|
|
|
-
|
|
|
- /* Get tg's load and ensure tg_load > 0 */
|
|
|
- tg_load = atomic_long_read(&gcfs_rq->tg->load_avg) + 1;
|
|
|
-
|
|
|
- /* Ensure tg_load >= load and updated with current load*/
|
|
|
- tg_load -= gcfs_rq->tg_load_avg_contrib;
|
|
|
- tg_load += load;
|
|
|
-
|
|
|
- /*
|
|
|
- * We need to compute a correction term in the case that the
|
|
|
- * task group is consuming more CPU than a task of equal
|
|
|
- * weight. A task with a weight equals to tg->shares will have
|
|
|
- * a load less or equal to scale_load_down(tg->shares).
|
|
|
- * Similarly, the sched_entities that represent the task group
|
|
|
- * at parent level, can't have a load higher than
|
|
|
- * scale_load_down(tg->shares). And the Sum of sched_entities'
|
|
|
- * load must be <= scale_load_down(tg->shares).
|
|
|
- */
|
|
|
- if (tg_load > scale_load_down(gcfs_rq->tg->shares)) {
|
|
|
- /* scale gcfs_rq's load into tg's shares*/
|
|
|
- load *= scale_load_down(gcfs_rq->tg->shares);
|
|
|
- load /= tg_load;
|
|
|
- }
|
|
|
- }
|
|
|
+ if (!runnable_sum)
|
|
|
+ return;
|
|
|
|
|
|
- delta = load - se->avg.load_avg;
|
|
|
+ gcfs_rq->prop_runnable_sum = 0;
|
|
|
|
|
|
- /* Nothing to update */
|
|
|
- if (!delta)
|
|
|
- return;
|
|
|
+ load_sum = (s64)se_weight(se) * runnable_sum;
|
|
|
+ load_avg = div_s64(load_sum, LOAD_AVG_MAX);
|
|
|
|
|
|
- /* Set new sched_entity's load */
|
|
|
- se->avg.load_avg = load;
|
|
|
- se->avg.load_sum = LOAD_AVG_MAX;
|
|
|
+ add_positive(&se->avg.load_sum, runnable_sum);
|
|
|
+ add_positive(&se->avg.load_avg, load_avg);
|
|
|
|
|
|
- /* Update parent cfs_rq load */
|
|
|
- add_positive(&cfs_rq->avg.load_avg, delta);
|
|
|
- cfs_rq->avg.load_sum = cfs_rq->avg.load_avg * LOAD_AVG_MAX;
|
|
|
+ add_positive(&cfs_rq->avg.load_avg, load_avg);
|
|
|
+ add_positive(&cfs_rq->avg.load_sum, load_sum);
|
|
|
|
|
|
- /*
|
|
|
- * If the sched_entity is already enqueued, we also have to update the
|
|
|
- * runnable load avg.
|
|
|
- */
|
|
|
if (se->on_rq) {
|
|
|
- /* Update parent cfs_rq runnable_load_avg */
|
|
|
- add_positive(&cfs_rq->runnable_load_avg, delta);
|
|
|
- cfs_rq->runnable_load_sum = cfs_rq->runnable_load_avg * LOAD_AVG_MAX;
|
|
|
+ add_positive(&cfs_rq->runnable_load_avg, load_avg);
|
|
|
+ add_positive(&cfs_rq->runnable_load_sum, load_sum);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-static inline void set_tg_cfs_propagate(struct cfs_rq *cfs_rq)
|
|
|
+static inline void add_tg_cfs_propagate(struct cfs_rq *cfs_rq, long runnable_sum)
|
|
|
{
|
|
|
- cfs_rq->propagate_avg = 1;
|
|
|
-}
|
|
|
-
|
|
|
-static inline int test_and_clear_tg_cfs_propagate(struct sched_entity *se)
|
|
|
-{
|
|
|
- struct cfs_rq *cfs_rq = group_cfs_rq(se);
|
|
|
-
|
|
|
- if (!cfs_rq->propagate_avg)
|
|
|
- return 0;
|
|
|
-
|
|
|
- cfs_rq->propagate_avg = 0;
|
|
|
- return 1;
|
|
|
+ cfs_rq->propagate = 1;
|
|
|
+ cfs_rq->prop_runnable_sum += runnable_sum;
|
|
|
}
|
|
|
|
|
|
/* Update task and its cfs_rq load average */
|
|
|
static inline int propagate_entity_load_avg(struct sched_entity *se)
|
|
|
{
|
|
|
- struct cfs_rq *cfs_rq;
|
|
|
+ struct cfs_rq *cfs_rq, *gcfs_rq;
|
|
|
|
|
|
if (entity_is_task(se))
|
|
|
return 0;
|
|
|
|
|
|
- if (!test_and_clear_tg_cfs_propagate(se))
|
|
|
+ gcfs_rq = group_cfs_rq(se);
|
|
|
+ if (!gcfs_rq->propagate)
|
|
|
return 0;
|
|
|
|
|
|
+ gcfs_rq->propagate = 0;
|
|
|
+
|
|
|
cfs_rq = cfs_rq_of(se);
|
|
|
|
|
|
- set_tg_cfs_propagate(cfs_rq);
|
|
|
+ add_tg_cfs_propagate(cfs_rq, gcfs_rq->prop_runnable_sum);
|
|
|
|
|
|
- update_tg_cfs_util(cfs_rq, se);
|
|
|
- update_tg_cfs_load(cfs_rq, se);
|
|
|
+ update_tg_cfs_util(cfs_rq, se, gcfs_rq);
|
|
|
+ update_tg_cfs_runnable(cfs_rq, se, gcfs_rq);
|
|
|
|
|
|
return 1;
|
|
|
}
|
|
@@ -3458,7 +3481,7 @@ static inline bool skip_blocked_update(struct sched_entity *se)
|
|
|
* If there is a pending propagation, we have to update the load and
|
|
|
* the utilization of the sched_entity:
|
|
|
*/
|
|
|
- if (gcfs_rq->propagate_avg)
|
|
|
+ if (gcfs_rq->propagate)
|
|
|
return false;
|
|
|
|
|
|
/*
|
|
@@ -3478,7 +3501,7 @@ static inline int propagate_entity_load_avg(struct sched_entity *se)
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
-static inline void set_tg_cfs_propagate(struct cfs_rq *cfs_rq) {}
|
|
|
+static inline void add_tg_cfs_propagate(struct cfs_rq *cfs_rq, long runnable_sum) {}
|
|
|
|
|
|
#endif /* CONFIG_FAIR_GROUP_SCHED */
|
|
|
|
|
@@ -3501,7 +3524,7 @@ static inline void set_tg_cfs_propagate(struct cfs_rq *cfs_rq) {}
|
|
|
static inline int
|
|
|
update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
|
|
|
{
|
|
|
- unsigned long removed_load = 0, removed_util = 0;
|
|
|
+ unsigned long removed_load = 0, removed_util = 0, removed_runnable_sum = 0;
|
|
|
struct sched_avg *sa = &cfs_rq->avg;
|
|
|
int decayed = 0;
|
|
|
|
|
@@ -3511,6 +3534,7 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
|
|
|
raw_spin_lock(&cfs_rq->removed.lock);
|
|
|
swap(cfs_rq->removed.util_avg, removed_util);
|
|
|
swap(cfs_rq->removed.load_avg, removed_load);
|
|
|
+ swap(cfs_rq->removed.runnable_sum, removed_runnable_sum);
|
|
|
cfs_rq->removed.nr = 0;
|
|
|
raw_spin_unlock(&cfs_rq->removed.lock);
|
|
|
|
|
@@ -3526,7 +3550,7 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
|
|
|
sub_positive(&sa->util_avg, r);
|
|
|
sub_positive(&sa->util_sum, r * LOAD_AVG_MAX);
|
|
|
|
|
|
- set_tg_cfs_propagate(cfs_rq);
|
|
|
+ add_tg_cfs_propagate(cfs_rq, -(long)removed_runnable_sum);
|
|
|
|
|
|
decayed = 1;
|
|
|
}
|
|
@@ -3558,7 +3582,8 @@ static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
|
|
|
enqueue_load_avg(cfs_rq, se);
|
|
|
cfs_rq->avg.util_avg += se->avg.util_avg;
|
|
|
cfs_rq->avg.util_sum += se->avg.util_sum;
|
|
|
- set_tg_cfs_propagate(cfs_rq);
|
|
|
+
|
|
|
+ add_tg_cfs_propagate(cfs_rq, se->avg.load_sum);
|
|
|
|
|
|
cfs_rq_util_change(cfs_rq);
|
|
|
}
|
|
@@ -3576,7 +3601,8 @@ static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
|
|
|
dequeue_load_avg(cfs_rq, se);
|
|
|
sub_positive(&cfs_rq->avg.util_avg, se->avg.util_avg);
|
|
|
sub_positive(&cfs_rq->avg.util_sum, se->avg.util_sum);
|
|
|
- set_tg_cfs_propagate(cfs_rq);
|
|
|
+
|
|
|
+ add_tg_cfs_propagate(cfs_rq, -se->avg.load_sum);
|
|
|
|
|
|
cfs_rq_util_change(cfs_rq);
|
|
|
}
|
|
@@ -3678,6 +3704,7 @@ void remove_entity_load_avg(struct sched_entity *se)
|
|
|
++cfs_rq->removed.nr;
|
|
|
cfs_rq->removed.util_avg += se->avg.util_avg;
|
|
|
cfs_rq->removed.load_avg += se->avg.load_avg;
|
|
|
+ cfs_rq->removed.runnable_sum += se->avg.load_sum; /* == runnable_sum */
|
|
|
raw_spin_unlock_irqrestore(&cfs_rq->removed.lock, flags);
|
|
|
}
|
|
|
|
|
@@ -9466,9 +9493,6 @@ void init_cfs_rq(struct cfs_rq *cfs_rq)
|
|
|
cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime;
|
|
|
#endif
|
|
|
#ifdef CONFIG_SMP
|
|
|
-#ifdef CONFIG_FAIR_GROUP_SCHED
|
|
|
- cfs_rq->propagate_avg = 0;
|
|
|
-#endif
|
|
|
raw_spin_lock_init(&cfs_rq->removed.lock);
|
|
|
#endif
|
|
|
}
|