|
@@ -2904,6 +2904,23 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq)
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+/*
|
|
|
|
|
+ * Unsigned subtract and clamp on underflow.
|
|
|
|
|
+ *
|
|
|
|
|
+ * Explicitly do a load-store to ensure the intermediate value never hits
|
|
|
|
|
+ * memory. This allows lockless observations without ever seeing the negative
|
|
|
|
|
+ * values.
|
|
|
|
|
+ */
|
|
|
|
|
+#define sub_positive(_ptr, _val) do { \
|
|
|
|
|
+ typeof(_ptr) ptr = (_ptr); \
|
|
|
|
|
+ typeof(*ptr) val = (_val); \
|
|
|
|
|
+ typeof(*ptr) res, var = READ_ONCE(*ptr); \
|
|
|
|
|
+ res = var - val; \
|
|
|
|
|
+ if (res > var) \
|
|
|
|
|
+ res = 0; \
|
|
|
|
|
+ WRITE_ONCE(*ptr, res); \
|
|
|
|
|
+} while (0)
|
|
|
|
|
+
|
|
|
/* Group cfs_rq's load_avg is used for task_h_load and update_cfs_share */
|
|
/* Group cfs_rq's load_avg is used for task_h_load and update_cfs_share */
|
|
|
static inline int
|
|
static inline int
|
|
|
update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq)
|
|
update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq)
|
|
@@ -2913,15 +2930,15 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq)
|
|
|
|
|
|
|
|
if (atomic_long_read(&cfs_rq->removed_load_avg)) {
|
|
if (atomic_long_read(&cfs_rq->removed_load_avg)) {
|
|
|
s64 r = atomic_long_xchg(&cfs_rq->removed_load_avg, 0);
|
|
s64 r = atomic_long_xchg(&cfs_rq->removed_load_avg, 0);
|
|
|
- sa->load_avg = max_t(long, sa->load_avg - r, 0);
|
|
|
|
|
- sa->load_sum = max_t(s64, sa->load_sum - r * LOAD_AVG_MAX, 0);
|
|
|
|
|
|
|
+ sub_positive(&sa->load_avg, r);
|
|
|
|
|
+ sub_positive(&sa->load_sum, r * LOAD_AVG_MAX);
|
|
|
removed_load = 1;
|
|
removed_load = 1;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
if (atomic_long_read(&cfs_rq->removed_util_avg)) {
|
|
if (atomic_long_read(&cfs_rq->removed_util_avg)) {
|
|
|
long r = atomic_long_xchg(&cfs_rq->removed_util_avg, 0);
|
|
long r = atomic_long_xchg(&cfs_rq->removed_util_avg, 0);
|
|
|
- sa->util_avg = max_t(long, sa->util_avg - r, 0);
|
|
|
|
|
- sa->util_sum = max_t(s32, sa->util_sum - r * LOAD_AVG_MAX, 0);
|
|
|
|
|
|
|
+ sub_positive(&sa->util_avg, r);
|
|
|
|
|
+ sub_positive(&sa->util_sum, r * LOAD_AVG_MAX);
|
|
|
removed_util = 1;
|
|
removed_util = 1;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
@@ -2994,10 +3011,10 @@ static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
|
|
|
&se->avg, se->on_rq * scale_load_down(se->load.weight),
|
|
&se->avg, se->on_rq * scale_load_down(se->load.weight),
|
|
|
cfs_rq->curr == se, NULL);
|
|
cfs_rq->curr == se, NULL);
|
|
|
|
|
|
|
|
- cfs_rq->avg.load_avg = max_t(long, cfs_rq->avg.load_avg - se->avg.load_avg, 0);
|
|
|
|
|
- cfs_rq->avg.load_sum = max_t(s64, cfs_rq->avg.load_sum - se->avg.load_sum, 0);
|
|
|
|
|
- cfs_rq->avg.util_avg = max_t(long, cfs_rq->avg.util_avg - se->avg.util_avg, 0);
|
|
|
|
|
- cfs_rq->avg.util_sum = max_t(s32, cfs_rq->avg.util_sum - se->avg.util_sum, 0);
|
|
|
|
|
|
|
+ sub_positive(&cfs_rq->avg.load_avg, se->avg.load_avg);
|
|
|
|
|
+ sub_positive(&cfs_rq->avg.load_sum, se->avg.load_sum);
|
|
|
|
|
+ sub_positive(&cfs_rq->avg.util_avg, se->avg.util_avg);
|
|
|
|
|
+ sub_positive(&cfs_rq->avg.util_sum, se->avg.util_sum);
|
|
|
|
|
|
|
|
cfs_rq_util_change(cfs_rq);
|
|
cfs_rq_util_change(cfs_rq);
|
|
|
}
|
|
}
|
|
@@ -3246,7 +3263,7 @@ static inline void check_schedstat_required(void)
|
|
|
trace_sched_stat_iowait_enabled() ||
|
|
trace_sched_stat_iowait_enabled() ||
|
|
|
trace_sched_stat_blocked_enabled() ||
|
|
trace_sched_stat_blocked_enabled() ||
|
|
|
trace_sched_stat_runtime_enabled()) {
|
|
trace_sched_stat_runtime_enabled()) {
|
|
|
- pr_warn_once("Scheduler tracepoints stat_sleep, stat_iowait, "
|
|
|
|
|
|
|
+ printk_deferred_once("Scheduler tracepoints stat_sleep, stat_iowait, "
|
|
|
"stat_blocked and stat_runtime require the "
|
|
"stat_blocked and stat_runtime require the "
|
|
|
"kernel parameter schedstats=enabled or "
|
|
"kernel parameter schedstats=enabled or "
|
|
|
"kernel.sched_schedstats=1\n");
|
|
"kernel.sched_schedstats=1\n");
|
|
@@ -4185,6 +4202,26 @@ static void check_enqueue_throttle(struct cfs_rq *cfs_rq)
|
|
|
if (!cfs_bandwidth_used())
|
|
if (!cfs_bandwidth_used())
|
|
|
return;
|
|
return;
|
|
|
|
|
|
|
|
|
|
+ /* Synchronize hierarchical throttle counter: */
|
|
|
|
|
+ if (unlikely(!cfs_rq->throttle_uptodate)) {
|
|
|
|
|
+ struct rq *rq = rq_of(cfs_rq);
|
|
|
|
|
+ struct cfs_rq *pcfs_rq;
|
|
|
|
|
+ struct task_group *tg;
|
|
|
|
|
+
|
|
|
|
|
+ cfs_rq->throttle_uptodate = 1;
|
|
|
|
|
+
|
|
|
|
|
+ /* Get closest up-to-date node, because leaves go first: */
|
|
|
|
|
+ for (tg = cfs_rq->tg->parent; tg; tg = tg->parent) {
|
|
|
|
|
+ pcfs_rq = tg->cfs_rq[cpu_of(rq)];
|
|
|
|
|
+ if (pcfs_rq->throttle_uptodate)
|
|
|
|
|
+ break;
|
|
|
|
|
+ }
|
|
|
|
|
+ if (tg) {
|
|
|
|
|
+ cfs_rq->throttle_count = pcfs_rq->throttle_count;
|
|
|
|
|
+ cfs_rq->throttled_clock_task = rq_clock_task(rq);
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
/* an active group must be handled by the update_curr()->put() path */
|
|
/* an active group must be handled by the update_curr()->put() path */
|
|
|
if (!cfs_rq->runtime_enabled || cfs_rq->curr)
|
|
if (!cfs_rq->runtime_enabled || cfs_rq->curr)
|
|
|
return;
|
|
return;
|
|
@@ -4500,15 +4537,14 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
|
|
|
|
|
|
|
|
/* Don't dequeue parent if it has other entities besides us */
|
|
/* Don't dequeue parent if it has other entities besides us */
|
|
|
if (cfs_rq->load.weight) {
|
|
if (cfs_rq->load.weight) {
|
|
|
|
|
+ /* Avoid re-evaluating load for this entity: */
|
|
|
|
|
+ se = parent_entity(se);
|
|
|
/*
|
|
/*
|
|
|
* Bias pick_next to pick a task from this cfs_rq, as
|
|
* Bias pick_next to pick a task from this cfs_rq, as
|
|
|
* p is sleeping when it is within its sched_slice.
|
|
* p is sleeping when it is within its sched_slice.
|
|
|
*/
|
|
*/
|
|
|
- if (task_sleep && parent_entity(se))
|
|
|
|
|
- set_next_buddy(parent_entity(se));
|
|
|
|
|
-
|
|
|
|
|
- /* avoid re-evaluating load for this entity */
|
|
|
|
|
- se = parent_entity(se);
|
|
|
|
|
|
|
+ if (task_sleep && se && !throttled_hierarchy(cfs_rq))
|
|
|
|
|
+ set_next_buddy(se);
|
|
|
break;
|
|
break;
|
|
|
}
|
|
}
|
|
|
flags |= DEQUEUE_SLEEP;
|
|
flags |= DEQUEUE_SLEEP;
|
|
@@ -8496,8 +8532,9 @@ void free_fair_sched_group(struct task_group *tg)
|
|
|
|
|
|
|
|
int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
|
|
int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
|
|
|
{
|
|
{
|
|
|
- struct cfs_rq *cfs_rq;
|
|
|
|
|
struct sched_entity *se;
|
|
struct sched_entity *se;
|
|
|
|
|
+ struct cfs_rq *cfs_rq;
|
|
|
|
|
+ struct rq *rq;
|
|
|
int i;
|
|
int i;
|
|
|
|
|
|
|
|
tg->cfs_rq = kzalloc(sizeof(cfs_rq) * nr_cpu_ids, GFP_KERNEL);
|
|
tg->cfs_rq = kzalloc(sizeof(cfs_rq) * nr_cpu_ids, GFP_KERNEL);
|
|
@@ -8512,6 +8549,8 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
|
|
|
init_cfs_bandwidth(tg_cfs_bandwidth(tg));
|
|
init_cfs_bandwidth(tg_cfs_bandwidth(tg));
|
|
|
|
|
|
|
|
for_each_possible_cpu(i) {
|
|
for_each_possible_cpu(i) {
|
|
|
|
|
+ rq = cpu_rq(i);
|
|
|
|
|
+
|
|
|
cfs_rq = kzalloc_node(sizeof(struct cfs_rq),
|
|
cfs_rq = kzalloc_node(sizeof(struct cfs_rq),
|
|
|
GFP_KERNEL, cpu_to_node(i));
|
|
GFP_KERNEL, cpu_to_node(i));
|
|
|
if (!cfs_rq)
|
|
if (!cfs_rq)
|
|
@@ -8525,7 +8564,10 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
|
|
|
init_cfs_rq(cfs_rq);
|
|
init_cfs_rq(cfs_rq);
|
|
|
init_tg_cfs_entry(tg, cfs_rq, se, i, parent->se[i]);
|
|
init_tg_cfs_entry(tg, cfs_rq, se, i, parent->se[i]);
|
|
|
init_entity_runnable_average(se);
|
|
init_entity_runnable_average(se);
|
|
|
|
|
+
|
|
|
|
|
+ raw_spin_lock_irq(&rq->lock);
|
|
|
post_init_entity_util_avg(se);
|
|
post_init_entity_util_avg(se);
|
|
|
|
|
+ raw_spin_unlock_irq(&rq->lock);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
return 1;
|
|
return 1;
|