|
@@ -2767,7 +2767,7 @@ static const u32 __accumulated_sum_N32[] = {
|
|
* Approximate:
|
|
* Approximate:
|
|
* val * y^n, where y^32 ~= 0.5 (~1 scheduling period)
|
|
* val * y^n, where y^32 ~= 0.5 (~1 scheduling period)
|
|
*/
|
|
*/
|
|
-static __always_inline u64 decay_load(u64 val, u64 n)
|
|
|
|
|
|
+static u64 decay_load(u64 val, u64 n)
|
|
{
|
|
{
|
|
unsigned int local_n;
|
|
unsigned int local_n;
|
|
|
|
|
|
@@ -2795,31 +2795,112 @@ static __always_inline u64 decay_load(u64 val, u64 n)
|
|
return val;
|
|
return val;
|
|
}
|
|
}
|
|
|
|
|
|
-/*
|
|
|
|
- * For updates fully spanning n periods, the contribution to runnable
|
|
|
|
- * average will be: \Sum 1024*y^n
|
|
|
|
- *
|
|
|
|
- * We can compute this reasonably efficiently by combining:
|
|
|
|
- * y^PERIOD = 1/2 with precomputed \Sum 1024*y^n {for n <PERIOD}
|
|
|
|
- */
|
|
|
|
-static u32 __compute_runnable_contrib(u64 n)
|
|
|
|
|
|
+static u32 __accumulate_sum(u64 periods, u32 period_contrib, u32 remainder)
|
|
{
|
|
{
|
|
- u32 contrib = 0;
|
|
|
|
|
|
+ u32 c1, c2, c3 = remainder; /* y^0 == 1 */
|
|
|
|
|
|
- if (likely(n <= LOAD_AVG_PERIOD))
|
|
|
|
- return runnable_avg_yN_sum[n];
|
|
|
|
- else if (unlikely(n >= LOAD_AVG_MAX_N))
|
|
|
|
|
|
+ if (!periods)
|
|
|
|
+ return remainder - period_contrib;
|
|
|
|
+
|
|
|
|
+ if (unlikely(periods >= LOAD_AVG_MAX_N))
|
|
return LOAD_AVG_MAX;
|
|
return LOAD_AVG_MAX;
|
|
|
|
|
|
- /* Since n < LOAD_AVG_MAX_N, n/LOAD_AVG_PERIOD < 11 */
|
|
|
|
- contrib = __accumulated_sum_N32[n/LOAD_AVG_PERIOD];
|
|
|
|
- n %= LOAD_AVG_PERIOD;
|
|
|
|
- contrib = decay_load(contrib, n);
|
|
|
|
- return contrib + runnable_avg_yN_sum[n];
|
|
|
|
|
|
+ /*
|
|
|
|
+ * c1 = d1 y^(p+1)
|
|
|
|
+ */
|
|
|
|
+ c1 = decay_load((u64)(1024 - period_contrib), periods);
|
|
|
|
+
|
|
|
|
+ periods -= 1;
|
|
|
|
+ /*
|
|
|
|
+ * For updates fully spanning n periods, the contribution to runnable
|
|
|
|
+ * average will be:
|
|
|
|
+ *
|
|
|
|
+ * c2 = 1024 \Sum y^n
|
|
|
|
+ *
|
|
|
|
+ * We can compute this reasonably efficiently by combining:
|
|
|
|
+ *
|
|
|
|
+ * y^PERIOD = 1/2 with precomputed 1024 \Sum y^n {for: n < PERIOD}
|
|
|
|
+ */
|
|
|
|
+ if (likely(periods <= LOAD_AVG_PERIOD)) {
|
|
|
|
+ c2 = runnable_avg_yN_sum[periods];
|
|
|
|
+ } else {
|
|
|
|
+ c2 = __accumulated_sum_N32[periods/LOAD_AVG_PERIOD];
|
|
|
|
+ periods %= LOAD_AVG_PERIOD;
|
|
|
|
+ c2 = decay_load(c2, periods);
|
|
|
|
+ c2 += runnable_avg_yN_sum[periods];
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ return c1 + c2 + c3;
|
|
}
|
|
}
|
|
|
|
|
|
#define cap_scale(v, s) ((v)*(s) >> SCHED_CAPACITY_SHIFT)
|
|
#define cap_scale(v, s) ((v)*(s) >> SCHED_CAPACITY_SHIFT)
|
|
|
|
|
|
|
|
+/*
|
|
|
|
+ * Accumulate the three separate parts of the sum; d1 the remainder
|
|
|
|
+ * of the last (incomplete) period, d2 the span of full periods and d3
|
|
|
|
+ * the remainder of the (incomplete) current period.
|
|
|
|
+ *
|
|
|
|
+ * d1 d2 d3
|
|
|
|
+ * ^ ^ ^
|
|
|
|
+ * | | |
|
|
|
|
+ * |<->|<----------------->|<--->|
|
|
|
|
+ * ... |---x---|------| ... |------|-----x (now)
|
|
|
|
+ *
|
|
|
|
+ * p
|
|
|
|
+ * u' = (u + d1) y^(p+1) + 1024 \Sum y^n + d3 y^0
|
|
|
|
+ * n=1
|
|
|
|
+ *
|
|
|
|
+ * = u y^(p+1) + (Step 1)
|
|
|
|
+ *
|
|
|
|
+ * p
|
|
|
|
+ * d1 y^(p+1) + 1024 \Sum y^n + d3 y^0 (Step 2)
|
|
|
|
+ * n=1
|
|
|
|
+ */
|
|
|
|
+static __always_inline u32
|
|
|
|
+accumulate_sum(u64 delta, int cpu, struct sched_avg *sa,
|
|
|
|
+ unsigned long weight, int running, struct cfs_rq *cfs_rq)
|
|
|
|
+{
|
|
|
|
+ unsigned long scale_freq, scale_cpu;
|
|
|
|
+ u64 periods;
|
|
|
|
+ u32 contrib;
|
|
|
|
+
|
|
|
|
+ scale_freq = arch_scale_freq_capacity(NULL, cpu);
|
|
|
|
+ scale_cpu = arch_scale_cpu_capacity(NULL, cpu);
|
|
|
|
+
|
|
|
|
+ delta += sa->period_contrib;
|
|
|
|
+ periods = delta / 1024; /* A period is 1024us (~1ms) */
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * Step 1: decay old *_sum if we crossed period boundaries.
|
|
|
|
+ */
|
|
|
|
+ if (periods) {
|
|
|
|
+ sa->load_sum = decay_load(sa->load_sum, periods);
|
|
|
|
+ if (cfs_rq) {
|
|
|
|
+ cfs_rq->runnable_load_sum =
|
|
|
|
+ decay_load(cfs_rq->runnable_load_sum, periods);
|
|
|
|
+ }
|
|
|
|
+ sa->util_sum = decay_load((u64)(sa->util_sum), periods);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * Step 2
|
|
|
|
+ */
|
|
|
|
+ delta %= 1024;
|
|
|
|
+ contrib = __accumulate_sum(periods, sa->period_contrib, delta);
|
|
|
|
+ sa->period_contrib = delta;
|
|
|
|
+
|
|
|
|
+ contrib = cap_scale(contrib, scale_freq);
|
|
|
|
+ if (weight) {
|
|
|
|
+ sa->load_sum += weight * contrib;
|
|
|
|
+ if (cfs_rq)
|
|
|
|
+ cfs_rq->runnable_load_sum += weight * contrib;
|
|
|
|
+ }
|
|
|
|
+ if (running)
|
|
|
|
+ sa->util_sum += contrib * scale_cpu;
|
|
|
|
+
|
|
|
|
+ return periods;
|
|
|
|
+}
|
|
|
|
+
|
|
/*
|
|
/*
|
|
* We can represent the historical contribution to runnable average as the
|
|
* We can represent the historical contribution to runnable average as the
|
|
* coefficients of a geometric series. To do this we sub-divide our runnable
|
|
* coefficients of a geometric series. To do this we sub-divide our runnable
|
|
@@ -2852,10 +2933,7 @@ static __always_inline int
|
|
___update_load_avg(u64 now, int cpu, struct sched_avg *sa,
|
|
___update_load_avg(u64 now, int cpu, struct sched_avg *sa,
|
|
unsigned long weight, int running, struct cfs_rq *cfs_rq)
|
|
unsigned long weight, int running, struct cfs_rq *cfs_rq)
|
|
{
|
|
{
|
|
- u64 delta, scaled_delta, periods;
|
|
|
|
- u32 contrib;
|
|
|
|
- unsigned int delta_w, scaled_delta_w, decayed = 0;
|
|
|
|
- unsigned long scale_freq, scale_cpu;
|
|
|
|
|
|
+ u64 delta;
|
|
|
|
|
|
delta = now - sa->last_update_time;
|
|
delta = now - sa->last_update_time;
|
|
/*
|
|
/*
|
|
@@ -2876,81 +2954,27 @@ ___update_load_avg(u64 now, int cpu, struct sched_avg *sa,
|
|
return 0;
|
|
return 0;
|
|
sa->last_update_time = now;
|
|
sa->last_update_time = now;
|
|
|
|
|
|
- scale_freq = arch_scale_freq_capacity(NULL, cpu);
|
|
|
|
- scale_cpu = arch_scale_cpu_capacity(NULL, cpu);
|
|
|
|
-
|
|
|
|
- /* delta_w is the amount already accumulated against our next period */
|
|
|
|
- delta_w = sa->period_contrib;
|
|
|
|
- if (delta + delta_w >= 1024) {
|
|
|
|
- decayed = 1;
|
|
|
|
-
|
|
|
|
- /* how much left for next period will start over, we don't know yet */
|
|
|
|
- sa->period_contrib = 0;
|
|
|
|
-
|
|
|
|
- /*
|
|
|
|
- * Now that we know we're crossing a period boundary, figure
|
|
|
|
- * out how much from delta we need to complete the current
|
|
|
|
- * period and accrue it.
|
|
|
|
- */
|
|
|
|
- delta_w = 1024 - delta_w;
|
|
|
|
- scaled_delta_w = cap_scale(delta_w, scale_freq);
|
|
|
|
- if (weight) {
|
|
|
|
- sa->load_sum += weight * scaled_delta_w;
|
|
|
|
- if (cfs_rq) {
|
|
|
|
- cfs_rq->runnable_load_sum +=
|
|
|
|
- weight * scaled_delta_w;
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- if (running)
|
|
|
|
- sa->util_sum += scaled_delta_w * scale_cpu;
|
|
|
|
-
|
|
|
|
- delta -= delta_w;
|
|
|
|
-
|
|
|
|
- /* Figure out how many additional periods this update spans */
|
|
|
|
- periods = delta / 1024;
|
|
|
|
- delta %= 1024;
|
|
|
|
-
|
|
|
|
- sa->load_sum = decay_load(sa->load_sum, periods + 1);
|
|
|
|
- if (cfs_rq) {
|
|
|
|
- cfs_rq->runnable_load_sum =
|
|
|
|
- decay_load(cfs_rq->runnable_load_sum, periods + 1);
|
|
|
|
- }
|
|
|
|
- sa->util_sum = decay_load((u64)(sa->util_sum), periods + 1);
|
|
|
|
-
|
|
|
|
- /* Efficiently calculate \sum (1..n_period) 1024*y^i */
|
|
|
|
- contrib = __compute_runnable_contrib(periods);
|
|
|
|
- contrib = cap_scale(contrib, scale_freq);
|
|
|
|
- if (weight) {
|
|
|
|
- sa->load_sum += weight * contrib;
|
|
|
|
- if (cfs_rq)
|
|
|
|
- cfs_rq->runnable_load_sum += weight * contrib;
|
|
|
|
- }
|
|
|
|
- if (running)
|
|
|
|
- sa->util_sum += contrib * scale_cpu;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- /* Remainder of delta accrued against u_0` */
|
|
|
|
- scaled_delta = cap_scale(delta, scale_freq);
|
|
|
|
- if (weight) {
|
|
|
|
- sa->load_sum += weight * scaled_delta;
|
|
|
|
- if (cfs_rq)
|
|
|
|
- cfs_rq->runnable_load_sum += weight * scaled_delta;
|
|
|
|
- }
|
|
|
|
- if (running)
|
|
|
|
- sa->util_sum += scaled_delta * scale_cpu;
|
|
|
|
-
|
|
|
|
- sa->period_contrib += delta;
|
|
|
|
|
|
+ /*
|
|
|
|
+ * Now we know we crossed measurement unit boundaries. The *_avg
|
|
|
|
+ * accrues by two steps:
|
|
|
|
+ *
|
|
|
|
+ * Step 1: accumulate *_sum since last_update_time. If we haven't
|
|
|
|
+ * crossed period boundaries, finish.
|
|
|
|
+ */
|
|
|
|
+ if (!accumulate_sum(delta, cpu, sa, weight, running, cfs_rq))
|
|
|
|
+ return 0;
|
|
|
|
|
|
- if (decayed) {
|
|
|
|
- sa->load_avg = div_u64(sa->load_sum, LOAD_AVG_MAX);
|
|
|
|
- if (cfs_rq) {
|
|
|
|
- cfs_rq->runnable_load_avg =
|
|
|
|
- div_u64(cfs_rq->runnable_load_sum, LOAD_AVG_MAX);
|
|
|
|
- }
|
|
|
|
- sa->util_avg = sa->util_sum / LOAD_AVG_MAX;
|
|
|
|
|
|
+ /*
|
|
|
|
+ * Step 2: update *_avg.
|
|
|
|
+ */
|
|
|
|
+ sa->load_avg = div_u64(sa->load_sum, LOAD_AVG_MAX);
|
|
|
|
+ if (cfs_rq) {
|
|
|
|
+ cfs_rq->runnable_load_avg =
|
|
|
|
+ div_u64(cfs_rq->runnable_load_sum, LOAD_AVG_MAX);
|
|
}
|
|
}
|
|
|
|
+ sa->util_avg = sa->util_sum / LOAD_AVG_MAX;
|
|
|
|
|
|
- return decayed;
|
|
|
|
|
|
+ return 1;
|
|
}
|
|
}
|
|
|
|
|
|
static int
|
|
static int
|