8 жил өмнө · a481db34b9
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2767,7 +2767,7 @@ static const u32 __accumulated_sum_N32[] = {
 
															  * Approximate:
														
 
															  *   val * y^n,    where y^32 ~= 0.5 (~1 scheduling period)
														
 
															  */
														
 
															-static __always_inline u64 decay_load(u64 val, u64 n)
														
 
															+static u64 decay_load(u64 val, u64 n)
														
 
															 {
														
 
															 	unsigned int local_n;
														
@@ -2795,31 +2795,112 @@ static __always_inline u64 decay_load(u64 val, u64 n)
 
															 	return val;
														
 
															 }
														
 
															-/*
														
 
															- * For updates fully spanning n periods, the contribution to runnable
														
 
															- * average will be: \Sum 1024*y^n
														
 
															- *
														
 
															- * We can compute this reasonably efficiently by combining:
														
 
															- *   y^PERIOD = 1/2 with precomputed \Sum 1024*y^n {for  n <PERIOD}
														
 
															- */
														
 
															-static u32 __compute_runnable_contrib(u64 n)
														
 
															+static u32 __accumulate_sum(u64 periods, u32 period_contrib, u32 remainder)
														
 
															 {
														
 
															-	u32 contrib = 0;
														
 
															+	u32 c1, c2, c3 = remainder; /* y^0 == 1 */
														
 
															-	if (likely(n <= LOAD_AVG_PERIOD))
														
 
															-		return runnable_avg_yN_sum[n];
														
 
															-	else if (unlikely(n >= LOAD_AVG_MAX_N))
														
 
															+	if (!periods)
														
 
															+		return remainder - period_contrib;
														
 
															+
														
 
															+	if (unlikely(periods >= LOAD_AVG_MAX_N))
														
 
															 		return LOAD_AVG_MAX;
														
 
															-	/* Since n < LOAD_AVG_MAX_N, n/LOAD_AVG_PERIOD < 11 */
														
 
															-	contrib = __accumulated_sum_N32[n/LOAD_AVG_PERIOD];
														
 
															-	n %= LOAD_AVG_PERIOD;
														
 
															-	contrib = decay_load(contrib, n);
														
 
															-	return contrib + runnable_avg_yN_sum[n];
														
 
															+	/*
														
 
															+	 * c1 = d1 y^(p+1)
														
 
															+	 */
														
 
															+	c1 = decay_load((u64)(1024 - period_contrib), periods);
														
 
															+
														
 
															+	periods -= 1;
														
 
															+	/*
														
 
															+	 * For updates fully spanning n periods, the contribution to runnable
														
 
															+	 * average will be:
														
 
															+	 *
														
 
															+	 *   c2 = 1024 \Sum y^n
														
 
															+	 *
														
 
															+	 * We can compute this reasonably efficiently by combining:
														
 
															+	 *
														
 
															+	 *   y^PERIOD = 1/2 with precomputed 1024 \Sum y^n {for: n < PERIOD}
														
 
															+	 */
														
 
															+	if (likely(periods <= LOAD_AVG_PERIOD)) {
														
 
															+		c2 = runnable_avg_yN_sum[periods];
														
 
															+	} else {
														
 
															+		c2 = __accumulated_sum_N32[periods/LOAD_AVG_PERIOD];
														
 
															+		periods %= LOAD_AVG_PERIOD;
														
 
															+		c2 = decay_load(c2, periods);
														
 
															+		c2 += runnable_avg_yN_sum[periods];
														
 
															+	}
														
 
															+
														
 
															+	return c1 + c2 + c3;
														
 
															 }
														
 
															 #define cap_scale(v, s) ((v)*(s) >> SCHED_CAPACITY_SHIFT)
														
 
															+/*
														
 
															+ * Accumulate the three separate parts of the sum; d1 the remainder
														
 
															+ * of the last (incomplete) period, d2 the span of full periods and d3
														
 
															+ * the remainder of the (incomplete) current period.
														
 
															+ *
														
 
															+ *           d1          d2           d3
														
 
															+ *           ^           ^            ^
														
 
															+ *           |           |            |
														
 
															+ *         |<->|<----------------->|<--->|
														
 
															+ * ... |---x---|------| ... |------|-----x (now)
														
 
															+ *
														
 
															+ *                                p
														
 
															+ * u' = (u + d1) y^(p+1) + 1024 \Sum y^n + d3 y^0
														
 
															+ *                               n=1
														
 
															+ *
														
 
															+ *    = u y^(p+1) +				(Step 1)
														
 
															+ *
														
 
															+ *                          p
														
 
															+ *      d1 y^(p+1) + 1024 \Sum y^n + d3 y^0	(Step 2)
														
 
															+ *                         n=1
														
 
															+ */
														
 
															+static __always_inline u32
														
 
															+accumulate_sum(u64 delta, int cpu, struct sched_avg *sa,
														
 
															+	       unsigned long weight, int running, struct cfs_rq *cfs_rq)
														
 
															+{
														
 
															+	unsigned long scale_freq, scale_cpu;
														
 
															+	u64 periods;
														
 
															+	u32 contrib;
														
 
															+
														
 
															+	scale_freq = arch_scale_freq_capacity(NULL, cpu);
														
 
															+	scale_cpu = arch_scale_cpu_capacity(NULL, cpu);
														
 
															+
														
 
															+	delta += sa->period_contrib;
														
 
															+	periods = delta / 1024; /* A period is 1024us (~1ms) */
														
 
															+
														
 
															+	/*
														
 
															+	 * Step 1: decay old *_sum if we crossed period boundaries.
														
 
															+	 */
														
 
															+	if (periods) {
														
 
															+		sa->load_sum = decay_load(sa->load_sum, periods);
														
 
															+		if (cfs_rq) {
														
 
															+			cfs_rq->runnable_load_sum =
														
 
															+				decay_load(cfs_rq->runnable_load_sum, periods);
														
 
															+		}
														
 
															+		sa->util_sum = decay_load((u64)(sa->util_sum), periods);
														
 
															+	}
														
 
															+
														
 
															+	/*
														
 
															+	 * Step 2
														
 
															+	 */
														
 
															+	delta %= 1024;
														
 
															+	contrib = __accumulate_sum(periods, sa->period_contrib, delta);
														
 
															+	sa->period_contrib = delta;
														
 
															+
														
 
															+	contrib = cap_scale(contrib, scale_freq);
														
 
															+	if (weight) {
														
 
															+		sa->load_sum += weight * contrib;
														
 
															+		if (cfs_rq)
														
 
															+			cfs_rq->runnable_load_sum += weight * contrib;
														
 
															+	}
														
 
															+	if (running)
														
 
															+		sa->util_sum += contrib * scale_cpu;
														
 
															+
														
 
															+	return periods;
														
 
															+}
														
 
															+
														
 
															 /*
														
 
															  * We can represent the historical contribution to runnable average as the
														
 
															  * coefficients of a geometric series.  To do this we sub-divide our runnable
														
@@ -2852,10 +2933,7 @@ static __always_inline int
 
															 ___update_load_avg(u64 now, int cpu, struct sched_avg *sa,
														
 
															 		  unsigned long weight, int running, struct cfs_rq *cfs_rq)
														
 
															 {
														
 
															-	u64 delta, scaled_delta, periods;
														
 
															-	u32 contrib;
														
 
															-	unsigned int delta_w, scaled_delta_w, decayed = 0;
														
 
															-	unsigned long scale_freq, scale_cpu;
														
 
															+	u64 delta;
														
 
															 	delta = now - sa->last_update_time;
														
 
															 	/*
														
@@ -2876,81 +2954,27 @@ ___update_load_avg(u64 now, int cpu, struct sched_avg *sa,
 
															 		return 0;
														
 
															 	sa->last_update_time = now;
														
 
															-	scale_freq = arch_scale_freq_capacity(NULL, cpu);
														
 
															-	scale_cpu = arch_scale_cpu_capacity(NULL, cpu);
														
 
															-
														
 
															-	/* delta_w is the amount already accumulated against our next period */
														
 
															-	delta_w = sa->period_contrib;
														
 
															-	if (delta + delta_w >= 1024) {
														
 
															-		decayed = 1;
														
 
															-
														
 
															-		/* how much left for next period will start over, we don't know yet */
														
 
															-		sa->period_contrib = 0;
														
 
															-
														
 
															-		/*
														
 
															-		 * Now that we know we're crossing a period boundary, figure
														
 
															-		 * out how much from delta we need to complete the current
														
 
															-		 * period and accrue it.
														
 
															-		 */
														
 
															-		delta_w = 1024 - delta_w;
														
 
															-		scaled_delta_w = cap_scale(delta_w, scale_freq);
														
 
															-		if (weight) {
														
 
															-			sa->load_sum += weight * scaled_delta_w;
														
 
															-			if (cfs_rq) {
														
 
															-				cfs_rq->runnable_load_sum +=
														
 
															-						weight * scaled_delta_w;
														
 
															-			}
														
 
															-		}
														
 
															-		if (running)
														
 
															-			sa->util_sum += scaled_delta_w * scale_cpu;
														
 
															-
														
 
															-		delta -= delta_w;
														
 
															-
														
 
															-		/* Figure out how many additional periods this update spans */
														
 
															-		periods = delta / 1024;
														
 
															-		delta %= 1024;
														
 
															-
														
 
															-		sa->load_sum = decay_load(sa->load_sum, periods + 1);
														
 
															-		if (cfs_rq) {
														
 
															-			cfs_rq->runnable_load_sum =
														
 
															-				decay_load(cfs_rq->runnable_load_sum, periods + 1);
														
 
															-		}
														
 
															-		sa->util_sum = decay_load((u64)(sa->util_sum), periods + 1);
														
 
															-
														
 
															-		/* Efficiently calculate \sum (1..n_period) 1024*y^i */
														
 
															-		contrib = __compute_runnable_contrib(periods);
														
 
															-		contrib = cap_scale(contrib, scale_freq);
														
 
															-		if (weight) {
														
 
															-			sa->load_sum += weight * contrib;
														
 
															-			if (cfs_rq)
														
 
															-				cfs_rq->runnable_load_sum += weight * contrib;
														
 
															-		}
														
 
															-		if (running)
														
 
															-			sa->util_sum += contrib * scale_cpu;
														
 
															-	}
														
 
															-
														
 
															-	/* Remainder of delta accrued against u_0` */
														
 
															-	scaled_delta = cap_scale(delta, scale_freq);
														
 
															-	if (weight) {
														
 
															-		sa->load_sum += weight * scaled_delta;
														
 
															-		if (cfs_rq)
														
 
															-			cfs_rq->runnable_load_sum += weight * scaled_delta;
														
 
															-	}
														
 
															-	if (running)
														
 
															-		sa->util_sum += scaled_delta * scale_cpu;
														
 
															-
														
 
															-	sa->period_contrib += delta;
														
 
															+	/*
														
 
															+	 * Now we know we crossed measurement unit boundaries. The *_avg
														
 
															+	 * accrues by two steps:
														
 
															+	 *
														
 
															+	 * Step 1: accumulate *_sum since last_update_time. If we haven't
														
 
															+	 * crossed period boundaries, finish.
														
 
															+	 */
														
 
															+	if (!accumulate_sum(delta, cpu, sa, weight, running, cfs_rq))
														
 
															+		return 0;
														
 
															-	if (decayed) {
														
 
															-		sa->load_avg = div_u64(sa->load_sum, LOAD_AVG_MAX);
														
 
															-		if (cfs_rq) {
														
 
															-			cfs_rq->runnable_load_avg =
														
 
															-				div_u64(cfs_rq->runnable_load_sum, LOAD_AVG_MAX);
														
 
															-		}
														
 
															-		sa->util_avg = sa->util_sum / LOAD_AVG_MAX;
														
 
															+	/*
														
 
															+	 * Step 2: update *_avg.
														
 
															+	 */
														
 
															+	sa->load_avg = div_u64(sa->load_sum, LOAD_AVG_MAX);
														
 
															+	if (cfs_rq) {
														
 
															+		cfs_rq->runnable_load_avg =
														
 
															+			div_u64(cfs_rq->runnable_load_sum, LOAD_AVG_MAX);
														
 
															 	}
														
 
															+	sa->util_avg = sa->util_sum / LOAD_AVG_MAX;
														
 
															-	return decayed;
														
 
															+	return 1;
														
 
															 }
														
 
															 static int