|
@@ -1,7 +1,9 @@
|
|
|
/*
|
|
|
- * kernel/sched/proc.c
|
|
|
+ * kernel/sched/loadavg.c
|
|
|
*
|
|
|
- * Kernel load calculations, forked from sched/core.c
|
|
|
+ * This file contains the magic bits required to compute the global loadavg
|
|
|
+ * figure. Its a silly number but people think its important. We go through
|
|
|
+ * great pains to make it work on big machines and tickless kernels.
|
|
|
*/
|
|
|
|
|
|
#include <linux/export.h>
|
|
@@ -81,7 +83,7 @@ long calc_load_fold_active(struct rq *this_rq)
|
|
|
long nr_active, delta = 0;
|
|
|
|
|
|
nr_active = this_rq->nr_running;
|
|
|
- nr_active += (long) this_rq->nr_uninterruptible;
|
|
|
+ nr_active += (long)this_rq->nr_uninterruptible;
|
|
|
|
|
|
if (nr_active != this_rq->calc_load_active) {
|
|
|
delta = nr_active - this_rq->calc_load_active;
|
|
@@ -186,6 +188,7 @@ void calc_load_enter_idle(void)
|
|
|
delta = calc_load_fold_active(this_rq);
|
|
|
if (delta) {
|
|
|
int idx = calc_load_write_idx();
|
|
|
+
|
|
|
atomic_long_add(delta, &calc_load_idle[idx]);
|
|
|
}
|
|
|
}
|
|
@@ -241,18 +244,20 @@ fixed_power_int(unsigned long x, unsigned int frac_bits, unsigned int n)
|
|
|
{
|
|
|
unsigned long result = 1UL << frac_bits;
|
|
|
|
|
|
- if (n) for (;;) {
|
|
|
- if (n & 1) {
|
|
|
- result *= x;
|
|
|
- result += 1UL << (frac_bits - 1);
|
|
|
- result >>= frac_bits;
|
|
|
+ if (n) {
|
|
|
+ for (;;) {
|
|
|
+ if (n & 1) {
|
|
|
+ result *= x;
|
|
|
+ result += 1UL << (frac_bits - 1);
|
|
|
+ result >>= frac_bits;
|
|
|
+ }
|
|
|
+ n >>= 1;
|
|
|
+ if (!n)
|
|
|
+ break;
|
|
|
+ x *= x;
|
|
|
+ x += 1UL << (frac_bits - 1);
|
|
|
+ x >>= frac_bits;
|
|
|
}
|
|
|
- n >>= 1;
|
|
|
- if (!n)
|
|
|
- break;
|
|
|
- x *= x;
|
|
|
- x += 1UL << (frac_bits - 1);
|
|
|
- x >>= frac_bits;
|
|
|
}
|
|
|
|
|
|
return result;
|
|
@@ -285,7 +290,6 @@ static unsigned long
|
|
|
calc_load_n(unsigned long load, unsigned long exp,
|
|
|
unsigned long active, unsigned int n)
|
|
|
{
|
|
|
-
|
|
|
return calc_load(load, fixed_power_int(exp, FSHIFT, n), active);
|
|
|
}
|
|
|
|
|
@@ -339,6 +343,8 @@ static inline void calc_global_nohz(void) { }
|
|
|
/*
|
|
|
* calc_load - update the avenrun load estimates 10 ticks after the
|
|
|
* CPUs have updated calc_load_tasks.
|
|
|
+ *
|
|
|
+ * Called from the global timer code.
|
|
|
*/
|
|
|
void calc_global_load(unsigned long ticks)
|
|
|
{
|
|
@@ -370,10 +376,10 @@ void calc_global_load(unsigned long ticks)
|
|
|
}
|
|
|
|
|
|
/*
|
|
|
- * Called from update_cpu_load() to periodically update this CPU's
|
|
|
+ * Called from scheduler_tick() to periodically update this CPU's
|
|
|
* active count.
|
|
|
*/
|
|
|
-static void calc_load_account_active(struct rq *this_rq)
|
|
|
+void calc_global_load_tick(struct rq *this_rq)
|
|
|
{
|
|
|
long delta;
|
|
|
|
|
@@ -386,199 +392,3 @@ static void calc_load_account_active(struct rq *this_rq)
|
|
|
|
|
|
this_rq->calc_load_update += LOAD_FREQ;
|
|
|
}
|
|
|
-
|
|
|
-/*
|
|
|
- * End of global load-average stuff
|
|
|
- */
|
|
|
-
|
|
|
-/*
|
|
|
- * The exact cpuload at various idx values, calculated at every tick would be
|
|
|
- * load = (2^idx - 1) / 2^idx * load + 1 / 2^idx * cur_load
|
|
|
- *
|
|
|
- * If a cpu misses updates for n-1 ticks (as it was idle) and update gets called
|
|
|
- * on nth tick when cpu may be busy, then we have:
|
|
|
- * load = ((2^idx - 1) / 2^idx)^(n-1) * load
|
|
|
- * load = (2^idx - 1) / 2^idx) * load + 1 / 2^idx * cur_load
|
|
|
- *
|
|
|
- * decay_load_missed() below does efficient calculation of
|
|
|
- * load = ((2^idx - 1) / 2^idx)^(n-1) * load
|
|
|
- * avoiding 0..n-1 loop doing load = ((2^idx - 1) / 2^idx) * load
|
|
|
- *
|
|
|
- * The calculation is approximated on a 128 point scale.
|
|
|
- * degrade_zero_ticks is the number of ticks after which load at any
|
|
|
- * particular idx is approximated to be zero.
|
|
|
- * degrade_factor is a precomputed table, a row for each load idx.
|
|
|
- * Each column corresponds to degradation factor for a power of two ticks,
|
|
|
- * based on 128 point scale.
|
|
|
- * Example:
|
|
|
- * row 2, col 3 (=12) says that the degradation at load idx 2 after
|
|
|
- * 8 ticks is 12/128 (which is an approximation of exact factor 3^8/4^8).
|
|
|
- *
|
|
|
- * With this power of 2 load factors, we can degrade the load n times
|
|
|
- * by looking at 1 bits in n and doing as many mult/shift instead of
|
|
|
- * n mult/shifts needed by the exact degradation.
|
|
|
- */
|
|
|
-#define DEGRADE_SHIFT 7
|
|
|
-static const unsigned char
|
|
|
- degrade_zero_ticks[CPU_LOAD_IDX_MAX] = {0, 8, 32, 64, 128};
|
|
|
-static const unsigned char
|
|
|
- degrade_factor[CPU_LOAD_IDX_MAX][DEGRADE_SHIFT + 1] = {
|
|
|
- {0, 0, 0, 0, 0, 0, 0, 0},
|
|
|
- {64, 32, 8, 0, 0, 0, 0, 0},
|
|
|
- {96, 72, 40, 12, 1, 0, 0},
|
|
|
- {112, 98, 75, 43, 15, 1, 0},
|
|
|
- {120, 112, 98, 76, 45, 16, 2} };
|
|
|
-
|
|
|
-/*
|
|
|
- * Update cpu_load for any missed ticks, due to tickless idle. The backlog
|
|
|
- * would be when CPU is idle and so we just decay the old load without
|
|
|
- * adding any new load.
|
|
|
- */
|
|
|
-static unsigned long
|
|
|
-decay_load_missed(unsigned long load, unsigned long missed_updates, int idx)
|
|
|
-{
|
|
|
- int j = 0;
|
|
|
-
|
|
|
- if (!missed_updates)
|
|
|
- return load;
|
|
|
-
|
|
|
- if (missed_updates >= degrade_zero_ticks[idx])
|
|
|
- return 0;
|
|
|
-
|
|
|
- if (idx == 1)
|
|
|
- return load >> missed_updates;
|
|
|
-
|
|
|
- while (missed_updates) {
|
|
|
- if (missed_updates % 2)
|
|
|
- load = (load * degrade_factor[idx][j]) >> DEGRADE_SHIFT;
|
|
|
-
|
|
|
- missed_updates >>= 1;
|
|
|
- j++;
|
|
|
- }
|
|
|
- return load;
|
|
|
-}
|
|
|
-
|
|
|
-/*
|
|
|
- * Update rq->cpu_load[] statistics. This function is usually called every
|
|
|
- * scheduler tick (TICK_NSEC). With tickless idle this will not be called
|
|
|
- * every tick. We fix it up based on jiffies.
|
|
|
- */
|
|
|
-static void __update_cpu_load(struct rq *this_rq, unsigned long this_load,
|
|
|
- unsigned long pending_updates)
|
|
|
-{
|
|
|
- int i, scale;
|
|
|
-
|
|
|
- this_rq->nr_load_updates++;
|
|
|
-
|
|
|
- /* Update our load: */
|
|
|
- this_rq->cpu_load[0] = this_load; /* Fasttrack for idx 0 */
|
|
|
- for (i = 1, scale = 2; i < CPU_LOAD_IDX_MAX; i++, scale += scale) {
|
|
|
- unsigned long old_load, new_load;
|
|
|
-
|
|
|
- /* scale is effectively 1 << i now, and >> i divides by scale */
|
|
|
-
|
|
|
- old_load = this_rq->cpu_load[i];
|
|
|
- old_load = decay_load_missed(old_load, pending_updates - 1, i);
|
|
|
- new_load = this_load;
|
|
|
- /*
|
|
|
- * Round up the averaging division if load is increasing. This
|
|
|
- * prevents us from getting stuck on 9 if the load is 10, for
|
|
|
- * example.
|
|
|
- */
|
|
|
- if (new_load > old_load)
|
|
|
- new_load += scale - 1;
|
|
|
-
|
|
|
- this_rq->cpu_load[i] = (old_load * (scale - 1) + new_load) >> i;
|
|
|
- }
|
|
|
-
|
|
|
- sched_avg_update(this_rq);
|
|
|
-}
|
|
|
-
|
|
|
-#ifdef CONFIG_SMP
|
|
|
-static inline unsigned long get_rq_runnable_load(struct rq *rq)
|
|
|
-{
|
|
|
- return rq->cfs.runnable_load_avg;
|
|
|
-}
|
|
|
-#else
|
|
|
-static inline unsigned long get_rq_runnable_load(struct rq *rq)
|
|
|
-{
|
|
|
- return rq->load.weight;
|
|
|
-}
|
|
|
-#endif
|
|
|
-
|
|
|
-#ifdef CONFIG_NO_HZ_COMMON
|
|
|
-/*
|
|
|
- * There is no sane way to deal with nohz on smp when using jiffies because the
|
|
|
- * cpu doing the jiffies update might drift wrt the cpu doing the jiffy reading
|
|
|
- * causing off-by-one errors in observed deltas; {0,2} instead of {1,1}.
|
|
|
- *
|
|
|
- * Therefore we cannot use the delta approach from the regular tick since that
|
|
|
- * would seriously skew the load calculation. However we'll make do for those
|
|
|
- * updates happening while idle (nohz_idle_balance) or coming out of idle
|
|
|
- * (tick_nohz_idle_exit).
|
|
|
- *
|
|
|
- * This means we might still be one tick off for nohz periods.
|
|
|
- */
|
|
|
-
|
|
|
-/*
|
|
|
- * Called from nohz_idle_balance() to update the load ratings before doing the
|
|
|
- * idle balance.
|
|
|
- */
|
|
|
-void update_idle_cpu_load(struct rq *this_rq)
|
|
|
-{
|
|
|
- unsigned long curr_jiffies = ACCESS_ONCE(jiffies);
|
|
|
- unsigned long load = get_rq_runnable_load(this_rq);
|
|
|
- unsigned long pending_updates;
|
|
|
-
|
|
|
- /*
|
|
|
- * bail if there's load or we're actually up-to-date.
|
|
|
- */
|
|
|
- if (load || curr_jiffies == this_rq->last_load_update_tick)
|
|
|
- return;
|
|
|
-
|
|
|
- pending_updates = curr_jiffies - this_rq->last_load_update_tick;
|
|
|
- this_rq->last_load_update_tick = curr_jiffies;
|
|
|
-
|
|
|
- __update_cpu_load(this_rq, load, pending_updates);
|
|
|
-}
|
|
|
-
|
|
|
-/*
|
|
|
- * Called from tick_nohz_idle_exit() -- try and fix up the ticks we missed.
|
|
|
- */
|
|
|
-void update_cpu_load_nohz(void)
|
|
|
-{
|
|
|
- struct rq *this_rq = this_rq();
|
|
|
- unsigned long curr_jiffies = ACCESS_ONCE(jiffies);
|
|
|
- unsigned long pending_updates;
|
|
|
-
|
|
|
- if (curr_jiffies == this_rq->last_load_update_tick)
|
|
|
- return;
|
|
|
-
|
|
|
- raw_spin_lock(&this_rq->lock);
|
|
|
- pending_updates = curr_jiffies - this_rq->last_load_update_tick;
|
|
|
- if (pending_updates) {
|
|
|
- this_rq->last_load_update_tick = curr_jiffies;
|
|
|
- /*
|
|
|
- * We were idle, this means load 0, the current load might be
|
|
|
- * !0 due to remote wakeups and the sort.
|
|
|
- */
|
|
|
- __update_cpu_load(this_rq, 0, pending_updates);
|
|
|
- }
|
|
|
- raw_spin_unlock(&this_rq->lock);
|
|
|
-}
|
|
|
-#endif /* CONFIG_NO_HZ */
|
|
|
-
|
|
|
-/*
|
|
|
- * Called from scheduler_tick()
|
|
|
- */
|
|
|
-void update_cpu_load_active(struct rq *this_rq)
|
|
|
-{
|
|
|
- unsigned long load = get_rq_runnable_load(this_rq);
|
|
|
- /*
|
|
|
- * See the mess around update_idle_cpu_load() / update_cpu_load_nohz().
|
|
|
- */
|
|
|
- this_rq->last_load_update_tick = jiffies;
|
|
|
- __update_cpu_load(this_rq, load, 1);
|
|
|
-
|
|
|
- calc_load_account_active(this_rq);
|
|
|
-}
|