|
@@ -9121,6 +9121,124 @@ out_unlock:
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
+static DEFINE_SPINLOCK(balancing);
|
|
|
+
|
|
|
+/*
|
|
|
+ * Scale the max load_balance interval with the number of CPUs in the system.
|
|
|
+ * This trades load-balance latency on larger machines for less cross talk.
|
|
|
+ */
|
|
|
+void update_max_interval(void)
|
|
|
+{
|
|
|
+ max_load_balance_interval = HZ*num_online_cpus()/10;
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * It checks each scheduling domain to see if it is due to be balanced,
|
|
|
+ * and initiates a balancing operation if so.
|
|
|
+ *
|
|
|
+ * Balancing parameters are set up in init_sched_domains.
|
|
|
+ */
|
|
|
+static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle)
|
|
|
+{
|
|
|
+ int continue_balancing = 1;
|
|
|
+ int cpu = rq->cpu;
|
|
|
+ unsigned long interval;
|
|
|
+ struct sched_domain *sd;
|
|
|
+ /* Earliest time when we have to do rebalance again */
|
|
|
+ unsigned long next_balance = jiffies + 60*HZ;
|
|
|
+ int update_next_balance = 0;
|
|
|
+ int need_serialize, need_decay = 0;
|
|
|
+ u64 max_cost = 0;
|
|
|
+
|
|
|
+ rcu_read_lock();
|
|
|
+ for_each_domain(cpu, sd) {
|
|
|
+ /*
|
|
|
+ * Decay the newidle max times here because this is a regular
|
|
|
+ * visit to all the domains. Decay ~1% per second.
|
|
|
+ */
|
|
|
+ if (time_after(jiffies, sd->next_decay_max_lb_cost)) {
|
|
|
+ sd->max_newidle_lb_cost =
|
|
|
+ (sd->max_newidle_lb_cost * 253) / 256;
|
|
|
+ sd->next_decay_max_lb_cost = jiffies + HZ;
|
|
|
+ need_decay = 1;
|
|
|
+ }
|
|
|
+ max_cost += sd->max_newidle_lb_cost;
|
|
|
+
|
|
|
+ if (!(sd->flags & SD_LOAD_BALANCE))
|
|
|
+ continue;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Stop the load balance at this level. There is another
|
|
|
+ * CPU in our sched group which is doing load balancing more
|
|
|
+ * actively.
|
|
|
+ */
|
|
|
+ if (!continue_balancing) {
|
|
|
+ if (need_decay)
|
|
|
+ continue;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+
|
|
|
+ interval = get_sd_balance_interval(sd, idle != CPU_IDLE);
|
|
|
+
|
|
|
+ need_serialize = sd->flags & SD_SERIALIZE;
|
|
|
+ if (need_serialize) {
|
|
|
+ if (!spin_trylock(&balancing))
|
|
|
+ goto out;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (time_after_eq(jiffies, sd->last_balance + interval)) {
|
|
|
+ if (load_balance(cpu, rq, sd, idle, &continue_balancing)) {
|
|
|
+ /*
|
|
|
+ * The LBF_DST_PINNED logic could have changed
|
|
|
+ * env->dst_cpu, so we can't know our idle
|
|
|
+ * state even if we migrated tasks. Update it.
|
|
|
+ */
|
|
|
+ idle = idle_cpu(cpu) ? CPU_IDLE : CPU_NOT_IDLE;
|
|
|
+ }
|
|
|
+ sd->last_balance = jiffies;
|
|
|
+ interval = get_sd_balance_interval(sd, idle != CPU_IDLE);
|
|
|
+ }
|
|
|
+ if (need_serialize)
|
|
|
+ spin_unlock(&balancing);
|
|
|
+out:
|
|
|
+ if (time_after(next_balance, sd->last_balance + interval)) {
|
|
|
+ next_balance = sd->last_balance + interval;
|
|
|
+ update_next_balance = 1;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if (need_decay) {
|
|
|
+ /*
|
|
|
+ * Ensure the rq-wide value also decays but keep it at a
|
|
|
+ * reasonable floor to avoid funnies with rq->avg_idle.
|
|
|
+ */
|
|
|
+ rq->max_idle_balance_cost =
|
|
|
+ max((u64)sysctl_sched_migration_cost, max_cost);
|
|
|
+ }
|
|
|
+ rcu_read_unlock();
|
|
|
+
|
|
|
+ /*
|
|
|
+ * next_balance will be updated only when there is a need.
|
|
|
+ * When the cpu is attached to null domain for ex, it will not be
|
|
|
+ * updated.
|
|
|
+ */
|
|
|
+ if (likely(update_next_balance)) {
|
|
|
+ rq->next_balance = next_balance;
|
|
|
+
|
|
|
+#ifdef CONFIG_NO_HZ_COMMON
|
|
|
+ /*
|
|
|
+ * If this CPU has been elected to perform the nohz idle
|
|
|
+ * balance. Other idle CPUs have already rebalanced with
|
|
|
+ * nohz_idle_balance() and nohz.next_balance has been
|
|
|
+ * updated accordingly. This CPU is now running the idle load
|
|
|
+ * balance for itself and we need to update the
|
|
|
+ * nohz.next_balance accordingly.
|
|
|
+ */
|
|
|
+ if ((idle == CPU_IDLE) && time_after(nohz.next_balance, rq->next_balance))
|
|
|
+ nohz.next_balance = rq->next_balance;
|
|
|
+#endif
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
static inline int on_null_domain(struct rq *rq)
|
|
|
{
|
|
|
return unlikely(!rcu_dereference_sched(rq->sd));
|
|
@@ -9373,124 +9491,6 @@ out:
|
|
|
static inline void nohz_balancer_kick(struct rq *rq) { }
|
|
|
#endif
|
|
|
|
|
|
-static DEFINE_SPINLOCK(balancing);
|
|
|
-
|
|
|
-/*
|
|
|
- * Scale the max load_balance interval with the number of CPUs in the system.
|
|
|
- * This trades load-balance latency on larger machines for less cross talk.
|
|
|
- */
|
|
|
-void update_max_interval(void)
|
|
|
-{
|
|
|
- max_load_balance_interval = HZ*num_online_cpus()/10;
|
|
|
-}
|
|
|
-
|
|
|
-/*
|
|
|
- * It checks each scheduling domain to see if it is due to be balanced,
|
|
|
- * and initiates a balancing operation if so.
|
|
|
- *
|
|
|
- * Balancing parameters are set up in init_sched_domains.
|
|
|
- */
|
|
|
-static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle)
|
|
|
-{
|
|
|
- int continue_balancing = 1;
|
|
|
- int cpu = rq->cpu;
|
|
|
- unsigned long interval;
|
|
|
- struct sched_domain *sd;
|
|
|
- /* Earliest time when we have to do rebalance again */
|
|
|
- unsigned long next_balance = jiffies + 60*HZ;
|
|
|
- int update_next_balance = 0;
|
|
|
- int need_serialize, need_decay = 0;
|
|
|
- u64 max_cost = 0;
|
|
|
-
|
|
|
- rcu_read_lock();
|
|
|
- for_each_domain(cpu, sd) {
|
|
|
- /*
|
|
|
- * Decay the newidle max times here because this is a regular
|
|
|
- * visit to all the domains. Decay ~1% per second.
|
|
|
- */
|
|
|
- if (time_after(jiffies, sd->next_decay_max_lb_cost)) {
|
|
|
- sd->max_newidle_lb_cost =
|
|
|
- (sd->max_newidle_lb_cost * 253) / 256;
|
|
|
- sd->next_decay_max_lb_cost = jiffies + HZ;
|
|
|
- need_decay = 1;
|
|
|
- }
|
|
|
- max_cost += sd->max_newidle_lb_cost;
|
|
|
-
|
|
|
- if (!(sd->flags & SD_LOAD_BALANCE))
|
|
|
- continue;
|
|
|
-
|
|
|
- /*
|
|
|
- * Stop the load balance at this level. There is another
|
|
|
- * CPU in our sched group which is doing load balancing more
|
|
|
- * actively.
|
|
|
- */
|
|
|
- if (!continue_balancing) {
|
|
|
- if (need_decay)
|
|
|
- continue;
|
|
|
- break;
|
|
|
- }
|
|
|
-
|
|
|
- interval = get_sd_balance_interval(sd, idle != CPU_IDLE);
|
|
|
-
|
|
|
- need_serialize = sd->flags & SD_SERIALIZE;
|
|
|
- if (need_serialize) {
|
|
|
- if (!spin_trylock(&balancing))
|
|
|
- goto out;
|
|
|
- }
|
|
|
-
|
|
|
- if (time_after_eq(jiffies, sd->last_balance + interval)) {
|
|
|
- if (load_balance(cpu, rq, sd, idle, &continue_balancing)) {
|
|
|
- /*
|
|
|
- * The LBF_DST_PINNED logic could have changed
|
|
|
- * env->dst_cpu, so we can't know our idle
|
|
|
- * state even if we migrated tasks. Update it.
|
|
|
- */
|
|
|
- idle = idle_cpu(cpu) ? CPU_IDLE : CPU_NOT_IDLE;
|
|
|
- }
|
|
|
- sd->last_balance = jiffies;
|
|
|
- interval = get_sd_balance_interval(sd, idle != CPU_IDLE);
|
|
|
- }
|
|
|
- if (need_serialize)
|
|
|
- spin_unlock(&balancing);
|
|
|
-out:
|
|
|
- if (time_after(next_balance, sd->last_balance + interval)) {
|
|
|
- next_balance = sd->last_balance + interval;
|
|
|
- update_next_balance = 1;
|
|
|
- }
|
|
|
- }
|
|
|
- if (need_decay) {
|
|
|
- /*
|
|
|
- * Ensure the rq-wide value also decays but keep it at a
|
|
|
- * reasonable floor to avoid funnies with rq->avg_idle.
|
|
|
- */
|
|
|
- rq->max_idle_balance_cost =
|
|
|
- max((u64)sysctl_sched_migration_cost, max_cost);
|
|
|
- }
|
|
|
- rcu_read_unlock();
|
|
|
-
|
|
|
- /*
|
|
|
- * next_balance will be updated only when there is a need.
|
|
|
- * When the CPU is attached to null domain for ex, it will not be
|
|
|
- * updated.
|
|
|
- */
|
|
|
- if (likely(update_next_balance)) {
|
|
|
- rq->next_balance = next_balance;
|
|
|
-
|
|
|
-#ifdef CONFIG_NO_HZ_COMMON
|
|
|
- /*
|
|
|
- * If this CPU has been elected to perform the nohz idle
|
|
|
- * balance. Other idle CPUs have already rebalanced with
|
|
|
- * nohz_idle_balance() and nohz.next_balance has been
|
|
|
- * updated accordingly. This CPU is now running the idle load
|
|
|
- * balance for itself and we need to update the
|
|
|
- * nohz.next_balance accordingly.
|
|
|
- */
|
|
|
- if ((idle == CPU_IDLE) && time_after(nohz.next_balance, rq->next_balance))
|
|
|
- nohz.next_balance = rq->next_balance;
|
|
|
-#endif
|
|
|
- }
|
|
|
-}
|
|
|
-
|
|
|
#ifdef CONFIG_NO_HZ_COMMON
|
|
|
/*
|
|
|
* In CONFIG_NO_HZ_COMMON case, the idle balance kickee will do the
|