7 tahun lalu · af3fe03c56
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -9121,6 +9121,124 @@ out_unlock:
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+static DEFINE_SPINLOCK(balancing);
			
 
				+
			
 
				+/*
			
 
				+ * Scale the max load_balance interval with the number of CPUs in the system.
			
 
				+ * This trades load-balance latency on larger machines for less cross talk.
			
 
				+ */
			
 
				+void update_max_interval(void)
			
 
				+{
			
 
				+	max_load_balance_interval = HZ*num_online_cpus()/10;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * It checks each scheduling domain to see if it is due to be balanced,
			
 
				+ * and initiates a balancing operation if so.
			
 
				+ *
			
 
				+ * Balancing parameters are set up in init_sched_domains.
			
 
				+ */
			
 
				+static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle)
			
 
				+{
			
 
				+	int continue_balancing = 1;
			
 
				+	int cpu = rq->cpu;
			
 
				+	unsigned long interval;
			
 
				+	struct sched_domain *sd;
			
 
				+	/* Earliest time when we have to do rebalance again */
			
 
				+	unsigned long next_balance = jiffies + 60*HZ;
			
 
				+	int update_next_balance = 0;
			
 
				+	int need_serialize, need_decay = 0;
			
 
				+	u64 max_cost = 0;
			
 
				+
			
 
				+	rcu_read_lock();
			
 
				+	for_each_domain(cpu, sd) {
			
 
				+		/*
			
 
				+		 * Decay the newidle max times here because this is a regular
			
 
				+		 * visit to all the domains. Decay ~1% per second.
			
 
				+		 */
			
 
				+		if (time_after(jiffies, sd->next_decay_max_lb_cost)) {
			
 
				+			sd->max_newidle_lb_cost =
			
 
				+				(sd->max_newidle_lb_cost * 253) / 256;
			
 
				+			sd->next_decay_max_lb_cost = jiffies + HZ;
			
 
				+			need_decay = 1;
			
 
				+		}
			
 
				+		max_cost += sd->max_newidle_lb_cost;
			
 
				+
			
 
				+		if (!(sd->flags & SD_LOAD_BALANCE))
			
 
				+			continue;
			
 
				+
			
 
				+		/*
			
 
				+		 * Stop the load balance at this level. There is another
			
 
				+		 * CPU in our sched group which is doing load balancing more
			
 
				+		 * actively.
			
 
				+		 */
			
 
				+		if (!continue_balancing) {
			
 
				+			if (need_decay)
			
 
				+				continue;
			
 
				+			break;
			
 
				+		}
			
 
				+
			
 
				+		interval = get_sd_balance_interval(sd, idle != CPU_IDLE);
			
 
				+
			
 
				+		need_serialize = sd->flags & SD_SERIALIZE;
			
 
				+		if (need_serialize) {
			
 
				+			if (!spin_trylock(&balancing))
			
 
				+				goto out;
			
 
				+		}
			
 
				+
			
 
				+		if (time_after_eq(jiffies, sd->last_balance + interval)) {
			
 
				+			if (load_balance(cpu, rq, sd, idle, &continue_balancing)) {
			
 
				+				/*
			
 
				+				 * The LBF_DST_PINNED logic could have changed
			
 
				+				 * env->dst_cpu, so we can't know our idle
			
 
				+				 * state even if we migrated tasks. Update it.
			
 
				+				 */
			
 
				+				idle = idle_cpu(cpu) ? CPU_IDLE : CPU_NOT_IDLE;
			
 
				+			}
			
 
				+			sd->last_balance = jiffies;
			
 
				+			interval = get_sd_balance_interval(sd, idle != CPU_IDLE);
			
 
				+		}
			
 
				+		if (need_serialize)
			
 
				+			spin_unlock(&balancing);
			
 
				+out:
			
 
				+		if (time_after(next_balance, sd->last_balance + interval)) {
			
 
				+			next_balance = sd->last_balance + interval;
			
 
				+			update_next_balance = 1;
			
 
				+		}
			
 
				+	}
			
 
				+	if (need_decay) {
			
 
				+		/*
			
 
				+		 * Ensure the rq-wide value also decays but keep it at a
			
 
				+		 * reasonable floor to avoid funnies with rq->avg_idle.
			
 
				+		 */
			
 
				+		rq->max_idle_balance_cost =
			
 
				+			max((u64)sysctl_sched_migration_cost, max_cost);
			
 
				+	}
			
 
				+	rcu_read_unlock();
			
 
				+
			
 
				+	/*
			
 
				+	 * next_balance will be updated only when there is a need.
			
 
				+	 * When the cpu is attached to null domain for ex, it will not be
			
 
				+	 * updated.
			
 
				+	 */
			
 
				+	if (likely(update_next_balance)) {
			
 
				+		rq->next_balance = next_balance;
			
 
				+
			
 
				+#ifdef CONFIG_NO_HZ_COMMON
			
 
				+		/*
			
 
				+		 * If this CPU has been elected to perform the nohz idle
			
 
				+		 * balance. Other idle CPUs have already rebalanced with
			
 
				+		 * nohz_idle_balance() and nohz.next_balance has been
			
 
				+		 * updated accordingly. This CPU is now running the idle load
			
 
				+		 * balance for itself and we need to update the
			
 
				+		 * nohz.next_balance accordingly.
			
 
				+		 */
			
 
				+		if ((idle == CPU_IDLE) && time_after(nohz.next_balance, rq->next_balance))
			
 
				+			nohz.next_balance = rq->next_balance;
			
 
				+#endif
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 static inline int on_null_domain(struct rq *rq)
			
 
				 {
			
 
				 	return unlikely(!rcu_dereference_sched(rq->sd));
			
@@ -9373,124 +9491,6 @@ out:
 
				 static inline void nohz_balancer_kick(struct rq *rq) { }
			
 
				 #endif
			
 
				 
			
 
				-static DEFINE_SPINLOCK(balancing);
			
 
				-
			
 
				-/*
			
 
				- * Scale the max load_balance interval with the number of CPUs in the system.
			
 
				- * This trades load-balance latency on larger machines for less cross talk.
			
 
				- */
			
 
				-void update_max_interval(void)
			
 
				-{
			
 
				-	max_load_balance_interval = HZ*num_online_cpus()/10;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * It checks each scheduling domain to see if it is due to be balanced,
			
 
				- * and initiates a balancing operation if so.
			
 
				- *
			
 
				- * Balancing parameters are set up in init_sched_domains.
			
 
				- */
			
 
				-static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle)
			
 
				-{
			
 
				-	int continue_balancing = 1;
			
 
				-	int cpu = rq->cpu;
			
 
				-	unsigned long interval;
			
 
				-	struct sched_domain *sd;
			
 
				-	/* Earliest time when we have to do rebalance again */
			
 
				-	unsigned long next_balance = jiffies + 60*HZ;
			
 
				-	int update_next_balance = 0;
			
 
				-	int need_serialize, need_decay = 0;
			
 
				-	u64 max_cost = 0;
			
 
				-
			
 
				-	rcu_read_lock();
			
 
				-	for_each_domain(cpu, sd) {
			
 
				-		/*
			
 
				-		 * Decay the newidle max times here because this is a regular
			
 
				-		 * visit to all the domains. Decay ~1% per second.
			
 
				-		 */
			
 
				-		if (time_after(jiffies, sd->next_decay_max_lb_cost)) {
			
 
				-			sd->max_newidle_lb_cost =
			
 
				-				(sd->max_newidle_lb_cost * 253) / 256;
			
 
				-			sd->next_decay_max_lb_cost = jiffies + HZ;
			
 
				-			need_decay = 1;
			
 
				-		}
			
 
				-		max_cost += sd->max_newidle_lb_cost;
			
 
				-
			
 
				-		if (!(sd->flags & SD_LOAD_BALANCE))
			
 
				-			continue;
			
 
				-
			
 
				-		/*
			
 
				-		 * Stop the load balance at this level. There is another
			
 
				-		 * CPU in our sched group which is doing load balancing more
			
 
				-		 * actively.
			
 
				-		 */
			
 
				-		if (!continue_balancing) {
			
 
				-			if (need_decay)
			
 
				-				continue;
			
 
				-			break;
			
 
				-		}
			
 
				-
			
 
				-		interval = get_sd_balance_interval(sd, idle != CPU_IDLE);
			
 
				-
			
 
				-		need_serialize = sd->flags & SD_SERIALIZE;
			
 
				-		if (need_serialize) {
			
 
				-			if (!spin_trylock(&balancing))
			
 
				-				goto out;
			
 
				-		}
			
 
				-
			
 
				-		if (time_after_eq(jiffies, sd->last_balance + interval)) {
			
 
				-			if (load_balance(cpu, rq, sd, idle, &continue_balancing)) {
			
 
				-				/*
			
 
				-				 * The LBF_DST_PINNED logic could have changed
			
 
				-				 * env->dst_cpu, so we can't know our idle
			
 
				-				 * state even if we migrated tasks. Update it.
			
 
				-				 */
			
 
				-				idle = idle_cpu(cpu) ? CPU_IDLE : CPU_NOT_IDLE;
			
 
				-			}
			
 
				-			sd->last_balance = jiffies;
			
 
				-			interval = get_sd_balance_interval(sd, idle != CPU_IDLE);
			
 
				-		}
			
 
				-		if (need_serialize)
			
 
				-			spin_unlock(&balancing);
			
 
				-out:
			
 
				-		if (time_after(next_balance, sd->last_balance + interval)) {
			
 
				-			next_balance = sd->last_balance + interval;
			
 
				-			update_next_balance = 1;
			
 
				-		}
			
 
				-	}
			
 
				-	if (need_decay) {
			
 
				-		/*
			
 
				-		 * Ensure the rq-wide value also decays but keep it at a
			
 
				-		 * reasonable floor to avoid funnies with rq->avg_idle.
			
 
				-		 */
			
 
				-		rq->max_idle_balance_cost =
			
 
				-			max((u64)sysctl_sched_migration_cost, max_cost);
			
 
				-	}
			
 
				-	rcu_read_unlock();
			
 
				-
			
 
				-	/*
			
 
				-	 * next_balance will be updated only when there is a need.
			
 
				-	 * When the CPU is attached to null domain for ex, it will not be
			
 
				-	 * updated.
			
 
				-	 */
			
 
				-	if (likely(update_next_balance)) {
			
 
				-		rq->next_balance = next_balance;
			
 
				-
			
 
				-#ifdef CONFIG_NO_HZ_COMMON
			
 
				-		/*
			
 
				-		 * If this CPU has been elected to perform the nohz idle
			
 
				-		 * balance. Other idle CPUs have already rebalanced with
			
 
				-		 * nohz_idle_balance() and nohz.next_balance has been
			
 
				-		 * updated accordingly. This CPU is now running the idle load
			
 
				-		 * balance for itself and we need to update the
			
 
				-		 * nohz.next_balance accordingly.
			
 
				-		 */
			
 
				-		if ((idle == CPU_IDLE) && time_after(nohz.next_balance, rq->next_balance))
			
 
				-			nohz.next_balance = rq->next_balance;
			
 
				-#endif
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				 #ifdef CONFIG_NO_HZ_COMMON
			
 
				 /*
			
 
				  * In CONFIG_NO_HZ_COMMON case, the idle balance kickee will do the