8 年前 · cef27403cb
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2694,6 +2694,67 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
 
				 
			
 
				 #ifdef CONFIG_FAIR_GROUP_SCHED
			
 
				 # ifdef CONFIG_SMP
			
 
				+/*
			
 
				+ * All this does is approximate the hierarchical proportion which includes that
			
 
				+ * global sum we all love to hate.
			
 
				+ *
			
 
				+ * That is, the weight of a group entity, is the proportional share of the
			
 
				+ * group weight based on the group runqueue weights. That is:
			
 
				+ *
			
 
				+ *                     tg->weight * grq->load.weight
			
 
				+ *   ge->load.weight = -----------------------------               (1)
			
 
				+ *			  \Sum grq->load.weight
			
 
				+ *
			
 
				+ * Now, because computing that sum is prohibitively expensive to compute (been
			
 
				+ * there, done that) we approximate it with this average stuff. The average
			
 
				+ * moves slower and therefore the approximation is cheaper and more stable.
			
 
				+ *
			
 
				+ * So instead of the above, we substitute:
			
 
				+ *
			
 
				+ *   grq->load.weight -> grq->avg.load_avg                         (2)
			
 
				+ *
			
 
				+ * which yields the following:
			
 
				+ *
			
 
				+ *                     tg->weight * grq->avg.load_avg
			
 
				+ *   ge->load.weight = ------------------------------              (3)
			
 
				+ *				tg->load_avg
			
 
				+ *
			
 
				+ * Where: tg->load_avg ~= \Sum grq->avg.load_avg
			
 
				+ *
			
 
				+ * That is shares_avg, and it is right (given the approximation (2)).
			
 
				+ *
			
 
				+ * The problem with it is that because the average is slow -- it was designed
			
 
				+ * to be exactly that of course -- this leads to transients in boundary
			
 
				+ * conditions. In specific, the case where the group was idle and we start the
			
 
				+ * one task. It takes time for our CPU's grq->avg.load_avg to build up,
			
 
				+ * yielding bad latency etc..
			
 
				+ *
			
 
				+ * Now, in that special case (1) reduces to:
			
 
				+ *
			
 
				+ *                     tg->weight * grq->load.weight
			
 
				+ *   ge->load.weight = ----------------------------- = tg>weight   (4)
			
 
				+ *			    grp->load.weight
			
 
				+ *
			
 
				+ * That is, the sum collapses because all other CPUs are idle; the UP scenario.
			
 
				+ *
			
 
				+ * So what we do is modify our approximation (3) to approach (4) in the (near)
			
 
				+ * UP case, like:
			
 
				+ *
			
 
				+ *   ge->load.weight =
			
 
				+ *
			
 
				+ *              tg->weight * grq->load.weight
			
 
				+ *     ---------------------------------------------------         (5)
			
 
				+ *     tg->load_avg - grq->avg.load_avg + grq->load.weight
			
 
				+ *
			
 
				+ *
			
 
				+ * And that is shares_weight and is icky. In the (near) UP case it approaches
			
 
				+ * (4) while in the normal case it approaches (3). It consistently
			
 
				+ * overestimates the ge->load.weight and therefore:
			
 
				+ *
			
 
				+ *   \Sum ge->load.weight >= tg->weight
			
 
				+ *
			
 
				+ * hence icky!
			
 
				+ */
			
 
				 static long calc_cfs_shares(struct cfs_rq *cfs_rq)
			
 
				 {
			
 
				 	long tg_weight, tg_shares, load, shares;