|
@@ -5202,6 +5202,14 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p,
|
|
|
return 1;
|
|
|
}
|
|
|
|
|
|
+static inline int task_util(struct task_struct *p);
|
|
|
+static int cpu_util_wake(int cpu, struct task_struct *p);
|
|
|
+
|
|
|
+static unsigned long capacity_spare_wake(int cpu, struct task_struct *p)
|
|
|
+{
|
|
|
+ return capacity_orig_of(cpu) - cpu_util_wake(cpu, p);
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* find_idlest_group finds and returns the least busy CPU group within the
|
|
|
* domain.
|
|
@@ -5211,7 +5219,9 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
|
|
|
int this_cpu, int sd_flag)
|
|
|
{
|
|
|
struct sched_group *idlest = NULL, *group = sd->groups;
|
|
|
+ struct sched_group *most_spare_sg = NULL;
|
|
|
unsigned long min_load = ULONG_MAX, this_load = 0;
|
|
|
+ unsigned long most_spare = 0, this_spare = 0;
|
|
|
int load_idx = sd->forkexec_idx;
|
|
|
int imbalance = 100 + (sd->imbalance_pct-100)/2;
|
|
|
|
|
@@ -5219,7 +5229,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
|
|
|
load_idx = sd->wake_idx;
|
|
|
|
|
|
do {
|
|
|
- unsigned long load, avg_load;
|
|
|
+ unsigned long load, avg_load, spare_cap, max_spare_cap;
|
|
|
int local_group;
|
|
|
int i;
|
|
|
|
|
@@ -5231,8 +5241,12 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
|
|
|
local_group = cpumask_test_cpu(this_cpu,
|
|
|
sched_group_cpus(group));
|
|
|
|
|
|
- /* Tally up the load of all CPUs in the group */
|
|
|
+ /*
|
|
|
+ * Tally up the load of all CPUs in the group and find
|
|
|
+ * the group containing the CPU with most spare capacity.
|
|
|
+ */
|
|
|
avg_load = 0;
|
|
|
+ max_spare_cap = 0;
|
|
|
|
|
|
for_each_cpu(i, sched_group_cpus(group)) {
|
|
|
/* Bias balancing toward cpus of our domain */
|
|
@@ -5242,6 +5256,11 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
|
|
|
load = target_load(i, load_idx);
|
|
|
|
|
|
avg_load += load;
|
|
|
+
|
|
|
+ spare_cap = capacity_spare_wake(i, p);
|
|
|
+
|
|
|
+ if (spare_cap > max_spare_cap)
|
|
|
+ max_spare_cap = spare_cap;
|
|
|
}
|
|
|
|
|
|
/* Adjust by relative CPU capacity of the group */
|
|
@@ -5249,12 +5268,33 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
|
|
|
|
|
|
if (local_group) {
|
|
|
this_load = avg_load;
|
|
|
- } else if (avg_load < min_load) {
|
|
|
- min_load = avg_load;
|
|
|
- idlest = group;
|
|
|
+ this_spare = max_spare_cap;
|
|
|
+ } else {
|
|
|
+ if (avg_load < min_load) {
|
|
|
+ min_load = avg_load;
|
|
|
+ idlest = group;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (most_spare < max_spare_cap) {
|
|
|
+ most_spare = max_spare_cap;
|
|
|
+ most_spare_sg = group;
|
|
|
+ }
|
|
|
}
|
|
|
} while (group = group->next, group != sd->groups);
|
|
|
|
|
|
+ /*
|
|
|
+ * The cross-over point between using spare capacity or least load
|
|
|
+ * is too conservative for high utilization tasks on partially
|
|
|
+ * utilized systems if we require spare_capacity > task_util(p),
|
|
|
+ * so we allow for some task stuffing by using
|
|
|
+ * spare_capacity > task_util(p)/2.
|
|
|
+ */
|
|
|
+ if (this_spare > task_util(p) / 2 &&
|
|
|
+ imbalance*this_spare > 100*most_spare)
|
|
|
+ return NULL;
|
|
|
+ else if (most_spare > task_util(p) / 2)
|
|
|
+ return most_spare_sg;
|
|
|
+
|
|
|
if (!idlest || 100*this_load < imbalance*min_load)
|
|
|
return NULL;
|
|
|
return idlest;
|