|
@@ -1580,9 +1580,8 @@ static bool load_too_imbalanced(long src_load, long dst_load,
|
|
|
* be exchanged with the source task
|
|
|
*/
|
|
|
static void task_numa_compare(struct task_numa_env *env,
|
|
|
- long taskimp, long groupimp)
|
|
|
+ long taskimp, long groupimp, bool maymove)
|
|
|
{
|
|
|
- struct rq *src_rq = cpu_rq(env->src_cpu);
|
|
|
struct rq *dst_rq = cpu_rq(env->dst_cpu);
|
|
|
struct task_struct *cur;
|
|
|
long src_load, dst_load;
|
|
@@ -1603,97 +1602,73 @@ static void task_numa_compare(struct task_numa_env *env,
|
|
|
if (cur == env->p)
|
|
|
goto unlock;
|
|
|
|
|
|
+ if (!cur) {
|
|
|
+ if (maymove || imp > env->best_imp)
|
|
|
+ goto assign;
|
|
|
+ else
|
|
|
+ goto unlock;
|
|
|
+ }
|
|
|
+
|
|
|
/*
|
|
|
* "imp" is the fault differential for the source task between the
|
|
|
* source and destination node. Calculate the total differential for
|
|
|
* the source task and potential destination task. The more negative
|
|
|
- * the value is, the more rmeote accesses that would be expected to
|
|
|
+ * the value is, the more remote accesses that would be expected to
|
|
|
* be incurred if the tasks were swapped.
|
|
|
*/
|
|
|
- if (cur) {
|
|
|
- /* Skip this swap candidate if cannot move to the source CPU: */
|
|
|
- if (!cpumask_test_cpu(env->src_cpu, &cur->cpus_allowed))
|
|
|
- goto unlock;
|
|
|
+ /* Skip this swap candidate if cannot move to the source cpu */
|
|
|
+ if (!cpumask_test_cpu(env->src_cpu, &cur->cpus_allowed))
|
|
|
+ goto unlock;
|
|
|
|
|
|
+ /*
|
|
|
+ * If dst and source tasks are in the same NUMA group, or not
|
|
|
+ * in any group then look only at task weights.
|
|
|
+ */
|
|
|
+ if (cur->numa_group == env->p->numa_group) {
|
|
|
+ imp = taskimp + task_weight(cur, env->src_nid, dist) -
|
|
|
+ task_weight(cur, env->dst_nid, dist);
|
|
|
/*
|
|
|
- * If dst and source tasks are in the same NUMA group, or not
|
|
|
- * in any group then look only at task weights.
|
|
|
+ * Add some hysteresis to prevent swapping the
|
|
|
+ * tasks within a group over tiny differences.
|
|
|
*/
|
|
|
- if (cur->numa_group == env->p->numa_group) {
|
|
|
- imp = taskimp + task_weight(cur, env->src_nid, dist) -
|
|
|
- task_weight(cur, env->dst_nid, dist);
|
|
|
- /*
|
|
|
- * Add some hysteresis to prevent swapping the
|
|
|
- * tasks within a group over tiny differences.
|
|
|
- */
|
|
|
- if (cur->numa_group)
|
|
|
- imp -= imp/16;
|
|
|
- } else {
|
|
|
- /*
|
|
|
- * Compare the group weights. If a task is all by
|
|
|
- * itself (not part of a group), use the task weight
|
|
|
- * instead.
|
|
|
- */
|
|
|
- if (cur->numa_group)
|
|
|
- imp += group_weight(cur, env->src_nid, dist) -
|
|
|
- group_weight(cur, env->dst_nid, dist);
|
|
|
- else
|
|
|
- imp += task_weight(cur, env->src_nid, dist) -
|
|
|
- task_weight(cur, env->dst_nid, dist);
|
|
|
- }
|
|
|
+ if (cur->numa_group)
|
|
|
+ imp -= imp / 16;
|
|
|
+ } else {
|
|
|
+ /*
|
|
|
+ * Compare the group weights. If a task is all by itself
|
|
|
+ * (not part of a group), use the task weight instead.
|
|
|
+ */
|
|
|
+ if (cur->numa_group && env->p->numa_group)
|
|
|
+ imp += group_weight(cur, env->src_nid, dist) -
|
|
|
+ group_weight(cur, env->dst_nid, dist);
|
|
|
+ else
|
|
|
+ imp += task_weight(cur, env->src_nid, dist) -
|
|
|
+ task_weight(cur, env->dst_nid, dist);
|
|
|
}
|
|
|
|
|
|
- if (imp <= env->best_imp && moveimp <= env->best_imp)
|
|
|
+ if (imp <= env->best_imp)
|
|
|
goto unlock;
|
|
|
|
|
|
- if (!cur) {
|
|
|
- /* Is there capacity at our destination? */
|
|
|
- if (env->src_stats.nr_running <= env->src_stats.task_capacity &&
|
|
|
- !env->dst_stats.has_free_capacity)
|
|
|
- goto unlock;
|
|
|
-
|
|
|
- goto balance;
|
|
|
- }
|
|
|
-
|
|
|
- /* Balance doesn't matter much if we're running a task per CPU: */
|
|
|
- if (imp > env->best_imp && src_rq->nr_running == 1 &&
|
|
|
- dst_rq->nr_running == 1)
|
|
|
+ if (maymove && moveimp > imp && moveimp > env->best_imp) {
|
|
|
+ imp = moveimp - 1;
|
|
|
+ cur = NULL;
|
|
|
goto assign;
|
|
|
+ }
|
|
|
|
|
|
/*
|
|
|
* In the overloaded case, try and keep the load balanced.
|
|
|
*/
|
|
|
-balance:
|
|
|
- load = task_h_load(env->p);
|
|
|
+ load = task_h_load(env->p) - task_h_load(cur);
|
|
|
+ if (!load)
|
|
|
+ goto assign;
|
|
|
+
|
|
|
dst_load = env->dst_stats.load + load;
|
|
|
src_load = env->src_stats.load - load;
|
|
|
|
|
|
- if (moveimp > imp && moveimp > env->best_imp) {
|
|
|
- /*
|
|
|
- * If the improvement from just moving env->p direction is
|
|
|
- * better than swapping tasks around, check if a move is
|
|
|
- * possible. Store a slightly smaller score than moveimp,
|
|
|
- * so an actually idle CPU will win.
|
|
|
- */
|
|
|
- if (!load_too_imbalanced(src_load, dst_load, env)) {
|
|
|
- imp = moveimp - 1;
|
|
|
- cur = NULL;
|
|
|
- goto assign;
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- if (imp <= env->best_imp)
|
|
|
- goto unlock;
|
|
|
-
|
|
|
- if (cur) {
|
|
|
- load = task_h_load(cur);
|
|
|
- dst_load -= load;
|
|
|
- src_load += load;
|
|
|
- }
|
|
|
-
|
|
|
if (load_too_imbalanced(src_load, dst_load, env))
|
|
|
goto unlock;
|
|
|
|
|
|
+assign:
|
|
|
/*
|
|
|
* One idle CPU per node is evaluated for a task numa move.
|
|
|
* Call select_idle_sibling to maybe find a better one.
|
|
@@ -1709,7 +1684,6 @@ balance:
|
|
|
local_irq_enable();
|
|
|
}
|
|
|
|
|
|
-assign:
|
|
|
task_numa_assign(env, cur, imp);
|
|
|
unlock:
|
|
|
rcu_read_unlock();
|
|
@@ -1718,15 +1692,27 @@ unlock:
|
|
|
static void task_numa_find_cpu(struct task_numa_env *env,
|
|
|
long taskimp, long groupimp)
|
|
|
{
|
|
|
+ long src_load, dst_load, load;
|
|
|
+ bool maymove = false;
|
|
|
int cpu;
|
|
|
|
|
|
+ load = task_h_load(env->p);
|
|
|
+ dst_load = env->dst_stats.load + load;
|
|
|
+ src_load = env->src_stats.load - load;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * If the improvement from just moving env->p direction is better
|
|
|
+ * than swapping tasks around, check if a move is possible.
|
|
|
+ */
|
|
|
+ maymove = !load_too_imbalanced(src_load, dst_load, env);
|
|
|
+
|
|
|
for_each_cpu(cpu, cpumask_of_node(env->dst_nid)) {
|
|
|
/* Skip this CPU if the source task cannot migrate */
|
|
|
if (!cpumask_test_cpu(cpu, &env->p->cpus_allowed))
|
|
|
continue;
|
|
|
|
|
|
env->dst_cpu = cpu;
|
|
|
- task_numa_compare(env, taskimp, groupimp);
|
|
|
+ task_numa_compare(env, taskimp, groupimp, maymove);
|
|
|
}
|
|
|
}
|
|
|
|