|
@@ -954,6 +954,69 @@ static inline unsigned long group_weight(struct task_struct *p, int nid)
|
|
return 1000 * group_faults(p, nid) / p->numa_group->total_faults;
|
|
return 1000 * group_faults(p, nid) / p->numa_group->total_faults;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+bool should_numa_migrate_memory(struct task_struct *p, struct page * page,
|
|
|
|
+ int src_nid, int dst_cpu)
|
|
|
|
+{
|
|
|
|
+ struct numa_group *ng = p->numa_group;
|
|
|
|
+ int dst_nid = cpu_to_node(dst_cpu);
|
|
|
|
+ int last_cpupid, this_cpupid;
|
|
|
|
+
|
|
|
|
+ this_cpupid = cpu_pid_to_cpupid(dst_cpu, current->pid);
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * Multi-stage node selection is used in conjunction with a periodic
|
|
|
|
+ * migration fault to build a temporal task<->page relation. By using
|
|
|
|
+ * a two-stage filter we remove short/unlikely relations.
|
|
|
|
+ *
|
|
|
|
+ * Using P(p) ~ n_p / n_t as per frequentist probability, we can equate
|
|
|
|
+ * a task's usage of a particular page (n_p) per total usage of this
|
|
|
|
+ * page (n_t) (in a given time-span) to a probability.
|
|
|
|
+ *
|
|
|
|
+ * Our periodic faults will sample this probability and getting the
|
|
|
|
+ * same result twice in a row, given these samples are fully
|
|
|
|
+ * independent, is then given by P(n)^2, provided our sample period
|
|
|
|
+ * is sufficiently short compared to the usage pattern.
|
|
|
|
+ *
|
|
|
|
+ * This quadric squishes small probabilities, making it less likely we
|
|
|
|
+ * act on an unlikely task<->page relation.
|
|
|
|
+ */
|
|
|
|
+ last_cpupid = page_cpupid_xchg_last(page, this_cpupid);
|
|
|
|
+ if (!cpupid_pid_unset(last_cpupid) &&
|
|
|
|
+ cpupid_to_nid(last_cpupid) != dst_nid)
|
|
|
|
+ return false;
|
|
|
|
+
|
|
|
|
+ /* Always allow migrate on private faults */
|
|
|
|
+ if (cpupid_match_pid(p, last_cpupid))
|
|
|
|
+ return true;
|
|
|
|
+
|
|
|
|
+ /* A shared fault, but p->numa_group has not been set up yet. */
|
|
|
|
+ if (!ng)
|
|
|
|
+ return true;
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * Do not migrate if the destination is not a node that
|
|
|
|
+ * is actively used by this numa group.
|
|
|
|
+ */
|
|
|
|
+ if (!node_isset(dst_nid, ng->active_nodes))
|
|
|
|
+ return false;
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * Source is a node that is not actively used by this
|
|
|
|
+ * numa group, while the destination is. Migrate.
|
|
|
|
+ */
|
|
|
|
+ if (!node_isset(src_nid, ng->active_nodes))
|
|
|
|
+ return true;
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * Both source and destination are nodes in active
|
|
|
|
+ * use by this numa group. Maximize memory bandwidth
|
|
|
|
+ * by migrating from more heavily used groups, to less
|
|
|
|
+ * heavily used ones, spreading the load around.
|
|
|
|
+ * Use a 1/4 hysteresis to avoid spurious page movement.
|
|
|
|
+ */
|
|
|
|
+ return group_faults(p, dst_nid) < (group_faults(p, src_nid) * 3 / 4);
|
|
|
|
+}
|
|
|
|
+
|
|
static unsigned long weighted_cpuload(const int cpu);
|
|
static unsigned long weighted_cpuload(const int cpu);
|
|
static unsigned long source_load(int cpu, int type);
|
|
static unsigned long source_load(int cpu, int type);
|
|
static unsigned long target_load(int cpu, int type);
|
|
static unsigned long target_load(int cpu, int type);
|