|
@@ -7,6 +7,7 @@
|
|
|
*
|
|
|
* Copyright IBM Corp. 2007-2010 Mel Gorman <mel@csn.ul.ie>
|
|
|
*/
|
|
|
+#include <linux/cpu.h>
|
|
|
#include <linux/swap.h>
|
|
|
#include <linux/migrate.h>
|
|
|
#include <linux/compaction.h>
|
|
@@ -17,6 +18,8 @@
|
|
|
#include <linux/balloon_compaction.h>
|
|
|
#include <linux/page-isolation.h>
|
|
|
#include <linux/kasan.h>
|
|
|
+#include <linux/kthread.h>
|
|
|
+#include <linux/freezer.h>
|
|
|
#include "internal.h"
|
|
|
|
|
|
#ifdef CONFIG_COMPACTION
|
|
@@ -1736,4 +1739,223 @@ void compaction_unregister_node(struct node *node)
|
|
|
}
|
|
|
#endif /* CONFIG_SYSFS && CONFIG_NUMA */
|
|
|
|
|
|
+static inline bool kcompactd_work_requested(pg_data_t *pgdat)
|
|
|
+{
|
|
|
+ return pgdat->kcompactd_max_order > 0;
|
|
|
+}
|
|
|
+
|
|
|
+static bool kcompactd_node_suitable(pg_data_t *pgdat)
|
|
|
+{
|
|
|
+ int zoneid;
|
|
|
+ struct zone *zone;
|
|
|
+ enum zone_type classzone_idx = pgdat->kcompactd_classzone_idx;
|
|
|
+
|
|
|
+ for (zoneid = 0; zoneid < classzone_idx; zoneid++) {
|
|
|
+ zone = &pgdat->node_zones[zoneid];
|
|
|
+
|
|
|
+ if (!populated_zone(zone))
|
|
|
+ continue;
|
|
|
+
|
|
|
+ if (compaction_suitable(zone, pgdat->kcompactd_max_order, 0,
|
|
|
+ classzone_idx) == COMPACT_CONTINUE)
|
|
|
+ return true;
|
|
|
+ }
|
|
|
+
|
|
|
+ return false;
|
|
|
+}
|
|
|
+
|
|
|
+static void kcompactd_do_work(pg_data_t *pgdat)
|
|
|
+{
|
|
|
+ /*
|
|
|
+ * With no special task, compact all zones so that a page of requested
|
|
|
+ * order is allocatable.
|
|
|
+ */
|
|
|
+ int zoneid;
|
|
|
+ struct zone *zone;
|
|
|
+ struct compact_control cc = {
|
|
|
+ .order = pgdat->kcompactd_max_order,
|
|
|
+ .classzone_idx = pgdat->kcompactd_classzone_idx,
|
|
|
+ .mode = MIGRATE_SYNC_LIGHT,
|
|
|
+ .ignore_skip_hint = true,
|
|
|
+
|
|
|
+ };
|
|
|
+ bool success = false;
|
|
|
+
|
|
|
+ trace_mm_compaction_kcompactd_wake(pgdat->node_id, cc.order,
|
|
|
+ cc.classzone_idx);
|
|
|
+ count_vm_event(KCOMPACTD_WAKE);
|
|
|
+
|
|
|
+ for (zoneid = 0; zoneid < cc.classzone_idx; zoneid++) {
|
|
|
+ int status;
|
|
|
+
|
|
|
+ zone = &pgdat->node_zones[zoneid];
|
|
|
+ if (!populated_zone(zone))
|
|
|
+ continue;
|
|
|
+
|
|
|
+ if (compaction_deferred(zone, cc.order))
|
|
|
+ continue;
|
|
|
+
|
|
|
+ if (compaction_suitable(zone, cc.order, 0, zoneid) !=
|
|
|
+ COMPACT_CONTINUE)
|
|
|
+ continue;
|
|
|
+
|
|
|
+ cc.nr_freepages = 0;
|
|
|
+ cc.nr_migratepages = 0;
|
|
|
+ cc.zone = zone;
|
|
|
+ INIT_LIST_HEAD(&cc.freepages);
|
|
|
+ INIT_LIST_HEAD(&cc.migratepages);
|
|
|
+
|
|
|
+ status = compact_zone(zone, &cc);
|
|
|
+
|
|
|
+ if (zone_watermark_ok(zone, cc.order, low_wmark_pages(zone),
|
|
|
+ cc.classzone_idx, 0)) {
|
|
|
+ success = true;
|
|
|
+ compaction_defer_reset(zone, cc.order, false);
|
|
|
+ } else if (status == COMPACT_COMPLETE) {
|
|
|
+ /*
|
|
|
+ * We use sync migration mode here, so we defer like
|
|
|
+ * sync direct compaction does.
|
|
|
+ */
|
|
|
+ defer_compaction(zone, cc.order);
|
|
|
+ }
|
|
|
+
|
|
|
+ VM_BUG_ON(!list_empty(&cc.freepages));
|
|
|
+ VM_BUG_ON(!list_empty(&cc.migratepages));
|
|
|
+ }
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Regardless of success, we are done until woken up next. But remember
|
|
|
+ * the requested order/classzone_idx in case it was higher/tighter than
|
|
|
+ * our current ones
|
|
|
+ */
|
|
|
+ if (pgdat->kcompactd_max_order <= cc.order)
|
|
|
+ pgdat->kcompactd_max_order = 0;
|
|
|
+ if (pgdat->kcompactd_classzone_idx >= cc.classzone_idx)
|
|
|
+ pgdat->kcompactd_classzone_idx = pgdat->nr_zones - 1;
|
|
|
+}
|
|
|
+
|
|
|
+void wakeup_kcompactd(pg_data_t *pgdat, int order, int classzone_idx)
|
|
|
+{
|
|
|
+ if (!order)
|
|
|
+ return;
|
|
|
+
|
|
|
+ if (pgdat->kcompactd_max_order < order)
|
|
|
+ pgdat->kcompactd_max_order = order;
|
|
|
+
|
|
|
+ if (pgdat->kcompactd_classzone_idx > classzone_idx)
|
|
|
+ pgdat->kcompactd_classzone_idx = classzone_idx;
|
|
|
+
|
|
|
+ if (!waitqueue_active(&pgdat->kcompactd_wait))
|
|
|
+ return;
|
|
|
+
|
|
|
+ if (!kcompactd_node_suitable(pgdat))
|
|
|
+ return;
|
|
|
+
|
|
|
+ trace_mm_compaction_wakeup_kcompactd(pgdat->node_id, order,
|
|
|
+ classzone_idx);
|
|
|
+ wake_up_interruptible(&pgdat->kcompactd_wait);
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * The background compaction daemon, started as a kernel thread
|
|
|
+ * from the init process.
|
|
|
+ */
|
|
|
+static int kcompactd(void *p)
|
|
|
+{
|
|
|
+ pg_data_t *pgdat = (pg_data_t*)p;
|
|
|
+ struct task_struct *tsk = current;
|
|
|
+
|
|
|
+ const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
|
|
|
+
|
|
|
+ if (!cpumask_empty(cpumask))
|
|
|
+ set_cpus_allowed_ptr(tsk, cpumask);
|
|
|
+
|
|
|
+ set_freezable();
|
|
|
+
|
|
|
+ pgdat->kcompactd_max_order = 0;
|
|
|
+ pgdat->kcompactd_classzone_idx = pgdat->nr_zones - 1;
|
|
|
+
|
|
|
+ while (!kthread_should_stop()) {
|
|
|
+ trace_mm_compaction_kcompactd_sleep(pgdat->node_id);
|
|
|
+ wait_event_freezable(pgdat->kcompactd_wait,
|
|
|
+ kcompactd_work_requested(pgdat));
|
|
|
+
|
|
|
+ kcompactd_do_work(pgdat);
|
|
|
+ }
|
|
|
+
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * This kcompactd start function will be called by init and node-hot-add.
|
|
|
+ * On node-hot-add, kcompactd will moved to proper cpus if cpus are hot-added.
|
|
|
+ */
|
|
|
+int kcompactd_run(int nid)
|
|
|
+{
|
|
|
+ pg_data_t *pgdat = NODE_DATA(nid);
|
|
|
+ int ret = 0;
|
|
|
+
|
|
|
+ if (pgdat->kcompactd)
|
|
|
+ return 0;
|
|
|
+
|
|
|
+ pgdat->kcompactd = kthread_run(kcompactd, pgdat, "kcompactd%d", nid);
|
|
|
+ if (IS_ERR(pgdat->kcompactd)) {
|
|
|
+ pr_err("Failed to start kcompactd on node %d\n", nid);
|
|
|
+ ret = PTR_ERR(pgdat->kcompactd);
|
|
|
+ pgdat->kcompactd = NULL;
|
|
|
+ }
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * Called by memory hotplug when all memory in a node is offlined. Caller must
|
|
|
+ * hold mem_hotplug_begin/end().
|
|
|
+ */
|
|
|
+void kcompactd_stop(int nid)
|
|
|
+{
|
|
|
+ struct task_struct *kcompactd = NODE_DATA(nid)->kcompactd;
|
|
|
+
|
|
|
+ if (kcompactd) {
|
|
|
+ kthread_stop(kcompactd);
|
|
|
+ NODE_DATA(nid)->kcompactd = NULL;
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * It's optimal to keep kcompactd on the same CPUs as their memory, but
|
|
|
+ * not required for correctness. So if the last cpu in a node goes
|
|
|
+ * away, we get changed to run anywhere: as the first one comes back,
|
|
|
+ * restore their cpu bindings.
|
|
|
+ */
|
|
|
+static int cpu_callback(struct notifier_block *nfb, unsigned long action,
|
|
|
+ void *hcpu)
|
|
|
+{
|
|
|
+ int nid;
|
|
|
+
|
|
|
+ if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) {
|
|
|
+ for_each_node_state(nid, N_MEMORY) {
|
|
|
+ pg_data_t *pgdat = NODE_DATA(nid);
|
|
|
+ const struct cpumask *mask;
|
|
|
+
|
|
|
+ mask = cpumask_of_node(pgdat->node_id);
|
|
|
+
|
|
|
+ if (cpumask_any_and(cpu_online_mask, mask) < nr_cpu_ids)
|
|
|
+ /* One of our CPUs online: restore mask */
|
|
|
+ set_cpus_allowed_ptr(pgdat->kcompactd, mask);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return NOTIFY_OK;
|
|
|
+}
|
|
|
+
|
|
|
+static int __init kcompactd_init(void)
|
|
|
+{
|
|
|
+ int nid;
|
|
|
+
|
|
|
+ for_each_node_state(nid, N_MEMORY)
|
|
|
+ kcompactd_run(nid);
|
|
|
+ hotcpu_notifier(cpu_callback, 0);
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+subsys_initcall(kcompactd_init)
|
|
|
+
|
|
|
#endif /* CONFIG_COMPACTION */
|