13 years ago · 1a687c2e9a
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1996,6 +1996,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 
				 
			
 
				 	nr_uarts=	[SERIAL] maximum number of UARTs to be registered.
			
 
				 
			
 
				+	numa_balancing=	[KNL,X86] Enable or disable automatic NUMA balancing.
			
 
				+			Allowed values are enable and disable
			
 
				+
			
 
				 	numa_zonelist_order= [KNL, BOOT] Select zonelist order for NUMA.
			
 
				 			one of ['zone', 'node', 'default'] can be specified
			
 
				 			This can be set from sysctl after boot.
			
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1563,10 +1563,14 @@ struct task_struct {
 
				 
			
 
				 #ifdef CONFIG_NUMA_BALANCING
			
 
				 extern void task_numa_fault(int node, int pages, bool migrated);
			
 
				+extern void set_numabalancing_state(bool enabled);
			
 
				 #else
			
 
				 static inline void task_numa_fault(int node, int pages, bool migrated)
			
 
				 {
			
 
				 }
			
 
				+static inline void set_numabalancing_state(bool enabled)
			
 
				+{
			
 
				+}
			
 
				 #endif
			
 
				 
			
 
				 /*
			
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -720,6 +720,14 @@ config ARCH_USES_NUMA_PROT_NONE
 
				 	depends on ARCH_WANTS_PROT_NUMA_PROT_NONE
			
 
				 	depends on NUMA_BALANCING
			
 
				 
			
 
				+config NUMA_BALANCING_DEFAULT_ENABLED
			
 
				+	bool "Automatically enable NUMA aware memory/task placement"
			
 
				+	default y
			
 
				+	depends on NUMA_BALANCING
			
 
				+	help
			
 
				+	  If set, autonumic NUMA balancing will be enabled if running on a NUMA
			
 
				+	  machine.
			
 
				+
			
 
				 config NUMA_BALANCING
			
 
				 	bool "Memory placement aware NUMA scheduler"
			
 
				 	default y
			
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -192,23 +192,10 @@ static void sched_feat_disable(int i) { };
 
				 static void sched_feat_enable(int i) { };
			
 
				 #endif /* HAVE_JUMP_LABEL */
			
 
				 
			
 
				-static ssize_t
			
 
				-sched_feat_write(struct file *filp, const char __user *ubuf,
			
 
				-		size_t cnt, loff_t *ppos)
			
 
				+static int sched_feat_set(char *cmp)
			
 
				 {
			
 
				-	char buf[64];
			
 
				-	char *cmp;
			
 
				-	int neg = 0;
			
 
				 	int i;
			
 
				-
			
 
				-	if (cnt > 63)
			
 
				-		cnt = 63;
			
 
				-
			
 
				-	if (copy_from_user(&buf, ubuf, cnt))
			
 
				-		return -EFAULT;
			
 
				-
			
 
				-	buf[cnt] = 0;
			
 
				-	cmp = strstrip(buf);
			
 
				+	int neg = 0;
			
 
				 
			
 
				 	if (strncmp(cmp, "NO_", 3) == 0) {
			
 
				 		neg = 1;
			
@@ -228,6 +215,27 @@ sched_feat_write(struct file *filp, const char __user *ubuf,
 
				 		}
			
 
				 	}
			
 
				 
			
 
				+	return i;
			
 
				+}
			
 
				+
			
 
				+static ssize_t
			
 
				+sched_feat_write(struct file *filp, const char __user *ubuf,
			
 
				+		size_t cnt, loff_t *ppos)
			
 
				+{
			
 
				+	char buf[64];
			
 
				+	char *cmp;
			
 
				+	int i;
			
 
				+
			
 
				+	if (cnt > 63)
			
 
				+		cnt = 63;
			
 
				+
			
 
				+	if (copy_from_user(&buf, ubuf, cnt))
			
 
				+		return -EFAULT;
			
 
				+
			
 
				+	buf[cnt] = 0;
			
 
				+	cmp = strstrip(buf);
			
 
				+
			
 
				+	i = sched_feat_set(cmp);
			
 
				 	if (i == __SCHED_FEAT_NR)
			
 
				 		return -EINVAL;
			
 
				 
			
@@ -1549,6 +1557,16 @@ static void __sched_fork(struct task_struct *p)
 
				 #endif /* CONFIG_NUMA_BALANCING */
			
 
				 }
			
 
				 
			
 
				+#ifdef CONFIG_NUMA_BALANCING
			
 
				+void set_numabalancing_state(bool enabled)
			
 
				+{
			
 
				+	if (enabled)
			
 
				+		sched_feat_set("NUMA");
			
 
				+	else
			
 
				+		sched_feat_set("NO_NUMA");
			
 
				+}
			
 
				+#endif /* CONFIG_NUMA_BALANCING */
			
 
				+
			
 
				 /*
			
 
				  * fork()/clone()-time setup:
			
 
				  */
			
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -811,6 +811,9 @@ void task_numa_fault(int node, int pages, bool migrated)
 
				 {
			
 
				 	struct task_struct *p = current;
			
 
				 
			
 
				+	if (!sched_feat_numa(NUMA))
			
 
				+		return;
			
 
				+
			
 
				 	/* FIXME: Allocate task-specific structure for placement policy here */
			
 
				 
			
 
				 	/*
			
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -63,8 +63,10 @@ SCHED_FEAT(RT_RUNTIME_SHARE, true)
 
				 SCHED_FEAT(LB_MIN, false)
			
 
				 
			
 
				 /*
			
 
				- * Apply the automatic NUMA scheduling policy
			
 
				+ * Apply the automatic NUMA scheduling policy. Enabled automatically
			
 
				+ * at runtime if running on a NUMA machine. Can be controlled via
			
 
				+ * numa_balancing=
			
 
				  */
			
 
				 #ifdef CONFIG_NUMA_BALANCING
			
 
				-SCHED_FEAT(NUMA,	true)
			
 
				+SCHED_FEAT(NUMA,	false)
			
 
				 #endif
			
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2521,6 +2521,50 @@ void mpol_free_shared_policy(struct shared_policy *p)
 
				 	mutex_unlock(&p->mutex);
			
 
				 }
			
 
				 
			
 
				+#ifdef CONFIG_NUMA_BALANCING
			
 
				+static bool __initdata numabalancing_override;
			
 
				+
			
 
				+static void __init check_numabalancing_enable(void)
			
 
				+{
			
 
				+	bool numabalancing_default = false;
			
 
				+
			
 
				+	if (IS_ENABLED(CONFIG_NUMA_BALANCING_DEFAULT_ENABLED))
			
 
				+		numabalancing_default = true;
			
 
				+
			
 
				+	if (nr_node_ids > 1 && !numabalancing_override) {
			
 
				+		printk(KERN_INFO "Enabling automatic NUMA balancing. "
			
 
				+			"Configure with numa_balancing= or sysctl");
			
 
				+		set_numabalancing_state(numabalancing_default);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static int __init setup_numabalancing(char *str)
			
 
				+{
			
 
				+	int ret = 0;
			
 
				+	if (!str)
			
 
				+		goto out;
			
 
				+	numabalancing_override = true;
			
 
				+
			
 
				+	if (!strcmp(str, "enable")) {
			
 
				+		set_numabalancing_state(true);
			
 
				+		ret = 1;
			
 
				+	} else if (!strcmp(str, "disable")) {
			
 
				+		set_numabalancing_state(false);
			
 
				+		ret = 1;
			
 
				+	}
			
 
				+out:
			
 
				+	if (!ret)
			
 
				+		printk(KERN_WARNING "Unable to parse numa_balancing=\n");
			
 
				+
			
 
				+	return ret;
			
 
				+}
			
 
				+__setup("numa_balancing=", setup_numabalancing);
			
 
				+#else
			
 
				+static inline void __init check_numabalancing_enable(void)
			
 
				+{
			
 
				+}
			
 
				+#endif /* CONFIG_NUMA_BALANCING */
			
 
				+
			
 
				 /* assumes fs == KERNEL_DS */
			
 
				 void __init numa_policy_init(void)
			
 
				 {
			
@@ -2571,6 +2615,8 @@ void __init numa_policy_init(void)
 
				 
			
 
				 	if (do_set_mempolicy(MPOL_INTERLEAVE, 0, &interleave_nodes))
			
 
				 		printk("numa_policy_init: interleaving failed\n");
			
 
				+
			
 
				+	check_numabalancing_enable();
			
 
				 }
			
 
				 
			
 
				 /* Reset policy of current process to default */