11 years ago · c29a7baf09
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -423,6 +423,43 @@ config NODES_SHIFT
 
				 	  Specify the maximum number of NUMA nodes available on the target
			
 
				 	  system. Increases memory reserved to accommodate various tables.
			
 
				 
			
 
				+menu "Select NUMA modes"
			
 
				+	depends on NUMA
			
 
				+
			
 
				+config NUMA_EMU
			
 
				+	bool "NUMA emulation"
			
 
				+	default y
			
 
				+	help
			
 
				+	  Numa emulation mode will split the available system memory into
			
 
				+	  equal chunks which then are distributed over the configured number
			
 
				+	  of nodes in a round-robin manner.
			
 
				+
			
 
				+	  The number of fake nodes is limited by the number of available memory
			
 
				+	  chunks (i.e. memory size / fake size) and the number of supported
			
 
				+	  nodes in the kernel.
			
 
				+
			
 
				+	  The CPUs are assigned to the nodes in a way that partially respects
			
 
				+	  the original machine topology (if supported by the machine).
			
 
				+	  Fair distribution of the CPUs is not guaranteed.
			
 
				+
			
 
				+config EMU_SIZE
			
 
				+	hex "NUMA emulation memory chunk size"
			
 
				+	default 0x10000000
			
 
				+	range 0x400000 0x100000000
			
 
				+	depends on NUMA_EMU
			
 
				+	help
			
 
				+	  Select the default size by which the memory is chopped and then
			
 
				+	  assigned to emulated NUMA nodes.
			
 
				+
			
 
				+	  This can be overridden by specifying
			
 
				+
			
 
				+	  emu_size=<n>
			
 
				+
			
 
				+	  on the kernel command line where also suffixes K, M, G, and T are
			
 
				+	  supported.
			
 
				+
			
 
				+endmenu
			
 
				+
			
 
				 config SCHED_MC
			
 
				 	def_bool n
			
 
				 
			
--- a/arch/s390/include/asm/numa.h
+++ b/arch/s390/include/asm/numa.h
@@ -26,6 +26,10 @@ extern int numa_debug_enabled;
 
				 
			
 
				 static inline void numa_setup(void) { }
			
 
				 static inline void numa_update_cpu_topology(void) { }
			
 
				+static inline int numa_pfn_to_nid(unsigned long pfn)
			
 
				+{
			
 
				+	return 0;
			
 
				+}
			
 
				 
			
 
				 #endif /* CONFIG_NUMA */
			
 
				 #endif /* _ASM_S390_NUMA_H */
			
--- a/arch/s390/numa/Makefile
+++ b/arch/s390/numa/Makefile
@@ -1,2 +1,3 @@
 
				 obj-y			+= numa.o
			
 
				 obj-y			+= toptree.o
			
 
				+obj-$(CONFIG_NUMA_EMU)	+= mode_emu.o
			
--- a/arch/s390/numa/mode_emu.c
+++ b/arch/s390/numa/mode_emu.c
@@ -0,0 +1,511 @@
 
				+/*
			
 
				+ * NUMA support for s390
			
 
				+ *
			
 
				+ * NUMA emulation (aka fake NUMA) distributes the available memory to nodes
			
 
				+ * without using real topology information about the physical memory of the
			
 
				+ * machine.
			
 
				+ *
			
 
				+ * It distributes the available CPUs to nodes while respecting the original
			
 
				+ * machine topology information. This is done by trying to avoid to separate
			
 
				+ * CPUs which reside on the same book or even on the same MC.
			
 
				+ *
			
 
				+ * Because the current Linux scheduler code requires a stable cpu to node
			
 
				+ * mapping, cores are pinned to nodes when the first CPU thread is set online.
			
 
				+ *
			
 
				+ * Copyright IBM Corp. 2015
			
 
				+ */
			
 
				+
			
 
				+#define KMSG_COMPONENT "numa_emu"
			
 
				+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
			
 
				+
			
 
				+#include <linux/kernel.h>
			
 
				+#include <linux/cpumask.h>
			
 
				+#include <linux/memblock.h>
			
 
				+#include <linux/node.h>
			
 
				+#include <linux/memory.h>
			
 
				+#include <asm/smp.h>
			
 
				+#include <asm/topology.h>
			
 
				+#include "numa_mode.h"
			
 
				+#include "toptree.h"
			
 
				+
			
 
				+/* Distances between the different system components */
			
 
				+#define DIST_EMPTY	0
			
 
				+#define DIST_CORE	1
			
 
				+#define DIST_MC		2
			
 
				+#define DIST_BOOK	3
			
 
				+#define DIST_MAX	4
			
 
				+
			
 
				+/* Node distance reported to common code */
			
 
				+#define EMU_NODE_DIST	10
			
 
				+
			
 
				+/* Node ID for free (not yet pinned) cores */
			
 
				+#define NODE_ID_FREE	-1
			
 
				+
			
 
				+/* Different levels of toptree */
			
 
				+enum toptree_level {CORE, MC, BOOK, NODE, TOPOLOGY};
			
 
				+
			
 
				+/* The two toptree IDs */
			
 
				+enum {TOPTREE_ID_PHYS, TOPTREE_ID_NUMA};
			
 
				+
			
 
				+/* Number of NUMA nodes */
			
 
				+static int emu_nodes = 1;
			
 
				+/* NUMA stripe size */
			
 
				+static unsigned long emu_size;
			
 
				+/* Pinned core to node mapping */
			
 
				+static int cores_to_node_id[CONFIG_NR_CPUS];
			
 
				+/* Total number of pinned cores */
			
 
				+static int cores_total;
			
 
				+/* Number of cores per node without extra cores */
			
 
				+static int cores_per_node_target;
			
 
				+/* Number of cores pinned to node */
			
 
				+static int cores_per_node[MAX_NUMNODES];
			
 
				+
			
 
				+/*
			
 
				+ * Pin a core to a node
			
 
				+ */
			
 
				+static void pin_core_to_node(int core_id, int node_id)
			
 
				+{
			
 
				+	if (cores_to_node_id[core_id] == NODE_ID_FREE) {
			
 
				+		cores_per_node[node_id]++;
			
 
				+		cores_to_node_id[core_id] = node_id;
			
 
				+		cores_total++;
			
 
				+	} else {
			
 
				+		WARN_ON(cores_to_node_id[core_id] != node_id);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Number of pinned cores of a node
			
 
				+ */
			
 
				+static int cores_pinned(struct toptree *node)
			
 
				+{
			
 
				+	return cores_per_node[node->id];
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * ID of the node where the core is pinned (or NODE_ID_FREE)
			
 
				+ */
			
 
				+static int core_pinned_to_node_id(struct toptree *core)
			
 
				+{
			
 
				+	return cores_to_node_id[core->id];
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Number of cores in the tree that are not yet pinned
			
 
				+ */
			
 
				+static int cores_free(struct toptree *tree)
			
 
				+{
			
 
				+	struct toptree *core;
			
 
				+	int count = 0;
			
 
				+
			
 
				+	toptree_for_each(core, tree, CORE) {
			
 
				+		if (core_pinned_to_node_id(core) == NODE_ID_FREE)
			
 
				+			count++;
			
 
				+	}
			
 
				+	return count;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Return node of core
			
 
				+ */
			
 
				+static struct toptree *core_node(struct toptree *core)
			
 
				+{
			
 
				+	return core->parent->parent->parent;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Return book of core
			
 
				+ */
			
 
				+static struct toptree *core_book(struct toptree *core)
			
 
				+{
			
 
				+	return core->parent->parent;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Return mc of core
			
 
				+ */
			
 
				+static struct toptree *core_mc(struct toptree *core)
			
 
				+{
			
 
				+	return core->parent;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Distance between two cores
			
 
				+ */
			
 
				+static int dist_core_to_core(struct toptree *core1, struct toptree *core2)
			
 
				+{
			
 
				+	if (core_book(core1)->id != core_book(core2)->id)
			
 
				+		return DIST_BOOK;
			
 
				+	if (core_mc(core1)->id != core_mc(core2)->id)
			
 
				+		return DIST_MC;
			
 
				+	/* Same core or sibling on same MC */
			
 
				+	return DIST_CORE;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Distance of a node to a core
			
 
				+ */
			
 
				+static int dist_node_to_core(struct toptree *node, struct toptree *core)
			
 
				+{
			
 
				+	struct toptree *core_node;
			
 
				+	int dist_min = DIST_MAX;
			
 
				+
			
 
				+	toptree_for_each(core_node, node, CORE)
			
 
				+		dist_min = min(dist_min, dist_core_to_core(core_node, core));
			
 
				+	return dist_min == DIST_MAX ? DIST_EMPTY : dist_min;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Unify will delete empty nodes, therefore recreate nodes.
			
 
				+ */
			
 
				+static void toptree_unify_tree(struct toptree *tree)
			
 
				+{
			
 
				+	int nid;
			
 
				+
			
 
				+	toptree_unify(tree);
			
 
				+	for (nid = 0; nid < emu_nodes; nid++)
			
 
				+		toptree_get_child(tree, nid);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Find the best/nearest node for a given core and ensure that no node
			
 
				+ * gets more than "cores_per_node_target + extra" cores.
			
 
				+ */
			
 
				+static struct toptree *node_for_core(struct toptree *numa, struct toptree *core,
			
 
				+				     int extra)
			
 
				+{
			
 
				+	struct toptree *node, *node_best = NULL;
			
 
				+	int dist_cur, dist_best;
			
 
				+
			
 
				+	dist_best = DIST_MAX;
			
 
				+	node_best = NULL;
			
 
				+	toptree_for_each(node, numa, NODE) {
			
 
				+		/* Already pinned cores must use their nodes */
			
 
				+		if (core_pinned_to_node_id(core) == node->id) {
			
 
				+			node_best = node;
			
 
				+			break;
			
 
				+		}
			
 
				+		/* Skip nodes that already have enough cores */
			
 
				+		if (cores_pinned(node) >= cores_per_node_target + extra)
			
 
				+			continue;
			
 
				+		dist_cur = dist_node_to_core(node, core);
			
 
				+		if (dist_cur < dist_best) {
			
 
				+			dist_best = dist_cur;
			
 
				+			node_best = node;
			
 
				+		}
			
 
				+	}
			
 
				+	return node_best;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Find the best node for each core with respect to "extra" core count
			
 
				+ */
			
 
				+static void toptree_to_numa_single(struct toptree *numa, struct toptree *phys,
			
 
				+				   int extra)
			
 
				+{
			
 
				+	struct toptree *node, *core, *tmp;
			
 
				+
			
 
				+	toptree_for_each_safe(core, tmp, phys, CORE) {
			
 
				+		node = node_for_core(numa, core, extra);
			
 
				+		if (!node)
			
 
				+			return;
			
 
				+		toptree_move(core, node);
			
 
				+		pin_core_to_node(core->id, node->id);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Move structures of given level to specified NUMA node
			
 
				+ */
			
 
				+static void move_level_to_numa_node(struct toptree *node, struct toptree *phys,
			
 
				+				    enum toptree_level level, bool perfect)
			
 
				+{
			
 
				+	struct toptree *cur, *tmp;
			
 
				+	int cores_free;
			
 
				+
			
 
				+	toptree_for_each_safe(cur, tmp, phys, level) {
			
 
				+		cores_free = cores_per_node_target - toptree_count(node, CORE);
			
 
				+		if (perfect) {
			
 
				+			if (cores_free == toptree_count(cur, CORE))
			
 
				+				toptree_move(cur, node);
			
 
				+		} else {
			
 
				+			if (cores_free >= toptree_count(cur, CORE))
			
 
				+				toptree_move(cur, node);
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Move structures of a given level to NUMA nodes. If "perfect" is specified
			
 
				+ * move only perfectly fitting structures. Otherwise move also smaller
			
 
				+ * than needed structures.
			
 
				+ */
			
 
				+static void move_level_to_numa(struct toptree *numa, struct toptree *phys,
			
 
				+			       enum toptree_level level, bool perfect)
			
 
				+{
			
 
				+	struct toptree *node;
			
 
				+
			
 
				+	toptree_for_each(node, numa, NODE)
			
 
				+		move_level_to_numa_node(node, phys, level, perfect);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * For the first run try to move the big structures
			
 
				+ */
			
 
				+static void toptree_to_numa_first(struct toptree *numa, struct toptree *phys)
			
 
				+{
			
 
				+	struct toptree *core;
			
 
				+
			
 
				+	/* Always try to move perfectly fitting structures first */
			
 
				+	move_level_to_numa(numa, phys, BOOK, true);
			
 
				+	move_level_to_numa(numa, phys, BOOK, false);
			
 
				+	move_level_to_numa(numa, phys, MC, true);
			
 
				+	move_level_to_numa(numa, phys, MC, false);
			
 
				+	/* Now pin all the moved cores */
			
 
				+	toptree_for_each(core, numa, CORE)
			
 
				+		pin_core_to_node(core->id, core_node(core)->id);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Allocate new topology and create required nodes
			
 
				+ */
			
 
				+static struct toptree *toptree_new(int id, int nodes)
			
 
				+{
			
 
				+	struct toptree *tree;
			
 
				+	int nid;
			
 
				+
			
 
				+	tree = toptree_alloc(TOPOLOGY, id);
			
 
				+	if (!tree)
			
 
				+		goto fail;
			
 
				+	for (nid = 0; nid < nodes; nid++) {
			
 
				+		if (!toptree_get_child(tree, nid))
			
 
				+			goto fail;
			
 
				+	}
			
 
				+	return tree;
			
 
				+fail:
			
 
				+	panic("NUMA emulation could not allocate topology");
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Move cores from physical topology into NUMA target topology
			
 
				+ * and try to keep as much of the physical topology as possible.
			
 
				+ */
			
 
				+static struct toptree *toptree_to_numa(struct toptree *phys)
			
 
				+{
			
 
				+	static int first = 1;
			
 
				+	struct toptree *numa;
			
 
				+
			
 
				+	cores_per_node_target = (cores_total + cores_free(phys)) / emu_nodes;
			
 
				+	numa = toptree_new(TOPTREE_ID_NUMA, emu_nodes);
			
 
				+	if (first) {
			
 
				+		toptree_to_numa_first(numa, phys);
			
 
				+		first = 0;
			
 
				+	}
			
 
				+	toptree_to_numa_single(numa, phys, 0);
			
 
				+	toptree_to_numa_single(numa, phys, 1);
			
 
				+	toptree_unify_tree(numa);
			
 
				+
			
 
				+	WARN_ON(cpumask_weight(&phys->mask));
			
 
				+	return numa;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Create a toptree out of the physical topology that we got from the hypervisor
			
 
				+ */
			
 
				+static struct toptree *toptree_from_topology(void)
			
 
				+{
			
 
				+	struct toptree *phys, *node, *book, *mc, *core;
			
 
				+	struct cpu_topology_s390 *top;
			
 
				+	int cpu;
			
 
				+
			
 
				+	phys = toptree_new(TOPTREE_ID_PHYS, 1);
			
 
				+
			
 
				+	for_each_online_cpu(cpu) {
			
 
				+		top = &per_cpu(cpu_topology, cpu);
			
 
				+		node = toptree_get_child(phys, 0);
			
 
				+		book = toptree_get_child(node, top->book_id);
			
 
				+		mc = toptree_get_child(book, top->socket_id);
			
 
				+		core = toptree_get_child(mc, top->core_id);
			
 
				+		if (!book || !mc || !core)
			
 
				+			panic("NUMA emulation could not allocate memory");
			
 
				+		cpumask_set_cpu(cpu, &core->mask);
			
 
				+		toptree_update_mask(mc);
			
 
				+	}
			
 
				+	return phys;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Add toptree core to topology and create correct CPU masks
			
 
				+ */
			
 
				+static void topology_add_core(struct toptree *core)
			
 
				+{
			
 
				+	struct cpu_topology_s390 *top;
			
 
				+	int cpu;
			
 
				+
			
 
				+	for_each_cpu(cpu, &core->mask) {
			
 
				+		top = &per_cpu(cpu_topology, cpu);
			
 
				+		cpumask_copy(&top->thread_mask, &core->mask);
			
 
				+		cpumask_copy(&top->core_mask, &core_mc(core)->mask);
			
 
				+		cpumask_copy(&top->book_mask, &core_book(core)->mask);
			
 
				+		cpumask_set_cpu(cpu, node_to_cpumask_map[core_node(core)->id]);
			
 
				+		top->node_id = core_node(core)->id;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Apply toptree to topology and create CPU masks
			
 
				+ */
			
 
				+static void toptree_to_topology(struct toptree *numa)
			
 
				+{
			
 
				+	struct toptree *core;
			
 
				+	int i;
			
 
				+
			
 
				+	/* Clear all node masks */
			
 
				+	for (i = 0; i < MAX_NUMNODES; i++)
			
 
				+		cpumask_clear(node_to_cpumask_map[i]);
			
 
				+
			
 
				+	/* Rebuild all masks */
			
 
				+	toptree_for_each(core, numa, CORE)
			
 
				+		topology_add_core(core);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Show the node to core mapping
			
 
				+ */
			
 
				+static void print_node_to_core_map(void)
			
 
				+{
			
 
				+	int nid, cid;
			
 
				+
			
 
				+	if (!numa_debug_enabled)
			
 
				+		return;
			
 
				+	printk(KERN_DEBUG "NUMA node to core mapping\n");
			
 
				+	for (nid = 0; nid < emu_nodes; nid++) {
			
 
				+		printk(KERN_DEBUG "  node %3d: ", nid);
			
 
				+		for (cid = 0; cid < ARRAY_SIZE(cores_to_node_id); cid++) {
			
 
				+			if (cores_to_node_id[cid] == nid)
			
 
				+				printk(KERN_CONT "%d ", cid);
			
 
				+		}
			
 
				+		printk(KERN_CONT "\n");
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Transfer physical topology into a NUMA topology and modify CPU masks
			
 
				+ * according to the NUMA topology.
			
 
				+ *
			
 
				+ * This function is called under the CPU hotplug lock.
			
 
				+ */
			
 
				+static void emu_update_cpu_topology(void)
			
 
				+{
			
 
				+	struct toptree *phys, *numa;
			
 
				+
			
 
				+	phys = toptree_from_topology();
			
 
				+	numa = toptree_to_numa(phys);
			
 
				+	toptree_free(phys);
			
 
				+	toptree_to_topology(numa);
			
 
				+	toptree_free(numa);
			
 
				+	print_node_to_core_map();
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * If emu_size is not set, use CONFIG_EMU_SIZE. Then round to minimum
			
 
				+ * alignment (needed for memory hotplug).
			
 
				+ */
			
 
				+static unsigned long emu_setup_size_adjust(unsigned long size)
			
 
				+{
			
 
				+	size = size ? : CONFIG_EMU_SIZE;
			
 
				+	size = roundup(size, memory_block_size_bytes());
			
 
				+	return size;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * If we have not enough memory for the specified nodes, reduce the node count.
			
 
				+ */
			
 
				+static int emu_setup_nodes_adjust(int nodes)
			
 
				+{
			
 
				+	int nodes_max;
			
 
				+
			
 
				+	nodes_max = memblock.memory.total_size / emu_size;
			
 
				+	nodes_max = max(nodes_max, 1);
			
 
				+	if (nodes_max >= nodes)
			
 
				+		return nodes;
			
 
				+	pr_warn("Not enough memory for %d nodes, reducing node count\n", nodes);
			
 
				+	return nodes_max;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Early emu setup
			
 
				+ */
			
 
				+static void emu_setup(void)
			
 
				+{
			
 
				+	int i;
			
 
				+
			
 
				+	emu_size = emu_setup_size_adjust(emu_size);
			
 
				+	emu_nodes = emu_setup_nodes_adjust(emu_nodes);
			
 
				+	for (i = 0; i < ARRAY_SIZE(cores_to_node_id); i++)
			
 
				+		cores_to_node_id[i] = NODE_ID_FREE;
			
 
				+	pr_info("Creating %d nodes with memory stripe size %ld MB\n",
			
 
				+		emu_nodes, emu_size >> 20);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Return node id for given page number
			
 
				+ */
			
 
				+static int emu_pfn_to_nid(unsigned long pfn)
			
 
				+{
			
 
				+	return (pfn / (emu_size >> PAGE_SHIFT)) % emu_nodes;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Return stripe size
			
 
				+ */
			
 
				+static unsigned long emu_align(void)
			
 
				+{
			
 
				+	return emu_size;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Return distance between two nodes
			
 
				+ */
			
 
				+static int emu_distance(int node1, int node2)
			
 
				+{
			
 
				+	return (node1 != node2) * EMU_NODE_DIST;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Define callbacks for generic s390 NUMA infrastructure
			
 
				+ */
			
 
				+const struct numa_mode numa_mode_emu = {
			
 
				+	.name = "emu",
			
 
				+	.setup = emu_setup,
			
 
				+	.update_cpu_topology = emu_update_cpu_topology,
			
 
				+	.__pfn_to_nid = emu_pfn_to_nid,
			
 
				+	.align = emu_align,
			
 
				+	.distance = emu_distance,
			
 
				+};
			
 
				+
			
 
				+/*
			
 
				+ * Kernel parameter: emu_nodes=<n>
			
 
				+ */
			
 
				+static int __init early_parse_emu_nodes(char *p)
			
 
				+{
			
 
				+	int count;
			
 
				+
			
 
				+	if (kstrtoint(p, 0, &count) != 0 || count <= 0)
			
 
				+		return 0;
			
 
				+	if (count <= 0)
			
 
				+		return 0;
			
 
				+	emu_nodes = min(count, MAX_NUMNODES);
			
 
				+	return 0;
			
 
				+}
			
 
				+early_param("emu_nodes", early_parse_emu_nodes);
			
 
				+
			
 
				+/*
			
 
				+ * Kernel parameter: emu_size=[<n>[k|M|G|T]]
			
 
				+ */
			
 
				+static int __init early_parse_emu_size(char *p)
			
 
				+{
			
 
				+	emu_size = memparse(p, NULL);
			
 
				+	return 0;
			
 
				+}
			
 
				+early_param("emu_size", early_parse_emu_size);
			
--- a/arch/s390/numa/numa.c
+++ b/arch/s390/numa/numa.c
@@ -175,6 +175,10 @@ static int __init parse_numa(char *parm)
 
				 {
			
 
				 	if (strcmp(parm, numa_mode_plain.name) == 0)
			
 
				 		mode = &numa_mode_plain;
			
 
				+#ifdef CONFIG_NUMA_EMU
			
 
				+	if (strcmp(parm, numa_mode_emu.name) == 0)
			
 
				+		mode = &numa_mode_emu;
			
 
				+#endif
			
 
				 	return 0;
			
 
				 }
			
 
				 early_param("numa", parse_numa);
			
--- a/arch/s390/numa/numa_mode.h
+++ b/arch/s390/numa/numa_mode.h
@@ -19,5 +19,6 @@ struct numa_mode {
 
				 };
			
 
				 
			
 
				 extern const struct numa_mode numa_mode_plain;
			
 
				+extern const struct numa_mode numa_mode_emu;
			
 
				 
			
 
				 #endif /* __S390_NUMA_MODE_H */
			
--- a/drivers/s390/char/sclp_cmd.c
+++ b/drivers/s390/char/sclp_cmd.c
@@ -25,6 +25,7 @@
 
				 #include <asm/setup.h>
			
 
				 #include <asm/page.h>
			
 
				 #include <asm/sclp.h>
			
 
				+#include <asm/numa.h>
			
 
				 
			
 
				 #include "sclp.h"
			
 
				 
			
@@ -388,11 +389,11 @@ static struct notifier_block sclp_mem_nb = {
 
				 };
			
 
				 
			
 
				 static void __init align_to_block_size(unsigned long long *start,
			
 
				-				       unsigned long long *size)
			
 
				+				       unsigned long long *size,
			
 
				+				       unsigned long long alignment)
			
 
				 {
			
 
				-	unsigned long long start_align, size_align, alignment;
			
 
				+	unsigned long long start_align, size_align;
			
 
				 
			
 
				-	alignment = memory_block_size_bytes();
			
 
				 	start_align = roundup(*start, alignment);
			
 
				 	size_align = rounddown(*start + *size, alignment) - start_align;
			
 
				 
			
@@ -404,8 +405,8 @@ static void __init align_to_block_size(unsigned long long *start,
 
				 
			
 
				 static void __init add_memory_merged(u16 rn)
			
 
				 {
			
 
				+	unsigned long long start, size, addr, block_size;
			
 
				 	static u16 first_rn, num;
			
 
				-	unsigned long long start, size;
			
 
				 
			
 
				 	if (rn && first_rn && (first_rn + num == rn)) {
			
 
				 		num++;
			
@@ -423,9 +424,12 @@ static void __init add_memory_merged(u16 rn)
 
				 		goto skip_add;
			
 
				 	if (memory_end_set && (start + size > memory_end))
			
 
				 		size = memory_end - start;
			
 
				-	align_to_block_size(&start, &size);
			
 
				-	if (size)
			
 
				-		add_memory(0, start, size);
			
 
				+	block_size = memory_block_size_bytes();
			
 
				+	align_to_block_size(&start, &size, block_size);
			
 
				+	if (!size)
			
 
				+		goto skip_add;
			
 
				+	for (addr = start; addr < start + size; addr += block_size)
			
 
				+		add_memory(numa_pfn_to_nid(PFN_DOWN(addr)), addr, block_size);
			
 
				 skip_add:
			
 
				 	first_rn = rn;
			
 
				 	num = 1;