|
@@ -134,28 +134,6 @@ static int __init fake_numa_create_new_node(unsigned long end_pfn,
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
-/*
|
|
|
- * get_node_active_region - Return active region containing pfn
|
|
|
- * Active range returned is empty if none found.
|
|
|
- * @pfn: The page to return the region for
|
|
|
- * @node_ar: Returned set to the active region containing @pfn
|
|
|
- */
|
|
|
-static void __init get_node_active_region(unsigned long pfn,
|
|
|
- struct node_active_region *node_ar)
|
|
|
-{
|
|
|
- unsigned long start_pfn, end_pfn;
|
|
|
- int i, nid;
|
|
|
-
|
|
|
- for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) {
|
|
|
- if (pfn >= start_pfn && pfn < end_pfn) {
|
|
|
- node_ar->nid = nid;
|
|
|
- node_ar->start_pfn = start_pfn;
|
|
|
- node_ar->end_pfn = end_pfn;
|
|
|
- break;
|
|
|
- }
|
|
|
- }
|
|
|
-}
|
|
|
-
|
|
|
static void reset_numa_cpu_lookup_table(void)
|
|
|
{
|
|
|
unsigned int cpu;
|
|
@@ -928,134 +906,48 @@ static void __init dump_numa_memory_topology(void)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-/*
|
|
|
- * Allocate some memory, satisfying the memblock or bootmem allocator where
|
|
|
- * required. nid is the preferred node and end is the physical address of
|
|
|
- * the highest address in the node.
|
|
|
- *
|
|
|
- * Returns the virtual address of the memory.
|
|
|
- */
|
|
|
-static void __init *careful_zallocation(int nid, unsigned long size,
|
|
|
- unsigned long align,
|
|
|
- unsigned long end_pfn)
|
|
|
-{
|
|
|
- void *ret;
|
|
|
- int new_nid;
|
|
|
- unsigned long ret_paddr;
|
|
|
-
|
|
|
- ret_paddr = __memblock_alloc_base(size, align, end_pfn << PAGE_SHIFT);
|
|
|
-
|
|
|
- /* retry over all memory */
|
|
|
- if (!ret_paddr)
|
|
|
- ret_paddr = __memblock_alloc_base(size, align, memblock_end_of_DRAM());
|
|
|
-
|
|
|
- if (!ret_paddr)
|
|
|
- panic("numa.c: cannot allocate %lu bytes for node %d",
|
|
|
- size, nid);
|
|
|
-
|
|
|
- ret = __va(ret_paddr);
|
|
|
-
|
|
|
- /*
|
|
|
- * We initialize the nodes in numeric order: 0, 1, 2...
|
|
|
- * and hand over control from the MEMBLOCK allocator to the
|
|
|
- * bootmem allocator. If this function is called for
|
|
|
- * node 5, then we know that all nodes <5 are using the
|
|
|
- * bootmem allocator instead of the MEMBLOCK allocator.
|
|
|
- *
|
|
|
- * So, check the nid from which this allocation came
|
|
|
- * and double check to see if we need to use bootmem
|
|
|
- * instead of the MEMBLOCK. We don't free the MEMBLOCK memory
|
|
|
- * since it would be useless.
|
|
|
- */
|
|
|
- new_nid = early_pfn_to_nid(ret_paddr >> PAGE_SHIFT);
|
|
|
- if (new_nid < nid) {
|
|
|
- ret = __alloc_bootmem_node(NODE_DATA(new_nid),
|
|
|
- size, align, 0);
|
|
|
-
|
|
|
- dbg("alloc_bootmem %p %lx\n", ret, size);
|
|
|
- }
|
|
|
-
|
|
|
- memset(ret, 0, size);
|
|
|
- return ret;
|
|
|
-}
|
|
|
-
|
|
|
static struct notifier_block ppc64_numa_nb = {
|
|
|
.notifier_call = cpu_numa_callback,
|
|
|
.priority = 1 /* Must run before sched domains notifier. */
|
|
|
};
|
|
|
|
|
|
-static void __init mark_reserved_regions_for_nid(int nid)
|
|
|
+/* Initialize NODE_DATA for a node on the local memory */
|
|
|
+static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn)
|
|
|
{
|
|
|
- struct pglist_data *node = NODE_DATA(nid);
|
|
|
- struct memblock_region *reg;
|
|
|
-
|
|
|
- for_each_memblock(reserved, reg) {
|
|
|
- unsigned long physbase = reg->base;
|
|
|
- unsigned long size = reg->size;
|
|
|
- unsigned long start_pfn = physbase >> PAGE_SHIFT;
|
|
|
- unsigned long end_pfn = PFN_UP(physbase + size);
|
|
|
- struct node_active_region node_ar;
|
|
|
- unsigned long node_end_pfn = pgdat_end_pfn(node);
|
|
|
-
|
|
|
- /*
|
|
|
- * Check to make sure that this memblock.reserved area is
|
|
|
- * within the bounds of the node that we care about.
|
|
|
- * Checking the nid of the start and end points is not
|
|
|
- * sufficient because the reserved area could span the
|
|
|
- * entire node.
|
|
|
- */
|
|
|
- if (end_pfn <= node->node_start_pfn ||
|
|
|
- start_pfn >= node_end_pfn)
|
|
|
- continue;
|
|
|
-
|
|
|
- get_node_active_region(start_pfn, &node_ar);
|
|
|
- while (start_pfn < end_pfn &&
|
|
|
- node_ar.start_pfn < node_ar.end_pfn) {
|
|
|
- unsigned long reserve_size = size;
|
|
|
- /*
|
|
|
- * if reserved region extends past active region
|
|
|
- * then trim size to active region
|
|
|
- */
|
|
|
- if (end_pfn > node_ar.end_pfn)
|
|
|
- reserve_size = (node_ar.end_pfn << PAGE_SHIFT)
|
|
|
- - physbase;
|
|
|
- /*
|
|
|
- * Only worry about *this* node, others may not
|
|
|
- * yet have valid NODE_DATA().
|
|
|
- */
|
|
|
- if (node_ar.nid == nid) {
|
|
|
- dbg("reserve_bootmem %lx %lx nid=%d\n",
|
|
|
- physbase, reserve_size, node_ar.nid);
|
|
|
- reserve_bootmem_node(NODE_DATA(node_ar.nid),
|
|
|
- physbase, reserve_size,
|
|
|
- BOOTMEM_DEFAULT);
|
|
|
- }
|
|
|
- /*
|
|
|
- * if reserved region is contained in the active region
|
|
|
- * then done.
|
|
|
- */
|
|
|
- if (end_pfn <= node_ar.end_pfn)
|
|
|
- break;
|
|
|
-
|
|
|
- /*
|
|
|
- * reserved region extends past the active region
|
|
|
- * get next active region that contains this
|
|
|
- * reserved region
|
|
|
- */
|
|
|
- start_pfn = node_ar.end_pfn;
|
|
|
- physbase = start_pfn << PAGE_SHIFT;
|
|
|
- size = size - reserve_size;
|
|
|
- get_node_active_region(start_pfn, &node_ar);
|
|
|
- }
|
|
|
- }
|
|
|
+ u64 spanned_pages = end_pfn - start_pfn;
|
|
|
+ const size_t nd_size = roundup(sizeof(pg_data_t), SMP_CACHE_BYTES);
|
|
|
+ u64 nd_pa;
|
|
|
+ void *nd;
|
|
|
+ int tnid;
|
|
|
+
|
|
|
+ if (spanned_pages)
|
|
|
+ pr_info("Initmem setup node %d [mem %#010Lx-%#010Lx]\n",
|
|
|
+ nid, start_pfn << PAGE_SHIFT,
|
|
|
+ (end_pfn << PAGE_SHIFT) - 1);
|
|
|
+ else
|
|
|
+ pr_info("Initmem setup node %d\n", nid);
|
|
|
+
|
|
|
+ nd_pa = memblock_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid);
|
|
|
+ nd = __va(nd_pa);
|
|
|
+
|
|
|
+ /* report and initialize */
|
|
|
+ pr_info(" NODE_DATA [mem %#010Lx-%#010Lx]\n",
|
|
|
+ nd_pa, nd_pa + nd_size - 1);
|
|
|
+ tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT);
|
|
|
+ if (tnid != nid)
|
|
|
+ pr_info(" NODE_DATA(%d) on node %d\n", nid, tnid);
|
|
|
+
|
|
|
+ node_data[nid] = nd;
|
|
|
+ memset(NODE_DATA(nid), 0, sizeof(pg_data_t));
|
|
|
+ NODE_DATA(nid)->node_id = nid;
|
|
|
+ NODE_DATA(nid)->node_start_pfn = start_pfn;
|
|
|
+ NODE_DATA(nid)->node_spanned_pages = spanned_pages;
|
|
|
}
|
|
|
|
|
|
-
|
|
|
-void __init do_init_bootmem(void)
|
|
|
+void __init initmem_init(void)
|
|
|
{
|
|
|
int nid, cpu;
|
|
|
|
|
|
- min_low_pfn = 0;
|
|
|
max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
|
|
|
max_pfn = max_low_pfn;
|
|
|
|
|
@@ -1064,64 +956,16 @@ void __init do_init_bootmem(void)
|
|
|
else
|
|
|
dump_numa_memory_topology();
|
|
|
|
|
|
+ memblock_dump_all();
|
|
|
+
|
|
|
for_each_online_node(nid) {
|
|
|
unsigned long start_pfn, end_pfn;
|
|
|
- void *bootmem_vaddr;
|
|
|
- unsigned long bootmap_pages;
|
|
|
|
|
|
get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
|
|
|
-
|
|
|
- /*
|
|
|
- * Allocate the node structure node local if possible
|
|
|
- *
|
|
|
- * Be careful moving this around, as it relies on all
|
|
|
- * previous nodes' bootmem to be initialized and have
|
|
|
- * all reserved areas marked.
|
|
|
- */
|
|
|
- NODE_DATA(nid) = careful_zallocation(nid,
|
|
|
- sizeof(struct pglist_data),
|
|
|
- SMP_CACHE_BYTES, end_pfn);
|
|
|
-
|
|
|
- dbg("node %d\n", nid);
|
|
|
- dbg("NODE_DATA() = %p\n", NODE_DATA(nid));
|
|
|
-
|
|
|
- NODE_DATA(nid)->bdata = &bootmem_node_data[nid];
|
|
|
- NODE_DATA(nid)->node_start_pfn = start_pfn;
|
|
|
- NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn;
|
|
|
-
|
|
|
- if (NODE_DATA(nid)->node_spanned_pages == 0)
|
|
|
- continue;
|
|
|
-
|
|
|
- dbg("start_paddr = %lx\n", start_pfn << PAGE_SHIFT);
|
|
|
- dbg("end_paddr = %lx\n", end_pfn << PAGE_SHIFT);
|
|
|
-
|
|
|
- bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
|
|
|
- bootmem_vaddr = careful_zallocation(nid,
|
|
|
- bootmap_pages << PAGE_SHIFT,
|
|
|
- PAGE_SIZE, end_pfn);
|
|
|
-
|
|
|
- dbg("bootmap_vaddr = %p\n", bootmem_vaddr);
|
|
|
-
|
|
|
- init_bootmem_node(NODE_DATA(nid),
|
|
|
- __pa(bootmem_vaddr) >> PAGE_SHIFT,
|
|
|
- start_pfn, end_pfn);
|
|
|
-
|
|
|
- free_bootmem_with_active_regions(nid, end_pfn);
|
|
|
- /*
|
|
|
- * Be very careful about moving this around. Future
|
|
|
- * calls to careful_zallocation() depend on this getting
|
|
|
- * done correctly.
|
|
|
- */
|
|
|
- mark_reserved_regions_for_nid(nid);
|
|
|
+ setup_node_data(nid, start_pfn, end_pfn);
|
|
|
sparse_memory_present_with_active_regions(nid);
|
|
|
}
|
|
|
|
|
|
- init_bootmem_done = 1;
|
|
|
-
|
|
|
- /*
|
|
|
- * Now bootmem is initialised we can create the node to cpumask
|
|
|
- * lookup tables and setup the cpu callback to populate them.
|
|
|
- */
|
|
|
setup_node_to_cpumask_map();
|
|
|
|
|
|
reset_numa_cpu_lookup_table();
|