sparse-vmemmap.c 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318
  1. /*
  2. * Virtual Memory Map support
  3. *
  4. * (C) 2007 sgi. Christoph Lameter.
  5. *
  6. * Virtual memory maps allow VM primitives pfn_to_page, page_to_pfn,
  7. * virt_to_page, page_address() to be implemented as a base offset
  8. * calculation without memory access.
  9. *
  10. * However, virtual mappings need a page table and TLBs. Many Linux
  11. * architectures already map their physical space using 1-1 mappings
  12. * via TLBs. For those arches the virtual memory map is essentially
  13. * for free if we use the same page size as the 1-1 mappings. In that
  14. * case the overhead consists of a few additional pages that are
  15. * allocated to create a view of memory for vmemmap.
  16. *
  17. * The architecture is expected to provide a vmemmap_populate() function
  18. * to instantiate the mapping.
  19. */
  20. #include <linux/mm.h>
  21. #include <linux/mmzone.h>
  22. #include <linux/bootmem.h>
  23. #include <linux/memremap.h>
  24. #include <linux/highmem.h>
  25. #include <linux/slab.h>
  26. #include <linux/spinlock.h>
  27. #include <linux/vmalloc.h>
  28. #include <linux/sched.h>
  29. #include <asm/dma.h>
  30. #include <asm/pgalloc.h>
  31. #include <asm/pgtable.h>
  32. /*
  33. * Allocate a block of memory to be used to back the virtual memory map
  34. * or to back the page tables that are used to create the mapping.
  35. * Uses the main allocators if they are available, else bootmem.
  36. */
  37. static void * __ref __earlyonly_bootmem_alloc(int node,
  38. unsigned long size,
  39. unsigned long align,
  40. unsigned long goal)
  41. {
  42. return memblock_virt_alloc_try_nid(size, align, goal,
  43. BOOTMEM_ALLOC_ACCESSIBLE, node);
  44. }
  45. static void *vmemmap_buf;
  46. static void *vmemmap_buf_end;
  47. void * __meminit vmemmap_alloc_block(unsigned long size, int node)
  48. {
  49. /* If the main allocator is up use that, fallback to bootmem. */
  50. if (slab_is_available()) {
  51. struct page *page;
  52. page = alloc_pages_node(node,
  53. GFP_KERNEL | __GFP_ZERO | __GFP_RETRY_MAYFAIL,
  54. get_order(size));
  55. if (page)
  56. return page_address(page);
  57. return NULL;
  58. } else
  59. return __earlyonly_bootmem_alloc(node, size, size,
  60. __pa(MAX_DMA_ADDRESS));
  61. }
  62. /* need to make sure size is all the same during early stage */
  63. static void * __meminit alloc_block_buf(unsigned long size, int node)
  64. {
  65. void *ptr;
  66. if (!vmemmap_buf)
  67. return vmemmap_alloc_block(size, node);
  68. /* take the from buf */
  69. ptr = (void *)ALIGN((unsigned long)vmemmap_buf, size);
  70. if (ptr + size > vmemmap_buf_end)
  71. return vmemmap_alloc_block(size, node);
  72. vmemmap_buf = ptr + size;
  73. return ptr;
  74. }
  75. static unsigned long __meminit vmem_altmap_next_pfn(struct vmem_altmap *altmap)
  76. {
  77. return altmap->base_pfn + altmap->reserve + altmap->alloc
  78. + altmap->align;
  79. }
  80. static unsigned long __meminit vmem_altmap_nr_free(struct vmem_altmap *altmap)
  81. {
  82. unsigned long allocated = altmap->alloc + altmap->align;
  83. if (altmap->free > allocated)
  84. return altmap->free - allocated;
  85. return 0;
  86. }
  87. /**
  88. * vmem_altmap_alloc - allocate pages from the vmem_altmap reservation
  89. * @altmap - reserved page pool for the allocation
  90. * @nr_pfns - size (in pages) of the allocation
  91. *
  92. * Allocations are aligned to the size of the request
  93. */
  94. static unsigned long __meminit vmem_altmap_alloc(struct vmem_altmap *altmap,
  95. unsigned long nr_pfns)
  96. {
  97. unsigned long pfn = vmem_altmap_next_pfn(altmap);
  98. unsigned long nr_align;
  99. nr_align = 1UL << find_first_bit(&nr_pfns, BITS_PER_LONG);
  100. nr_align = ALIGN(pfn, nr_align) - pfn;
  101. if (nr_pfns + nr_align > vmem_altmap_nr_free(altmap))
  102. return ULONG_MAX;
  103. altmap->alloc += nr_pfns;
  104. altmap->align += nr_align;
  105. return pfn + nr_align;
  106. }
  107. static void * __meminit altmap_alloc_block_buf(unsigned long size,
  108. struct vmem_altmap *altmap)
  109. {
  110. unsigned long pfn, nr_pfns;
  111. void *ptr;
  112. if (size & ~PAGE_MASK) {
  113. pr_warn_once("%s: allocations must be multiple of PAGE_SIZE (%ld)\n",
  114. __func__, size);
  115. return NULL;
  116. }
  117. nr_pfns = size >> PAGE_SHIFT;
  118. pfn = vmem_altmap_alloc(altmap, nr_pfns);
  119. if (pfn < ULONG_MAX)
  120. ptr = __va(__pfn_to_phys(pfn));
  121. else
  122. ptr = NULL;
  123. pr_debug("%s: pfn: %#lx alloc: %ld align: %ld nr: %#lx\n",
  124. __func__, pfn, altmap->alloc, altmap->align, nr_pfns);
  125. return ptr;
  126. }
  127. /* need to make sure size is all the same during early stage */
  128. void * __meminit __vmemmap_alloc_block_buf(unsigned long size, int node,
  129. struct vmem_altmap *altmap)
  130. {
  131. if (altmap)
  132. return altmap_alloc_block_buf(size, altmap);
  133. return alloc_block_buf(size, node);
  134. }
  135. void __meminit vmemmap_verify(pte_t *pte, int node,
  136. unsigned long start, unsigned long end)
  137. {
  138. unsigned long pfn = pte_pfn(*pte);
  139. int actual_node = early_pfn_to_nid(pfn);
  140. if (node_distance(actual_node, node) > LOCAL_DISTANCE)
  141. pr_warn("[%lx-%lx] potential offnode page_structs\n",
  142. start, end - 1);
  143. }
  144. pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node)
  145. {
  146. pte_t *pte = pte_offset_kernel(pmd, addr);
  147. if (pte_none(*pte)) {
  148. pte_t entry;
  149. void *p = alloc_block_buf(PAGE_SIZE, node);
  150. if (!p)
  151. return NULL;
  152. entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL);
  153. set_pte_at(&init_mm, addr, pte, entry);
  154. }
  155. return pte;
  156. }
  157. pmd_t * __meminit vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node)
  158. {
  159. pmd_t *pmd = pmd_offset(pud, addr);
  160. if (pmd_none(*pmd)) {
  161. void *p = vmemmap_alloc_block(PAGE_SIZE, node);
  162. if (!p)
  163. return NULL;
  164. pmd_populate_kernel(&init_mm, pmd, p);
  165. }
  166. return pmd;
  167. }
  168. pud_t * __meminit vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node)
  169. {
  170. pud_t *pud = pud_offset(p4d, addr);
  171. if (pud_none(*pud)) {
  172. void *p = vmemmap_alloc_block(PAGE_SIZE, node);
  173. if (!p)
  174. return NULL;
  175. pud_populate(&init_mm, pud, p);
  176. }
  177. return pud;
  178. }
  179. p4d_t * __meminit vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node)
  180. {
  181. p4d_t *p4d = p4d_offset(pgd, addr);
  182. if (p4d_none(*p4d)) {
  183. void *p = vmemmap_alloc_block(PAGE_SIZE, node);
  184. if (!p)
  185. return NULL;
  186. p4d_populate(&init_mm, p4d, p);
  187. }
  188. return p4d;
  189. }
  190. pgd_t * __meminit vmemmap_pgd_populate(unsigned long addr, int node)
  191. {
  192. pgd_t *pgd = pgd_offset_k(addr);
  193. if (pgd_none(*pgd)) {
  194. void *p = vmemmap_alloc_block(PAGE_SIZE, node);
  195. if (!p)
  196. return NULL;
  197. pgd_populate(&init_mm, pgd, p);
  198. }
  199. return pgd;
  200. }
  201. int __meminit vmemmap_populate_basepages(unsigned long start,
  202. unsigned long end, int node)
  203. {
  204. unsigned long addr = start;
  205. pgd_t *pgd;
  206. p4d_t *p4d;
  207. pud_t *pud;
  208. pmd_t *pmd;
  209. pte_t *pte;
  210. for (; addr < end; addr += PAGE_SIZE) {
  211. pgd = vmemmap_pgd_populate(addr, node);
  212. if (!pgd)
  213. return -ENOMEM;
  214. p4d = vmemmap_p4d_populate(pgd, addr, node);
  215. if (!p4d)
  216. return -ENOMEM;
  217. pud = vmemmap_pud_populate(p4d, addr, node);
  218. if (!pud)
  219. return -ENOMEM;
  220. pmd = vmemmap_pmd_populate(pud, addr, node);
  221. if (!pmd)
  222. return -ENOMEM;
  223. pte = vmemmap_pte_populate(pmd, addr, node);
  224. if (!pte)
  225. return -ENOMEM;
  226. vmemmap_verify(pte, node, addr, addr + PAGE_SIZE);
  227. }
  228. return 0;
  229. }
  230. struct page * __meminit sparse_mem_map_populate(unsigned long pnum, int nid)
  231. {
  232. unsigned long start;
  233. unsigned long end;
  234. struct page *map;
  235. map = pfn_to_page(pnum * PAGES_PER_SECTION);
  236. start = (unsigned long)map;
  237. end = (unsigned long)(map + PAGES_PER_SECTION);
  238. if (vmemmap_populate(start, end, nid))
  239. return NULL;
  240. return map;
  241. }
  242. void __init sparse_mem_maps_populate_node(struct page **map_map,
  243. unsigned long pnum_begin,
  244. unsigned long pnum_end,
  245. unsigned long map_count, int nodeid)
  246. {
  247. unsigned long pnum;
  248. unsigned long size = sizeof(struct page) * PAGES_PER_SECTION;
  249. void *vmemmap_buf_start;
  250. size = ALIGN(size, PMD_SIZE);
  251. vmemmap_buf_start = __earlyonly_bootmem_alloc(nodeid, size * map_count,
  252. PMD_SIZE, __pa(MAX_DMA_ADDRESS));
  253. if (vmemmap_buf_start) {
  254. vmemmap_buf = vmemmap_buf_start;
  255. vmemmap_buf_end = vmemmap_buf_start + size * map_count;
  256. }
  257. for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
  258. struct mem_section *ms;
  259. if (!present_section_nr(pnum))
  260. continue;
  261. map_map[pnum] = sparse_mem_map_populate(pnum, nodeid);
  262. if (map_map[pnum])
  263. continue;
  264. ms = __nr_to_section(pnum);
  265. pr_err("%s: sparsemem memory map backing failed some memory will not be available\n",
  266. __func__);
  267. ms->section_mem_map = 0;
  268. }
  269. if (vmemmap_buf_start) {
  270. /* need to free left buf */
  271. memblock_free_early(__pa(vmemmap_buf),
  272. vmemmap_buf_end - vmemmap_buf);
  273. vmemmap_buf = NULL;
  274. vmemmap_buf_end = NULL;
  275. }
  276. }