vmem.c 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440
  1. /*
  2. * Copyright IBM Corp. 2006
  3. * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
  4. */
  5. #include <linux/bootmem.h>
  6. #include <linux/pfn.h>
  7. #include <linux/mm.h>
  8. #include <linux/init.h>
  9. #include <linux/list.h>
  10. #include <linux/hugetlb.h>
  11. #include <linux/slab.h>
  12. #include <linux/memblock.h>
  13. #include <asm/cacheflush.h>
  14. #include <asm/pgalloc.h>
  15. #include <asm/pgtable.h>
  16. #include <asm/setup.h>
  17. #include <asm/tlbflush.h>
  18. #include <asm/sections.h>
  19. #include <asm/set_memory.h>
  20. static DEFINE_MUTEX(vmem_mutex);
  21. struct memory_segment {
  22. struct list_head list;
  23. unsigned long start;
  24. unsigned long size;
  25. };
  26. static LIST_HEAD(mem_segs);
  27. static void __ref *vmem_alloc_pages(unsigned int order)
  28. {
  29. unsigned long size = PAGE_SIZE << order;
  30. if (slab_is_available())
  31. return (void *)__get_free_pages(GFP_KERNEL, order);
  32. return (void *) memblock_alloc(size, size);
  33. }
  34. void *vmem_crst_alloc(unsigned long val)
  35. {
  36. unsigned long *table;
  37. table = vmem_alloc_pages(CRST_ALLOC_ORDER);
  38. if (table)
  39. crst_table_init(table, val);
  40. return table;
  41. }
  42. pte_t __ref *vmem_pte_alloc(void)
  43. {
  44. unsigned long size = PTRS_PER_PTE * sizeof(pte_t);
  45. pte_t *pte;
  46. if (slab_is_available())
  47. pte = (pte_t *) page_table_alloc(&init_mm);
  48. else
  49. pte = (pte_t *) memblock_alloc(size, size);
  50. if (!pte)
  51. return NULL;
  52. clear_table((unsigned long *) pte, _PAGE_INVALID, size);
  53. return pte;
  54. }
  55. /*
  56. * Add a physical memory range to the 1:1 mapping.
  57. */
  58. static int vmem_add_mem(unsigned long start, unsigned long size)
  59. {
  60. unsigned long pgt_prot, sgt_prot, r3_prot;
  61. unsigned long pages4k, pages1m, pages2g;
  62. unsigned long end = start + size;
  63. unsigned long address = start;
  64. pgd_t *pg_dir;
  65. p4d_t *p4_dir;
  66. pud_t *pu_dir;
  67. pmd_t *pm_dir;
  68. pte_t *pt_dir;
  69. int ret = -ENOMEM;
  70. pgt_prot = pgprot_val(PAGE_KERNEL);
  71. sgt_prot = pgprot_val(SEGMENT_KERNEL);
  72. r3_prot = pgprot_val(REGION3_KERNEL);
  73. if (!MACHINE_HAS_NX) {
  74. pgt_prot &= ~_PAGE_NOEXEC;
  75. sgt_prot &= ~_SEGMENT_ENTRY_NOEXEC;
  76. r3_prot &= ~_REGION_ENTRY_NOEXEC;
  77. }
  78. pages4k = pages1m = pages2g = 0;
  79. while (address < end) {
  80. pg_dir = pgd_offset_k(address);
  81. if (pgd_none(*pg_dir)) {
  82. p4_dir = vmem_crst_alloc(_REGION2_ENTRY_EMPTY);
  83. if (!p4_dir)
  84. goto out;
  85. pgd_populate(&init_mm, pg_dir, p4_dir);
  86. }
  87. p4_dir = p4d_offset(pg_dir, address);
  88. if (p4d_none(*p4_dir)) {
  89. pu_dir = vmem_crst_alloc(_REGION3_ENTRY_EMPTY);
  90. if (!pu_dir)
  91. goto out;
  92. p4d_populate(&init_mm, p4_dir, pu_dir);
  93. }
  94. pu_dir = pud_offset(p4_dir, address);
  95. if (MACHINE_HAS_EDAT2 && pud_none(*pu_dir) && address &&
  96. !(address & ~PUD_MASK) && (address + PUD_SIZE <= end) &&
  97. !debug_pagealloc_enabled()) {
  98. pud_val(*pu_dir) = address | r3_prot;
  99. address += PUD_SIZE;
  100. pages2g++;
  101. continue;
  102. }
  103. if (pud_none(*pu_dir)) {
  104. pm_dir = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
  105. if (!pm_dir)
  106. goto out;
  107. pud_populate(&init_mm, pu_dir, pm_dir);
  108. }
  109. pm_dir = pmd_offset(pu_dir, address);
  110. if (MACHINE_HAS_EDAT1 && pmd_none(*pm_dir) && address &&
  111. !(address & ~PMD_MASK) && (address + PMD_SIZE <= end) &&
  112. !debug_pagealloc_enabled()) {
  113. pmd_val(*pm_dir) = address | sgt_prot;
  114. address += PMD_SIZE;
  115. pages1m++;
  116. continue;
  117. }
  118. if (pmd_none(*pm_dir)) {
  119. pt_dir = vmem_pte_alloc();
  120. if (!pt_dir)
  121. goto out;
  122. pmd_populate(&init_mm, pm_dir, pt_dir);
  123. }
  124. pt_dir = pte_offset_kernel(pm_dir, address);
  125. pte_val(*pt_dir) = address | pgt_prot;
  126. address += PAGE_SIZE;
  127. pages4k++;
  128. }
  129. ret = 0;
  130. out:
  131. update_page_count(PG_DIRECT_MAP_4K, pages4k);
  132. update_page_count(PG_DIRECT_MAP_1M, pages1m);
  133. update_page_count(PG_DIRECT_MAP_2G, pages2g);
  134. return ret;
  135. }
  136. /*
  137. * Remove a physical memory range from the 1:1 mapping.
  138. * Currently only invalidates page table entries.
  139. */
  140. static void vmem_remove_range(unsigned long start, unsigned long size)
  141. {
  142. unsigned long pages4k, pages1m, pages2g;
  143. unsigned long end = start + size;
  144. unsigned long address = start;
  145. pgd_t *pg_dir;
  146. p4d_t *p4_dir;
  147. pud_t *pu_dir;
  148. pmd_t *pm_dir;
  149. pte_t *pt_dir;
  150. pages4k = pages1m = pages2g = 0;
  151. while (address < end) {
  152. pg_dir = pgd_offset_k(address);
  153. if (pgd_none(*pg_dir)) {
  154. address += PGDIR_SIZE;
  155. continue;
  156. }
  157. p4_dir = p4d_offset(pg_dir, address);
  158. if (p4d_none(*p4_dir)) {
  159. address += P4D_SIZE;
  160. continue;
  161. }
  162. pu_dir = pud_offset(p4_dir, address);
  163. if (pud_none(*pu_dir)) {
  164. address += PUD_SIZE;
  165. continue;
  166. }
  167. if (pud_large(*pu_dir)) {
  168. pud_clear(pu_dir);
  169. address += PUD_SIZE;
  170. pages2g++;
  171. continue;
  172. }
  173. pm_dir = pmd_offset(pu_dir, address);
  174. if (pmd_none(*pm_dir)) {
  175. address += PMD_SIZE;
  176. continue;
  177. }
  178. if (pmd_large(*pm_dir)) {
  179. pmd_clear(pm_dir);
  180. address += PMD_SIZE;
  181. pages1m++;
  182. continue;
  183. }
  184. pt_dir = pte_offset_kernel(pm_dir, address);
  185. pte_clear(&init_mm, address, pt_dir);
  186. address += PAGE_SIZE;
  187. pages4k++;
  188. }
  189. flush_tlb_kernel_range(start, end);
  190. update_page_count(PG_DIRECT_MAP_4K, -pages4k);
  191. update_page_count(PG_DIRECT_MAP_1M, -pages1m);
  192. update_page_count(PG_DIRECT_MAP_2G, -pages2g);
  193. }
  194. /*
  195. * Add a backed mem_map array to the virtual mem_map array.
  196. */
  197. int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
  198. {
  199. unsigned long pgt_prot, sgt_prot;
  200. unsigned long address = start;
  201. pgd_t *pg_dir;
  202. p4d_t *p4_dir;
  203. pud_t *pu_dir;
  204. pmd_t *pm_dir;
  205. pte_t *pt_dir;
  206. int ret = -ENOMEM;
  207. pgt_prot = pgprot_val(PAGE_KERNEL);
  208. sgt_prot = pgprot_val(SEGMENT_KERNEL);
  209. if (!MACHINE_HAS_NX) {
  210. pgt_prot &= ~_PAGE_NOEXEC;
  211. sgt_prot &= ~_SEGMENT_ENTRY_NOEXEC;
  212. }
  213. for (address = start; address < end;) {
  214. pg_dir = pgd_offset_k(address);
  215. if (pgd_none(*pg_dir)) {
  216. p4_dir = vmem_crst_alloc(_REGION2_ENTRY_EMPTY);
  217. if (!p4_dir)
  218. goto out;
  219. pgd_populate(&init_mm, pg_dir, p4_dir);
  220. }
  221. p4_dir = p4d_offset(pg_dir, address);
  222. if (p4d_none(*p4_dir)) {
  223. pu_dir = vmem_crst_alloc(_REGION3_ENTRY_EMPTY);
  224. if (!pu_dir)
  225. goto out;
  226. p4d_populate(&init_mm, p4_dir, pu_dir);
  227. }
  228. pu_dir = pud_offset(p4_dir, address);
  229. if (pud_none(*pu_dir)) {
  230. pm_dir = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
  231. if (!pm_dir)
  232. goto out;
  233. pud_populate(&init_mm, pu_dir, pm_dir);
  234. }
  235. pm_dir = pmd_offset(pu_dir, address);
  236. if (pmd_none(*pm_dir)) {
  237. /* Use 1MB frames for vmemmap if available. We always
  238. * use large frames even if they are only partially
  239. * used.
  240. * Otherwise we would have also page tables since
  241. * vmemmap_populate gets called for each section
  242. * separately. */
  243. if (MACHINE_HAS_EDAT1) {
  244. void *new_page;
  245. new_page = vmemmap_alloc_block(PMD_SIZE, node);
  246. if (!new_page)
  247. goto out;
  248. pmd_val(*pm_dir) = __pa(new_page) | sgt_prot;
  249. address = (address + PMD_SIZE) & PMD_MASK;
  250. continue;
  251. }
  252. pt_dir = vmem_pte_alloc();
  253. if (!pt_dir)
  254. goto out;
  255. pmd_populate(&init_mm, pm_dir, pt_dir);
  256. } else if (pmd_large(*pm_dir)) {
  257. address = (address + PMD_SIZE) & PMD_MASK;
  258. continue;
  259. }
  260. pt_dir = pte_offset_kernel(pm_dir, address);
  261. if (pte_none(*pt_dir)) {
  262. void *new_page;
  263. new_page = vmemmap_alloc_block(PAGE_SIZE, node);
  264. if (!new_page)
  265. goto out;
  266. pte_val(*pt_dir) = __pa(new_page) | pgt_prot;
  267. }
  268. address += PAGE_SIZE;
  269. }
  270. ret = 0;
  271. out:
  272. return ret;
  273. }
  274. void vmemmap_free(unsigned long start, unsigned long end)
  275. {
  276. }
  277. /*
  278. * Add memory segment to the segment list if it doesn't overlap with
  279. * an already present segment.
  280. */
  281. static int insert_memory_segment(struct memory_segment *seg)
  282. {
  283. struct memory_segment *tmp;
  284. if (seg->start + seg->size > VMEM_MAX_PHYS ||
  285. seg->start + seg->size < seg->start)
  286. return -ERANGE;
  287. list_for_each_entry(tmp, &mem_segs, list) {
  288. if (seg->start >= tmp->start + tmp->size)
  289. continue;
  290. if (seg->start + seg->size <= tmp->start)
  291. continue;
  292. return -ENOSPC;
  293. }
  294. list_add(&seg->list, &mem_segs);
  295. return 0;
  296. }
  297. /*
  298. * Remove memory segment from the segment list.
  299. */
  300. static void remove_memory_segment(struct memory_segment *seg)
  301. {
  302. list_del(&seg->list);
  303. }
  304. static void __remove_shared_memory(struct memory_segment *seg)
  305. {
  306. remove_memory_segment(seg);
  307. vmem_remove_range(seg->start, seg->size);
  308. }
  309. int vmem_remove_mapping(unsigned long start, unsigned long size)
  310. {
  311. struct memory_segment *seg;
  312. int ret;
  313. mutex_lock(&vmem_mutex);
  314. ret = -ENOENT;
  315. list_for_each_entry(seg, &mem_segs, list) {
  316. if (seg->start == start && seg->size == size)
  317. break;
  318. }
  319. if (seg->start != start || seg->size != size)
  320. goto out;
  321. ret = 0;
  322. __remove_shared_memory(seg);
  323. kfree(seg);
  324. out:
  325. mutex_unlock(&vmem_mutex);
  326. return ret;
  327. }
  328. int vmem_add_mapping(unsigned long start, unsigned long size)
  329. {
  330. struct memory_segment *seg;
  331. int ret;
  332. mutex_lock(&vmem_mutex);
  333. ret = -ENOMEM;
  334. seg = kzalloc(sizeof(*seg), GFP_KERNEL);
  335. if (!seg)
  336. goto out;
  337. seg->start = start;
  338. seg->size = size;
  339. ret = insert_memory_segment(seg);
  340. if (ret)
  341. goto out_free;
  342. ret = vmem_add_mem(start, size);
  343. if (ret)
  344. goto out_remove;
  345. goto out;
  346. out_remove:
  347. __remove_shared_memory(seg);
  348. out_free:
  349. kfree(seg);
  350. out:
  351. mutex_unlock(&vmem_mutex);
  352. return ret;
  353. }
  354. /*
  355. * map whole physical memory to virtual memory (identity mapping)
  356. * we reserve enough space in the vmalloc area for vmemmap to hotplug
  357. * additional memory segments.
  358. */
  359. void __init vmem_map_init(void)
  360. {
  361. struct memblock_region *reg;
  362. for_each_memblock(memory, reg)
  363. vmem_add_mem(reg->base, reg->size);
  364. __set_memory((unsigned long) _stext,
  365. (_etext - _stext) >> PAGE_SHIFT,
  366. SET_MEMORY_RO | SET_MEMORY_X);
  367. __set_memory((unsigned long) _etext,
  368. (_eshared - _etext) >> PAGE_SHIFT,
  369. SET_MEMORY_RO);
  370. __set_memory((unsigned long) _sinittext,
  371. (_einittext - _sinittext) >> PAGE_SHIFT,
  372. SET_MEMORY_RO | SET_MEMORY_X);
  373. pr_info("Write protected kernel read-only data: %luk\n",
  374. (_eshared - _stext) >> 10);
  375. }
  376. /*
  377. * Convert memblock.memory to a memory segment list so there is a single
  378. * list that contains all memory segments.
  379. */
  380. static int __init vmem_convert_memory_chunk(void)
  381. {
  382. struct memblock_region *reg;
  383. struct memory_segment *seg;
  384. mutex_lock(&vmem_mutex);
  385. for_each_memblock(memory, reg) {
  386. seg = kzalloc(sizeof(*seg), GFP_KERNEL);
  387. if (!seg)
  388. panic("Out of memory...\n");
  389. seg->start = reg->base;
  390. seg->size = reg->size;
  391. insert_memory_segment(seg);
  392. }
  393. mutex_unlock(&vmem_mutex);
  394. return 0;
  395. }
  396. core_initcall(vmem_convert_memory_chunk);