pgtable-radix.c 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790
  1. /*
  2. * Page table handling routines for radix page table.
  3. *
  4. * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation.
  5. *
  6. * This program is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU General Public License
  8. * as published by the Free Software Foundation; either version
  9. * 2 of the License, or (at your option) any later version.
  10. */
  11. #include <linux/sched/mm.h>
  12. #include <linux/memblock.h>
  13. #include <linux/of_fdt.h>
  14. #include <asm/pgtable.h>
  15. #include <asm/pgalloc.h>
  16. #include <asm/dma.h>
  17. #include <asm/machdep.h>
  18. #include <asm/mmu.h>
  19. #include <asm/firmware.h>
  20. #include <asm/powernv.h>
  21. #include <trace/events/thp.h>
  22. static int native_register_process_table(unsigned long base, unsigned long pg_sz,
  23. unsigned long table_size)
  24. {
  25. unsigned long patb1 = base | table_size | PATB_GR;
  26. partition_tb->patb1 = cpu_to_be64(patb1);
  27. return 0;
  28. }
  29. static __ref void *early_alloc_pgtable(unsigned long size)
  30. {
  31. void *pt;
  32. pt = __va(memblock_alloc_base(size, size, MEMBLOCK_ALLOC_ANYWHERE));
  33. memset(pt, 0, size);
  34. return pt;
  35. }
  36. int radix__map_kernel_page(unsigned long ea, unsigned long pa,
  37. pgprot_t flags,
  38. unsigned int map_page_size)
  39. {
  40. pgd_t *pgdp;
  41. pud_t *pudp;
  42. pmd_t *pmdp;
  43. pte_t *ptep;
  44. /*
  45. * Make sure task size is correct as per the max adddr
  46. */
  47. BUILD_BUG_ON(TASK_SIZE_USER64 > RADIX_PGTABLE_RANGE);
  48. if (slab_is_available()) {
  49. pgdp = pgd_offset_k(ea);
  50. pudp = pud_alloc(&init_mm, pgdp, ea);
  51. if (!pudp)
  52. return -ENOMEM;
  53. if (map_page_size == PUD_SIZE) {
  54. ptep = (pte_t *)pudp;
  55. goto set_the_pte;
  56. }
  57. pmdp = pmd_alloc(&init_mm, pudp, ea);
  58. if (!pmdp)
  59. return -ENOMEM;
  60. if (map_page_size == PMD_SIZE) {
  61. ptep = pmdp_ptep(pmdp);
  62. goto set_the_pte;
  63. }
  64. ptep = pte_alloc_kernel(pmdp, ea);
  65. if (!ptep)
  66. return -ENOMEM;
  67. } else {
  68. pgdp = pgd_offset_k(ea);
  69. if (pgd_none(*pgdp)) {
  70. pudp = early_alloc_pgtable(PUD_TABLE_SIZE);
  71. BUG_ON(pudp == NULL);
  72. pgd_populate(&init_mm, pgdp, pudp);
  73. }
  74. pudp = pud_offset(pgdp, ea);
  75. if (map_page_size == PUD_SIZE) {
  76. ptep = (pte_t *)pudp;
  77. goto set_the_pte;
  78. }
  79. if (pud_none(*pudp)) {
  80. pmdp = early_alloc_pgtable(PMD_TABLE_SIZE);
  81. BUG_ON(pmdp == NULL);
  82. pud_populate(&init_mm, pudp, pmdp);
  83. }
  84. pmdp = pmd_offset(pudp, ea);
  85. if (map_page_size == PMD_SIZE) {
  86. ptep = pmdp_ptep(pmdp);
  87. goto set_the_pte;
  88. }
  89. if (!pmd_present(*pmdp)) {
  90. ptep = early_alloc_pgtable(PAGE_SIZE);
  91. BUG_ON(ptep == NULL);
  92. pmd_populate_kernel(&init_mm, pmdp, ptep);
  93. }
  94. ptep = pte_offset_kernel(pmdp, ea);
  95. }
  96. set_the_pte:
  97. set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, flags));
  98. smp_wmb();
  99. return 0;
  100. }
  101. static inline void __meminit print_mapping(unsigned long start,
  102. unsigned long end,
  103. unsigned long size)
  104. {
  105. if (end <= start)
  106. return;
  107. pr_info("Mapped range 0x%lx - 0x%lx with 0x%lx\n", start, end, size);
  108. }
  109. static int __meminit create_physical_mapping(unsigned long start,
  110. unsigned long end)
  111. {
  112. unsigned long addr, mapping_size = 0;
  113. start = _ALIGN_UP(start, PAGE_SIZE);
  114. for (addr = start; addr < end; addr += mapping_size) {
  115. unsigned long gap, previous_size;
  116. int rc;
  117. gap = end - addr;
  118. previous_size = mapping_size;
  119. if (IS_ALIGNED(addr, PUD_SIZE) && gap >= PUD_SIZE &&
  120. mmu_psize_defs[MMU_PAGE_1G].shift)
  121. mapping_size = PUD_SIZE;
  122. else if (IS_ALIGNED(addr, PMD_SIZE) && gap >= PMD_SIZE &&
  123. mmu_psize_defs[MMU_PAGE_2M].shift)
  124. mapping_size = PMD_SIZE;
  125. else
  126. mapping_size = PAGE_SIZE;
  127. if (mapping_size != previous_size) {
  128. print_mapping(start, addr, previous_size);
  129. start = addr;
  130. }
  131. rc = radix__map_kernel_page((unsigned long)__va(addr), addr,
  132. PAGE_KERNEL_X, mapping_size);
  133. if (rc)
  134. return rc;
  135. }
  136. print_mapping(start, addr, mapping_size);
  137. return 0;
  138. }
  139. static void __init radix_init_pgtable(void)
  140. {
  141. unsigned long rts_field;
  142. struct memblock_region *reg;
  143. /* We don't support slb for radix */
  144. mmu_slb_size = 0;
  145. /*
  146. * Create the linear mapping, using standard page size for now
  147. */
  148. for_each_memblock(memory, reg)
  149. WARN_ON(create_physical_mapping(reg->base,
  150. reg->base + reg->size));
  151. /*
  152. * Allocate Partition table and process table for the
  153. * host.
  154. */
  155. BUILD_BUG_ON_MSG((PRTB_SIZE_SHIFT > 36), "Process table size too large.");
  156. process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT);
  157. /*
  158. * Fill in the process table.
  159. */
  160. rts_field = radix__get_tree_size();
  161. process_tb->prtb0 = cpu_to_be64(rts_field | __pa(init_mm.pgd) | RADIX_PGD_INDEX_SIZE);
  162. /*
  163. * Fill in the partition table. We are suppose to use effective address
  164. * of process table here. But our linear mapping also enable us to use
  165. * physical address here.
  166. */
  167. register_process_table(__pa(process_tb), 0, PRTB_SIZE_SHIFT - 12);
  168. pr_info("Process table %p and radix root for kernel: %p\n", process_tb, init_mm.pgd);
  169. asm volatile("ptesync" : : : "memory");
  170. asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : :
  171. "r" (TLBIEL_INVAL_SET_LPID), "r" (0));
  172. asm volatile("eieio; tlbsync; ptesync" : : : "memory");
  173. }
  174. static void __init radix_init_partition_table(void)
  175. {
  176. unsigned long rts_field, dw0;
  177. mmu_partition_table_init();
  178. rts_field = radix__get_tree_size();
  179. dw0 = rts_field | __pa(init_mm.pgd) | RADIX_PGD_INDEX_SIZE | PATB_HR;
  180. mmu_partition_table_set_entry(0, dw0, 0);
  181. pr_info("Initializing Radix MMU\n");
  182. pr_info("Partition table %p\n", partition_tb);
  183. }
  184. void __init radix_init_native(void)
  185. {
  186. register_process_table = native_register_process_table;
  187. }
  188. static int __init get_idx_from_shift(unsigned int shift)
  189. {
  190. int idx = -1;
  191. switch (shift) {
  192. case 0xc:
  193. idx = MMU_PAGE_4K;
  194. break;
  195. case 0x10:
  196. idx = MMU_PAGE_64K;
  197. break;
  198. case 0x15:
  199. idx = MMU_PAGE_2M;
  200. break;
  201. case 0x1e:
  202. idx = MMU_PAGE_1G;
  203. break;
  204. }
  205. return idx;
  206. }
  207. static int __init radix_dt_scan_page_sizes(unsigned long node,
  208. const char *uname, int depth,
  209. void *data)
  210. {
  211. int size = 0;
  212. int shift, idx;
  213. unsigned int ap;
  214. const __be32 *prop;
  215. const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
  216. /* We are scanning "cpu" nodes only */
  217. if (type == NULL || strcmp(type, "cpu") != 0)
  218. return 0;
  219. prop = of_get_flat_dt_prop(node, "ibm,processor-radix-AP-encodings", &size);
  220. if (!prop)
  221. return 0;
  222. pr_info("Page sizes from device-tree:\n");
  223. for (; size >= 4; size -= 4, ++prop) {
  224. struct mmu_psize_def *def;
  225. /* top 3 bit is AP encoding */
  226. shift = be32_to_cpu(prop[0]) & ~(0xe << 28);
  227. ap = be32_to_cpu(prop[0]) >> 29;
  228. pr_info("Page size shift = %d AP=0x%x\n", shift, ap);
  229. idx = get_idx_from_shift(shift);
  230. if (idx < 0)
  231. continue;
  232. def = &mmu_psize_defs[idx];
  233. def->shift = shift;
  234. def->ap = ap;
  235. }
  236. /* needed ? */
  237. cur_cpu_spec->mmu_features &= ~MMU_FTR_NO_SLBIE_B;
  238. return 1;
  239. }
  240. void __init radix__early_init_devtree(void)
  241. {
  242. int rc;
  243. /*
  244. * Try to find the available page sizes in the device-tree
  245. */
  246. rc = of_scan_flat_dt(radix_dt_scan_page_sizes, NULL);
  247. if (rc != 0) /* Found */
  248. goto found;
  249. /*
  250. * let's assume we have page 4k and 64k support
  251. */
  252. mmu_psize_defs[MMU_PAGE_4K].shift = 12;
  253. mmu_psize_defs[MMU_PAGE_4K].ap = 0x0;
  254. mmu_psize_defs[MMU_PAGE_64K].shift = 16;
  255. mmu_psize_defs[MMU_PAGE_64K].ap = 0x5;
  256. found:
  257. #ifdef CONFIG_SPARSEMEM_VMEMMAP
  258. if (mmu_psize_defs[MMU_PAGE_2M].shift) {
  259. /*
  260. * map vmemmap using 2M if available
  261. */
  262. mmu_vmemmap_psize = MMU_PAGE_2M;
  263. }
  264. #endif /* CONFIG_SPARSEMEM_VMEMMAP */
  265. return;
  266. }
  267. static void update_hid_for_radix(void)
  268. {
  269. unsigned long hid0;
  270. unsigned long rb = 3UL << PPC_BITLSHIFT(53); /* IS = 3 */
  271. asm volatile("ptesync": : :"memory");
  272. /* prs = 0, ric = 2, rs = 0, r = 1 is = 3 */
  273. asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
  274. : : "r"(rb), "i"(1), "i"(0), "i"(2), "r"(0) : "memory");
  275. /* prs = 1, ric = 2, rs = 0, r = 1 is = 3 */
  276. asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
  277. : : "r"(rb), "i"(1), "i"(1), "i"(2), "r"(0) : "memory");
  278. asm volatile("eieio; tlbsync; ptesync; isync; slbia": : :"memory");
  279. /*
  280. * now switch the HID
  281. */
  282. hid0 = mfspr(SPRN_HID0);
  283. hid0 |= HID0_POWER9_RADIX;
  284. mtspr(SPRN_HID0, hid0);
  285. asm volatile("isync": : :"memory");
  286. /* Wait for it to happen */
  287. while (!(mfspr(SPRN_HID0) & HID0_POWER9_RADIX))
  288. cpu_relax();
  289. }
  290. static void radix_init_amor(void)
  291. {
  292. /*
  293. * In HV mode, we init AMOR (Authority Mask Override Register) so that
  294. * the hypervisor and guest can setup IAMR (Instruction Authority Mask
  295. * Register), enable key 0 and set it to 1.
  296. *
  297. * AMOR = 0b1100 .... 0000 (Mask for key 0 is 11)
  298. */
  299. mtspr(SPRN_AMOR, (3ul << 62));
  300. }
  301. static void radix_init_iamr(void)
  302. {
  303. unsigned long iamr;
  304. /*
  305. * The IAMR should set to 0 on DD1.
  306. */
  307. if (cpu_has_feature(CPU_FTR_POWER9_DD1))
  308. iamr = 0;
  309. else
  310. iamr = (1ul << 62);
  311. /*
  312. * Radix always uses key0 of the IAMR to determine if an access is
  313. * allowed. We set bit 0 (IBM bit 1) of key0, to prevent instruction
  314. * fetch.
  315. */
  316. mtspr(SPRN_IAMR, iamr);
  317. }
  318. void __init radix__early_init_mmu(void)
  319. {
  320. unsigned long lpcr;
  321. #ifdef CONFIG_PPC_64K_PAGES
  322. /* PAGE_SIZE mappings */
  323. mmu_virtual_psize = MMU_PAGE_64K;
  324. #else
  325. mmu_virtual_psize = MMU_PAGE_4K;
  326. #endif
  327. #ifdef CONFIG_SPARSEMEM_VMEMMAP
  328. /* vmemmap mapping */
  329. mmu_vmemmap_psize = mmu_virtual_psize;
  330. #endif
  331. /*
  332. * initialize page table size
  333. */
  334. __pte_index_size = RADIX_PTE_INDEX_SIZE;
  335. __pmd_index_size = RADIX_PMD_INDEX_SIZE;
  336. __pud_index_size = RADIX_PUD_INDEX_SIZE;
  337. __pgd_index_size = RADIX_PGD_INDEX_SIZE;
  338. __pmd_cache_index = RADIX_PMD_INDEX_SIZE;
  339. __pte_table_size = RADIX_PTE_TABLE_SIZE;
  340. __pmd_table_size = RADIX_PMD_TABLE_SIZE;
  341. __pud_table_size = RADIX_PUD_TABLE_SIZE;
  342. __pgd_table_size = RADIX_PGD_TABLE_SIZE;
  343. __pmd_val_bits = RADIX_PMD_VAL_BITS;
  344. __pud_val_bits = RADIX_PUD_VAL_BITS;
  345. __pgd_val_bits = RADIX_PGD_VAL_BITS;
  346. __kernel_virt_start = RADIX_KERN_VIRT_START;
  347. __kernel_virt_size = RADIX_KERN_VIRT_SIZE;
  348. __vmalloc_start = RADIX_VMALLOC_START;
  349. __vmalloc_end = RADIX_VMALLOC_END;
  350. vmemmap = (struct page *)RADIX_VMEMMAP_BASE;
  351. ioremap_bot = IOREMAP_BASE;
  352. #ifdef CONFIG_PCI
  353. pci_io_base = ISA_IO_BASE;
  354. #endif
  355. /*
  356. * For now radix also use the same frag size
  357. */
  358. __pte_frag_nr = H_PTE_FRAG_NR;
  359. __pte_frag_size_shift = H_PTE_FRAG_SIZE_SHIFT;
  360. if (!firmware_has_feature(FW_FEATURE_LPAR)) {
  361. radix_init_native();
  362. if (cpu_has_feature(CPU_FTR_POWER9_DD1))
  363. update_hid_for_radix();
  364. lpcr = mfspr(SPRN_LPCR);
  365. mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
  366. radix_init_partition_table();
  367. radix_init_amor();
  368. } else {
  369. radix_init_pseries();
  370. }
  371. memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
  372. radix_init_iamr();
  373. radix_init_pgtable();
  374. }
  375. void radix__early_init_mmu_secondary(void)
  376. {
  377. unsigned long lpcr;
  378. /*
  379. * update partition table control register and UPRT
  380. */
  381. if (!firmware_has_feature(FW_FEATURE_LPAR)) {
  382. if (cpu_has_feature(CPU_FTR_POWER9_DD1))
  383. update_hid_for_radix();
  384. lpcr = mfspr(SPRN_LPCR);
  385. mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
  386. mtspr(SPRN_PTCR,
  387. __pa(partition_tb) | (PATB_SIZE_SHIFT - 12));
  388. radix_init_amor();
  389. }
  390. radix_init_iamr();
  391. }
  392. void radix__mmu_cleanup_all(void)
  393. {
  394. unsigned long lpcr;
  395. if (!firmware_has_feature(FW_FEATURE_LPAR)) {
  396. lpcr = mfspr(SPRN_LPCR);
  397. mtspr(SPRN_LPCR, lpcr & ~LPCR_UPRT);
  398. mtspr(SPRN_PTCR, 0);
  399. powernv_set_nmmu_ptcr(0);
  400. radix__flush_tlb_all();
  401. }
  402. }
  403. void radix__setup_initial_memory_limit(phys_addr_t first_memblock_base,
  404. phys_addr_t first_memblock_size)
  405. {
  406. /* We don't currently support the first MEMBLOCK not mapping 0
  407. * physical on those processors
  408. */
  409. BUG_ON(first_memblock_base != 0);
  410. /*
  411. * We limit the allocation that depend on ppc64_rma_size
  412. * to first_memblock_size. We also clamp it to 1GB to
  413. * avoid some funky things such as RTAS bugs.
  414. *
  415. * On radix config we really don't have a limitation
  416. * on real mode access. But keeping it as above works
  417. * well enough.
  418. */
  419. ppc64_rma_size = min_t(u64, first_memblock_size, 0x40000000);
  420. /*
  421. * Finally limit subsequent allocations. We really don't want
  422. * to limit the memblock allocations to rma_size. FIXME!! should
  423. * we even limit at all ?
  424. */
  425. memblock_set_current_limit(first_memblock_base + first_memblock_size);
  426. }
  427. #ifdef CONFIG_MEMORY_HOTPLUG
  428. static void free_pte_table(pte_t *pte_start, pmd_t *pmd)
  429. {
  430. pte_t *pte;
  431. int i;
  432. for (i = 0; i < PTRS_PER_PTE; i++) {
  433. pte = pte_start + i;
  434. if (!pte_none(*pte))
  435. return;
  436. }
  437. pte_free_kernel(&init_mm, pte_start);
  438. pmd_clear(pmd);
  439. }
  440. static void free_pmd_table(pmd_t *pmd_start, pud_t *pud)
  441. {
  442. pmd_t *pmd;
  443. int i;
  444. for (i = 0; i < PTRS_PER_PMD; i++) {
  445. pmd = pmd_start + i;
  446. if (!pmd_none(*pmd))
  447. return;
  448. }
  449. pmd_free(&init_mm, pmd_start);
  450. pud_clear(pud);
  451. }
  452. static void remove_pte_table(pte_t *pte_start, unsigned long addr,
  453. unsigned long end)
  454. {
  455. unsigned long next;
  456. pte_t *pte;
  457. pte = pte_start + pte_index(addr);
  458. for (; addr < end; addr = next, pte++) {
  459. next = (addr + PAGE_SIZE) & PAGE_MASK;
  460. if (next > end)
  461. next = end;
  462. if (!pte_present(*pte))
  463. continue;
  464. if (!PAGE_ALIGNED(addr) || !PAGE_ALIGNED(next)) {
  465. /*
  466. * The vmemmap_free() and remove_section_mapping()
  467. * codepaths call us with aligned addresses.
  468. */
  469. WARN_ONCE(1, "%s: unaligned range\n", __func__);
  470. continue;
  471. }
  472. pte_clear(&init_mm, addr, pte);
  473. }
  474. }
  475. static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr,
  476. unsigned long end)
  477. {
  478. unsigned long next;
  479. pte_t *pte_base;
  480. pmd_t *pmd;
  481. pmd = pmd_start + pmd_index(addr);
  482. for (; addr < end; addr = next, pmd++) {
  483. next = pmd_addr_end(addr, end);
  484. if (!pmd_present(*pmd))
  485. continue;
  486. if (pmd_huge(*pmd)) {
  487. if (!IS_ALIGNED(addr, PMD_SIZE) ||
  488. !IS_ALIGNED(next, PMD_SIZE)) {
  489. WARN_ONCE(1, "%s: unaligned range\n", __func__);
  490. continue;
  491. }
  492. pte_clear(&init_mm, addr, (pte_t *)pmd);
  493. continue;
  494. }
  495. pte_base = (pte_t *)pmd_page_vaddr(*pmd);
  496. remove_pte_table(pte_base, addr, next);
  497. free_pte_table(pte_base, pmd);
  498. }
  499. }
  500. static void remove_pud_table(pud_t *pud_start, unsigned long addr,
  501. unsigned long end)
  502. {
  503. unsigned long next;
  504. pmd_t *pmd_base;
  505. pud_t *pud;
  506. pud = pud_start + pud_index(addr);
  507. for (; addr < end; addr = next, pud++) {
  508. next = pud_addr_end(addr, end);
  509. if (!pud_present(*pud))
  510. continue;
  511. if (pud_huge(*pud)) {
  512. if (!IS_ALIGNED(addr, PUD_SIZE) ||
  513. !IS_ALIGNED(next, PUD_SIZE)) {
  514. WARN_ONCE(1, "%s: unaligned range\n", __func__);
  515. continue;
  516. }
  517. pte_clear(&init_mm, addr, (pte_t *)pud);
  518. continue;
  519. }
  520. pmd_base = (pmd_t *)pud_page_vaddr(*pud);
  521. remove_pmd_table(pmd_base, addr, next);
  522. free_pmd_table(pmd_base, pud);
  523. }
  524. }
  525. static void remove_pagetable(unsigned long start, unsigned long end)
  526. {
  527. unsigned long addr, next;
  528. pud_t *pud_base;
  529. pgd_t *pgd;
  530. spin_lock(&init_mm.page_table_lock);
  531. for (addr = start; addr < end; addr = next) {
  532. next = pgd_addr_end(addr, end);
  533. pgd = pgd_offset_k(addr);
  534. if (!pgd_present(*pgd))
  535. continue;
  536. if (pgd_huge(*pgd)) {
  537. if (!IS_ALIGNED(addr, PGDIR_SIZE) ||
  538. !IS_ALIGNED(next, PGDIR_SIZE)) {
  539. WARN_ONCE(1, "%s: unaligned range\n", __func__);
  540. continue;
  541. }
  542. pte_clear(&init_mm, addr, (pte_t *)pgd);
  543. continue;
  544. }
  545. pud_base = (pud_t *)pgd_page_vaddr(*pgd);
  546. remove_pud_table(pud_base, addr, next);
  547. }
  548. spin_unlock(&init_mm.page_table_lock);
  549. radix__flush_tlb_kernel_range(start, end);
  550. }
  551. int __ref radix__create_section_mapping(unsigned long start, unsigned long end)
  552. {
  553. return create_physical_mapping(start, end);
  554. }
  555. int radix__remove_section_mapping(unsigned long start, unsigned long end)
  556. {
  557. remove_pagetable(start, end);
  558. return 0;
  559. }
  560. #endif /* CONFIG_MEMORY_HOTPLUG */
  561. #ifdef CONFIG_SPARSEMEM_VMEMMAP
  562. int __meminit radix__vmemmap_create_mapping(unsigned long start,
  563. unsigned long page_size,
  564. unsigned long phys)
  565. {
  566. /* Create a PTE encoding */
  567. unsigned long flags = _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_KERNEL_RW;
  568. BUG_ON(radix__map_kernel_page(start, phys, __pgprot(flags), page_size));
  569. return 0;
  570. }
  571. #ifdef CONFIG_MEMORY_HOTPLUG
  572. void radix__vmemmap_remove_mapping(unsigned long start, unsigned long page_size)
  573. {
  574. remove_pagetable(start, start + page_size);
  575. }
  576. #endif
  577. #endif
  578. #ifdef CONFIG_TRANSPARENT_HUGEPAGE
  579. unsigned long radix__pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
  580. pmd_t *pmdp, unsigned long clr,
  581. unsigned long set)
  582. {
  583. unsigned long old;
  584. #ifdef CONFIG_DEBUG_VM
  585. WARN_ON(!radix__pmd_trans_huge(*pmdp));
  586. assert_spin_locked(&mm->page_table_lock);
  587. #endif
  588. old = radix__pte_update(mm, addr, (pte_t *)pmdp, clr, set, 1);
  589. trace_hugepage_update(addr, old, clr, set);
  590. return old;
  591. }
  592. pmd_t radix__pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address,
  593. pmd_t *pmdp)
  594. {
  595. pmd_t pmd;
  596. VM_BUG_ON(address & ~HPAGE_PMD_MASK);
  597. VM_BUG_ON(radix__pmd_trans_huge(*pmdp));
  598. /*
  599. * khugepaged calls this for normal pmd
  600. */
  601. pmd = *pmdp;
  602. pmd_clear(pmdp);
  603. /*FIXME!! Verify whether we need this kick below */
  604. kick_all_cpus_sync();
  605. flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
  606. return pmd;
  607. }
  608. /*
  609. * For us pgtable_t is pte_t *. Inorder to save the deposisted
  610. * page table, we consider the allocated page table as a list
  611. * head. On withdraw we need to make sure we zero out the used
  612. * list_head memory area.
  613. */
  614. void radix__pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
  615. pgtable_t pgtable)
  616. {
  617. struct list_head *lh = (struct list_head *) pgtable;
  618. assert_spin_locked(pmd_lockptr(mm, pmdp));
  619. /* FIFO */
  620. if (!pmd_huge_pte(mm, pmdp))
  621. INIT_LIST_HEAD(lh);
  622. else
  623. list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp));
  624. pmd_huge_pte(mm, pmdp) = pgtable;
  625. }
  626. pgtable_t radix__pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
  627. {
  628. pte_t *ptep;
  629. pgtable_t pgtable;
  630. struct list_head *lh;
  631. assert_spin_locked(pmd_lockptr(mm, pmdp));
  632. /* FIFO */
  633. pgtable = pmd_huge_pte(mm, pmdp);
  634. lh = (struct list_head *) pgtable;
  635. if (list_empty(lh))
  636. pmd_huge_pte(mm, pmdp) = NULL;
  637. else {
  638. pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next;
  639. list_del(lh);
  640. }
  641. ptep = (pte_t *) pgtable;
  642. *ptep = __pte(0);
  643. ptep++;
  644. *ptep = __pte(0);
  645. return pgtable;
  646. }
  647. pmd_t radix__pmdp_huge_get_and_clear(struct mm_struct *mm,
  648. unsigned long addr, pmd_t *pmdp)
  649. {
  650. pmd_t old_pmd;
  651. unsigned long old;
  652. old = radix__pmd_hugepage_update(mm, addr, pmdp, ~0UL, 0);
  653. old_pmd = __pmd(old);
  654. /*
  655. * Serialize against find_linux_pte_or_hugepte which does lock-less
  656. * lookup in page tables with local interrupts disabled. For huge pages
  657. * it casts pmd_t to pte_t. Since format of pte_t is different from
  658. * pmd_t we want to prevent transit from pmd pointing to page table
  659. * to pmd pointing to huge page (and back) while interrupts are disabled.
  660. * We clear pmd to possibly replace it with page table pointer in
  661. * different code paths. So make sure we wait for the parallel
  662. * find_linux_pte_or_hugepage to finish.
  663. */
  664. kick_all_cpus_sync();
  665. return old_pmd;
  666. }
  667. int radix__has_transparent_hugepage(void)
  668. {
  669. /* For radix 2M at PMD level means thp */
  670. if (mmu_psize_defs[MMU_PAGE_2M].shift == PMD_SHIFT)
  671. return 1;
  672. return 0;
  673. }
  674. #endif /* CONFIG_TRANSPARENT_HUGEPAGE */