pgtable-radix.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942
  1. /*
  2. * Page table handling routines for radix page table.
  3. *
  4. * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation.
  5. *
  6. * This program is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU General Public License
  8. * as published by the Free Software Foundation; either version
  9. * 2 of the License, or (at your option) any later version.
  10. */
  11. #define pr_fmt(fmt) "radix-mmu: " fmt
  12. #include <linux/kernel.h>
  13. #include <linux/sched/mm.h>
  14. #include <linux/memblock.h>
  15. #include <linux/of_fdt.h>
  16. #include <linux/mm.h>
  17. #include <linux/string_helpers.h>
  18. #include <asm/pgtable.h>
  19. #include <asm/pgalloc.h>
  20. #include <asm/dma.h>
  21. #include <asm/machdep.h>
  22. #include <asm/mmu.h>
  23. #include <asm/firmware.h>
  24. #include <asm/powernv.h>
  25. #include <asm/sections.h>
  26. #include <asm/trace.h>
  27. #include <trace/events/thp.h>
  28. unsigned int mmu_pid_bits;
  29. unsigned int mmu_base_pid;
  30. static int native_register_process_table(unsigned long base, unsigned long pg_sz,
  31. unsigned long table_size)
  32. {
  33. unsigned long patb0, patb1;
  34. patb0 = be64_to_cpu(partition_tb[0].patb0);
  35. patb1 = base | table_size | PATB_GR;
  36. mmu_partition_table_set_entry(0, patb0, patb1);
  37. return 0;
  38. }
  39. static __ref void *early_alloc_pgtable(unsigned long size)
  40. {
  41. void *pt;
  42. pt = __va(memblock_alloc_base(size, size, MEMBLOCK_ALLOC_ANYWHERE));
  43. memset(pt, 0, size);
  44. return pt;
  45. }
  46. int radix__map_kernel_page(unsigned long ea, unsigned long pa,
  47. pgprot_t flags,
  48. unsigned int map_page_size)
  49. {
  50. pgd_t *pgdp;
  51. pud_t *pudp;
  52. pmd_t *pmdp;
  53. pte_t *ptep;
  54. /*
  55. * Make sure task size is correct as per the max adddr
  56. */
  57. BUILD_BUG_ON(TASK_SIZE_USER64 > RADIX_PGTABLE_RANGE);
  58. if (slab_is_available()) {
  59. pgdp = pgd_offset_k(ea);
  60. pudp = pud_alloc(&init_mm, pgdp, ea);
  61. if (!pudp)
  62. return -ENOMEM;
  63. if (map_page_size == PUD_SIZE) {
  64. ptep = (pte_t *)pudp;
  65. goto set_the_pte;
  66. }
  67. pmdp = pmd_alloc(&init_mm, pudp, ea);
  68. if (!pmdp)
  69. return -ENOMEM;
  70. if (map_page_size == PMD_SIZE) {
  71. ptep = pmdp_ptep(pmdp);
  72. goto set_the_pte;
  73. }
  74. ptep = pte_alloc_kernel(pmdp, ea);
  75. if (!ptep)
  76. return -ENOMEM;
  77. } else {
  78. pgdp = pgd_offset_k(ea);
  79. if (pgd_none(*pgdp)) {
  80. pudp = early_alloc_pgtable(PUD_TABLE_SIZE);
  81. BUG_ON(pudp == NULL);
  82. pgd_populate(&init_mm, pgdp, pudp);
  83. }
  84. pudp = pud_offset(pgdp, ea);
  85. if (map_page_size == PUD_SIZE) {
  86. ptep = (pte_t *)pudp;
  87. goto set_the_pte;
  88. }
  89. if (pud_none(*pudp)) {
  90. pmdp = early_alloc_pgtable(PMD_TABLE_SIZE);
  91. BUG_ON(pmdp == NULL);
  92. pud_populate(&init_mm, pudp, pmdp);
  93. }
  94. pmdp = pmd_offset(pudp, ea);
  95. if (map_page_size == PMD_SIZE) {
  96. ptep = pmdp_ptep(pmdp);
  97. goto set_the_pte;
  98. }
  99. if (!pmd_present(*pmdp)) {
  100. ptep = early_alloc_pgtable(PAGE_SIZE);
  101. BUG_ON(ptep == NULL);
  102. pmd_populate_kernel(&init_mm, pmdp, ptep);
  103. }
  104. ptep = pte_offset_kernel(pmdp, ea);
  105. }
  106. set_the_pte:
  107. set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, flags));
  108. smp_wmb();
  109. return 0;
  110. }
  111. #ifdef CONFIG_STRICT_KERNEL_RWX
  112. void radix__change_memory_range(unsigned long start, unsigned long end,
  113. unsigned long clear)
  114. {
  115. unsigned long idx;
  116. pgd_t *pgdp;
  117. pud_t *pudp;
  118. pmd_t *pmdp;
  119. pte_t *ptep;
  120. start = ALIGN_DOWN(start, PAGE_SIZE);
  121. end = PAGE_ALIGN(end); // aligns up
  122. pr_debug("Changing flags on range %lx-%lx removing 0x%lx\n",
  123. start, end, clear);
  124. for (idx = start; idx < end; idx += PAGE_SIZE) {
  125. pgdp = pgd_offset_k(idx);
  126. pudp = pud_alloc(&init_mm, pgdp, idx);
  127. if (!pudp)
  128. continue;
  129. if (pud_huge(*pudp)) {
  130. ptep = (pte_t *)pudp;
  131. goto update_the_pte;
  132. }
  133. pmdp = pmd_alloc(&init_mm, pudp, idx);
  134. if (!pmdp)
  135. continue;
  136. if (pmd_huge(*pmdp)) {
  137. ptep = pmdp_ptep(pmdp);
  138. goto update_the_pte;
  139. }
  140. ptep = pte_alloc_kernel(pmdp, idx);
  141. if (!ptep)
  142. continue;
  143. update_the_pte:
  144. radix__pte_update(&init_mm, idx, ptep, clear, 0, 0);
  145. }
  146. radix__flush_tlb_kernel_range(start, end);
  147. }
  148. void radix__mark_rodata_ro(void)
  149. {
  150. unsigned long start, end;
  151. /*
  152. * mark_rodata_ro() will mark itself as !writable at some point.
  153. * Due to DD1 workaround in radix__pte_update(), we'll end up with
  154. * an invalid pte and the system will crash quite severly.
  155. */
  156. if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
  157. pr_warn("Warning: Unable to mark rodata read only on P9 DD1\n");
  158. return;
  159. }
  160. start = (unsigned long)_stext;
  161. end = (unsigned long)__init_begin;
  162. radix__change_memory_range(start, end, _PAGE_WRITE);
  163. }
  164. void radix__mark_initmem_nx(void)
  165. {
  166. unsigned long start = (unsigned long)__init_begin;
  167. unsigned long end = (unsigned long)__init_end;
  168. radix__change_memory_range(start, end, _PAGE_EXEC);
  169. }
  170. #endif /* CONFIG_STRICT_KERNEL_RWX */
  171. static inline void __meminit print_mapping(unsigned long start,
  172. unsigned long end,
  173. unsigned long size)
  174. {
  175. char buf[10];
  176. if (end <= start)
  177. return;
  178. string_get_size(size, 1, STRING_UNITS_2, buf, sizeof(buf));
  179. pr_info("Mapped 0x%016lx-0x%016lx with %s pages\n", start, end, buf);
  180. }
  181. static int __meminit create_physical_mapping(unsigned long start,
  182. unsigned long end)
  183. {
  184. unsigned long vaddr, addr, mapping_size = 0;
  185. pgprot_t prot;
  186. unsigned long max_mapping_size;
  187. #ifdef CONFIG_STRICT_KERNEL_RWX
  188. int split_text_mapping = 1;
  189. #else
  190. int split_text_mapping = 0;
  191. #endif
  192. start = _ALIGN_UP(start, PAGE_SIZE);
  193. for (addr = start; addr < end; addr += mapping_size) {
  194. unsigned long gap, previous_size;
  195. int rc;
  196. gap = end - addr;
  197. previous_size = mapping_size;
  198. max_mapping_size = PUD_SIZE;
  199. retry:
  200. if (IS_ALIGNED(addr, PUD_SIZE) && gap >= PUD_SIZE &&
  201. mmu_psize_defs[MMU_PAGE_1G].shift &&
  202. PUD_SIZE <= max_mapping_size)
  203. mapping_size = PUD_SIZE;
  204. else if (IS_ALIGNED(addr, PMD_SIZE) && gap >= PMD_SIZE &&
  205. mmu_psize_defs[MMU_PAGE_2M].shift)
  206. mapping_size = PMD_SIZE;
  207. else
  208. mapping_size = PAGE_SIZE;
  209. if (split_text_mapping && (mapping_size == PUD_SIZE) &&
  210. (addr <= __pa_symbol(__init_begin)) &&
  211. (addr + mapping_size) >= __pa_symbol(_stext)) {
  212. max_mapping_size = PMD_SIZE;
  213. goto retry;
  214. }
  215. if (split_text_mapping && (mapping_size == PMD_SIZE) &&
  216. (addr <= __pa_symbol(__init_begin)) &&
  217. (addr + mapping_size) >= __pa_symbol(_stext))
  218. mapping_size = PAGE_SIZE;
  219. if (mapping_size != previous_size) {
  220. print_mapping(start, addr, previous_size);
  221. start = addr;
  222. }
  223. vaddr = (unsigned long)__va(addr);
  224. if (overlaps_kernel_text(vaddr, vaddr + mapping_size) ||
  225. overlaps_interrupt_vector_text(vaddr, vaddr + mapping_size))
  226. prot = PAGE_KERNEL_X;
  227. else
  228. prot = PAGE_KERNEL;
  229. rc = radix__map_kernel_page(vaddr, addr, prot, mapping_size);
  230. if (rc)
  231. return rc;
  232. }
  233. print_mapping(start, addr, mapping_size);
  234. return 0;
  235. }
  236. static void __init radix_init_pgtable(void)
  237. {
  238. unsigned long rts_field;
  239. struct memblock_region *reg;
  240. /* We don't support slb for radix */
  241. mmu_slb_size = 0;
  242. /*
  243. * Create the linear mapping, using standard page size for now
  244. */
  245. for_each_memblock(memory, reg)
  246. WARN_ON(create_physical_mapping(reg->base,
  247. reg->base + reg->size));
  248. /* Find out how many PID bits are supported */
  249. if (cpu_has_feature(CPU_FTR_HVMODE)) {
  250. if (!mmu_pid_bits)
  251. mmu_pid_bits = 20;
  252. #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
  253. /*
  254. * When KVM is possible, we only use the top half of the
  255. * PID space to avoid collisions between host and guest PIDs
  256. * which can cause problems due to prefetch when exiting the
  257. * guest with AIL=3
  258. */
  259. mmu_base_pid = 1 << (mmu_pid_bits - 1);
  260. #else
  261. mmu_base_pid = 1;
  262. #endif
  263. } else {
  264. /* The guest uses the bottom half of the PID space */
  265. if (!mmu_pid_bits)
  266. mmu_pid_bits = 19;
  267. mmu_base_pid = 1;
  268. }
  269. /*
  270. * Allocate Partition table and process table for the
  271. * host.
  272. */
  273. BUG_ON(PRTB_SIZE_SHIFT > 36);
  274. process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT);
  275. /*
  276. * Fill in the process table.
  277. */
  278. rts_field = radix__get_tree_size();
  279. process_tb->prtb0 = cpu_to_be64(rts_field | __pa(init_mm.pgd) | RADIX_PGD_INDEX_SIZE);
  280. /*
  281. * Fill in the partition table. We are suppose to use effective address
  282. * of process table here. But our linear mapping also enable us to use
  283. * physical address here.
  284. */
  285. register_process_table(__pa(process_tb), 0, PRTB_SIZE_SHIFT - 12);
  286. pr_info("Process table %p and radix root for kernel: %p\n", process_tb, init_mm.pgd);
  287. asm volatile("ptesync" : : : "memory");
  288. asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : :
  289. "r" (TLBIEL_INVAL_SET_LPID), "r" (0));
  290. asm volatile("eieio; tlbsync; ptesync" : : : "memory");
  291. trace_tlbie(0, 0, TLBIEL_INVAL_SET_LPID, 0, 2, 1, 1);
  292. }
  293. static void __init radix_init_partition_table(void)
  294. {
  295. unsigned long rts_field, dw0;
  296. mmu_partition_table_init();
  297. rts_field = radix__get_tree_size();
  298. dw0 = rts_field | __pa(init_mm.pgd) | RADIX_PGD_INDEX_SIZE | PATB_HR;
  299. mmu_partition_table_set_entry(0, dw0, 0);
  300. pr_info("Initializing Radix MMU\n");
  301. pr_info("Partition table %p\n", partition_tb);
  302. }
  303. void __init radix_init_native(void)
  304. {
  305. register_process_table = native_register_process_table;
  306. }
  307. static int __init get_idx_from_shift(unsigned int shift)
  308. {
  309. int idx = -1;
  310. switch (shift) {
  311. case 0xc:
  312. idx = MMU_PAGE_4K;
  313. break;
  314. case 0x10:
  315. idx = MMU_PAGE_64K;
  316. break;
  317. case 0x15:
  318. idx = MMU_PAGE_2M;
  319. break;
  320. case 0x1e:
  321. idx = MMU_PAGE_1G;
  322. break;
  323. }
  324. return idx;
  325. }
  326. static int __init radix_dt_scan_page_sizes(unsigned long node,
  327. const char *uname, int depth,
  328. void *data)
  329. {
  330. int size = 0;
  331. int shift, idx;
  332. unsigned int ap;
  333. const __be32 *prop;
  334. const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
  335. /* We are scanning "cpu" nodes only */
  336. if (type == NULL || strcmp(type, "cpu") != 0)
  337. return 0;
  338. /* Find MMU PID size */
  339. prop = of_get_flat_dt_prop(node, "ibm,mmu-pid-bits", &size);
  340. if (prop && size == 4)
  341. mmu_pid_bits = be32_to_cpup(prop);
  342. /* Grab page size encodings */
  343. prop = of_get_flat_dt_prop(node, "ibm,processor-radix-AP-encodings", &size);
  344. if (!prop)
  345. return 0;
  346. pr_info("Page sizes from device-tree:\n");
  347. for (; size >= 4; size -= 4, ++prop) {
  348. struct mmu_psize_def *def;
  349. /* top 3 bit is AP encoding */
  350. shift = be32_to_cpu(prop[0]) & ~(0xe << 28);
  351. ap = be32_to_cpu(prop[0]) >> 29;
  352. pr_info("Page size shift = %d AP=0x%x\n", shift, ap);
  353. idx = get_idx_from_shift(shift);
  354. if (idx < 0)
  355. continue;
  356. def = &mmu_psize_defs[idx];
  357. def->shift = shift;
  358. def->ap = ap;
  359. }
  360. /* needed ? */
  361. cur_cpu_spec->mmu_features &= ~MMU_FTR_NO_SLBIE_B;
  362. return 1;
  363. }
  364. void __init radix__early_init_devtree(void)
  365. {
  366. int rc;
  367. /*
  368. * Try to find the available page sizes in the device-tree
  369. */
  370. rc = of_scan_flat_dt(radix_dt_scan_page_sizes, NULL);
  371. if (rc != 0) /* Found */
  372. goto found;
  373. /*
  374. * let's assume we have page 4k and 64k support
  375. */
  376. mmu_psize_defs[MMU_PAGE_4K].shift = 12;
  377. mmu_psize_defs[MMU_PAGE_4K].ap = 0x0;
  378. mmu_psize_defs[MMU_PAGE_64K].shift = 16;
  379. mmu_psize_defs[MMU_PAGE_64K].ap = 0x5;
  380. found:
  381. #ifdef CONFIG_SPARSEMEM_VMEMMAP
  382. if (mmu_psize_defs[MMU_PAGE_2M].shift) {
  383. /*
  384. * map vmemmap using 2M if available
  385. */
  386. mmu_vmemmap_psize = MMU_PAGE_2M;
  387. }
  388. #endif /* CONFIG_SPARSEMEM_VMEMMAP */
  389. return;
  390. }
  391. static void update_hid_for_radix(void)
  392. {
  393. unsigned long hid0;
  394. unsigned long rb = 3UL << PPC_BITLSHIFT(53); /* IS = 3 */
  395. asm volatile("ptesync": : :"memory");
  396. /* prs = 0, ric = 2, rs = 0, r = 1 is = 3 */
  397. asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
  398. : : "r"(rb), "i"(1), "i"(0), "i"(2), "r"(0) : "memory");
  399. /* prs = 1, ric = 2, rs = 0, r = 1 is = 3 */
  400. asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
  401. : : "r"(rb), "i"(1), "i"(1), "i"(2), "r"(0) : "memory");
  402. asm volatile("eieio; tlbsync; ptesync; isync; slbia": : :"memory");
  403. trace_tlbie(0, 0, rb, 0, 2, 0, 1);
  404. trace_tlbie(0, 0, rb, 0, 2, 1, 1);
  405. /*
  406. * now switch the HID
  407. */
  408. hid0 = mfspr(SPRN_HID0);
  409. hid0 |= HID0_POWER9_RADIX;
  410. mtspr(SPRN_HID0, hid0);
  411. asm volatile("isync": : :"memory");
  412. /* Wait for it to happen */
  413. while (!(mfspr(SPRN_HID0) & HID0_POWER9_RADIX))
  414. cpu_relax();
  415. }
  416. static void radix_init_amor(void)
  417. {
  418. /*
  419. * In HV mode, we init AMOR (Authority Mask Override Register) so that
  420. * the hypervisor and guest can setup IAMR (Instruction Authority Mask
  421. * Register), enable key 0 and set it to 1.
  422. *
  423. * AMOR = 0b1100 .... 0000 (Mask for key 0 is 11)
  424. */
  425. mtspr(SPRN_AMOR, (3ul << 62));
  426. }
  427. static void radix_init_iamr(void)
  428. {
  429. unsigned long iamr;
  430. /*
  431. * The IAMR should set to 0 on DD1.
  432. */
  433. if (cpu_has_feature(CPU_FTR_POWER9_DD1))
  434. iamr = 0;
  435. else
  436. iamr = (1ul << 62);
  437. /*
  438. * Radix always uses key0 of the IAMR to determine if an access is
  439. * allowed. We set bit 0 (IBM bit 1) of key0, to prevent instruction
  440. * fetch.
  441. */
  442. mtspr(SPRN_IAMR, iamr);
  443. }
  444. void __init radix__early_init_mmu(void)
  445. {
  446. unsigned long lpcr;
  447. #ifdef CONFIG_PPC_64K_PAGES
  448. /* PAGE_SIZE mappings */
  449. mmu_virtual_psize = MMU_PAGE_64K;
  450. #else
  451. mmu_virtual_psize = MMU_PAGE_4K;
  452. #endif
  453. #ifdef CONFIG_SPARSEMEM_VMEMMAP
  454. /* vmemmap mapping */
  455. mmu_vmemmap_psize = mmu_virtual_psize;
  456. #endif
  457. /*
  458. * initialize page table size
  459. */
  460. __pte_index_size = RADIX_PTE_INDEX_SIZE;
  461. __pmd_index_size = RADIX_PMD_INDEX_SIZE;
  462. __pud_index_size = RADIX_PUD_INDEX_SIZE;
  463. __pgd_index_size = RADIX_PGD_INDEX_SIZE;
  464. __pmd_cache_index = RADIX_PMD_INDEX_SIZE;
  465. __pte_table_size = RADIX_PTE_TABLE_SIZE;
  466. __pmd_table_size = RADIX_PMD_TABLE_SIZE;
  467. __pud_table_size = RADIX_PUD_TABLE_SIZE;
  468. __pgd_table_size = RADIX_PGD_TABLE_SIZE;
  469. __pmd_val_bits = RADIX_PMD_VAL_BITS;
  470. __pud_val_bits = RADIX_PUD_VAL_BITS;
  471. __pgd_val_bits = RADIX_PGD_VAL_BITS;
  472. __kernel_virt_start = RADIX_KERN_VIRT_START;
  473. __kernel_virt_size = RADIX_KERN_VIRT_SIZE;
  474. __vmalloc_start = RADIX_VMALLOC_START;
  475. __vmalloc_end = RADIX_VMALLOC_END;
  476. __kernel_io_start = RADIX_KERN_IO_START;
  477. vmemmap = (struct page *)RADIX_VMEMMAP_BASE;
  478. ioremap_bot = IOREMAP_BASE;
  479. #ifdef CONFIG_PCI
  480. pci_io_base = ISA_IO_BASE;
  481. #endif
  482. /*
  483. * For now radix also use the same frag size
  484. */
  485. __pte_frag_nr = H_PTE_FRAG_NR;
  486. __pte_frag_size_shift = H_PTE_FRAG_SIZE_SHIFT;
  487. if (!firmware_has_feature(FW_FEATURE_LPAR)) {
  488. radix_init_native();
  489. if (cpu_has_feature(CPU_FTR_POWER9_DD1))
  490. update_hid_for_radix();
  491. lpcr = mfspr(SPRN_LPCR);
  492. mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
  493. radix_init_partition_table();
  494. radix_init_amor();
  495. } else {
  496. radix_init_pseries();
  497. }
  498. memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
  499. radix_init_iamr();
  500. radix_init_pgtable();
  501. if (cpu_has_feature(CPU_FTR_HVMODE))
  502. tlbiel_all();
  503. }
  504. void radix__early_init_mmu_secondary(void)
  505. {
  506. unsigned long lpcr;
  507. /*
  508. * update partition table control register and UPRT
  509. */
  510. if (!firmware_has_feature(FW_FEATURE_LPAR)) {
  511. if (cpu_has_feature(CPU_FTR_POWER9_DD1))
  512. update_hid_for_radix();
  513. lpcr = mfspr(SPRN_LPCR);
  514. mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
  515. mtspr(SPRN_PTCR,
  516. __pa(partition_tb) | (PATB_SIZE_SHIFT - 12));
  517. radix_init_amor();
  518. }
  519. radix_init_iamr();
  520. if (cpu_has_feature(CPU_FTR_HVMODE))
  521. tlbiel_all();
  522. }
  523. void radix__mmu_cleanup_all(void)
  524. {
  525. unsigned long lpcr;
  526. if (!firmware_has_feature(FW_FEATURE_LPAR)) {
  527. lpcr = mfspr(SPRN_LPCR);
  528. mtspr(SPRN_LPCR, lpcr & ~LPCR_UPRT);
  529. mtspr(SPRN_PTCR, 0);
  530. powernv_set_nmmu_ptcr(0);
  531. radix__flush_tlb_all();
  532. }
  533. }
  534. void radix__setup_initial_memory_limit(phys_addr_t first_memblock_base,
  535. phys_addr_t first_memblock_size)
  536. {
  537. /* We don't currently support the first MEMBLOCK not mapping 0
  538. * physical on those processors
  539. */
  540. BUG_ON(first_memblock_base != 0);
  541. /*
  542. * Radix mode is not limited by RMA / VRMA addressing.
  543. */
  544. ppc64_rma_size = ULONG_MAX;
  545. }
  546. #ifdef CONFIG_MEMORY_HOTPLUG
  547. static void free_pte_table(pte_t *pte_start, pmd_t *pmd)
  548. {
  549. pte_t *pte;
  550. int i;
  551. for (i = 0; i < PTRS_PER_PTE; i++) {
  552. pte = pte_start + i;
  553. if (!pte_none(*pte))
  554. return;
  555. }
  556. pte_free_kernel(&init_mm, pte_start);
  557. pmd_clear(pmd);
  558. }
  559. static void free_pmd_table(pmd_t *pmd_start, pud_t *pud)
  560. {
  561. pmd_t *pmd;
  562. int i;
  563. for (i = 0; i < PTRS_PER_PMD; i++) {
  564. pmd = pmd_start + i;
  565. if (!pmd_none(*pmd))
  566. return;
  567. }
  568. pmd_free(&init_mm, pmd_start);
  569. pud_clear(pud);
  570. }
  571. static void remove_pte_table(pte_t *pte_start, unsigned long addr,
  572. unsigned long end)
  573. {
  574. unsigned long next;
  575. pte_t *pte;
  576. pte = pte_start + pte_index(addr);
  577. for (; addr < end; addr = next, pte++) {
  578. next = (addr + PAGE_SIZE) & PAGE_MASK;
  579. if (next > end)
  580. next = end;
  581. if (!pte_present(*pte))
  582. continue;
  583. if (!PAGE_ALIGNED(addr) || !PAGE_ALIGNED(next)) {
  584. /*
  585. * The vmemmap_free() and remove_section_mapping()
  586. * codepaths call us with aligned addresses.
  587. */
  588. WARN_ONCE(1, "%s: unaligned range\n", __func__);
  589. continue;
  590. }
  591. pte_clear(&init_mm, addr, pte);
  592. }
  593. }
  594. static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr,
  595. unsigned long end)
  596. {
  597. unsigned long next;
  598. pte_t *pte_base;
  599. pmd_t *pmd;
  600. pmd = pmd_start + pmd_index(addr);
  601. for (; addr < end; addr = next, pmd++) {
  602. next = pmd_addr_end(addr, end);
  603. if (!pmd_present(*pmd))
  604. continue;
  605. if (pmd_huge(*pmd)) {
  606. if (!IS_ALIGNED(addr, PMD_SIZE) ||
  607. !IS_ALIGNED(next, PMD_SIZE)) {
  608. WARN_ONCE(1, "%s: unaligned range\n", __func__);
  609. continue;
  610. }
  611. pte_clear(&init_mm, addr, (pte_t *)pmd);
  612. continue;
  613. }
  614. pte_base = (pte_t *)pmd_page_vaddr(*pmd);
  615. remove_pte_table(pte_base, addr, next);
  616. free_pte_table(pte_base, pmd);
  617. }
  618. }
  619. static void remove_pud_table(pud_t *pud_start, unsigned long addr,
  620. unsigned long end)
  621. {
  622. unsigned long next;
  623. pmd_t *pmd_base;
  624. pud_t *pud;
  625. pud = pud_start + pud_index(addr);
  626. for (; addr < end; addr = next, pud++) {
  627. next = pud_addr_end(addr, end);
  628. if (!pud_present(*pud))
  629. continue;
  630. if (pud_huge(*pud)) {
  631. if (!IS_ALIGNED(addr, PUD_SIZE) ||
  632. !IS_ALIGNED(next, PUD_SIZE)) {
  633. WARN_ONCE(1, "%s: unaligned range\n", __func__);
  634. continue;
  635. }
  636. pte_clear(&init_mm, addr, (pte_t *)pud);
  637. continue;
  638. }
  639. pmd_base = (pmd_t *)pud_page_vaddr(*pud);
  640. remove_pmd_table(pmd_base, addr, next);
  641. free_pmd_table(pmd_base, pud);
  642. }
  643. }
  644. static void remove_pagetable(unsigned long start, unsigned long end)
  645. {
  646. unsigned long addr, next;
  647. pud_t *pud_base;
  648. pgd_t *pgd;
  649. spin_lock(&init_mm.page_table_lock);
  650. for (addr = start; addr < end; addr = next) {
  651. next = pgd_addr_end(addr, end);
  652. pgd = pgd_offset_k(addr);
  653. if (!pgd_present(*pgd))
  654. continue;
  655. if (pgd_huge(*pgd)) {
  656. if (!IS_ALIGNED(addr, PGDIR_SIZE) ||
  657. !IS_ALIGNED(next, PGDIR_SIZE)) {
  658. WARN_ONCE(1, "%s: unaligned range\n", __func__);
  659. continue;
  660. }
  661. pte_clear(&init_mm, addr, (pte_t *)pgd);
  662. continue;
  663. }
  664. pud_base = (pud_t *)pgd_page_vaddr(*pgd);
  665. remove_pud_table(pud_base, addr, next);
  666. }
  667. spin_unlock(&init_mm.page_table_lock);
  668. radix__flush_tlb_kernel_range(start, end);
  669. }
  670. int __ref radix__create_section_mapping(unsigned long start, unsigned long end)
  671. {
  672. return create_physical_mapping(start, end);
  673. }
  674. int radix__remove_section_mapping(unsigned long start, unsigned long end)
  675. {
  676. remove_pagetable(start, end);
  677. return 0;
  678. }
  679. #endif /* CONFIG_MEMORY_HOTPLUG */
  680. #ifdef CONFIG_SPARSEMEM_VMEMMAP
  681. int __meminit radix__vmemmap_create_mapping(unsigned long start,
  682. unsigned long page_size,
  683. unsigned long phys)
  684. {
  685. /* Create a PTE encoding */
  686. unsigned long flags = _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_KERNEL_RW;
  687. BUG_ON(radix__map_kernel_page(start, phys, __pgprot(flags), page_size));
  688. return 0;
  689. }
  690. #ifdef CONFIG_MEMORY_HOTPLUG
  691. void radix__vmemmap_remove_mapping(unsigned long start, unsigned long page_size)
  692. {
  693. remove_pagetable(start, start + page_size);
  694. }
  695. #endif
  696. #endif
  697. #ifdef CONFIG_TRANSPARENT_HUGEPAGE
  698. unsigned long radix__pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
  699. pmd_t *pmdp, unsigned long clr,
  700. unsigned long set)
  701. {
  702. unsigned long old;
  703. #ifdef CONFIG_DEBUG_VM
  704. WARN_ON(!radix__pmd_trans_huge(*pmdp) && !pmd_devmap(*pmdp));
  705. assert_spin_locked(&mm->page_table_lock);
  706. #endif
  707. old = radix__pte_update(mm, addr, (pte_t *)pmdp, clr, set, 1);
  708. trace_hugepage_update(addr, old, clr, set);
  709. return old;
  710. }
  711. pmd_t radix__pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address,
  712. pmd_t *pmdp)
  713. {
  714. pmd_t pmd;
  715. VM_BUG_ON(address & ~HPAGE_PMD_MASK);
  716. VM_BUG_ON(radix__pmd_trans_huge(*pmdp));
  717. VM_BUG_ON(pmd_devmap(*pmdp));
  718. /*
  719. * khugepaged calls this for normal pmd
  720. */
  721. pmd = *pmdp;
  722. pmd_clear(pmdp);
  723. /*FIXME!! Verify whether we need this kick below */
  724. serialize_against_pte_lookup(vma->vm_mm);
  725. radix__flush_tlb_collapsed_pmd(vma->vm_mm, address);
  726. return pmd;
  727. }
  728. /*
  729. * For us pgtable_t is pte_t *. Inorder to save the deposisted
  730. * page table, we consider the allocated page table as a list
  731. * head. On withdraw we need to make sure we zero out the used
  732. * list_head memory area.
  733. */
  734. void radix__pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
  735. pgtable_t pgtable)
  736. {
  737. struct list_head *lh = (struct list_head *) pgtable;
  738. assert_spin_locked(pmd_lockptr(mm, pmdp));
  739. /* FIFO */
  740. if (!pmd_huge_pte(mm, pmdp))
  741. INIT_LIST_HEAD(lh);
  742. else
  743. list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp));
  744. pmd_huge_pte(mm, pmdp) = pgtable;
  745. }
  746. pgtable_t radix__pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
  747. {
  748. pte_t *ptep;
  749. pgtable_t pgtable;
  750. struct list_head *lh;
  751. assert_spin_locked(pmd_lockptr(mm, pmdp));
  752. /* FIFO */
  753. pgtable = pmd_huge_pte(mm, pmdp);
  754. lh = (struct list_head *) pgtable;
  755. if (list_empty(lh))
  756. pmd_huge_pte(mm, pmdp) = NULL;
  757. else {
  758. pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next;
  759. list_del(lh);
  760. }
  761. ptep = (pte_t *) pgtable;
  762. *ptep = __pte(0);
  763. ptep++;
  764. *ptep = __pte(0);
  765. return pgtable;
  766. }
  767. pmd_t radix__pmdp_huge_get_and_clear(struct mm_struct *mm,
  768. unsigned long addr, pmd_t *pmdp)
  769. {
  770. pmd_t old_pmd;
  771. unsigned long old;
  772. old = radix__pmd_hugepage_update(mm, addr, pmdp, ~0UL, 0);
  773. old_pmd = __pmd(old);
  774. /*
  775. * Serialize against find_current_mm_pte which does lock-less
  776. * lookup in page tables with local interrupts disabled. For huge pages
  777. * it casts pmd_t to pte_t. Since format of pte_t is different from
  778. * pmd_t we want to prevent transit from pmd pointing to page table
  779. * to pmd pointing to huge page (and back) while interrupts are disabled.
  780. * We clear pmd to possibly replace it with page table pointer in
  781. * different code paths. So make sure we wait for the parallel
  782. * find_current_mm_pte to finish.
  783. */
  784. serialize_against_pte_lookup(mm);
  785. return old_pmd;
  786. }
  787. int radix__has_transparent_hugepage(void)
  788. {
  789. /* For radix 2M at PMD level means thp */
  790. if (mmu_psize_defs[MMU_PAGE_2M].shift == PMD_SHIFT)
  791. return 1;
  792. return 0;
  793. }
  794. #endif /* CONFIG_TRANSPARENT_HUGEPAGE */