pgtable_64.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493
  1. /*
  2. * This file contains ioremap and related functions for 64-bit machines.
  3. *
  4. * Derived from arch/ppc64/mm/init.c
  5. * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
  6. *
  7. * Modifications by Paul Mackerras (PowerMac) (paulus@samba.org)
  8. * and Cort Dougan (PReP) (cort@cs.nmt.edu)
  9. * Copyright (C) 1996 Paul Mackerras
  10. *
  11. * Derived from "arch/i386/mm/init.c"
  12. * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
  13. *
  14. * Dave Engebretsen <engebret@us.ibm.com>
  15. * Rework for PPC64 port.
  16. *
  17. * This program is free software; you can redistribute it and/or
  18. * modify it under the terms of the GNU General Public License
  19. * as published by the Free Software Foundation; either version
  20. * 2 of the License, or (at your option) any later version.
  21. *
  22. */
  23. #include <linux/signal.h>
  24. #include <linux/sched.h>
  25. #include <linux/kernel.h>
  26. #include <linux/errno.h>
  27. #include <linux/string.h>
  28. #include <linux/export.h>
  29. #include <linux/types.h>
  30. #include <linux/mman.h>
  31. #include <linux/mm.h>
  32. #include <linux/swap.h>
  33. #include <linux/stddef.h>
  34. #include <linux/vmalloc.h>
  35. #include <linux/memblock.h>
  36. #include <linux/slab.h>
  37. #include <linux/hugetlb.h>
  38. #include <asm/pgalloc.h>
  39. #include <asm/page.h>
  40. #include <asm/prom.h>
  41. #include <asm/io.h>
  42. #include <asm/mmu_context.h>
  43. #include <asm/pgtable.h>
  44. #include <asm/mmu.h>
  45. #include <asm/smp.h>
  46. #include <asm/machdep.h>
  47. #include <asm/tlb.h>
  48. #include <asm/trace.h>
  49. #include <asm/processor.h>
  50. #include <asm/cputable.h>
  51. #include <asm/sections.h>
  52. #include <asm/firmware.h>
  53. #include <asm/dma.h>
  54. #include <asm/powernv.h>
  55. #include "mmu_decl.h"
  56. #ifdef CONFIG_PPC_STD_MMU_64
  57. #if TASK_SIZE_USER64 > (1UL << (ESID_BITS + SID_SHIFT))
  58. #error TASK_SIZE_USER64 exceeds user VSID range
  59. #endif
  60. #endif
  61. #ifdef CONFIG_PPC_BOOK3S_64
  62. /*
  63. * partition table and process table for ISA 3.0
  64. */
  65. struct prtb_entry *process_tb;
  66. struct patb_entry *partition_tb;
  67. /*
  68. * page table size
  69. */
  70. unsigned long __pte_index_size;
  71. EXPORT_SYMBOL(__pte_index_size);
  72. unsigned long __pmd_index_size;
  73. EXPORT_SYMBOL(__pmd_index_size);
  74. unsigned long __pud_index_size;
  75. EXPORT_SYMBOL(__pud_index_size);
  76. unsigned long __pgd_index_size;
  77. EXPORT_SYMBOL(__pgd_index_size);
  78. unsigned long __pmd_cache_index;
  79. EXPORT_SYMBOL(__pmd_cache_index);
  80. unsigned long __pte_table_size;
  81. EXPORT_SYMBOL(__pte_table_size);
  82. unsigned long __pmd_table_size;
  83. EXPORT_SYMBOL(__pmd_table_size);
  84. unsigned long __pud_table_size;
  85. EXPORT_SYMBOL(__pud_table_size);
  86. unsigned long __pgd_table_size;
  87. EXPORT_SYMBOL(__pgd_table_size);
  88. unsigned long __pmd_val_bits;
  89. EXPORT_SYMBOL(__pmd_val_bits);
  90. unsigned long __pud_val_bits;
  91. EXPORT_SYMBOL(__pud_val_bits);
  92. unsigned long __pgd_val_bits;
  93. EXPORT_SYMBOL(__pgd_val_bits);
  94. unsigned long __kernel_virt_start;
  95. EXPORT_SYMBOL(__kernel_virt_start);
  96. unsigned long __kernel_virt_size;
  97. EXPORT_SYMBOL(__kernel_virt_size);
  98. unsigned long __vmalloc_start;
  99. EXPORT_SYMBOL(__vmalloc_start);
  100. unsigned long __vmalloc_end;
  101. EXPORT_SYMBOL(__vmalloc_end);
  102. struct page *vmemmap;
  103. EXPORT_SYMBOL(vmemmap);
  104. unsigned long __pte_frag_nr;
  105. EXPORT_SYMBOL(__pte_frag_nr);
  106. unsigned long __pte_frag_size_shift;
  107. EXPORT_SYMBOL(__pte_frag_size_shift);
  108. unsigned long ioremap_bot;
  109. #else /* !CONFIG_PPC_BOOK3S_64 */
  110. unsigned long ioremap_bot = IOREMAP_BASE;
  111. #endif
  112. /**
  113. * __ioremap_at - Low level function to establish the page tables
  114. * for an IO mapping
  115. */
  116. void __iomem * __ioremap_at(phys_addr_t pa, void *ea, unsigned long size,
  117. unsigned long flags)
  118. {
  119. unsigned long i;
  120. /* Make sure we have the base flags */
  121. if ((flags & _PAGE_PRESENT) == 0)
  122. flags |= pgprot_val(PAGE_KERNEL);
  123. /* We don't support the 4K PFN hack with ioremap */
  124. if (flags & H_PAGE_4K_PFN)
  125. return NULL;
  126. WARN_ON(pa & ~PAGE_MASK);
  127. WARN_ON(((unsigned long)ea) & ~PAGE_MASK);
  128. WARN_ON(size & ~PAGE_MASK);
  129. for (i = 0; i < size; i += PAGE_SIZE)
  130. if (map_kernel_page((unsigned long)ea+i, pa+i, flags))
  131. return NULL;
  132. return (void __iomem *)ea;
  133. }
  134. /**
  135. * __iounmap_from - Low level function to tear down the page tables
  136. * for an IO mapping. This is used for mappings that
  137. * are manipulated manually, like partial unmapping of
  138. * PCI IOs or ISA space.
  139. */
  140. void __iounmap_at(void *ea, unsigned long size)
  141. {
  142. WARN_ON(((unsigned long)ea) & ~PAGE_MASK);
  143. WARN_ON(size & ~PAGE_MASK);
  144. unmap_kernel_range((unsigned long)ea, size);
  145. }
  146. void __iomem * __ioremap_caller(phys_addr_t addr, unsigned long size,
  147. unsigned long flags, void *caller)
  148. {
  149. phys_addr_t paligned;
  150. void __iomem *ret;
  151. /*
  152. * Choose an address to map it to.
  153. * Once the imalloc system is running, we use it.
  154. * Before that, we map using addresses going
  155. * up from ioremap_bot. imalloc will use
  156. * the addresses from ioremap_bot through
  157. * IMALLOC_END
  158. *
  159. */
  160. paligned = addr & PAGE_MASK;
  161. size = PAGE_ALIGN(addr + size) - paligned;
  162. if ((size == 0) || (paligned == 0))
  163. return NULL;
  164. if (slab_is_available()) {
  165. struct vm_struct *area;
  166. area = __get_vm_area_caller(size, VM_IOREMAP,
  167. ioremap_bot, IOREMAP_END,
  168. caller);
  169. if (area == NULL)
  170. return NULL;
  171. area->phys_addr = paligned;
  172. ret = __ioremap_at(paligned, area->addr, size, flags);
  173. if (!ret)
  174. vunmap(area->addr);
  175. } else {
  176. ret = __ioremap_at(paligned, (void *)ioremap_bot, size, flags);
  177. if (ret)
  178. ioremap_bot += size;
  179. }
  180. if (ret)
  181. ret += addr & ~PAGE_MASK;
  182. return ret;
  183. }
  184. void __iomem * __ioremap(phys_addr_t addr, unsigned long size,
  185. unsigned long flags)
  186. {
  187. return __ioremap_caller(addr, size, flags, __builtin_return_address(0));
  188. }
  189. void __iomem * ioremap(phys_addr_t addr, unsigned long size)
  190. {
  191. unsigned long flags = pgprot_val(pgprot_noncached(__pgprot(0)));
  192. void *caller = __builtin_return_address(0);
  193. if (ppc_md.ioremap)
  194. return ppc_md.ioremap(addr, size, flags, caller);
  195. return __ioremap_caller(addr, size, flags, caller);
  196. }
  197. void __iomem * ioremap_wc(phys_addr_t addr, unsigned long size)
  198. {
  199. unsigned long flags = pgprot_val(pgprot_noncached_wc(__pgprot(0)));
  200. void *caller = __builtin_return_address(0);
  201. if (ppc_md.ioremap)
  202. return ppc_md.ioremap(addr, size, flags, caller);
  203. return __ioremap_caller(addr, size, flags, caller);
  204. }
  205. void __iomem * ioremap_prot(phys_addr_t addr, unsigned long size,
  206. unsigned long flags)
  207. {
  208. void *caller = __builtin_return_address(0);
  209. /* writeable implies dirty for kernel addresses */
  210. if (flags & _PAGE_WRITE)
  211. flags |= _PAGE_DIRTY;
  212. /* we don't want to let _PAGE_EXEC leak out */
  213. flags &= ~_PAGE_EXEC;
  214. /*
  215. * Force kernel mapping.
  216. */
  217. #if defined(CONFIG_PPC_BOOK3S_64)
  218. flags |= _PAGE_PRIVILEGED;
  219. #else
  220. flags &= ~_PAGE_USER;
  221. #endif
  222. #ifdef _PAGE_BAP_SR
  223. /* _PAGE_USER contains _PAGE_BAP_SR on BookE using the new PTE format
  224. * which means that we just cleared supervisor access... oops ;-) This
  225. * restores it
  226. */
  227. flags |= _PAGE_BAP_SR;
  228. #endif
  229. if (ppc_md.ioremap)
  230. return ppc_md.ioremap(addr, size, flags, caller);
  231. return __ioremap_caller(addr, size, flags, caller);
  232. }
  233. /*
  234. * Unmap an IO region and remove it from imalloc'd list.
  235. * Access to IO memory should be serialized by driver.
  236. */
  237. void __iounmap(volatile void __iomem *token)
  238. {
  239. void *addr;
  240. if (!slab_is_available())
  241. return;
  242. addr = (void *) ((unsigned long __force)
  243. PCI_FIX_ADDR(token) & PAGE_MASK);
  244. if ((unsigned long)addr < ioremap_bot) {
  245. printk(KERN_WARNING "Attempt to iounmap early bolted mapping"
  246. " at 0x%p\n", addr);
  247. return;
  248. }
  249. vunmap(addr);
  250. }
  251. void iounmap(volatile void __iomem *token)
  252. {
  253. if (ppc_md.iounmap)
  254. ppc_md.iounmap(token);
  255. else
  256. __iounmap(token);
  257. }
  258. EXPORT_SYMBOL(ioremap);
  259. EXPORT_SYMBOL(ioremap_wc);
  260. EXPORT_SYMBOL(ioremap_prot);
  261. EXPORT_SYMBOL(__ioremap);
  262. EXPORT_SYMBOL(__ioremap_at);
  263. EXPORT_SYMBOL(iounmap);
  264. EXPORT_SYMBOL(__iounmap);
  265. EXPORT_SYMBOL(__iounmap_at);
  266. #ifndef __PAGETABLE_PUD_FOLDED
  267. /* 4 level page table */
  268. struct page *pgd_page(pgd_t pgd)
  269. {
  270. if (pgd_huge(pgd))
  271. return pte_page(pgd_pte(pgd));
  272. return virt_to_page(pgd_page_vaddr(pgd));
  273. }
  274. #endif
  275. struct page *pud_page(pud_t pud)
  276. {
  277. if (pud_huge(pud))
  278. return pte_page(pud_pte(pud));
  279. return virt_to_page(pud_page_vaddr(pud));
  280. }
  281. /*
  282. * For hugepage we have pfn in the pmd, we use PTE_RPN_SHIFT bits for flags
  283. * For PTE page, we have a PTE_FRAG_SIZE (4K) aligned virtual address.
  284. */
  285. struct page *pmd_page(pmd_t pmd)
  286. {
  287. if (pmd_trans_huge(pmd) || pmd_huge(pmd) || pmd_devmap(pmd))
  288. return pte_page(pmd_pte(pmd));
  289. return virt_to_page(pmd_page_vaddr(pmd));
  290. }
  291. #ifdef CONFIG_PPC_64K_PAGES
  292. static pte_t *get_from_cache(struct mm_struct *mm)
  293. {
  294. void *pte_frag, *ret;
  295. spin_lock(&mm->page_table_lock);
  296. ret = mm->context.pte_frag;
  297. if (ret) {
  298. pte_frag = ret + PTE_FRAG_SIZE;
  299. /*
  300. * If we have taken up all the fragments mark PTE page NULL
  301. */
  302. if (((unsigned long)pte_frag & ~PAGE_MASK) == 0)
  303. pte_frag = NULL;
  304. mm->context.pte_frag = pte_frag;
  305. }
  306. spin_unlock(&mm->page_table_lock);
  307. return (pte_t *)ret;
  308. }
  309. static pte_t *__alloc_for_cache(struct mm_struct *mm, int kernel)
  310. {
  311. void *ret = NULL;
  312. struct page *page;
  313. if (!kernel) {
  314. page = alloc_page(PGALLOC_GFP | __GFP_ACCOUNT);
  315. if (!page)
  316. return NULL;
  317. if (!pgtable_page_ctor(page)) {
  318. __free_page(page);
  319. return NULL;
  320. }
  321. } else {
  322. page = alloc_page(PGALLOC_GFP);
  323. if (!page)
  324. return NULL;
  325. }
  326. ret = page_address(page);
  327. spin_lock(&mm->page_table_lock);
  328. /*
  329. * If we find pgtable_page set, we return
  330. * the allocated page with single fragement
  331. * count.
  332. */
  333. if (likely(!mm->context.pte_frag)) {
  334. set_page_count(page, PTE_FRAG_NR);
  335. mm->context.pte_frag = ret + PTE_FRAG_SIZE;
  336. }
  337. spin_unlock(&mm->page_table_lock);
  338. return (pte_t *)ret;
  339. }
  340. pte_t *pte_fragment_alloc(struct mm_struct *mm, unsigned long vmaddr, int kernel)
  341. {
  342. pte_t *pte;
  343. pte = get_from_cache(mm);
  344. if (pte)
  345. return pte;
  346. return __alloc_for_cache(mm, kernel);
  347. }
  348. #endif /* CONFIG_PPC_64K_PAGES */
  349. void pte_fragment_free(unsigned long *table, int kernel)
  350. {
  351. struct page *page = virt_to_page(table);
  352. if (put_page_testzero(page)) {
  353. if (!kernel)
  354. pgtable_page_dtor(page);
  355. free_hot_cold_page(page, 0);
  356. }
  357. }
  358. #ifdef CONFIG_SMP
  359. void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
  360. {
  361. unsigned long pgf = (unsigned long)table;
  362. BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
  363. pgf |= shift;
  364. tlb_remove_table(tlb, (void *)pgf);
  365. }
  366. void __tlb_remove_table(void *_table)
  367. {
  368. void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE);
  369. unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE;
  370. if (!shift)
  371. /* PTE page needs special handling */
  372. pte_fragment_free(table, 0);
  373. else {
  374. BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
  375. kmem_cache_free(PGT_CACHE(shift), table);
  376. }
  377. }
  378. #else
  379. void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
  380. {
  381. if (!shift) {
  382. /* PTE page needs special handling */
  383. pte_fragment_free(table, 0);
  384. } else {
  385. BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
  386. kmem_cache_free(PGT_CACHE(shift), table);
  387. }
  388. }
  389. #endif
  390. #ifdef CONFIG_PPC_BOOK3S_64
  391. void __init mmu_partition_table_init(void)
  392. {
  393. unsigned long patb_size = 1UL << PATB_SIZE_SHIFT;
  394. unsigned long ptcr;
  395. BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 36), "Partition table size too large.");
  396. partition_tb = __va(memblock_alloc_base(patb_size, patb_size,
  397. MEMBLOCK_ALLOC_ANYWHERE));
  398. /* Initialize the Partition Table with no entries */
  399. memset((void *)partition_tb, 0, patb_size);
  400. /*
  401. * update partition table control register,
  402. * 64 K size.
  403. */
  404. ptcr = __pa(partition_tb) | (PATB_SIZE_SHIFT - 12);
  405. mtspr(SPRN_PTCR, ptcr);
  406. powernv_set_nmmu_ptcr(ptcr);
  407. }
  408. void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
  409. unsigned long dw1)
  410. {
  411. unsigned long old = be64_to_cpu(partition_tb[lpid].patb0);
  412. partition_tb[lpid].patb0 = cpu_to_be64(dw0);
  413. partition_tb[lpid].patb1 = cpu_to_be64(dw1);
  414. /*
  415. * Global flush of TLBs and partition table caches for this lpid.
  416. * The type of flush (hash or radix) depends on what the previous
  417. * use of this partition ID was, not the new use.
  418. */
  419. asm volatile("ptesync" : : : "memory");
  420. if (old & PATB_HR) {
  421. asm volatile(PPC_TLBIE_5(%0,%1,2,0,1) : :
  422. "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
  423. trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 1);
  424. } else {
  425. asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : :
  426. "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
  427. trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 0);
  428. }
  429. asm volatile("eieio; tlbsync; ptesync" : : : "memory");
  430. }
  431. EXPORT_SYMBOL_GPL(mmu_partition_table_set_entry);
  432. #endif /* CONFIG_PPC_BOOK3S_64 */