pgtable_64.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481
  1. /*
  2. * This file contains ioremap and related functions for 64-bit machines.
  3. *
  4. * Derived from arch/ppc64/mm/init.c
  5. * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
  6. *
  7. * Modifications by Paul Mackerras (PowerMac) (paulus@samba.org)
  8. * and Cort Dougan (PReP) (cort@cs.nmt.edu)
  9. * Copyright (C) 1996 Paul Mackerras
  10. *
  11. * Derived from "arch/i386/mm/init.c"
  12. * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
  13. *
  14. * Dave Engebretsen <engebret@us.ibm.com>
  15. * Rework for PPC64 port.
  16. *
  17. * This program is free software; you can redistribute it and/or
  18. * modify it under the terms of the GNU General Public License
  19. * as published by the Free Software Foundation; either version
  20. * 2 of the License, or (at your option) any later version.
  21. *
  22. */
  23. #include <linux/signal.h>
  24. #include <linux/sched.h>
  25. #include <linux/kernel.h>
  26. #include <linux/errno.h>
  27. #include <linux/string.h>
  28. #include <linux/export.h>
  29. #include <linux/types.h>
  30. #include <linux/mman.h>
  31. #include <linux/mm.h>
  32. #include <linux/swap.h>
  33. #include <linux/stddef.h>
  34. #include <linux/vmalloc.h>
  35. #include <linux/memblock.h>
  36. #include <linux/slab.h>
  37. #include <linux/hugetlb.h>
  38. #include <asm/pgalloc.h>
  39. #include <asm/page.h>
  40. #include <asm/prom.h>
  41. #include <asm/io.h>
  42. #include <asm/mmu_context.h>
  43. #include <asm/pgtable.h>
  44. #include <asm/mmu.h>
  45. #include <asm/smp.h>
  46. #include <asm/machdep.h>
  47. #include <asm/tlb.h>
  48. #include <asm/processor.h>
  49. #include <asm/cputable.h>
  50. #include <asm/sections.h>
  51. #include <asm/firmware.h>
  52. #include <asm/dma.h>
  53. #include <asm/powernv.h>
  54. #include "mmu_decl.h"
  55. #ifdef CONFIG_PPC_STD_MMU_64
  56. #if TASK_SIZE_USER64 > (1UL << (ESID_BITS + SID_SHIFT))
  57. #error TASK_SIZE_USER64 exceeds user VSID range
  58. #endif
  59. #endif
  60. #ifdef CONFIG_PPC_BOOK3S_64
  61. /*
  62. * partition table and process table for ISA 3.0
  63. */
  64. struct prtb_entry *process_tb;
  65. struct patb_entry *partition_tb;
  66. /*
  67. * page table size
  68. */
  69. unsigned long __pte_index_size;
  70. EXPORT_SYMBOL(__pte_index_size);
  71. unsigned long __pmd_index_size;
  72. EXPORT_SYMBOL(__pmd_index_size);
  73. unsigned long __pud_index_size;
  74. EXPORT_SYMBOL(__pud_index_size);
  75. unsigned long __pgd_index_size;
  76. EXPORT_SYMBOL(__pgd_index_size);
  77. unsigned long __pmd_cache_index;
  78. EXPORT_SYMBOL(__pmd_cache_index);
  79. unsigned long __pte_table_size;
  80. EXPORT_SYMBOL(__pte_table_size);
  81. unsigned long __pmd_table_size;
  82. EXPORT_SYMBOL(__pmd_table_size);
  83. unsigned long __pud_table_size;
  84. EXPORT_SYMBOL(__pud_table_size);
  85. unsigned long __pgd_table_size;
  86. EXPORT_SYMBOL(__pgd_table_size);
  87. unsigned long __pmd_val_bits;
  88. EXPORT_SYMBOL(__pmd_val_bits);
  89. unsigned long __pud_val_bits;
  90. EXPORT_SYMBOL(__pud_val_bits);
  91. unsigned long __pgd_val_bits;
  92. EXPORT_SYMBOL(__pgd_val_bits);
  93. unsigned long __kernel_virt_start;
  94. EXPORT_SYMBOL(__kernel_virt_start);
  95. unsigned long __kernel_virt_size;
  96. EXPORT_SYMBOL(__kernel_virt_size);
  97. unsigned long __vmalloc_start;
  98. EXPORT_SYMBOL(__vmalloc_start);
  99. unsigned long __vmalloc_end;
  100. EXPORT_SYMBOL(__vmalloc_end);
  101. struct page *vmemmap;
  102. EXPORT_SYMBOL(vmemmap);
  103. unsigned long __pte_frag_nr;
  104. EXPORT_SYMBOL(__pte_frag_nr);
  105. unsigned long __pte_frag_size_shift;
  106. EXPORT_SYMBOL(__pte_frag_size_shift);
  107. unsigned long ioremap_bot;
  108. #else /* !CONFIG_PPC_BOOK3S_64 */
  109. unsigned long ioremap_bot = IOREMAP_BASE;
  110. #endif
  111. /**
  112. * __ioremap_at - Low level function to establish the page tables
  113. * for an IO mapping
  114. */
  115. void __iomem * __ioremap_at(phys_addr_t pa, void *ea, unsigned long size,
  116. unsigned long flags)
  117. {
  118. unsigned long i;
  119. /* Make sure we have the base flags */
  120. if ((flags & _PAGE_PRESENT) == 0)
  121. flags |= pgprot_val(PAGE_KERNEL);
  122. /* We don't support the 4K PFN hack with ioremap */
  123. if (flags & H_PAGE_4K_PFN)
  124. return NULL;
  125. WARN_ON(pa & ~PAGE_MASK);
  126. WARN_ON(((unsigned long)ea) & ~PAGE_MASK);
  127. WARN_ON(size & ~PAGE_MASK);
  128. for (i = 0; i < size; i += PAGE_SIZE)
  129. if (map_kernel_page((unsigned long)ea+i, pa+i, flags))
  130. return NULL;
  131. return (void __iomem *)ea;
  132. }
  133. /**
  134. * __iounmap_from - Low level function to tear down the page tables
  135. * for an IO mapping. This is used for mappings that
  136. * are manipulated manually, like partial unmapping of
  137. * PCI IOs or ISA space.
  138. */
  139. void __iounmap_at(void *ea, unsigned long size)
  140. {
  141. WARN_ON(((unsigned long)ea) & ~PAGE_MASK);
  142. WARN_ON(size & ~PAGE_MASK);
  143. unmap_kernel_range((unsigned long)ea, size);
  144. }
  145. void __iomem * __ioremap_caller(phys_addr_t addr, unsigned long size,
  146. unsigned long flags, void *caller)
  147. {
  148. phys_addr_t paligned;
  149. void __iomem *ret;
  150. /*
  151. * Choose an address to map it to.
  152. * Once the imalloc system is running, we use it.
  153. * Before that, we map using addresses going
  154. * up from ioremap_bot. imalloc will use
  155. * the addresses from ioremap_bot through
  156. * IMALLOC_END
  157. *
  158. */
  159. paligned = addr & PAGE_MASK;
  160. size = PAGE_ALIGN(addr + size) - paligned;
  161. if ((size == 0) || (paligned == 0))
  162. return NULL;
  163. if (slab_is_available()) {
  164. struct vm_struct *area;
  165. area = __get_vm_area_caller(size, VM_IOREMAP,
  166. ioremap_bot, IOREMAP_END,
  167. caller);
  168. if (area == NULL)
  169. return NULL;
  170. area->phys_addr = paligned;
  171. ret = __ioremap_at(paligned, area->addr, size, flags);
  172. if (!ret)
  173. vunmap(area->addr);
  174. } else {
  175. ret = __ioremap_at(paligned, (void *)ioremap_bot, size, flags);
  176. if (ret)
  177. ioremap_bot += size;
  178. }
  179. if (ret)
  180. ret += addr & ~PAGE_MASK;
  181. return ret;
  182. }
  183. void __iomem * __ioremap(phys_addr_t addr, unsigned long size,
  184. unsigned long flags)
  185. {
  186. return __ioremap_caller(addr, size, flags, __builtin_return_address(0));
  187. }
  188. void __iomem * ioremap(phys_addr_t addr, unsigned long size)
  189. {
  190. unsigned long flags = pgprot_val(pgprot_noncached(__pgprot(0)));
  191. void *caller = __builtin_return_address(0);
  192. if (ppc_md.ioremap)
  193. return ppc_md.ioremap(addr, size, flags, caller);
  194. return __ioremap_caller(addr, size, flags, caller);
  195. }
  196. void __iomem * ioremap_wc(phys_addr_t addr, unsigned long size)
  197. {
  198. unsigned long flags = pgprot_val(pgprot_noncached_wc(__pgprot(0)));
  199. void *caller = __builtin_return_address(0);
  200. if (ppc_md.ioremap)
  201. return ppc_md.ioremap(addr, size, flags, caller);
  202. return __ioremap_caller(addr, size, flags, caller);
  203. }
  204. void __iomem * ioremap_prot(phys_addr_t addr, unsigned long size,
  205. unsigned long flags)
  206. {
  207. void *caller = __builtin_return_address(0);
  208. /* writeable implies dirty for kernel addresses */
  209. if (flags & _PAGE_WRITE)
  210. flags |= _PAGE_DIRTY;
  211. /* we don't want to let _PAGE_EXEC leak out */
  212. flags &= ~_PAGE_EXEC;
  213. /*
  214. * Force kernel mapping.
  215. */
  216. #if defined(CONFIG_PPC_BOOK3S_64)
  217. flags |= _PAGE_PRIVILEGED;
  218. #else
  219. flags &= ~_PAGE_USER;
  220. #endif
  221. #ifdef _PAGE_BAP_SR
  222. /* _PAGE_USER contains _PAGE_BAP_SR on BookE using the new PTE format
  223. * which means that we just cleared supervisor access... oops ;-) This
  224. * restores it
  225. */
  226. flags |= _PAGE_BAP_SR;
  227. #endif
  228. if (ppc_md.ioremap)
  229. return ppc_md.ioremap(addr, size, flags, caller);
  230. return __ioremap_caller(addr, size, flags, caller);
  231. }
  232. /*
  233. * Unmap an IO region and remove it from imalloc'd list.
  234. * Access to IO memory should be serialized by driver.
  235. */
  236. void __iounmap(volatile void __iomem *token)
  237. {
  238. void *addr;
  239. if (!slab_is_available())
  240. return;
  241. addr = (void *) ((unsigned long __force)
  242. PCI_FIX_ADDR(token) & PAGE_MASK);
  243. if ((unsigned long)addr < ioremap_bot) {
  244. printk(KERN_WARNING "Attempt to iounmap early bolted mapping"
  245. " at 0x%p\n", addr);
  246. return;
  247. }
  248. vunmap(addr);
  249. }
  250. void iounmap(volatile void __iomem *token)
  251. {
  252. if (ppc_md.iounmap)
  253. ppc_md.iounmap(token);
  254. else
  255. __iounmap(token);
  256. }
  257. EXPORT_SYMBOL(ioremap);
  258. EXPORT_SYMBOL(ioremap_wc);
  259. EXPORT_SYMBOL(ioremap_prot);
  260. EXPORT_SYMBOL(__ioremap);
  261. EXPORT_SYMBOL(__ioremap_at);
  262. EXPORT_SYMBOL(iounmap);
  263. EXPORT_SYMBOL(__iounmap);
  264. EXPORT_SYMBOL(__iounmap_at);
  265. #ifndef __PAGETABLE_PUD_FOLDED
  266. /* 4 level page table */
  267. struct page *pgd_page(pgd_t pgd)
  268. {
  269. if (pgd_huge(pgd))
  270. return pte_page(pgd_pte(pgd));
  271. return virt_to_page(pgd_page_vaddr(pgd));
  272. }
  273. #endif
  274. struct page *pud_page(pud_t pud)
  275. {
  276. if (pud_huge(pud))
  277. return pte_page(pud_pte(pud));
  278. return virt_to_page(pud_page_vaddr(pud));
  279. }
  280. /*
  281. * For hugepage we have pfn in the pmd, we use PTE_RPN_SHIFT bits for flags
  282. * For PTE page, we have a PTE_FRAG_SIZE (4K) aligned virtual address.
  283. */
  284. struct page *pmd_page(pmd_t pmd)
  285. {
  286. if (pmd_trans_huge(pmd) || pmd_huge(pmd))
  287. return pte_page(pmd_pte(pmd));
  288. return virt_to_page(pmd_page_vaddr(pmd));
  289. }
  290. #ifdef CONFIG_PPC_64K_PAGES
  291. static pte_t *get_from_cache(struct mm_struct *mm)
  292. {
  293. void *pte_frag, *ret;
  294. spin_lock(&mm->page_table_lock);
  295. ret = mm->context.pte_frag;
  296. if (ret) {
  297. pte_frag = ret + PTE_FRAG_SIZE;
  298. /*
  299. * If we have taken up all the fragments mark PTE page NULL
  300. */
  301. if (((unsigned long)pte_frag & ~PAGE_MASK) == 0)
  302. pte_frag = NULL;
  303. mm->context.pte_frag = pte_frag;
  304. }
  305. spin_unlock(&mm->page_table_lock);
  306. return (pte_t *)ret;
  307. }
  308. static pte_t *__alloc_for_cache(struct mm_struct *mm, int kernel)
  309. {
  310. void *ret = NULL;
  311. struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
  312. if (!page)
  313. return NULL;
  314. if (!kernel && !pgtable_page_ctor(page)) {
  315. __free_page(page);
  316. return NULL;
  317. }
  318. ret = page_address(page);
  319. spin_lock(&mm->page_table_lock);
  320. /*
  321. * If we find pgtable_page set, we return
  322. * the allocated page with single fragement
  323. * count.
  324. */
  325. if (likely(!mm->context.pte_frag)) {
  326. set_page_count(page, PTE_FRAG_NR);
  327. mm->context.pte_frag = ret + PTE_FRAG_SIZE;
  328. }
  329. spin_unlock(&mm->page_table_lock);
  330. return (pte_t *)ret;
  331. }
  332. pte_t *pte_fragment_alloc(struct mm_struct *mm, unsigned long vmaddr, int kernel)
  333. {
  334. pte_t *pte;
  335. pte = get_from_cache(mm);
  336. if (pte)
  337. return pte;
  338. return __alloc_for_cache(mm, kernel);
  339. }
  340. #endif /* CONFIG_PPC_64K_PAGES */
  341. void pte_fragment_free(unsigned long *table, int kernel)
  342. {
  343. struct page *page = virt_to_page(table);
  344. if (put_page_testzero(page)) {
  345. if (!kernel)
  346. pgtable_page_dtor(page);
  347. free_hot_cold_page(page, 0);
  348. }
  349. }
  350. #ifdef CONFIG_SMP
  351. void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
  352. {
  353. unsigned long pgf = (unsigned long)table;
  354. BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
  355. pgf |= shift;
  356. tlb_remove_table(tlb, (void *)pgf);
  357. }
  358. void __tlb_remove_table(void *_table)
  359. {
  360. void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE);
  361. unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE;
  362. if (!shift)
  363. /* PTE page needs special handling */
  364. pte_fragment_free(table, 0);
  365. else {
  366. BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
  367. kmem_cache_free(PGT_CACHE(shift), table);
  368. }
  369. }
  370. #else
  371. void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
  372. {
  373. if (!shift) {
  374. /* PTE page needs special handling */
  375. pte_fragment_free(table, 0);
  376. } else {
  377. BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
  378. kmem_cache_free(PGT_CACHE(shift), table);
  379. }
  380. }
  381. #endif
  382. #ifdef CONFIG_PPC_BOOK3S_64
  383. void __init mmu_partition_table_init(void)
  384. {
  385. unsigned long patb_size = 1UL << PATB_SIZE_SHIFT;
  386. unsigned long ptcr;
  387. BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 36), "Partition table size too large.");
  388. partition_tb = __va(memblock_alloc_base(patb_size, patb_size,
  389. MEMBLOCK_ALLOC_ANYWHERE));
  390. /* Initialize the Partition Table with no entries */
  391. memset((void *)partition_tb, 0, patb_size);
  392. /*
  393. * update partition table control register,
  394. * 64 K size.
  395. */
  396. ptcr = __pa(partition_tb) | (PATB_SIZE_SHIFT - 12);
  397. mtspr(SPRN_PTCR, ptcr);
  398. powernv_set_nmmu_ptcr(ptcr);
  399. }
  400. void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
  401. unsigned long dw1)
  402. {
  403. unsigned long old = be64_to_cpu(partition_tb[lpid].patb0);
  404. partition_tb[lpid].patb0 = cpu_to_be64(dw0);
  405. partition_tb[lpid].patb1 = cpu_to_be64(dw1);
  406. /*
  407. * Global flush of TLBs and partition table caches for this lpid.
  408. * The type of flush (hash or radix) depends on what the previous
  409. * use of this partition ID was, not the new use.
  410. */
  411. asm volatile("ptesync" : : : "memory");
  412. if (old & PATB_HR)
  413. asm volatile(PPC_TLBIE_5(%0,%1,2,0,1) : :
  414. "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
  415. else
  416. asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : :
  417. "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
  418. asm volatile("eieio; tlbsync; ptesync" : : : "memory");
  419. }
  420. EXPORT_SYMBOL_GPL(mmu_partition_table_set_entry);
  421. #endif /* CONFIG_PPC_BOOK3S_64 */