hugetlbpage.c 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746
  1. /*
  2. * PPC64 (POWER4) Huge TLB Page Support for Kernel.
  3. *
  4. * Copyright (C) 2003 David Gibson, IBM Corporation.
  5. *
  6. * Based on the IA-32 version:
  7. * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com>
  8. */
  9. #include <linux/init.h>
  10. #include <linux/fs.h>
  11. #include <linux/mm.h>
  12. #include <linux/hugetlb.h>
  13. #include <linux/pagemap.h>
  14. #include <linux/slab.h>
  15. #include <linux/err.h>
  16. #include <linux/sysctl.h>
  17. #include <asm/mman.h>
  18. #include <asm/pgalloc.h>
  19. #include <asm/tlb.h>
  20. #include <asm/tlbflush.h>
  21. #include <asm/mmu_context.h>
  22. #include <asm/machdep.h>
  23. #include <asm/cputable.h>
  24. #include <asm/spu.h>
  25. #define PAGE_SHIFT_64K 16
  26. #define PAGE_SHIFT_16M 24
  27. #define PAGE_SHIFT_16G 34
  28. #define NUM_LOW_AREAS (0x100000000UL >> SID_SHIFT)
  29. #define NUM_HIGH_AREAS (PGTABLE_RANGE >> HTLB_AREA_SHIFT)
  30. #define MAX_NUMBER_GPAGES 1024
  31. /* Tracks the 16G pages after the device tree is scanned and before the
  32. * huge_boot_pages list is ready. */
  33. static unsigned long gpage_freearray[MAX_NUMBER_GPAGES];
  34. static unsigned nr_gpages;
  35. /* Flag to mark huge PD pointers. This means pmd_bad() and pud_bad()
  36. * will choke on pointers to hugepte tables, which is handy for
  37. * catching screwups early. */
  38. static inline int shift_to_mmu_psize(unsigned int shift)
  39. {
  40. int psize;
  41. for (psize = 0; psize < MMU_PAGE_COUNT; ++psize)
  42. if (mmu_psize_defs[psize].shift == shift)
  43. return psize;
  44. return -1;
  45. }
  46. static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize)
  47. {
  48. if (mmu_psize_defs[mmu_psize].shift)
  49. return mmu_psize_defs[mmu_psize].shift;
  50. BUG();
  51. }
  52. #define hugepd_none(hpd) ((hpd).pd == 0)
  53. static inline pte_t *hugepd_page(hugepd_t hpd)
  54. {
  55. BUG_ON(!hugepd_ok(hpd));
  56. return (pte_t *)((hpd.pd & ~HUGEPD_SHIFT_MASK) | 0xc000000000000000);
  57. }
  58. static inline unsigned int hugepd_shift(hugepd_t hpd)
  59. {
  60. return hpd.pd & HUGEPD_SHIFT_MASK;
  61. }
  62. static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr, unsigned pdshift)
  63. {
  64. unsigned long idx = (addr & ((1UL << pdshift) - 1)) >> hugepd_shift(*hpdp);
  65. pte_t *dir = hugepd_page(*hpdp);
  66. return dir + idx;
  67. }
  68. pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift)
  69. {
  70. pgd_t *pg;
  71. pud_t *pu;
  72. pmd_t *pm;
  73. hugepd_t *hpdp = NULL;
  74. unsigned pdshift = PGDIR_SHIFT;
  75. if (shift)
  76. *shift = 0;
  77. pg = pgdir + pgd_index(ea);
  78. if (is_hugepd(pg)) {
  79. hpdp = (hugepd_t *)pg;
  80. } else if (!pgd_none(*pg)) {
  81. pdshift = PUD_SHIFT;
  82. pu = pud_offset(pg, ea);
  83. if (is_hugepd(pu))
  84. hpdp = (hugepd_t *)pu;
  85. else if (!pud_none(*pu)) {
  86. pdshift = PMD_SHIFT;
  87. pm = pmd_offset(pu, ea);
  88. if (is_hugepd(pm))
  89. hpdp = (hugepd_t *)pm;
  90. else if (!pmd_none(*pm)) {
  91. return pte_offset_map(pm, ea);
  92. }
  93. }
  94. }
  95. if (!hpdp)
  96. return NULL;
  97. if (shift)
  98. *shift = hugepd_shift(*hpdp);
  99. return hugepte_offset(hpdp, ea, pdshift);
  100. }
  101. pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
  102. {
  103. return find_linux_pte_or_hugepte(mm->pgd, addr, NULL);
  104. }
  105. static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
  106. unsigned long address, unsigned pdshift, unsigned pshift)
  107. {
  108. pte_t *new = kmem_cache_zalloc(PGT_CACHE(pdshift - pshift),
  109. GFP_KERNEL|__GFP_REPEAT);
  110. BUG_ON(pshift > HUGEPD_SHIFT_MASK);
  111. BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK);
  112. if (! new)
  113. return -ENOMEM;
  114. spin_lock(&mm->page_table_lock);
  115. if (!hugepd_none(*hpdp))
  116. kmem_cache_free(PGT_CACHE(pdshift - pshift), new);
  117. else
  118. hpdp->pd = ((unsigned long)new & ~0x8000000000000000) | pshift;
  119. spin_unlock(&mm->page_table_lock);
  120. return 0;
  121. }
  122. pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz)
  123. {
  124. pgd_t *pg;
  125. pud_t *pu;
  126. pmd_t *pm;
  127. hugepd_t *hpdp = NULL;
  128. unsigned pshift = __ffs(sz);
  129. unsigned pdshift = PGDIR_SHIFT;
  130. addr &= ~(sz-1);
  131. pg = pgd_offset(mm, addr);
  132. if (pshift >= PUD_SHIFT) {
  133. hpdp = (hugepd_t *)pg;
  134. } else {
  135. pdshift = PUD_SHIFT;
  136. pu = pud_alloc(mm, pg, addr);
  137. if (pshift >= PMD_SHIFT) {
  138. hpdp = (hugepd_t *)pu;
  139. } else {
  140. pdshift = PMD_SHIFT;
  141. pm = pmd_alloc(mm, pu, addr);
  142. hpdp = (hugepd_t *)pm;
  143. }
  144. }
  145. if (!hpdp)
  146. return NULL;
  147. BUG_ON(!hugepd_none(*hpdp) && !hugepd_ok(*hpdp));
  148. if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift))
  149. return NULL;
  150. return hugepte_offset(hpdp, addr, pdshift);
  151. }
  152. /* Build list of addresses of gigantic pages. This function is used in early
  153. * boot before the buddy or bootmem allocator is setup.
  154. */
  155. void add_gpage(unsigned long addr, unsigned long page_size,
  156. unsigned long number_of_pages)
  157. {
  158. if (!addr)
  159. return;
  160. while (number_of_pages > 0) {
  161. gpage_freearray[nr_gpages] = addr;
  162. nr_gpages++;
  163. number_of_pages--;
  164. addr += page_size;
  165. }
  166. }
  167. /* Moves the gigantic page addresses from the temporary list to the
  168. * huge_boot_pages list.
  169. */
  170. int alloc_bootmem_huge_page(struct hstate *hstate)
  171. {
  172. struct huge_bootmem_page *m;
  173. if (nr_gpages == 0)
  174. return 0;
  175. m = phys_to_virt(gpage_freearray[--nr_gpages]);
  176. gpage_freearray[nr_gpages] = 0;
  177. list_add(&m->list, &huge_boot_pages);
  178. m->hstate = hstate;
  179. return 1;
  180. }
  181. int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
  182. {
  183. return 0;
  184. }
  185. static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshift,
  186. unsigned long start, unsigned long end,
  187. unsigned long floor, unsigned long ceiling)
  188. {
  189. pte_t *hugepte = hugepd_page(*hpdp);
  190. unsigned shift = hugepd_shift(*hpdp);
  191. unsigned long pdmask = ~((1UL << pdshift) - 1);
  192. start &= pdmask;
  193. if (start < floor)
  194. return;
  195. if (ceiling) {
  196. ceiling &= pdmask;
  197. if (! ceiling)
  198. return;
  199. }
  200. if (end - 1 > ceiling - 1)
  201. return;
  202. hpdp->pd = 0;
  203. tlb->need_flush = 1;
  204. pgtable_free_tlb(tlb, hugepte, pdshift - shift);
  205. }
  206. static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
  207. unsigned long addr, unsigned long end,
  208. unsigned long floor, unsigned long ceiling)
  209. {
  210. pmd_t *pmd;
  211. unsigned long next;
  212. unsigned long start;
  213. start = addr;
  214. pmd = pmd_offset(pud, addr);
  215. do {
  216. next = pmd_addr_end(addr, end);
  217. if (pmd_none(*pmd))
  218. continue;
  219. free_hugepd_range(tlb, (hugepd_t *)pmd, PMD_SHIFT,
  220. addr, next, floor, ceiling);
  221. } while (pmd++, addr = next, addr != end);
  222. start &= PUD_MASK;
  223. if (start < floor)
  224. return;
  225. if (ceiling) {
  226. ceiling &= PUD_MASK;
  227. if (!ceiling)
  228. return;
  229. }
  230. if (end - 1 > ceiling - 1)
  231. return;
  232. pmd = pmd_offset(pud, start);
  233. pud_clear(pud);
  234. pmd_free_tlb(tlb, pmd, start);
  235. }
  236. static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
  237. unsigned long addr, unsigned long end,
  238. unsigned long floor, unsigned long ceiling)
  239. {
  240. pud_t *pud;
  241. unsigned long next;
  242. unsigned long start;
  243. start = addr;
  244. pud = pud_offset(pgd, addr);
  245. do {
  246. next = pud_addr_end(addr, end);
  247. if (!is_hugepd(pud)) {
  248. if (pud_none_or_clear_bad(pud))
  249. continue;
  250. hugetlb_free_pmd_range(tlb, pud, addr, next, floor,
  251. ceiling);
  252. } else {
  253. free_hugepd_range(tlb, (hugepd_t *)pud, PUD_SHIFT,
  254. addr, next, floor, ceiling);
  255. }
  256. } while (pud++, addr = next, addr != end);
  257. start &= PGDIR_MASK;
  258. if (start < floor)
  259. return;
  260. if (ceiling) {
  261. ceiling &= PGDIR_MASK;
  262. if (!ceiling)
  263. return;
  264. }
  265. if (end - 1 > ceiling - 1)
  266. return;
  267. pud = pud_offset(pgd, start);
  268. pgd_clear(pgd);
  269. pud_free_tlb(tlb, pud, start);
  270. }
  271. /*
  272. * This function frees user-level page tables of a process.
  273. *
  274. * Must be called with pagetable lock held.
  275. */
  276. void hugetlb_free_pgd_range(struct mmu_gather *tlb,
  277. unsigned long addr, unsigned long end,
  278. unsigned long floor, unsigned long ceiling)
  279. {
  280. pgd_t *pgd;
  281. unsigned long next;
  282. /*
  283. * Because there are a number of different possible pagetable
  284. * layouts for hugepage ranges, we limit knowledge of how
  285. * things should be laid out to the allocation path
  286. * (huge_pte_alloc(), above). Everything else works out the
  287. * structure as it goes from information in the hugepd
  288. * pointers. That means that we can't here use the
  289. * optimization used in the normal page free_pgd_range(), of
  290. * checking whether we're actually covering a large enough
  291. * range to have to do anything at the top level of the walk
  292. * instead of at the bottom.
  293. *
  294. * To make sense of this, you should probably go read the big
  295. * block comment at the top of the normal free_pgd_range(),
  296. * too.
  297. */
  298. pgd = pgd_offset(tlb->mm, addr);
  299. do {
  300. next = pgd_addr_end(addr, end);
  301. if (!is_hugepd(pgd)) {
  302. if (pgd_none_or_clear_bad(pgd))
  303. continue;
  304. hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling);
  305. } else {
  306. free_hugepd_range(tlb, (hugepd_t *)pgd, PGDIR_SHIFT,
  307. addr, next, floor, ceiling);
  308. }
  309. } while (pgd++, addr = next, addr != end);
  310. }
  311. void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
  312. pte_t *ptep, pte_t pte)
  313. {
  314. if (pte_present(*ptep)) {
  315. /* We open-code pte_clear because we need to pass the right
  316. * argument to hpte_need_flush (huge / !huge). Might not be
  317. * necessary anymore if we make hpte_need_flush() get the
  318. * page size from the slices
  319. */
  320. pte_update(mm, addr, ptep, ~0UL, 1);
  321. }
  322. *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
  323. }
  324. pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
  325. pte_t *ptep)
  326. {
  327. unsigned long old = pte_update(mm, addr, ptep, ~0UL, 1);
  328. return __pte(old);
  329. }
  330. struct page *
  331. follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
  332. {
  333. pte_t *ptep;
  334. struct page *page;
  335. unsigned shift;
  336. unsigned long mask;
  337. ptep = find_linux_pte_or_hugepte(mm->pgd, address, &shift);
  338. /* Verify it is a huge page else bail. */
  339. if (!ptep || !shift)
  340. return ERR_PTR(-EINVAL);
  341. mask = (1UL << shift) - 1;
  342. page = pte_page(*ptep);
  343. if (page)
  344. page += (address & mask) / PAGE_SIZE;
  345. return page;
  346. }
  347. int pmd_huge(pmd_t pmd)
  348. {
  349. return 0;
  350. }
  351. int pud_huge(pud_t pud)
  352. {
  353. return 0;
  354. }
  355. struct page *
  356. follow_huge_pmd(struct mm_struct *mm, unsigned long address,
  357. pmd_t *pmd, int write)
  358. {
  359. BUG();
  360. return NULL;
  361. }
  362. static noinline int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
  363. unsigned long end, int write, struct page **pages, int *nr)
  364. {
  365. unsigned long mask;
  366. unsigned long pte_end;
  367. struct page *head, *page;
  368. pte_t pte;
  369. int refs;
  370. pte_end = (addr + sz) & ~(sz-1);
  371. if (pte_end < end)
  372. end = pte_end;
  373. pte = *ptep;
  374. mask = _PAGE_PRESENT | _PAGE_USER;
  375. if (write)
  376. mask |= _PAGE_RW;
  377. if ((pte_val(pte) & mask) != mask)
  378. return 0;
  379. /* hugepages are never "special" */
  380. VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
  381. refs = 0;
  382. head = pte_page(pte);
  383. page = head + ((addr & (sz-1)) >> PAGE_SHIFT);
  384. do {
  385. VM_BUG_ON(compound_head(page) != head);
  386. pages[*nr] = page;
  387. (*nr)++;
  388. page++;
  389. refs++;
  390. } while (addr += PAGE_SIZE, addr != end);
  391. if (!page_cache_add_speculative(head, refs)) {
  392. *nr -= refs;
  393. return 0;
  394. }
  395. if (unlikely(pte_val(pte) != pte_val(*ptep))) {
  396. /* Could be optimized better */
  397. while (*nr) {
  398. put_page(page);
  399. (*nr)--;
  400. }
  401. }
  402. return 1;
  403. }
  404. int gup_hugepd(hugepd_t *hugepd, unsigned pdshift,
  405. unsigned long addr, unsigned long end,
  406. int write, struct page **pages, int *nr)
  407. {
  408. pte_t *ptep;
  409. unsigned long sz = 1UL << hugepd_shift(*hugepd);
  410. ptep = hugepte_offset(hugepd, addr, pdshift);
  411. do {
  412. if (!gup_hugepte(ptep, sz, addr, end, write, pages, nr))
  413. return 0;
  414. } while (ptep++, addr += sz, addr != end);
  415. return 1;
  416. }
  417. unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
  418. unsigned long len, unsigned long pgoff,
  419. unsigned long flags)
  420. {
  421. struct hstate *hstate = hstate_file(file);
  422. int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate));
  423. return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1, 0);
  424. }
  425. unsigned long vma_mmu_pagesize(struct vm_area_struct *vma)
  426. {
  427. unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start);
  428. return 1UL << mmu_psize_to_shift(psize);
  429. }
  430. /*
  431. * Called by asm hashtable.S for doing lazy icache flush
  432. */
  433. static unsigned int hash_huge_page_do_lazy_icache(unsigned long rflags,
  434. pte_t pte, int trap, unsigned long sz)
  435. {
  436. struct page *page;
  437. int i;
  438. if (!pfn_valid(pte_pfn(pte)))
  439. return rflags;
  440. page = pte_page(pte);
  441. /* page is dirty */
  442. if (!test_bit(PG_arch_1, &page->flags) && !PageReserved(page)) {
  443. if (trap == 0x400) {
  444. for (i = 0; i < (sz / PAGE_SIZE); i++)
  445. __flush_dcache_icache(page_address(page+i));
  446. set_bit(PG_arch_1, &page->flags);
  447. } else {
  448. rflags |= HPTE_R_N;
  449. }
  450. }
  451. return rflags;
  452. }
  453. int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
  454. pte_t *ptep, unsigned long trap, int local, int ssize,
  455. unsigned int shift, unsigned int mmu_psize)
  456. {
  457. unsigned long old_pte, new_pte;
  458. unsigned long va, rflags, pa, sz;
  459. long slot;
  460. int err = 1;
  461. BUG_ON(shift != mmu_psize_defs[mmu_psize].shift);
  462. /* Search the Linux page table for a match with va */
  463. va = hpt_va(ea, vsid, ssize);
  464. /*
  465. * Check the user's access rights to the page. If access should be
  466. * prevented then send the problem up to do_page_fault.
  467. */
  468. if (unlikely(access & ~pte_val(*ptep)))
  469. goto out;
  470. /*
  471. * At this point, we have a pte (old_pte) which can be used to build
  472. * or update an HPTE. There are 2 cases:
  473. *
  474. * 1. There is a valid (present) pte with no associated HPTE (this is
  475. * the most common case)
  476. * 2. There is a valid (present) pte with an associated HPTE. The
  477. * current values of the pp bits in the HPTE prevent access
  478. * because we are doing software DIRTY bit management and the
  479. * page is currently not DIRTY.
  480. */
  481. do {
  482. old_pte = pte_val(*ptep);
  483. if (old_pte & _PAGE_BUSY)
  484. goto out;
  485. new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED;
  486. } while(old_pte != __cmpxchg_u64((unsigned long *)ptep,
  487. old_pte, new_pte));
  488. rflags = 0x2 | (!(new_pte & _PAGE_RW));
  489. /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */
  490. rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N);
  491. sz = ((1UL) << shift);
  492. if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
  493. /* No CPU has hugepages but lacks no execute, so we
  494. * don't need to worry about that case */
  495. rflags = hash_huge_page_do_lazy_icache(rflags, __pte(old_pte),
  496. trap, sz);
  497. /* Check if pte already has an hpte (case 2) */
  498. if (unlikely(old_pte & _PAGE_HASHPTE)) {
  499. /* There MIGHT be an HPTE for this pte */
  500. unsigned long hash, slot;
  501. hash = hpt_hash(va, shift, ssize);
  502. if (old_pte & _PAGE_F_SECOND)
  503. hash = ~hash;
  504. slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
  505. slot += (old_pte & _PAGE_F_GIX) >> 12;
  506. if (ppc_md.hpte_updatepp(slot, rflags, va, mmu_psize,
  507. ssize, local) == -1)
  508. old_pte &= ~_PAGE_HPTEFLAGS;
  509. }
  510. if (likely(!(old_pte & _PAGE_HASHPTE))) {
  511. unsigned long hash = hpt_hash(va, shift, ssize);
  512. unsigned long hpte_group;
  513. pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT;
  514. repeat:
  515. hpte_group = ((hash & htab_hash_mask) *
  516. HPTES_PER_GROUP) & ~0x7UL;
  517. /* clear HPTE slot informations in new PTE */
  518. #ifdef CONFIG_PPC_64K_PAGES
  519. new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HPTE_SUB0;
  520. #else
  521. new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE;
  522. #endif
  523. /* Add in WIMG bits */
  524. rflags |= (new_pte & (_PAGE_WRITETHRU | _PAGE_NO_CACHE |
  525. _PAGE_COHERENT | _PAGE_GUARDED));
  526. /* Insert into the hash table, primary slot */
  527. slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, 0,
  528. mmu_psize, ssize);
  529. /* Primary is full, try the secondary */
  530. if (unlikely(slot == -1)) {
  531. hpte_group = ((~hash & htab_hash_mask) *
  532. HPTES_PER_GROUP) & ~0x7UL;
  533. slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags,
  534. HPTE_V_SECONDARY,
  535. mmu_psize, ssize);
  536. if (slot == -1) {
  537. if (mftb() & 0x1)
  538. hpte_group = ((hash & htab_hash_mask) *
  539. HPTES_PER_GROUP)&~0x7UL;
  540. ppc_md.hpte_remove(hpte_group);
  541. goto repeat;
  542. }
  543. }
  544. if (unlikely(slot == -2))
  545. panic("hash_huge_page: pte_insert failed\n");
  546. new_pte |= (slot << 12) & (_PAGE_F_SECOND | _PAGE_F_GIX);
  547. }
  548. /*
  549. * No need to use ldarx/stdcx here
  550. */
  551. *ptep = __pte(new_pte & ~_PAGE_BUSY);
  552. err = 0;
  553. out:
  554. return err;
  555. }
  556. static int __init add_huge_page_size(unsigned long long size)
  557. {
  558. int shift = __ffs(size);
  559. int mmu_psize;
  560. /* Check that it is a page size supported by the hardware and
  561. * that it fits within pagetable and slice limits. */
  562. if (!is_power_of_2(size)
  563. || (shift > SLICE_HIGH_SHIFT) || (shift <= PAGE_SHIFT))
  564. return -EINVAL;
  565. if ((mmu_psize = shift_to_mmu_psize(shift)) < 0)
  566. return -EINVAL;
  567. #ifdef CONFIG_SPU_FS_64K_LS
  568. /* Disable support for 64K huge pages when 64K SPU local store
  569. * support is enabled as the current implementation conflicts.
  570. */
  571. if (shift == PAGE_SHIFT_64K)
  572. return -EINVAL;
  573. #endif /* CONFIG_SPU_FS_64K_LS */
  574. BUG_ON(mmu_psize_defs[mmu_psize].shift != shift);
  575. /* Return if huge page size has already been setup */
  576. if (size_to_hstate(size))
  577. return 0;
  578. hugetlb_add_hstate(shift - PAGE_SHIFT);
  579. return 0;
  580. }
  581. static int __init hugepage_setup_sz(char *str)
  582. {
  583. unsigned long long size;
  584. size = memparse(str, &str);
  585. if (add_huge_page_size(size) != 0)
  586. printk(KERN_WARNING "Invalid huge page size specified(%llu)\n", size);
  587. return 1;
  588. }
  589. __setup("hugepagesz=", hugepage_setup_sz);
  590. static int __init hugetlbpage_init(void)
  591. {
  592. int psize;
  593. if (!cpu_has_feature(CPU_FTR_16M_PAGE))
  594. return -ENODEV;
  595. for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
  596. unsigned shift;
  597. unsigned pdshift;
  598. if (!mmu_psize_defs[psize].shift)
  599. continue;
  600. shift = mmu_psize_to_shift(psize);
  601. if (add_huge_page_size(1ULL << shift) < 0)
  602. continue;
  603. if (shift < PMD_SHIFT)
  604. pdshift = PMD_SHIFT;
  605. else if (shift < PUD_SHIFT)
  606. pdshift = PUD_SHIFT;
  607. else
  608. pdshift = PGDIR_SHIFT;
  609. pgtable_cache_add(pdshift - shift, NULL);
  610. if (!PGT_CACHE(pdshift - shift))
  611. panic("hugetlbpage_init(): could not create "
  612. "pgtable cache for %d bit pagesize\n", shift);
  613. }
  614. /* Set default large page size. Currently, we pick 16M or 1M
  615. * depending on what is available
  616. */
  617. if (mmu_psize_defs[MMU_PAGE_16M].shift)
  618. HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_16M].shift;
  619. else if (mmu_psize_defs[MMU_PAGE_1M].shift)
  620. HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_1M].shift;
  621. return 0;
  622. }
  623. module_init(hugetlbpage_init);