hugetlbpage.c 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * IBM System z Huge TLB Page Support for Kernel.
  4. *
  5. * Copyright IBM Corp. 2007,2016
  6. * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com>
  7. */
  8. #define KMSG_COMPONENT "hugetlb"
  9. #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  10. #include <linux/mm.h>
  11. #include <linux/hugetlb.h>
  12. /*
  13. * If the bit selected by single-bit bitmask "a" is set within "x", move
  14. * it to the position indicated by single-bit bitmask "b".
  15. */
  16. #define move_set_bit(x, a, b) (((x) & (a)) >> ilog2(a) << ilog2(b))
  17. static inline unsigned long __pte_to_rste(pte_t pte)
  18. {
  19. unsigned long rste;
  20. /*
  21. * Convert encoding pte bits pmd / pud bits
  22. * lIR.uswrdy.p dy..R...I...wr
  23. * empty 010.000000.0 -> 00..0...1...00
  24. * prot-none, clean, old 111.000000.1 -> 00..1...1...00
  25. * prot-none, clean, young 111.000001.1 -> 01..1...1...00
  26. * prot-none, dirty, old 111.000010.1 -> 10..1...1...00
  27. * prot-none, dirty, young 111.000011.1 -> 11..1...1...00
  28. * read-only, clean, old 111.000100.1 -> 00..1...1...01
  29. * read-only, clean, young 101.000101.1 -> 01..1...0...01
  30. * read-only, dirty, old 111.000110.1 -> 10..1...1...01
  31. * read-only, dirty, young 101.000111.1 -> 11..1...0...01
  32. * read-write, clean, old 111.001100.1 -> 00..1...1...11
  33. * read-write, clean, young 101.001101.1 -> 01..1...0...11
  34. * read-write, dirty, old 110.001110.1 -> 10..0...1...11
  35. * read-write, dirty, young 100.001111.1 -> 11..0...0...11
  36. * HW-bits: R read-only, I invalid
  37. * SW-bits: p present, y young, d dirty, r read, w write, s special,
  38. * u unused, l large
  39. */
  40. if (pte_present(pte)) {
  41. rste = pte_val(pte) & PAGE_MASK;
  42. rste |= move_set_bit(pte_val(pte), _PAGE_READ,
  43. _SEGMENT_ENTRY_READ);
  44. rste |= move_set_bit(pte_val(pte), _PAGE_WRITE,
  45. _SEGMENT_ENTRY_WRITE);
  46. rste |= move_set_bit(pte_val(pte), _PAGE_INVALID,
  47. _SEGMENT_ENTRY_INVALID);
  48. rste |= move_set_bit(pte_val(pte), _PAGE_PROTECT,
  49. _SEGMENT_ENTRY_PROTECT);
  50. rste |= move_set_bit(pte_val(pte), _PAGE_DIRTY,
  51. _SEGMENT_ENTRY_DIRTY);
  52. rste |= move_set_bit(pte_val(pte), _PAGE_YOUNG,
  53. _SEGMENT_ENTRY_YOUNG);
  54. #ifdef CONFIG_MEM_SOFT_DIRTY
  55. rste |= move_set_bit(pte_val(pte), _PAGE_SOFT_DIRTY,
  56. _SEGMENT_ENTRY_SOFT_DIRTY);
  57. #endif
  58. rste |= move_set_bit(pte_val(pte), _PAGE_NOEXEC,
  59. _SEGMENT_ENTRY_NOEXEC);
  60. } else
  61. rste = _SEGMENT_ENTRY_EMPTY;
  62. return rste;
  63. }
  64. static inline pte_t __rste_to_pte(unsigned long rste)
  65. {
  66. int present;
  67. pte_t pte;
  68. if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
  69. present = pud_present(__pud(rste));
  70. else
  71. present = pmd_present(__pmd(rste));
  72. /*
  73. * Convert encoding pmd / pud bits pte bits
  74. * dy..R...I...wr lIR.uswrdy.p
  75. * empty 00..0...1...00 -> 010.000000.0
  76. * prot-none, clean, old 00..1...1...00 -> 111.000000.1
  77. * prot-none, clean, young 01..1...1...00 -> 111.000001.1
  78. * prot-none, dirty, old 10..1...1...00 -> 111.000010.1
  79. * prot-none, dirty, young 11..1...1...00 -> 111.000011.1
  80. * read-only, clean, old 00..1...1...01 -> 111.000100.1
  81. * read-only, clean, young 01..1...0...01 -> 101.000101.1
  82. * read-only, dirty, old 10..1...1...01 -> 111.000110.1
  83. * read-only, dirty, young 11..1...0...01 -> 101.000111.1
  84. * read-write, clean, old 00..1...1...11 -> 111.001100.1
  85. * read-write, clean, young 01..1...0...11 -> 101.001101.1
  86. * read-write, dirty, old 10..0...1...11 -> 110.001110.1
  87. * read-write, dirty, young 11..0...0...11 -> 100.001111.1
  88. * HW-bits: R read-only, I invalid
  89. * SW-bits: p present, y young, d dirty, r read, w write, s special,
  90. * u unused, l large
  91. */
  92. if (present) {
  93. pte_val(pte) = rste & _SEGMENT_ENTRY_ORIGIN_LARGE;
  94. pte_val(pte) |= _PAGE_LARGE | _PAGE_PRESENT;
  95. pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_READ,
  96. _PAGE_READ);
  97. pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_WRITE,
  98. _PAGE_WRITE);
  99. pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_INVALID,
  100. _PAGE_INVALID);
  101. pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_PROTECT,
  102. _PAGE_PROTECT);
  103. pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_DIRTY,
  104. _PAGE_DIRTY);
  105. pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_YOUNG,
  106. _PAGE_YOUNG);
  107. #ifdef CONFIG_MEM_SOFT_DIRTY
  108. pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_SOFT_DIRTY,
  109. _PAGE_DIRTY);
  110. #endif
  111. pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_NOEXEC,
  112. _PAGE_NOEXEC);
  113. } else
  114. pte_val(pte) = _PAGE_INVALID;
  115. return pte;
  116. }
  117. static void clear_huge_pte_skeys(struct mm_struct *mm, unsigned long rste)
  118. {
  119. struct page *page;
  120. unsigned long size, paddr;
  121. if (!mm_uses_skeys(mm) ||
  122. rste & _SEGMENT_ENTRY_INVALID)
  123. return;
  124. if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) {
  125. page = pud_page(__pud(rste));
  126. size = PUD_SIZE;
  127. paddr = rste & PUD_MASK;
  128. } else {
  129. page = pmd_page(__pmd(rste));
  130. size = PMD_SIZE;
  131. paddr = rste & PMD_MASK;
  132. }
  133. if (!test_and_set_bit(PG_arch_1, &page->flags))
  134. __storage_key_init_range(paddr, paddr + size - 1);
  135. }
  136. void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
  137. pte_t *ptep, pte_t pte)
  138. {
  139. unsigned long rste;
  140. rste = __pte_to_rste(pte);
  141. if (!MACHINE_HAS_NX)
  142. rste &= ~_SEGMENT_ENTRY_NOEXEC;
  143. /* Set correct table type for 2G hugepages */
  144. if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
  145. rste |= _REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE;
  146. else
  147. rste |= _SEGMENT_ENTRY_LARGE;
  148. clear_huge_pte_skeys(mm, rste);
  149. pte_val(*ptep) = rste;
  150. }
  151. pte_t huge_ptep_get(pte_t *ptep)
  152. {
  153. return __rste_to_pte(pte_val(*ptep));
  154. }
  155. pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
  156. unsigned long addr, pte_t *ptep)
  157. {
  158. pte_t pte = huge_ptep_get(ptep);
  159. pmd_t *pmdp = (pmd_t *) ptep;
  160. pud_t *pudp = (pud_t *) ptep;
  161. if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
  162. pudp_xchg_direct(mm, addr, pudp, __pud(_REGION3_ENTRY_EMPTY));
  163. else
  164. pmdp_xchg_direct(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
  165. return pte;
  166. }
  167. pte_t *huge_pte_alloc(struct mm_struct *mm,
  168. unsigned long addr, unsigned long sz)
  169. {
  170. pgd_t *pgdp;
  171. p4d_t *p4dp;
  172. pud_t *pudp;
  173. pmd_t *pmdp = NULL;
  174. pgdp = pgd_offset(mm, addr);
  175. p4dp = p4d_alloc(mm, pgdp, addr);
  176. if (p4dp) {
  177. pudp = pud_alloc(mm, p4dp, addr);
  178. if (pudp) {
  179. if (sz == PUD_SIZE)
  180. return (pte_t *) pudp;
  181. else if (sz == PMD_SIZE)
  182. pmdp = pmd_alloc(mm, pudp, addr);
  183. }
  184. }
  185. return (pte_t *) pmdp;
  186. }
  187. pte_t *huge_pte_offset(struct mm_struct *mm,
  188. unsigned long addr, unsigned long sz)
  189. {
  190. pgd_t *pgdp;
  191. p4d_t *p4dp;
  192. pud_t *pudp;
  193. pmd_t *pmdp = NULL;
  194. pgdp = pgd_offset(mm, addr);
  195. if (pgd_present(*pgdp)) {
  196. p4dp = p4d_offset(pgdp, addr);
  197. if (p4d_present(*p4dp)) {
  198. pudp = pud_offset(p4dp, addr);
  199. if (pud_present(*pudp)) {
  200. if (pud_large(*pudp))
  201. return (pte_t *) pudp;
  202. pmdp = pmd_offset(pudp, addr);
  203. }
  204. }
  205. }
  206. return (pte_t *) pmdp;
  207. }
  208. int pmd_huge(pmd_t pmd)
  209. {
  210. return pmd_large(pmd);
  211. }
  212. int pud_huge(pud_t pud)
  213. {
  214. return pud_large(pud);
  215. }
  216. struct page *
  217. follow_huge_pud(struct mm_struct *mm, unsigned long address,
  218. pud_t *pud, int flags)
  219. {
  220. if (flags & FOLL_GET)
  221. return NULL;
  222. return pud_page(*pud) + ((address & ~PUD_MASK) >> PAGE_SHIFT);
  223. }
  224. static __init int setup_hugepagesz(char *opt)
  225. {
  226. unsigned long size;
  227. char *string = opt;
  228. size = memparse(opt, &opt);
  229. if (MACHINE_HAS_EDAT1 && size == PMD_SIZE) {
  230. hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
  231. } else if (MACHINE_HAS_EDAT2 && size == PUD_SIZE) {
  232. hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
  233. } else {
  234. hugetlb_bad_size();
  235. pr_err("hugepagesz= specifies an unsupported page size %s\n",
  236. string);
  237. return 0;
  238. }
  239. return 1;
  240. }
  241. __setup("hugepagesz=", setup_hugepagesz);