pgtable.c 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943
  1. /*
  2. * Copyright IBM Corp. 2007, 2011
  3. * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
  4. */
  5. #include <linux/sched.h>
  6. #include <linux/kernel.h>
  7. #include <linux/errno.h>
  8. #include <linux/gfp.h>
  9. #include <linux/mm.h>
  10. #include <linux/swap.h>
  11. #include <linux/smp.h>
  12. #include <linux/spinlock.h>
  13. #include <linux/rcupdate.h>
  14. #include <linux/slab.h>
  15. #include <linux/swapops.h>
  16. #include <linux/sysctl.h>
  17. #include <linux/ksm.h>
  18. #include <linux/mman.h>
  19. #include <asm/pgtable.h>
  20. #include <asm/pgalloc.h>
  21. #include <asm/tlb.h>
  22. #include <asm/tlbflush.h>
  23. #include <asm/mmu_context.h>
  24. #include <asm/page-states.h>
  25. static inline pte_t ptep_flush_direct(struct mm_struct *mm,
  26. unsigned long addr, pte_t *ptep)
  27. {
  28. pte_t old;
  29. old = *ptep;
  30. if (unlikely(pte_val(old) & _PAGE_INVALID))
  31. return old;
  32. atomic_inc(&mm->context.flush_count);
  33. if (MACHINE_HAS_TLB_LC &&
  34. cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
  35. __ptep_ipte(addr, ptep, IPTE_LOCAL);
  36. else
  37. __ptep_ipte(addr, ptep, IPTE_GLOBAL);
  38. atomic_dec(&mm->context.flush_count);
  39. return old;
  40. }
  41. static inline pte_t ptep_flush_lazy(struct mm_struct *mm,
  42. unsigned long addr, pte_t *ptep)
  43. {
  44. pte_t old;
  45. old = *ptep;
  46. if (unlikely(pte_val(old) & _PAGE_INVALID))
  47. return old;
  48. atomic_inc(&mm->context.flush_count);
  49. if (cpumask_equal(&mm->context.cpu_attach_mask,
  50. cpumask_of(smp_processor_id()))) {
  51. pte_val(*ptep) |= _PAGE_INVALID;
  52. mm->context.flush_mm = 1;
  53. } else
  54. __ptep_ipte(addr, ptep, IPTE_GLOBAL);
  55. atomic_dec(&mm->context.flush_count);
  56. return old;
  57. }
  58. static inline pgste_t pgste_get_lock(pte_t *ptep)
  59. {
  60. unsigned long new = 0;
  61. #ifdef CONFIG_PGSTE
  62. unsigned long old;
  63. asm(
  64. " lg %0,%2\n"
  65. "0: lgr %1,%0\n"
  66. " nihh %0,0xff7f\n" /* clear PCL bit in old */
  67. " oihh %1,0x0080\n" /* set PCL bit in new */
  68. " csg %0,%1,%2\n"
  69. " jl 0b\n"
  70. : "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE])
  71. : "Q" (ptep[PTRS_PER_PTE]) : "cc", "memory");
  72. #endif
  73. return __pgste(new);
  74. }
  75. static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste)
  76. {
  77. #ifdef CONFIG_PGSTE
  78. asm(
  79. " nihh %1,0xff7f\n" /* clear PCL bit */
  80. " stg %1,%0\n"
  81. : "=Q" (ptep[PTRS_PER_PTE])
  82. : "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE])
  83. : "cc", "memory");
  84. #endif
  85. }
  86. static inline pgste_t pgste_get(pte_t *ptep)
  87. {
  88. unsigned long pgste = 0;
  89. #ifdef CONFIG_PGSTE
  90. pgste = *(unsigned long *)(ptep + PTRS_PER_PTE);
  91. #endif
  92. return __pgste(pgste);
  93. }
  94. static inline void pgste_set(pte_t *ptep, pgste_t pgste)
  95. {
  96. #ifdef CONFIG_PGSTE
  97. *(pgste_t *)(ptep + PTRS_PER_PTE) = pgste;
  98. #endif
  99. }
  100. static inline pgste_t pgste_update_all(pte_t pte, pgste_t pgste,
  101. struct mm_struct *mm)
  102. {
  103. #ifdef CONFIG_PGSTE
  104. unsigned long address, bits, skey;
  105. if (!mm_use_skey(mm) || pte_val(pte) & _PAGE_INVALID)
  106. return pgste;
  107. address = pte_val(pte) & PAGE_MASK;
  108. skey = (unsigned long) page_get_storage_key(address);
  109. bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
  110. /* Transfer page changed & referenced bit to guest bits in pgste */
  111. pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */
  112. /* Copy page access key and fetch protection bit to pgste */
  113. pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT);
  114. pgste_val(pgste) |= (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
  115. #endif
  116. return pgste;
  117. }
  118. static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry,
  119. struct mm_struct *mm)
  120. {
  121. #ifdef CONFIG_PGSTE
  122. unsigned long address;
  123. unsigned long nkey;
  124. if (!mm_use_skey(mm) || pte_val(entry) & _PAGE_INVALID)
  125. return;
  126. VM_BUG_ON(!(pte_val(*ptep) & _PAGE_INVALID));
  127. address = pte_val(entry) & PAGE_MASK;
  128. /*
  129. * Set page access key and fetch protection bit from pgste.
  130. * The guest C/R information is still in the PGSTE, set real
  131. * key C/R to 0.
  132. */
  133. nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
  134. nkey |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48;
  135. page_set_storage_key(address, nkey, 0);
  136. #endif
  137. }
  138. static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry)
  139. {
  140. #ifdef CONFIG_PGSTE
  141. if ((pte_val(entry) & _PAGE_PRESENT) &&
  142. (pte_val(entry) & _PAGE_WRITE) &&
  143. !(pte_val(entry) & _PAGE_INVALID)) {
  144. if (!MACHINE_HAS_ESOP) {
  145. /*
  146. * Without enhanced suppression-on-protection force
  147. * the dirty bit on for all writable ptes.
  148. */
  149. pte_val(entry) |= _PAGE_DIRTY;
  150. pte_val(entry) &= ~_PAGE_PROTECT;
  151. }
  152. if (!(pte_val(entry) & _PAGE_PROTECT))
  153. /* This pte allows write access, set user-dirty */
  154. pgste_val(pgste) |= PGSTE_UC_BIT;
  155. }
  156. #endif
  157. *ptep = entry;
  158. return pgste;
  159. }
  160. static inline pgste_t pgste_pte_notify(struct mm_struct *mm,
  161. unsigned long addr,
  162. pte_t *ptep, pgste_t pgste)
  163. {
  164. #ifdef CONFIG_PGSTE
  165. unsigned long bits;
  166. bits = pgste_val(pgste) & (PGSTE_IN_BIT | PGSTE_VSIE_BIT);
  167. if (bits) {
  168. pgste_val(pgste) ^= bits;
  169. ptep_notify(mm, addr, ptep, bits);
  170. }
  171. #endif
  172. return pgste;
  173. }
  174. static inline pgste_t ptep_xchg_start(struct mm_struct *mm,
  175. unsigned long addr, pte_t *ptep)
  176. {
  177. pgste_t pgste = __pgste(0);
  178. if (mm_has_pgste(mm)) {
  179. pgste = pgste_get_lock(ptep);
  180. pgste = pgste_pte_notify(mm, addr, ptep, pgste);
  181. }
  182. return pgste;
  183. }
  184. static inline pte_t ptep_xchg_commit(struct mm_struct *mm,
  185. unsigned long addr, pte_t *ptep,
  186. pgste_t pgste, pte_t old, pte_t new)
  187. {
  188. if (mm_has_pgste(mm)) {
  189. if (pte_val(old) & _PAGE_INVALID)
  190. pgste_set_key(ptep, pgste, new, mm);
  191. if (pte_val(new) & _PAGE_INVALID) {
  192. pgste = pgste_update_all(old, pgste, mm);
  193. if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) ==
  194. _PGSTE_GPS_USAGE_UNUSED)
  195. pte_val(old) |= _PAGE_UNUSED;
  196. }
  197. pgste = pgste_set_pte(ptep, pgste, new);
  198. pgste_set_unlock(ptep, pgste);
  199. } else {
  200. *ptep = new;
  201. }
  202. return old;
  203. }
  204. pte_t ptep_xchg_direct(struct mm_struct *mm, unsigned long addr,
  205. pte_t *ptep, pte_t new)
  206. {
  207. pgste_t pgste;
  208. pte_t old;
  209. preempt_disable();
  210. pgste = ptep_xchg_start(mm, addr, ptep);
  211. old = ptep_flush_direct(mm, addr, ptep);
  212. old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new);
  213. preempt_enable();
  214. return old;
  215. }
  216. EXPORT_SYMBOL(ptep_xchg_direct);
  217. pte_t ptep_xchg_lazy(struct mm_struct *mm, unsigned long addr,
  218. pte_t *ptep, pte_t new)
  219. {
  220. pgste_t pgste;
  221. pte_t old;
  222. preempt_disable();
  223. pgste = ptep_xchg_start(mm, addr, ptep);
  224. old = ptep_flush_lazy(mm, addr, ptep);
  225. old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new);
  226. preempt_enable();
  227. return old;
  228. }
  229. EXPORT_SYMBOL(ptep_xchg_lazy);
  230. pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr,
  231. pte_t *ptep)
  232. {
  233. pgste_t pgste;
  234. pte_t old;
  235. preempt_disable();
  236. pgste = ptep_xchg_start(mm, addr, ptep);
  237. old = ptep_flush_lazy(mm, addr, ptep);
  238. if (mm_has_pgste(mm)) {
  239. pgste = pgste_update_all(old, pgste, mm);
  240. pgste_set(ptep, pgste);
  241. }
  242. return old;
  243. }
  244. EXPORT_SYMBOL(ptep_modify_prot_start);
  245. void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
  246. pte_t *ptep, pte_t pte)
  247. {
  248. pgste_t pgste;
  249. if (!MACHINE_HAS_NX)
  250. pte_val(pte) &= ~_PAGE_NOEXEC;
  251. if (mm_has_pgste(mm)) {
  252. pgste = pgste_get(ptep);
  253. pgste_set_key(ptep, pgste, pte, mm);
  254. pgste = pgste_set_pte(ptep, pgste, pte);
  255. pgste_set_unlock(ptep, pgste);
  256. } else {
  257. *ptep = pte;
  258. }
  259. preempt_enable();
  260. }
  261. EXPORT_SYMBOL(ptep_modify_prot_commit);
  262. static inline pmd_t pmdp_flush_direct(struct mm_struct *mm,
  263. unsigned long addr, pmd_t *pmdp)
  264. {
  265. pmd_t old;
  266. old = *pmdp;
  267. if (pmd_val(old) & _SEGMENT_ENTRY_INVALID)
  268. return old;
  269. if (!MACHINE_HAS_IDTE) {
  270. __pmdp_csp(pmdp);
  271. return old;
  272. }
  273. atomic_inc(&mm->context.flush_count);
  274. if (MACHINE_HAS_TLB_LC &&
  275. cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
  276. __pmdp_idte(addr, pmdp, IDTE_LOCAL);
  277. else
  278. __pmdp_idte(addr, pmdp, IDTE_GLOBAL);
  279. atomic_dec(&mm->context.flush_count);
  280. return old;
  281. }
  282. static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm,
  283. unsigned long addr, pmd_t *pmdp)
  284. {
  285. pmd_t old;
  286. old = *pmdp;
  287. if (pmd_val(old) & _SEGMENT_ENTRY_INVALID)
  288. return old;
  289. atomic_inc(&mm->context.flush_count);
  290. if (cpumask_equal(&mm->context.cpu_attach_mask,
  291. cpumask_of(smp_processor_id()))) {
  292. pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID;
  293. mm->context.flush_mm = 1;
  294. } else if (MACHINE_HAS_IDTE)
  295. __pmdp_idte(addr, pmdp, IDTE_GLOBAL);
  296. else
  297. __pmdp_csp(pmdp);
  298. atomic_dec(&mm->context.flush_count);
  299. return old;
  300. }
  301. pmd_t pmdp_xchg_direct(struct mm_struct *mm, unsigned long addr,
  302. pmd_t *pmdp, pmd_t new)
  303. {
  304. pmd_t old;
  305. preempt_disable();
  306. old = pmdp_flush_direct(mm, addr, pmdp);
  307. *pmdp = new;
  308. preempt_enable();
  309. return old;
  310. }
  311. EXPORT_SYMBOL(pmdp_xchg_direct);
  312. pmd_t pmdp_xchg_lazy(struct mm_struct *mm, unsigned long addr,
  313. pmd_t *pmdp, pmd_t new)
  314. {
  315. pmd_t old;
  316. preempt_disable();
  317. old = pmdp_flush_lazy(mm, addr, pmdp);
  318. *pmdp = new;
  319. preempt_enable();
  320. return old;
  321. }
  322. EXPORT_SYMBOL(pmdp_xchg_lazy);
  323. static inline pud_t pudp_flush_direct(struct mm_struct *mm,
  324. unsigned long addr, pud_t *pudp)
  325. {
  326. pud_t old;
  327. old = *pudp;
  328. if (pud_val(old) & _REGION_ENTRY_INVALID)
  329. return old;
  330. if (!MACHINE_HAS_IDTE) {
  331. /*
  332. * Invalid bit position is the same for pmd and pud, so we can
  333. * re-use _pmd_csp() here
  334. */
  335. __pmdp_csp((pmd_t *) pudp);
  336. return old;
  337. }
  338. atomic_inc(&mm->context.flush_count);
  339. if (MACHINE_HAS_TLB_LC &&
  340. cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
  341. __pudp_idte(addr, pudp, IDTE_LOCAL);
  342. else
  343. __pudp_idte(addr, pudp, IDTE_GLOBAL);
  344. atomic_dec(&mm->context.flush_count);
  345. return old;
  346. }
  347. pud_t pudp_xchg_direct(struct mm_struct *mm, unsigned long addr,
  348. pud_t *pudp, pud_t new)
  349. {
  350. pud_t old;
  351. preempt_disable();
  352. old = pudp_flush_direct(mm, addr, pudp);
  353. *pudp = new;
  354. preempt_enable();
  355. return old;
  356. }
  357. EXPORT_SYMBOL(pudp_xchg_direct);
  358. #ifdef CONFIG_TRANSPARENT_HUGEPAGE
  359. void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
  360. pgtable_t pgtable)
  361. {
  362. struct list_head *lh = (struct list_head *) pgtable;
  363. assert_spin_locked(pmd_lockptr(mm, pmdp));
  364. /* FIFO */
  365. if (!pmd_huge_pte(mm, pmdp))
  366. INIT_LIST_HEAD(lh);
  367. else
  368. list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp));
  369. pmd_huge_pte(mm, pmdp) = pgtable;
  370. }
  371. pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
  372. {
  373. struct list_head *lh;
  374. pgtable_t pgtable;
  375. pte_t *ptep;
  376. assert_spin_locked(pmd_lockptr(mm, pmdp));
  377. /* FIFO */
  378. pgtable = pmd_huge_pte(mm, pmdp);
  379. lh = (struct list_head *) pgtable;
  380. if (list_empty(lh))
  381. pmd_huge_pte(mm, pmdp) = NULL;
  382. else {
  383. pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next;
  384. list_del(lh);
  385. }
  386. ptep = (pte_t *) pgtable;
  387. pte_val(*ptep) = _PAGE_INVALID;
  388. ptep++;
  389. pte_val(*ptep) = _PAGE_INVALID;
  390. return pgtable;
  391. }
  392. #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
  393. #ifdef CONFIG_PGSTE
  394. void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr,
  395. pte_t *ptep, pte_t entry)
  396. {
  397. pgste_t pgste;
  398. /* the mm_has_pgste() check is done in set_pte_at() */
  399. preempt_disable();
  400. pgste = pgste_get_lock(ptep);
  401. pgste_val(pgste) &= ~_PGSTE_GPS_ZERO;
  402. pgste_set_key(ptep, pgste, entry, mm);
  403. pgste = pgste_set_pte(ptep, pgste, entry);
  404. pgste_set_unlock(ptep, pgste);
  405. preempt_enable();
  406. }
  407. void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
  408. {
  409. pgste_t pgste;
  410. preempt_disable();
  411. pgste = pgste_get_lock(ptep);
  412. pgste_val(pgste) |= PGSTE_IN_BIT;
  413. pgste_set_unlock(ptep, pgste);
  414. preempt_enable();
  415. }
  416. /**
  417. * ptep_force_prot - change access rights of a locked pte
  418. * @mm: pointer to the process mm_struct
  419. * @addr: virtual address in the guest address space
  420. * @ptep: pointer to the page table entry
  421. * @prot: indicates guest access rights: PROT_NONE, PROT_READ or PROT_WRITE
  422. * @bit: pgste bit to set (e.g. for notification)
  423. *
  424. * Returns 0 if the access rights were changed and -EAGAIN if the current
  425. * and requested access rights are incompatible.
  426. */
  427. int ptep_force_prot(struct mm_struct *mm, unsigned long addr,
  428. pte_t *ptep, int prot, unsigned long bit)
  429. {
  430. pte_t entry;
  431. pgste_t pgste;
  432. int pte_i, pte_p;
  433. pgste = pgste_get_lock(ptep);
  434. entry = *ptep;
  435. /* Check pte entry after all locks have been acquired */
  436. pte_i = pte_val(entry) & _PAGE_INVALID;
  437. pte_p = pte_val(entry) & _PAGE_PROTECT;
  438. if ((pte_i && (prot != PROT_NONE)) ||
  439. (pte_p && (prot & PROT_WRITE))) {
  440. pgste_set_unlock(ptep, pgste);
  441. return -EAGAIN;
  442. }
  443. /* Change access rights and set pgste bit */
  444. if (prot == PROT_NONE && !pte_i) {
  445. ptep_flush_direct(mm, addr, ptep);
  446. pgste = pgste_update_all(entry, pgste, mm);
  447. pte_val(entry) |= _PAGE_INVALID;
  448. }
  449. if (prot == PROT_READ && !pte_p) {
  450. ptep_flush_direct(mm, addr, ptep);
  451. pte_val(entry) &= ~_PAGE_INVALID;
  452. pte_val(entry) |= _PAGE_PROTECT;
  453. }
  454. pgste_val(pgste) |= bit;
  455. pgste = pgste_set_pte(ptep, pgste, entry);
  456. pgste_set_unlock(ptep, pgste);
  457. return 0;
  458. }
  459. int ptep_shadow_pte(struct mm_struct *mm, unsigned long saddr,
  460. pte_t *sptep, pte_t *tptep, pte_t pte)
  461. {
  462. pgste_t spgste, tpgste;
  463. pte_t spte, tpte;
  464. int rc = -EAGAIN;
  465. if (!(pte_val(*tptep) & _PAGE_INVALID))
  466. return 0; /* already shadowed */
  467. spgste = pgste_get_lock(sptep);
  468. spte = *sptep;
  469. if (!(pte_val(spte) & _PAGE_INVALID) &&
  470. !((pte_val(spte) & _PAGE_PROTECT) &&
  471. !(pte_val(pte) & _PAGE_PROTECT))) {
  472. pgste_val(spgste) |= PGSTE_VSIE_BIT;
  473. tpgste = pgste_get_lock(tptep);
  474. pte_val(tpte) = (pte_val(spte) & PAGE_MASK) |
  475. (pte_val(pte) & _PAGE_PROTECT);
  476. /* don't touch the storage key - it belongs to parent pgste */
  477. tpgste = pgste_set_pte(tptep, tpgste, tpte);
  478. pgste_set_unlock(tptep, tpgste);
  479. rc = 1;
  480. }
  481. pgste_set_unlock(sptep, spgste);
  482. return rc;
  483. }
  484. void ptep_unshadow_pte(struct mm_struct *mm, unsigned long saddr, pte_t *ptep)
  485. {
  486. pgste_t pgste;
  487. pgste = pgste_get_lock(ptep);
  488. /* notifier is called by the caller */
  489. ptep_flush_direct(mm, saddr, ptep);
  490. /* don't touch the storage key - it belongs to parent pgste */
  491. pgste = pgste_set_pte(ptep, pgste, __pte(_PAGE_INVALID));
  492. pgste_set_unlock(ptep, pgste);
  493. }
  494. static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry)
  495. {
  496. if (!non_swap_entry(entry))
  497. dec_mm_counter(mm, MM_SWAPENTS);
  498. else if (is_migration_entry(entry)) {
  499. struct page *page = migration_entry_to_page(entry);
  500. dec_mm_counter(mm, mm_counter(page));
  501. }
  502. free_swap_and_cache(entry);
  503. }
  504. void ptep_zap_unused(struct mm_struct *mm, unsigned long addr,
  505. pte_t *ptep, int reset)
  506. {
  507. unsigned long pgstev;
  508. pgste_t pgste;
  509. pte_t pte;
  510. /* Zap unused and logically-zero pages */
  511. preempt_disable();
  512. pgste = pgste_get_lock(ptep);
  513. pgstev = pgste_val(pgste);
  514. pte = *ptep;
  515. if (!reset && pte_swap(pte) &&
  516. ((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED ||
  517. (pgstev & _PGSTE_GPS_ZERO))) {
  518. ptep_zap_swap_entry(mm, pte_to_swp_entry(pte));
  519. pte_clear(mm, addr, ptep);
  520. }
  521. if (reset)
  522. pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK;
  523. pgste_set_unlock(ptep, pgste);
  524. preempt_enable();
  525. }
  526. void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
  527. {
  528. unsigned long ptev;
  529. pgste_t pgste;
  530. /* Clear storage key */
  531. preempt_disable();
  532. pgste = pgste_get_lock(ptep);
  533. pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT |
  534. PGSTE_GR_BIT | PGSTE_GC_BIT);
  535. ptev = pte_val(*ptep);
  536. if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE))
  537. page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 1);
  538. pgste_set_unlock(ptep, pgste);
  539. preempt_enable();
  540. }
  541. /*
  542. * Test and reset if a guest page is dirty
  543. */
  544. bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr)
  545. {
  546. spinlock_t *ptl;
  547. pgd_t *pgd;
  548. pud_t *pud;
  549. pmd_t *pmd;
  550. pgste_t pgste;
  551. pte_t *ptep;
  552. pte_t pte;
  553. bool dirty;
  554. pgd = pgd_offset(mm, addr);
  555. pud = pud_alloc(mm, pgd, addr);
  556. if (!pud)
  557. return false;
  558. pmd = pmd_alloc(mm, pud, addr);
  559. if (!pmd)
  560. return false;
  561. /* We can't run guests backed by huge pages, but userspace can
  562. * still set them up and then try to migrate them without any
  563. * migration support.
  564. */
  565. if (pmd_large(*pmd))
  566. return true;
  567. ptep = pte_alloc_map_lock(mm, pmd, addr, &ptl);
  568. if (unlikely(!ptep))
  569. return false;
  570. pgste = pgste_get_lock(ptep);
  571. dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT);
  572. pgste_val(pgste) &= ~PGSTE_UC_BIT;
  573. pte = *ptep;
  574. if (dirty && (pte_val(pte) & _PAGE_PRESENT)) {
  575. pgste = pgste_pte_notify(mm, addr, ptep, pgste);
  576. __ptep_ipte(addr, ptep, IPTE_GLOBAL);
  577. if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE))
  578. pte_val(pte) |= _PAGE_PROTECT;
  579. else
  580. pte_val(pte) |= _PAGE_INVALID;
  581. *ptep = pte;
  582. }
  583. pgste_set_unlock(ptep, pgste);
  584. spin_unlock(ptl);
  585. return dirty;
  586. }
  587. EXPORT_SYMBOL_GPL(test_and_clear_guest_dirty);
  588. int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
  589. unsigned char key, bool nq)
  590. {
  591. unsigned long keyul;
  592. spinlock_t *ptl;
  593. pgste_t old, new;
  594. pte_t *ptep;
  595. ptep = get_locked_pte(mm, addr, &ptl);
  596. if (unlikely(!ptep))
  597. return -EFAULT;
  598. new = old = pgste_get_lock(ptep);
  599. pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT |
  600. PGSTE_ACC_BITS | PGSTE_FP_BIT);
  601. keyul = (unsigned long) key;
  602. pgste_val(new) |= (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48;
  603. pgste_val(new) |= (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
  604. if (!(pte_val(*ptep) & _PAGE_INVALID)) {
  605. unsigned long address, bits, skey;
  606. address = pte_val(*ptep) & PAGE_MASK;
  607. skey = (unsigned long) page_get_storage_key(address);
  608. bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
  609. skey = key & (_PAGE_ACC_BITS | _PAGE_FP_BIT);
  610. /* Set storage key ACC and FP */
  611. page_set_storage_key(address, skey, !nq);
  612. /* Merge host changed & referenced into pgste */
  613. pgste_val(new) |= bits << 52;
  614. }
  615. /* changing the guest storage key is considered a change of the page */
  616. if ((pgste_val(new) ^ pgste_val(old)) &
  617. (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT))
  618. pgste_val(new) |= PGSTE_UC_BIT;
  619. pgste_set_unlock(ptep, new);
  620. pte_unmap_unlock(ptep, ptl);
  621. return 0;
  622. }
  623. EXPORT_SYMBOL(set_guest_storage_key);
  624. /**
  625. * Conditionally set a guest storage key (handling csske).
  626. * oldkey will be updated when either mr or mc is set and a pointer is given.
  627. *
  628. * Returns 0 if a guests storage key update wasn't necessary, 1 if the guest
  629. * storage key was updated and -EFAULT on access errors.
  630. */
  631. int cond_set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
  632. unsigned char key, unsigned char *oldkey,
  633. bool nq, bool mr, bool mc)
  634. {
  635. unsigned char tmp, mask = _PAGE_ACC_BITS | _PAGE_FP_BIT;
  636. int rc;
  637. /* we can drop the pgste lock between getting and setting the key */
  638. if (mr | mc) {
  639. rc = get_guest_storage_key(current->mm, addr, &tmp);
  640. if (rc)
  641. return rc;
  642. if (oldkey)
  643. *oldkey = tmp;
  644. if (!mr)
  645. mask |= _PAGE_REFERENCED;
  646. if (!mc)
  647. mask |= _PAGE_CHANGED;
  648. if (!((tmp ^ key) & mask))
  649. return 0;
  650. }
  651. rc = set_guest_storage_key(current->mm, addr, key, nq);
  652. return rc < 0 ? rc : 1;
  653. }
  654. EXPORT_SYMBOL(cond_set_guest_storage_key);
  655. /**
  656. * Reset a guest reference bit (rrbe), returning the reference and changed bit.
  657. *
  658. * Returns < 0 in case of error, otherwise the cc to be reported to the guest.
  659. */
  660. int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr)
  661. {
  662. spinlock_t *ptl;
  663. pgste_t old, new;
  664. pte_t *ptep;
  665. int cc = 0;
  666. ptep = get_locked_pte(mm, addr, &ptl);
  667. if (unlikely(!ptep))
  668. return -EFAULT;
  669. new = old = pgste_get_lock(ptep);
  670. /* Reset guest reference bit only */
  671. pgste_val(new) &= ~PGSTE_GR_BIT;
  672. if (!(pte_val(*ptep) & _PAGE_INVALID)) {
  673. cc = page_reset_referenced(pte_val(*ptep) & PAGE_MASK);
  674. /* Merge real referenced bit into host-set */
  675. pgste_val(new) |= ((unsigned long) cc << 53) & PGSTE_HR_BIT;
  676. }
  677. /* Reflect guest's logical view, not physical */
  678. cc |= (pgste_val(old) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 49;
  679. /* Changing the guest storage key is considered a change of the page */
  680. if ((pgste_val(new) ^ pgste_val(old)) & PGSTE_GR_BIT)
  681. pgste_val(new) |= PGSTE_UC_BIT;
  682. pgste_set_unlock(ptep, new);
  683. pte_unmap_unlock(ptep, ptl);
  684. return cc;
  685. }
  686. EXPORT_SYMBOL(reset_guest_reference_bit);
  687. int get_guest_storage_key(struct mm_struct *mm, unsigned long addr,
  688. unsigned char *key)
  689. {
  690. spinlock_t *ptl;
  691. pgste_t pgste;
  692. pte_t *ptep;
  693. ptep = get_locked_pte(mm, addr, &ptl);
  694. if (unlikely(!ptep))
  695. return -EFAULT;
  696. pgste = pgste_get_lock(ptep);
  697. *key = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
  698. if (!(pte_val(*ptep) & _PAGE_INVALID))
  699. *key = page_get_storage_key(pte_val(*ptep) & PAGE_MASK);
  700. /* Reflect guest's logical view, not physical */
  701. *key |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48;
  702. pgste_set_unlock(ptep, pgste);
  703. pte_unmap_unlock(ptep, ptl);
  704. return 0;
  705. }
  706. EXPORT_SYMBOL(get_guest_storage_key);
  707. /**
  708. * pgste_perform_essa - perform ESSA actions on the PGSTE.
  709. * @mm: the memory context. It must have PGSTEs, no check is performed here!
  710. * @hva: the host virtual address of the page whose PGSTE is to be processed
  711. * @orc: the specific action to perform, see the ESSA_SET_* macros.
  712. * @oldpte: the PTE will be saved there if the pointer is not NULL.
  713. * @oldpgste: the old PGSTE will be saved there if the pointer is not NULL.
  714. *
  715. * Return: 1 if the page is to be added to the CBRL, otherwise 0,
  716. * or < 0 in case of error. -EINVAL is returned for invalid values
  717. * of orc, -EFAULT for invalid addresses.
  718. */
  719. int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc,
  720. unsigned long *oldpte, unsigned long *oldpgste)
  721. {
  722. unsigned long pgstev;
  723. spinlock_t *ptl;
  724. pgste_t pgste;
  725. pte_t *ptep;
  726. int res = 0;
  727. WARN_ON_ONCE(orc > ESSA_MAX);
  728. if (unlikely(orc > ESSA_MAX))
  729. return -EINVAL;
  730. ptep = get_locked_pte(mm, hva, &ptl);
  731. if (unlikely(!ptep))
  732. return -EFAULT;
  733. pgste = pgste_get_lock(ptep);
  734. pgstev = pgste_val(pgste);
  735. if (oldpte)
  736. *oldpte = pte_val(*ptep);
  737. if (oldpgste)
  738. *oldpgste = pgstev;
  739. switch (orc) {
  740. case ESSA_GET_STATE:
  741. break;
  742. case ESSA_SET_STABLE:
  743. pgstev &= ~_PGSTE_GPS_USAGE_MASK;
  744. pgstev |= _PGSTE_GPS_USAGE_STABLE;
  745. break;
  746. case ESSA_SET_UNUSED:
  747. pgstev &= ~_PGSTE_GPS_USAGE_MASK;
  748. pgstev |= _PGSTE_GPS_USAGE_UNUSED;
  749. if (pte_val(*ptep) & _PAGE_INVALID)
  750. res = 1;
  751. break;
  752. case ESSA_SET_VOLATILE:
  753. pgstev &= ~_PGSTE_GPS_USAGE_MASK;
  754. pgstev |= _PGSTE_GPS_USAGE_VOLATILE;
  755. if (pte_val(*ptep) & _PAGE_INVALID)
  756. res = 1;
  757. break;
  758. case ESSA_SET_POT_VOLATILE:
  759. pgstev &= ~_PGSTE_GPS_USAGE_MASK;
  760. if (!(pte_val(*ptep) & _PAGE_INVALID)) {
  761. pgstev |= _PGSTE_GPS_USAGE_POT_VOLATILE;
  762. break;
  763. }
  764. if (pgstev & _PGSTE_GPS_ZERO) {
  765. pgstev |= _PGSTE_GPS_USAGE_VOLATILE;
  766. break;
  767. }
  768. if (!(pgstev & PGSTE_GC_BIT)) {
  769. pgstev |= _PGSTE_GPS_USAGE_VOLATILE;
  770. res = 1;
  771. break;
  772. }
  773. break;
  774. case ESSA_SET_STABLE_RESIDENT:
  775. pgstev &= ~_PGSTE_GPS_USAGE_MASK;
  776. pgstev |= _PGSTE_GPS_USAGE_STABLE;
  777. /*
  778. * Since the resident state can go away any time after this
  779. * call, we will not make this page resident. We can revisit
  780. * this decision if a guest will ever start using this.
  781. */
  782. break;
  783. case ESSA_SET_STABLE_IF_RESIDENT:
  784. if (!(pte_val(*ptep) & _PAGE_INVALID)) {
  785. pgstev &= ~_PGSTE_GPS_USAGE_MASK;
  786. pgstev |= _PGSTE_GPS_USAGE_STABLE;
  787. }
  788. break;
  789. default:
  790. /* we should never get here! */
  791. break;
  792. }
  793. /* If we are discarding a page, set it to logical zero */
  794. if (res)
  795. pgstev |= _PGSTE_GPS_ZERO;
  796. pgste_val(pgste) = pgstev;
  797. pgste_set_unlock(ptep, pgste);
  798. pte_unmap_unlock(ptep, ptl);
  799. return res;
  800. }
  801. EXPORT_SYMBOL(pgste_perform_essa);
  802. /**
  803. * set_pgste_bits - set specific PGSTE bits.
  804. * @mm: the memory context. It must have PGSTEs, no check is performed here!
  805. * @hva: the host virtual address of the page whose PGSTE is to be processed
  806. * @bits: a bitmask representing the bits that will be touched
  807. * @value: the values of the bits to be written. Only the bits in the mask
  808. * will be written.
  809. *
  810. * Return: 0 on success, < 0 in case of error.
  811. */
  812. int set_pgste_bits(struct mm_struct *mm, unsigned long hva,
  813. unsigned long bits, unsigned long value)
  814. {
  815. spinlock_t *ptl;
  816. pgste_t new;
  817. pte_t *ptep;
  818. ptep = get_locked_pte(mm, hva, &ptl);
  819. if (unlikely(!ptep))
  820. return -EFAULT;
  821. new = pgste_get_lock(ptep);
  822. pgste_val(new) &= ~bits;
  823. pgste_val(new) |= value & bits;
  824. pgste_set_unlock(ptep, new);
  825. pte_unmap_unlock(ptep, ptl);
  826. return 0;
  827. }
  828. EXPORT_SYMBOL(set_pgste_bits);
  829. /**
  830. * get_pgste - get the current PGSTE for the given address.
  831. * @mm: the memory context. It must have PGSTEs, no check is performed here!
  832. * @hva: the host virtual address of the page whose PGSTE is to be processed
  833. * @pgstep: will be written with the current PGSTE for the given address.
  834. *
  835. * Return: 0 on success, < 0 in case of error.
  836. */
  837. int get_pgste(struct mm_struct *mm, unsigned long hva, unsigned long *pgstep)
  838. {
  839. spinlock_t *ptl;
  840. pte_t *ptep;
  841. ptep = get_locked_pte(mm, hva, &ptl);
  842. if (unlikely(!ptep))
  843. return -EFAULT;
  844. *pgstep = pgste_val(pgste_get(ptep));
  845. pte_unmap_unlock(ptep, ptl);
  846. return 0;
  847. }
  848. EXPORT_SYMBOL(get_pgste);
  849. #endif