pgtable.c 26 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022
  1. /*
  2. * Copyright IBM Corp. 2007, 2011
  3. * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
  4. */
  5. #include <linux/sched.h>
  6. #include <linux/kernel.h>
  7. #include <linux/errno.h>
  8. #include <linux/gfp.h>
  9. #include <linux/mm.h>
  10. #include <linux/swap.h>
  11. #include <linux/smp.h>
  12. #include <linux/spinlock.h>
  13. #include <linux/rcupdate.h>
  14. #include <linux/slab.h>
  15. #include <linux/swapops.h>
  16. #include <linux/sysctl.h>
  17. #include <linux/ksm.h>
  18. #include <linux/mman.h>
  19. #include <asm/pgtable.h>
  20. #include <asm/pgalloc.h>
  21. #include <asm/tlb.h>
  22. #include <asm/tlbflush.h>
  23. #include <asm/mmu_context.h>
  24. #include <asm/page-states.h>
  25. static inline void ptep_ipte_local(struct mm_struct *mm, unsigned long addr,
  26. pte_t *ptep)
  27. {
  28. unsigned long opt, asce;
  29. if (MACHINE_HAS_TLB_GUEST) {
  30. opt = 0;
  31. asce = READ_ONCE(mm->context.gmap_asce);
  32. if (asce == 0UL)
  33. opt |= IPTE_NODAT;
  34. if (asce != -1UL) {
  35. asce = asce ? : mm->context.asce;
  36. opt |= IPTE_GUEST_ASCE;
  37. }
  38. __ptep_ipte(addr, ptep, opt, asce, IPTE_LOCAL);
  39. } else {
  40. __ptep_ipte(addr, ptep, 0, 0, IPTE_LOCAL);
  41. }
  42. }
  43. static inline void ptep_ipte_global(struct mm_struct *mm, unsigned long addr,
  44. pte_t *ptep)
  45. {
  46. unsigned long opt, asce;
  47. if (MACHINE_HAS_TLB_GUEST) {
  48. opt = 0;
  49. asce = READ_ONCE(mm->context.gmap_asce);
  50. if (asce == 0UL)
  51. opt |= IPTE_NODAT;
  52. if (asce != -1UL) {
  53. asce = asce ? : mm->context.asce;
  54. opt |= IPTE_GUEST_ASCE;
  55. }
  56. __ptep_ipte(addr, ptep, opt, asce, IPTE_GLOBAL);
  57. } else {
  58. __ptep_ipte(addr, ptep, 0, 0, IPTE_GLOBAL);
  59. }
  60. }
  61. static inline pte_t ptep_flush_direct(struct mm_struct *mm,
  62. unsigned long addr, pte_t *ptep)
  63. {
  64. pte_t old;
  65. old = *ptep;
  66. if (unlikely(pte_val(old) & _PAGE_INVALID))
  67. return old;
  68. atomic_inc(&mm->context.flush_count);
  69. if (MACHINE_HAS_TLB_LC &&
  70. cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
  71. ptep_ipte_local(mm, addr, ptep);
  72. else
  73. ptep_ipte_global(mm, addr, ptep);
  74. atomic_dec(&mm->context.flush_count);
  75. return old;
  76. }
  77. static inline pte_t ptep_flush_lazy(struct mm_struct *mm,
  78. unsigned long addr, pte_t *ptep)
  79. {
  80. pte_t old;
  81. old = *ptep;
  82. if (unlikely(pte_val(old) & _PAGE_INVALID))
  83. return old;
  84. atomic_inc(&mm->context.flush_count);
  85. if (cpumask_equal(&mm->context.cpu_attach_mask,
  86. cpumask_of(smp_processor_id()))) {
  87. pte_val(*ptep) |= _PAGE_INVALID;
  88. mm->context.flush_mm = 1;
  89. } else
  90. ptep_ipte_global(mm, addr, ptep);
  91. atomic_dec(&mm->context.flush_count);
  92. return old;
  93. }
  94. static inline pgste_t pgste_get_lock(pte_t *ptep)
  95. {
  96. unsigned long new = 0;
  97. #ifdef CONFIG_PGSTE
  98. unsigned long old;
  99. asm(
  100. " lg %0,%2\n"
  101. "0: lgr %1,%0\n"
  102. " nihh %0,0xff7f\n" /* clear PCL bit in old */
  103. " oihh %1,0x0080\n" /* set PCL bit in new */
  104. " csg %0,%1,%2\n"
  105. " jl 0b\n"
  106. : "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE])
  107. : "Q" (ptep[PTRS_PER_PTE]) : "cc", "memory");
  108. #endif
  109. return __pgste(new);
  110. }
  111. static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste)
  112. {
  113. #ifdef CONFIG_PGSTE
  114. asm(
  115. " nihh %1,0xff7f\n" /* clear PCL bit */
  116. " stg %1,%0\n"
  117. : "=Q" (ptep[PTRS_PER_PTE])
  118. : "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE])
  119. : "cc", "memory");
  120. #endif
  121. }
  122. static inline pgste_t pgste_get(pte_t *ptep)
  123. {
  124. unsigned long pgste = 0;
  125. #ifdef CONFIG_PGSTE
  126. pgste = *(unsigned long *)(ptep + PTRS_PER_PTE);
  127. #endif
  128. return __pgste(pgste);
  129. }
  130. static inline void pgste_set(pte_t *ptep, pgste_t pgste)
  131. {
  132. #ifdef CONFIG_PGSTE
  133. *(pgste_t *)(ptep + PTRS_PER_PTE) = pgste;
  134. #endif
  135. }
  136. static inline pgste_t pgste_update_all(pte_t pte, pgste_t pgste,
  137. struct mm_struct *mm)
  138. {
  139. #ifdef CONFIG_PGSTE
  140. unsigned long address, bits, skey;
  141. if (!mm_use_skey(mm) || pte_val(pte) & _PAGE_INVALID)
  142. return pgste;
  143. address = pte_val(pte) & PAGE_MASK;
  144. skey = (unsigned long) page_get_storage_key(address);
  145. bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
  146. /* Transfer page changed & referenced bit to guest bits in pgste */
  147. pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */
  148. /* Copy page access key and fetch protection bit to pgste */
  149. pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT);
  150. pgste_val(pgste) |= (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
  151. #endif
  152. return pgste;
  153. }
  154. static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry,
  155. struct mm_struct *mm)
  156. {
  157. #ifdef CONFIG_PGSTE
  158. unsigned long address;
  159. unsigned long nkey;
  160. if (!mm_use_skey(mm) || pte_val(entry) & _PAGE_INVALID)
  161. return;
  162. VM_BUG_ON(!(pte_val(*ptep) & _PAGE_INVALID));
  163. address = pte_val(entry) & PAGE_MASK;
  164. /*
  165. * Set page access key and fetch protection bit from pgste.
  166. * The guest C/R information is still in the PGSTE, set real
  167. * key C/R to 0.
  168. */
  169. nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
  170. nkey |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48;
  171. page_set_storage_key(address, nkey, 0);
  172. #endif
  173. }
  174. static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry)
  175. {
  176. #ifdef CONFIG_PGSTE
  177. if ((pte_val(entry) & _PAGE_PRESENT) &&
  178. (pte_val(entry) & _PAGE_WRITE) &&
  179. !(pte_val(entry) & _PAGE_INVALID)) {
  180. if (!MACHINE_HAS_ESOP) {
  181. /*
  182. * Without enhanced suppression-on-protection force
  183. * the dirty bit on for all writable ptes.
  184. */
  185. pte_val(entry) |= _PAGE_DIRTY;
  186. pte_val(entry) &= ~_PAGE_PROTECT;
  187. }
  188. if (!(pte_val(entry) & _PAGE_PROTECT))
  189. /* This pte allows write access, set user-dirty */
  190. pgste_val(pgste) |= PGSTE_UC_BIT;
  191. }
  192. #endif
  193. *ptep = entry;
  194. return pgste;
  195. }
  196. static inline pgste_t pgste_pte_notify(struct mm_struct *mm,
  197. unsigned long addr,
  198. pte_t *ptep, pgste_t pgste)
  199. {
  200. #ifdef CONFIG_PGSTE
  201. unsigned long bits;
  202. bits = pgste_val(pgste) & (PGSTE_IN_BIT | PGSTE_VSIE_BIT);
  203. if (bits) {
  204. pgste_val(pgste) ^= bits;
  205. ptep_notify(mm, addr, ptep, bits);
  206. }
  207. #endif
  208. return pgste;
  209. }
  210. static inline pgste_t ptep_xchg_start(struct mm_struct *mm,
  211. unsigned long addr, pte_t *ptep)
  212. {
  213. pgste_t pgste = __pgste(0);
  214. if (mm_has_pgste(mm)) {
  215. pgste = pgste_get_lock(ptep);
  216. pgste = pgste_pte_notify(mm, addr, ptep, pgste);
  217. }
  218. return pgste;
  219. }
  220. static inline pte_t ptep_xchg_commit(struct mm_struct *mm,
  221. unsigned long addr, pte_t *ptep,
  222. pgste_t pgste, pte_t old, pte_t new)
  223. {
  224. if (mm_has_pgste(mm)) {
  225. if (pte_val(old) & _PAGE_INVALID)
  226. pgste_set_key(ptep, pgste, new, mm);
  227. if (pte_val(new) & _PAGE_INVALID) {
  228. pgste = pgste_update_all(old, pgste, mm);
  229. if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) ==
  230. _PGSTE_GPS_USAGE_UNUSED)
  231. pte_val(old) |= _PAGE_UNUSED;
  232. }
  233. pgste = pgste_set_pte(ptep, pgste, new);
  234. pgste_set_unlock(ptep, pgste);
  235. } else {
  236. *ptep = new;
  237. }
  238. return old;
  239. }
  240. pte_t ptep_xchg_direct(struct mm_struct *mm, unsigned long addr,
  241. pte_t *ptep, pte_t new)
  242. {
  243. pgste_t pgste;
  244. pte_t old;
  245. preempt_disable();
  246. pgste = ptep_xchg_start(mm, addr, ptep);
  247. old = ptep_flush_direct(mm, addr, ptep);
  248. old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new);
  249. preempt_enable();
  250. return old;
  251. }
  252. EXPORT_SYMBOL(ptep_xchg_direct);
  253. pte_t ptep_xchg_lazy(struct mm_struct *mm, unsigned long addr,
  254. pte_t *ptep, pte_t new)
  255. {
  256. pgste_t pgste;
  257. pte_t old;
  258. preempt_disable();
  259. pgste = ptep_xchg_start(mm, addr, ptep);
  260. old = ptep_flush_lazy(mm, addr, ptep);
  261. old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new);
  262. preempt_enable();
  263. return old;
  264. }
  265. EXPORT_SYMBOL(ptep_xchg_lazy);
  266. pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr,
  267. pte_t *ptep)
  268. {
  269. pgste_t pgste;
  270. pte_t old;
  271. preempt_disable();
  272. pgste = ptep_xchg_start(mm, addr, ptep);
  273. old = ptep_flush_lazy(mm, addr, ptep);
  274. if (mm_has_pgste(mm)) {
  275. pgste = pgste_update_all(old, pgste, mm);
  276. pgste_set(ptep, pgste);
  277. }
  278. return old;
  279. }
  280. EXPORT_SYMBOL(ptep_modify_prot_start);
  281. void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
  282. pte_t *ptep, pte_t pte)
  283. {
  284. pgste_t pgste;
  285. if (!MACHINE_HAS_NX)
  286. pte_val(pte) &= ~_PAGE_NOEXEC;
  287. if (mm_has_pgste(mm)) {
  288. pgste = pgste_get(ptep);
  289. pgste_set_key(ptep, pgste, pte, mm);
  290. pgste = pgste_set_pte(ptep, pgste, pte);
  291. pgste_set_unlock(ptep, pgste);
  292. } else {
  293. *ptep = pte;
  294. }
  295. preempt_enable();
  296. }
  297. EXPORT_SYMBOL(ptep_modify_prot_commit);
  298. static inline void pmdp_idte_local(struct mm_struct *mm,
  299. unsigned long addr, pmd_t *pmdp)
  300. {
  301. if (MACHINE_HAS_TLB_GUEST)
  302. __pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE,
  303. mm->context.asce, IDTE_LOCAL);
  304. else
  305. __pmdp_idte(addr, pmdp, 0, 0, IDTE_LOCAL);
  306. }
  307. static inline void pmdp_idte_global(struct mm_struct *mm,
  308. unsigned long addr, pmd_t *pmdp)
  309. {
  310. if (MACHINE_HAS_TLB_GUEST)
  311. __pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE,
  312. mm->context.asce, IDTE_GLOBAL);
  313. else if (MACHINE_HAS_IDTE)
  314. __pmdp_idte(addr, pmdp, 0, 0, IDTE_GLOBAL);
  315. else
  316. __pmdp_csp(pmdp);
  317. }
  318. static inline pmd_t pmdp_flush_direct(struct mm_struct *mm,
  319. unsigned long addr, pmd_t *pmdp)
  320. {
  321. pmd_t old;
  322. old = *pmdp;
  323. if (pmd_val(old) & _SEGMENT_ENTRY_INVALID)
  324. return old;
  325. atomic_inc(&mm->context.flush_count);
  326. if (MACHINE_HAS_TLB_LC &&
  327. cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
  328. pmdp_idte_local(mm, addr, pmdp);
  329. else
  330. pmdp_idte_global(mm, addr, pmdp);
  331. atomic_dec(&mm->context.flush_count);
  332. return old;
  333. }
  334. static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm,
  335. unsigned long addr, pmd_t *pmdp)
  336. {
  337. pmd_t old;
  338. old = *pmdp;
  339. if (pmd_val(old) & _SEGMENT_ENTRY_INVALID)
  340. return old;
  341. atomic_inc(&mm->context.flush_count);
  342. if (cpumask_equal(&mm->context.cpu_attach_mask,
  343. cpumask_of(smp_processor_id()))) {
  344. pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID;
  345. mm->context.flush_mm = 1;
  346. } else {
  347. pmdp_idte_global(mm, addr, pmdp);
  348. }
  349. atomic_dec(&mm->context.flush_count);
  350. return old;
  351. }
  352. pmd_t pmdp_xchg_direct(struct mm_struct *mm, unsigned long addr,
  353. pmd_t *pmdp, pmd_t new)
  354. {
  355. pmd_t old;
  356. preempt_disable();
  357. old = pmdp_flush_direct(mm, addr, pmdp);
  358. *pmdp = new;
  359. preempt_enable();
  360. return old;
  361. }
  362. EXPORT_SYMBOL(pmdp_xchg_direct);
  363. pmd_t pmdp_xchg_lazy(struct mm_struct *mm, unsigned long addr,
  364. pmd_t *pmdp, pmd_t new)
  365. {
  366. pmd_t old;
  367. preempt_disable();
  368. old = pmdp_flush_lazy(mm, addr, pmdp);
  369. *pmdp = new;
  370. preempt_enable();
  371. return old;
  372. }
  373. EXPORT_SYMBOL(pmdp_xchg_lazy);
  374. static inline void pudp_idte_local(struct mm_struct *mm,
  375. unsigned long addr, pud_t *pudp)
  376. {
  377. if (MACHINE_HAS_TLB_GUEST)
  378. __pudp_idte(addr, pudp, IDTE_NODAT | IDTE_GUEST_ASCE,
  379. mm->context.asce, IDTE_LOCAL);
  380. else
  381. __pudp_idte(addr, pudp, 0, 0, IDTE_LOCAL);
  382. }
  383. static inline void pudp_idte_global(struct mm_struct *mm,
  384. unsigned long addr, pud_t *pudp)
  385. {
  386. if (MACHINE_HAS_TLB_GUEST)
  387. __pudp_idte(addr, pudp, IDTE_NODAT | IDTE_GUEST_ASCE,
  388. mm->context.asce, IDTE_GLOBAL);
  389. else if (MACHINE_HAS_IDTE)
  390. __pudp_idte(addr, pudp, 0, 0, IDTE_GLOBAL);
  391. else
  392. /*
  393. * Invalid bit position is the same for pmd and pud, so we can
  394. * re-use _pmd_csp() here
  395. */
  396. __pmdp_csp((pmd_t *) pudp);
  397. }
  398. static inline pud_t pudp_flush_direct(struct mm_struct *mm,
  399. unsigned long addr, pud_t *pudp)
  400. {
  401. pud_t old;
  402. old = *pudp;
  403. if (pud_val(old) & _REGION_ENTRY_INVALID)
  404. return old;
  405. atomic_inc(&mm->context.flush_count);
  406. if (MACHINE_HAS_TLB_LC &&
  407. cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
  408. pudp_idte_local(mm, addr, pudp);
  409. else
  410. pudp_idte_global(mm, addr, pudp);
  411. atomic_dec(&mm->context.flush_count);
  412. return old;
  413. }
  414. pud_t pudp_xchg_direct(struct mm_struct *mm, unsigned long addr,
  415. pud_t *pudp, pud_t new)
  416. {
  417. pud_t old;
  418. preempt_disable();
  419. old = pudp_flush_direct(mm, addr, pudp);
  420. *pudp = new;
  421. preempt_enable();
  422. return old;
  423. }
  424. EXPORT_SYMBOL(pudp_xchg_direct);
  425. #ifdef CONFIG_TRANSPARENT_HUGEPAGE
  426. void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
  427. pgtable_t pgtable)
  428. {
  429. struct list_head *lh = (struct list_head *) pgtable;
  430. assert_spin_locked(pmd_lockptr(mm, pmdp));
  431. /* FIFO */
  432. if (!pmd_huge_pte(mm, pmdp))
  433. INIT_LIST_HEAD(lh);
  434. else
  435. list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp));
  436. pmd_huge_pte(mm, pmdp) = pgtable;
  437. }
  438. pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
  439. {
  440. struct list_head *lh;
  441. pgtable_t pgtable;
  442. pte_t *ptep;
  443. assert_spin_locked(pmd_lockptr(mm, pmdp));
  444. /* FIFO */
  445. pgtable = pmd_huge_pte(mm, pmdp);
  446. lh = (struct list_head *) pgtable;
  447. if (list_empty(lh))
  448. pmd_huge_pte(mm, pmdp) = NULL;
  449. else {
  450. pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next;
  451. list_del(lh);
  452. }
  453. ptep = (pte_t *) pgtable;
  454. pte_val(*ptep) = _PAGE_INVALID;
  455. ptep++;
  456. pte_val(*ptep) = _PAGE_INVALID;
  457. return pgtable;
  458. }
  459. #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
  460. #ifdef CONFIG_PGSTE
  461. void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr,
  462. pte_t *ptep, pte_t entry)
  463. {
  464. pgste_t pgste;
  465. /* the mm_has_pgste() check is done in set_pte_at() */
  466. preempt_disable();
  467. pgste = pgste_get_lock(ptep);
  468. pgste_val(pgste) &= ~_PGSTE_GPS_ZERO;
  469. pgste_set_key(ptep, pgste, entry, mm);
  470. pgste = pgste_set_pte(ptep, pgste, entry);
  471. pgste_set_unlock(ptep, pgste);
  472. preempt_enable();
  473. }
  474. void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
  475. {
  476. pgste_t pgste;
  477. preempt_disable();
  478. pgste = pgste_get_lock(ptep);
  479. pgste_val(pgste) |= PGSTE_IN_BIT;
  480. pgste_set_unlock(ptep, pgste);
  481. preempt_enable();
  482. }
  483. /**
  484. * ptep_force_prot - change access rights of a locked pte
  485. * @mm: pointer to the process mm_struct
  486. * @addr: virtual address in the guest address space
  487. * @ptep: pointer to the page table entry
  488. * @prot: indicates guest access rights: PROT_NONE, PROT_READ or PROT_WRITE
  489. * @bit: pgste bit to set (e.g. for notification)
  490. *
  491. * Returns 0 if the access rights were changed and -EAGAIN if the current
  492. * and requested access rights are incompatible.
  493. */
  494. int ptep_force_prot(struct mm_struct *mm, unsigned long addr,
  495. pte_t *ptep, int prot, unsigned long bit)
  496. {
  497. pte_t entry;
  498. pgste_t pgste;
  499. int pte_i, pte_p;
  500. pgste = pgste_get_lock(ptep);
  501. entry = *ptep;
  502. /* Check pte entry after all locks have been acquired */
  503. pte_i = pte_val(entry) & _PAGE_INVALID;
  504. pte_p = pte_val(entry) & _PAGE_PROTECT;
  505. if ((pte_i && (prot != PROT_NONE)) ||
  506. (pte_p && (prot & PROT_WRITE))) {
  507. pgste_set_unlock(ptep, pgste);
  508. return -EAGAIN;
  509. }
  510. /* Change access rights and set pgste bit */
  511. if (prot == PROT_NONE && !pte_i) {
  512. ptep_flush_direct(mm, addr, ptep);
  513. pgste = pgste_update_all(entry, pgste, mm);
  514. pte_val(entry) |= _PAGE_INVALID;
  515. }
  516. if (prot == PROT_READ && !pte_p) {
  517. ptep_flush_direct(mm, addr, ptep);
  518. pte_val(entry) &= ~_PAGE_INVALID;
  519. pte_val(entry) |= _PAGE_PROTECT;
  520. }
  521. pgste_val(pgste) |= bit;
  522. pgste = pgste_set_pte(ptep, pgste, entry);
  523. pgste_set_unlock(ptep, pgste);
  524. return 0;
  525. }
  526. int ptep_shadow_pte(struct mm_struct *mm, unsigned long saddr,
  527. pte_t *sptep, pte_t *tptep, pte_t pte)
  528. {
  529. pgste_t spgste, tpgste;
  530. pte_t spte, tpte;
  531. int rc = -EAGAIN;
  532. if (!(pte_val(*tptep) & _PAGE_INVALID))
  533. return 0; /* already shadowed */
  534. spgste = pgste_get_lock(sptep);
  535. spte = *sptep;
  536. if (!(pte_val(spte) & _PAGE_INVALID) &&
  537. !((pte_val(spte) & _PAGE_PROTECT) &&
  538. !(pte_val(pte) & _PAGE_PROTECT))) {
  539. pgste_val(spgste) |= PGSTE_VSIE_BIT;
  540. tpgste = pgste_get_lock(tptep);
  541. pte_val(tpte) = (pte_val(spte) & PAGE_MASK) |
  542. (pte_val(pte) & _PAGE_PROTECT);
  543. /* don't touch the storage key - it belongs to parent pgste */
  544. tpgste = pgste_set_pte(tptep, tpgste, tpte);
  545. pgste_set_unlock(tptep, tpgste);
  546. rc = 1;
  547. }
  548. pgste_set_unlock(sptep, spgste);
  549. return rc;
  550. }
  551. void ptep_unshadow_pte(struct mm_struct *mm, unsigned long saddr, pte_t *ptep)
  552. {
  553. pgste_t pgste;
  554. pgste = pgste_get_lock(ptep);
  555. /* notifier is called by the caller */
  556. ptep_flush_direct(mm, saddr, ptep);
  557. /* don't touch the storage key - it belongs to parent pgste */
  558. pgste = pgste_set_pte(ptep, pgste, __pte(_PAGE_INVALID));
  559. pgste_set_unlock(ptep, pgste);
  560. }
  561. static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry)
  562. {
  563. if (!non_swap_entry(entry))
  564. dec_mm_counter(mm, MM_SWAPENTS);
  565. else if (is_migration_entry(entry)) {
  566. struct page *page = migration_entry_to_page(entry);
  567. dec_mm_counter(mm, mm_counter(page));
  568. }
  569. free_swap_and_cache(entry);
  570. }
  571. void ptep_zap_unused(struct mm_struct *mm, unsigned long addr,
  572. pte_t *ptep, int reset)
  573. {
  574. unsigned long pgstev;
  575. pgste_t pgste;
  576. pte_t pte;
  577. /* Zap unused and logically-zero pages */
  578. preempt_disable();
  579. pgste = pgste_get_lock(ptep);
  580. pgstev = pgste_val(pgste);
  581. pte = *ptep;
  582. if (!reset && pte_swap(pte) &&
  583. ((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED ||
  584. (pgstev & _PGSTE_GPS_ZERO))) {
  585. ptep_zap_swap_entry(mm, pte_to_swp_entry(pte));
  586. pte_clear(mm, addr, ptep);
  587. }
  588. if (reset)
  589. pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK;
  590. pgste_set_unlock(ptep, pgste);
  591. preempt_enable();
  592. }
  593. void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
  594. {
  595. unsigned long ptev;
  596. pgste_t pgste;
  597. /* Clear storage key */
  598. preempt_disable();
  599. pgste = pgste_get_lock(ptep);
  600. pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT |
  601. PGSTE_GR_BIT | PGSTE_GC_BIT);
  602. ptev = pte_val(*ptep);
  603. if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE))
  604. page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 1);
  605. pgste_set_unlock(ptep, pgste);
  606. preempt_enable();
  607. }
  608. /*
  609. * Test and reset if a guest page is dirty
  610. */
  611. bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr)
  612. {
  613. spinlock_t *ptl;
  614. pgd_t *pgd;
  615. p4d_t *p4d;
  616. pud_t *pud;
  617. pmd_t *pmd;
  618. pgste_t pgste;
  619. pte_t *ptep;
  620. pte_t pte;
  621. bool dirty;
  622. pgd = pgd_offset(mm, addr);
  623. p4d = p4d_alloc(mm, pgd, addr);
  624. if (!p4d)
  625. return false;
  626. pud = pud_alloc(mm, p4d, addr);
  627. if (!pud)
  628. return false;
  629. pmd = pmd_alloc(mm, pud, addr);
  630. if (!pmd)
  631. return false;
  632. /* We can't run guests backed by huge pages, but userspace can
  633. * still set them up and then try to migrate them without any
  634. * migration support.
  635. */
  636. if (pmd_large(*pmd))
  637. return true;
  638. ptep = pte_alloc_map_lock(mm, pmd, addr, &ptl);
  639. if (unlikely(!ptep))
  640. return false;
  641. pgste = pgste_get_lock(ptep);
  642. dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT);
  643. pgste_val(pgste) &= ~PGSTE_UC_BIT;
  644. pte = *ptep;
  645. if (dirty && (pte_val(pte) & _PAGE_PRESENT)) {
  646. pgste = pgste_pte_notify(mm, addr, ptep, pgste);
  647. ptep_ipte_global(mm, addr, ptep);
  648. if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE))
  649. pte_val(pte) |= _PAGE_PROTECT;
  650. else
  651. pte_val(pte) |= _PAGE_INVALID;
  652. *ptep = pte;
  653. }
  654. pgste_set_unlock(ptep, pgste);
  655. spin_unlock(ptl);
  656. return dirty;
  657. }
  658. EXPORT_SYMBOL_GPL(test_and_clear_guest_dirty);
  659. int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
  660. unsigned char key, bool nq)
  661. {
  662. unsigned long keyul;
  663. spinlock_t *ptl;
  664. pgste_t old, new;
  665. pte_t *ptep;
  666. ptep = get_locked_pte(mm, addr, &ptl);
  667. if (unlikely(!ptep))
  668. return -EFAULT;
  669. new = old = pgste_get_lock(ptep);
  670. pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT |
  671. PGSTE_ACC_BITS | PGSTE_FP_BIT);
  672. keyul = (unsigned long) key;
  673. pgste_val(new) |= (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48;
  674. pgste_val(new) |= (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
  675. if (!(pte_val(*ptep) & _PAGE_INVALID)) {
  676. unsigned long address, bits, skey;
  677. address = pte_val(*ptep) & PAGE_MASK;
  678. skey = (unsigned long) page_get_storage_key(address);
  679. bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
  680. skey = key & (_PAGE_ACC_BITS | _PAGE_FP_BIT);
  681. /* Set storage key ACC and FP */
  682. page_set_storage_key(address, skey, !nq);
  683. /* Merge host changed & referenced into pgste */
  684. pgste_val(new) |= bits << 52;
  685. }
  686. /* changing the guest storage key is considered a change of the page */
  687. if ((pgste_val(new) ^ pgste_val(old)) &
  688. (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT))
  689. pgste_val(new) |= PGSTE_UC_BIT;
  690. pgste_set_unlock(ptep, new);
  691. pte_unmap_unlock(ptep, ptl);
  692. return 0;
  693. }
  694. EXPORT_SYMBOL(set_guest_storage_key);
  695. /**
  696. * Conditionally set a guest storage key (handling csske).
  697. * oldkey will be updated when either mr or mc is set and a pointer is given.
  698. *
  699. * Returns 0 if a guests storage key update wasn't necessary, 1 if the guest
  700. * storage key was updated and -EFAULT on access errors.
  701. */
  702. int cond_set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
  703. unsigned char key, unsigned char *oldkey,
  704. bool nq, bool mr, bool mc)
  705. {
  706. unsigned char tmp, mask = _PAGE_ACC_BITS | _PAGE_FP_BIT;
  707. int rc;
  708. /* we can drop the pgste lock between getting and setting the key */
  709. if (mr | mc) {
  710. rc = get_guest_storage_key(current->mm, addr, &tmp);
  711. if (rc)
  712. return rc;
  713. if (oldkey)
  714. *oldkey = tmp;
  715. if (!mr)
  716. mask |= _PAGE_REFERENCED;
  717. if (!mc)
  718. mask |= _PAGE_CHANGED;
  719. if (!((tmp ^ key) & mask))
  720. return 0;
  721. }
  722. rc = set_guest_storage_key(current->mm, addr, key, nq);
  723. return rc < 0 ? rc : 1;
  724. }
  725. EXPORT_SYMBOL(cond_set_guest_storage_key);
  726. /**
  727. * Reset a guest reference bit (rrbe), returning the reference and changed bit.
  728. *
  729. * Returns < 0 in case of error, otherwise the cc to be reported to the guest.
  730. */
  731. int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr)
  732. {
  733. spinlock_t *ptl;
  734. pgste_t old, new;
  735. pte_t *ptep;
  736. int cc = 0;
  737. ptep = get_locked_pte(mm, addr, &ptl);
  738. if (unlikely(!ptep))
  739. return -EFAULT;
  740. new = old = pgste_get_lock(ptep);
  741. /* Reset guest reference bit only */
  742. pgste_val(new) &= ~PGSTE_GR_BIT;
  743. if (!(pte_val(*ptep) & _PAGE_INVALID)) {
  744. cc = page_reset_referenced(pte_val(*ptep) & PAGE_MASK);
  745. /* Merge real referenced bit into host-set */
  746. pgste_val(new) |= ((unsigned long) cc << 53) & PGSTE_HR_BIT;
  747. }
  748. /* Reflect guest's logical view, not physical */
  749. cc |= (pgste_val(old) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 49;
  750. /* Changing the guest storage key is considered a change of the page */
  751. if ((pgste_val(new) ^ pgste_val(old)) & PGSTE_GR_BIT)
  752. pgste_val(new) |= PGSTE_UC_BIT;
  753. pgste_set_unlock(ptep, new);
  754. pte_unmap_unlock(ptep, ptl);
  755. return cc;
  756. }
  757. EXPORT_SYMBOL(reset_guest_reference_bit);
  758. int get_guest_storage_key(struct mm_struct *mm, unsigned long addr,
  759. unsigned char *key)
  760. {
  761. spinlock_t *ptl;
  762. pgste_t pgste;
  763. pte_t *ptep;
  764. ptep = get_locked_pte(mm, addr, &ptl);
  765. if (unlikely(!ptep))
  766. return -EFAULT;
  767. pgste = pgste_get_lock(ptep);
  768. *key = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
  769. if (!(pte_val(*ptep) & _PAGE_INVALID))
  770. *key = page_get_storage_key(pte_val(*ptep) & PAGE_MASK);
  771. /* Reflect guest's logical view, not physical */
  772. *key |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48;
  773. pgste_set_unlock(ptep, pgste);
  774. pte_unmap_unlock(ptep, ptl);
  775. return 0;
  776. }
  777. EXPORT_SYMBOL(get_guest_storage_key);
  778. /**
  779. * pgste_perform_essa - perform ESSA actions on the PGSTE.
  780. * @mm: the memory context. It must have PGSTEs, no check is performed here!
  781. * @hva: the host virtual address of the page whose PGSTE is to be processed
  782. * @orc: the specific action to perform, see the ESSA_SET_* macros.
  783. * @oldpte: the PTE will be saved there if the pointer is not NULL.
  784. * @oldpgste: the old PGSTE will be saved there if the pointer is not NULL.
  785. *
  786. * Return: 1 if the page is to be added to the CBRL, otherwise 0,
  787. * or < 0 in case of error. -EINVAL is returned for invalid values
  788. * of orc, -EFAULT for invalid addresses.
  789. */
  790. int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc,
  791. unsigned long *oldpte, unsigned long *oldpgste)
  792. {
  793. unsigned long pgstev;
  794. spinlock_t *ptl;
  795. pgste_t pgste;
  796. pte_t *ptep;
  797. int res = 0;
  798. WARN_ON_ONCE(orc > ESSA_MAX);
  799. if (unlikely(orc > ESSA_MAX))
  800. return -EINVAL;
  801. ptep = get_locked_pte(mm, hva, &ptl);
  802. if (unlikely(!ptep))
  803. return -EFAULT;
  804. pgste = pgste_get_lock(ptep);
  805. pgstev = pgste_val(pgste);
  806. if (oldpte)
  807. *oldpte = pte_val(*ptep);
  808. if (oldpgste)
  809. *oldpgste = pgstev;
  810. switch (orc) {
  811. case ESSA_GET_STATE:
  812. break;
  813. case ESSA_SET_STABLE:
  814. pgstev &= ~_PGSTE_GPS_USAGE_MASK;
  815. pgstev |= _PGSTE_GPS_USAGE_STABLE;
  816. break;
  817. case ESSA_SET_UNUSED:
  818. pgstev &= ~_PGSTE_GPS_USAGE_MASK;
  819. pgstev |= _PGSTE_GPS_USAGE_UNUSED;
  820. if (pte_val(*ptep) & _PAGE_INVALID)
  821. res = 1;
  822. break;
  823. case ESSA_SET_VOLATILE:
  824. pgstev &= ~_PGSTE_GPS_USAGE_MASK;
  825. pgstev |= _PGSTE_GPS_USAGE_VOLATILE;
  826. if (pte_val(*ptep) & _PAGE_INVALID)
  827. res = 1;
  828. break;
  829. case ESSA_SET_POT_VOLATILE:
  830. pgstev &= ~_PGSTE_GPS_USAGE_MASK;
  831. if (!(pte_val(*ptep) & _PAGE_INVALID)) {
  832. pgstev |= _PGSTE_GPS_USAGE_POT_VOLATILE;
  833. break;
  834. }
  835. if (pgstev & _PGSTE_GPS_ZERO) {
  836. pgstev |= _PGSTE_GPS_USAGE_VOLATILE;
  837. break;
  838. }
  839. if (!(pgstev & PGSTE_GC_BIT)) {
  840. pgstev |= _PGSTE_GPS_USAGE_VOLATILE;
  841. res = 1;
  842. break;
  843. }
  844. break;
  845. case ESSA_SET_STABLE_RESIDENT:
  846. pgstev &= ~_PGSTE_GPS_USAGE_MASK;
  847. pgstev |= _PGSTE_GPS_USAGE_STABLE;
  848. /*
  849. * Since the resident state can go away any time after this
  850. * call, we will not make this page resident. We can revisit
  851. * this decision if a guest will ever start using this.
  852. */
  853. break;
  854. case ESSA_SET_STABLE_IF_RESIDENT:
  855. if (!(pte_val(*ptep) & _PAGE_INVALID)) {
  856. pgstev &= ~_PGSTE_GPS_USAGE_MASK;
  857. pgstev |= _PGSTE_GPS_USAGE_STABLE;
  858. }
  859. break;
  860. default:
  861. /* we should never get here! */
  862. break;
  863. }
  864. /* If we are discarding a page, set it to logical zero */
  865. if (res)
  866. pgstev |= _PGSTE_GPS_ZERO;
  867. pgste_val(pgste) = pgstev;
  868. pgste_set_unlock(ptep, pgste);
  869. pte_unmap_unlock(ptep, ptl);
  870. return res;
  871. }
  872. EXPORT_SYMBOL(pgste_perform_essa);
  873. /**
  874. * set_pgste_bits - set specific PGSTE bits.
  875. * @mm: the memory context. It must have PGSTEs, no check is performed here!
  876. * @hva: the host virtual address of the page whose PGSTE is to be processed
  877. * @bits: a bitmask representing the bits that will be touched
  878. * @value: the values of the bits to be written. Only the bits in the mask
  879. * will be written.
  880. *
  881. * Return: 0 on success, < 0 in case of error.
  882. */
  883. int set_pgste_bits(struct mm_struct *mm, unsigned long hva,
  884. unsigned long bits, unsigned long value)
  885. {
  886. spinlock_t *ptl;
  887. pgste_t new;
  888. pte_t *ptep;
  889. ptep = get_locked_pte(mm, hva, &ptl);
  890. if (unlikely(!ptep))
  891. return -EFAULT;
  892. new = pgste_get_lock(ptep);
  893. pgste_val(new) &= ~bits;
  894. pgste_val(new) |= value & bits;
  895. pgste_set_unlock(ptep, new);
  896. pte_unmap_unlock(ptep, ptl);
  897. return 0;
  898. }
  899. EXPORT_SYMBOL(set_pgste_bits);
  900. /**
  901. * get_pgste - get the current PGSTE for the given address.
  902. * @mm: the memory context. It must have PGSTEs, no check is performed here!
  903. * @hva: the host virtual address of the page whose PGSTE is to be processed
  904. * @pgstep: will be written with the current PGSTE for the given address.
  905. *
  906. * Return: 0 on success, < 0 in case of error.
  907. */
  908. int get_pgste(struct mm_struct *mm, unsigned long hva, unsigned long *pgstep)
  909. {
  910. spinlock_t *ptl;
  911. pte_t *ptep;
  912. ptep = get_locked_pte(mm, hva, &ptl);
  913. if (unlikely(!ptep))
  914. return -EFAULT;
  915. *pgstep = pgste_val(pgste_get(ptep));
  916. pte_unmap_unlock(ptep, ptl);
  917. return 0;
  918. }
  919. EXPORT_SYMBOL(get_pgste);
  920. #endif