kvm_book3s_64.h 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430
  1. /*
  2. * This program is free software; you can redistribute it and/or modify
  3. * it under the terms of the GNU General Public License, version 2, as
  4. * published by the Free Software Foundation.
  5. *
  6. * This program is distributed in the hope that it will be useful,
  7. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  8. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  9. * GNU General Public License for more details.
  10. *
  11. * You should have received a copy of the GNU General Public License
  12. * along with this program; if not, write to the Free Software
  13. * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  14. *
  15. * Copyright SUSE Linux Products GmbH 2010
  16. *
  17. * Authors: Alexander Graf <agraf@suse.de>
  18. */
  19. #ifndef __ASM_KVM_BOOK3S_64_H__
  20. #define __ASM_KVM_BOOK3S_64_H__
  21. #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
  22. static inline struct kvmppc_book3s_shadow_vcpu *svcpu_get(struct kvm_vcpu *vcpu)
  23. {
  24. preempt_disable();
  25. return &get_paca()->shadow_vcpu;
  26. }
  27. static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu)
  28. {
  29. preempt_enable();
  30. }
  31. #endif
  32. #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
  33. #define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */
  34. #endif
  35. #define VRMA_VSID 0x1ffffffUL /* 1TB VSID reserved for VRMA */
  36. /*
  37. * We use a lock bit in HPTE dword 0 to synchronize updates and
  38. * accesses to each HPTE, and another bit to indicate non-present
  39. * HPTEs.
  40. */
  41. #define HPTE_V_HVLOCK 0x40UL
  42. #define HPTE_V_ABSENT 0x20UL
  43. /*
  44. * We use this bit in the guest_rpte field of the revmap entry
  45. * to indicate a modified HPTE.
  46. */
  47. #define HPTE_GR_MODIFIED (1ul << 62)
  48. /* These bits are reserved in the guest view of the HPTE */
  49. #define HPTE_GR_RESERVED HPTE_GR_MODIFIED
  50. static inline long try_lock_hpte(__be64 *hpte, unsigned long bits)
  51. {
  52. unsigned long tmp, old;
  53. __be64 be_lockbit, be_bits;
  54. /*
  55. * We load/store in native endian, but the HTAB is in big endian. If
  56. * we byte swap all data we apply on the PTE we're implicitly correct
  57. * again.
  58. */
  59. be_lockbit = cpu_to_be64(HPTE_V_HVLOCK);
  60. be_bits = cpu_to_be64(bits);
  61. asm volatile(" ldarx %0,0,%2\n"
  62. " and. %1,%0,%3\n"
  63. " bne 2f\n"
  64. " or %0,%0,%4\n"
  65. " stdcx. %0,0,%2\n"
  66. " beq+ 2f\n"
  67. " mr %1,%3\n"
  68. "2: isync"
  69. : "=&r" (tmp), "=&r" (old)
  70. : "r" (hpte), "r" (be_bits), "r" (be_lockbit)
  71. : "cc", "memory");
  72. return old == 0;
  73. }
  74. static inline void unlock_hpte(__be64 *hpte, unsigned long hpte_v)
  75. {
  76. hpte_v &= ~HPTE_V_HVLOCK;
  77. asm volatile(PPC_RELEASE_BARRIER "" : : : "memory");
  78. hpte[0] = cpu_to_be64(hpte_v);
  79. }
  80. /* Without barrier */
  81. static inline void __unlock_hpte(__be64 *hpte, unsigned long hpte_v)
  82. {
  83. hpte_v &= ~HPTE_V_HVLOCK;
  84. hpte[0] = cpu_to_be64(hpte_v);
  85. }
  86. static inline int __hpte_actual_psize(unsigned int lp, int psize)
  87. {
  88. int i, shift;
  89. unsigned int mask;
  90. /* start from 1 ignoring MMU_PAGE_4K */
  91. for (i = 1; i < MMU_PAGE_COUNT; i++) {
  92. /* invalid penc */
  93. if (mmu_psize_defs[psize].penc[i] == -1)
  94. continue;
  95. /*
  96. * encoding bits per actual page size
  97. * PTE LP actual page size
  98. * rrrr rrrz >=8KB
  99. * rrrr rrzz >=16KB
  100. * rrrr rzzz >=32KB
  101. * rrrr zzzz >=64KB
  102. * .......
  103. */
  104. shift = mmu_psize_defs[i].shift - LP_SHIFT;
  105. if (shift > LP_BITS)
  106. shift = LP_BITS;
  107. mask = (1 << shift) - 1;
  108. if ((lp & mask) == mmu_psize_defs[psize].penc[i])
  109. return i;
  110. }
  111. return -1;
  112. }
  113. static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
  114. unsigned long pte_index)
  115. {
  116. int b_psize = MMU_PAGE_4K, a_psize = MMU_PAGE_4K;
  117. unsigned int penc;
  118. unsigned long rb = 0, va_low, sllp;
  119. unsigned int lp = (r >> LP_SHIFT) & ((1 << LP_BITS) - 1);
  120. if (v & HPTE_V_LARGE) {
  121. for (b_psize = 0; b_psize < MMU_PAGE_COUNT; b_psize++) {
  122. /* valid entries have a shift value */
  123. if (!mmu_psize_defs[b_psize].shift)
  124. continue;
  125. a_psize = __hpte_actual_psize(lp, b_psize);
  126. if (a_psize != -1)
  127. break;
  128. }
  129. }
  130. /*
  131. * Ignore the top 14 bits of va
  132. * v have top two bits covering segment size, hence move
  133. * by 16 bits, Also clear the lower HPTE_V_AVPN_SHIFT (7) bits.
  134. * AVA field in v also have the lower 23 bits ignored.
  135. * For base page size 4K we need 14 .. 65 bits (so need to
  136. * collect extra 11 bits)
  137. * For others we need 14..14+i
  138. */
  139. /* This covers 14..54 bits of va*/
  140. rb = (v & ~0x7fUL) << 16; /* AVA field */
  141. rb |= (v >> HPTE_V_SSIZE_SHIFT) << 8; /* B field */
  142. /*
  143. * AVA in v had cleared lower 23 bits. We need to derive
  144. * that from pteg index
  145. */
  146. va_low = pte_index >> 3;
  147. if (v & HPTE_V_SECONDARY)
  148. va_low = ~va_low;
  149. /*
  150. * get the vpn bits from va_low using reverse of hashing.
  151. * In v we have va with 23 bits dropped and then left shifted
  152. * HPTE_V_AVPN_SHIFT (7) bits. Now to find vsid we need
  153. * right shift it with (SID_SHIFT - (23 - 7))
  154. */
  155. if (!(v & HPTE_V_1TB_SEG))
  156. va_low ^= v >> (SID_SHIFT - 16);
  157. else
  158. va_low ^= v >> (SID_SHIFT_1T - 16);
  159. va_low &= 0x7ff;
  160. switch (b_psize) {
  161. case MMU_PAGE_4K:
  162. sllp = ((mmu_psize_defs[a_psize].sllp & SLB_VSID_L) >> 6) |
  163. ((mmu_psize_defs[a_psize].sllp & SLB_VSID_LP) >> 4);
  164. rb |= sllp << 5; /* AP field */
  165. rb |= (va_low & 0x7ff) << 12; /* remaining 11 bits of AVA */
  166. break;
  167. default:
  168. {
  169. int aval_shift;
  170. /*
  171. * remaining bits of AVA/LP fields
  172. * Also contain the rr bits of LP
  173. */
  174. rb |= (va_low << mmu_psize_defs[b_psize].shift) & 0x7ff000;
  175. /*
  176. * Now clear not needed LP bits based on actual psize
  177. */
  178. rb &= ~((1ul << mmu_psize_defs[a_psize].shift) - 1);
  179. /*
  180. * AVAL field 58..77 - base_page_shift bits of va
  181. * we have space for 58..64 bits, Missing bits should
  182. * be zero filled. +1 is to take care of L bit shift
  183. */
  184. aval_shift = 64 - (77 - mmu_psize_defs[b_psize].shift) + 1;
  185. rb |= ((va_low << aval_shift) & 0xfe);
  186. rb |= 1; /* L field */
  187. penc = mmu_psize_defs[b_psize].penc[a_psize];
  188. rb |= penc << 12; /* LP field */
  189. break;
  190. }
  191. }
  192. rb |= (v >> 54) & 0x300; /* B field */
  193. return rb;
  194. }
  195. static inline unsigned long __hpte_page_size(unsigned long h, unsigned long l,
  196. bool is_base_size)
  197. {
  198. int size, a_psize;
  199. /* Look at the 8 bit LP value */
  200. unsigned int lp = (l >> LP_SHIFT) & ((1 << LP_BITS) - 1);
  201. /* only handle 4k, 64k and 16M pages for now */
  202. if (!(h & HPTE_V_LARGE))
  203. return 1ul << 12;
  204. else {
  205. for (size = 0; size < MMU_PAGE_COUNT; size++) {
  206. /* valid entries have a shift value */
  207. if (!mmu_psize_defs[size].shift)
  208. continue;
  209. a_psize = __hpte_actual_psize(lp, size);
  210. if (a_psize != -1) {
  211. if (is_base_size)
  212. return 1ul << mmu_psize_defs[size].shift;
  213. return 1ul << mmu_psize_defs[a_psize].shift;
  214. }
  215. }
  216. }
  217. return 0;
  218. }
  219. static inline unsigned long hpte_page_size(unsigned long h, unsigned long l)
  220. {
  221. return __hpte_page_size(h, l, 0);
  222. }
  223. static inline unsigned long hpte_base_page_size(unsigned long h, unsigned long l)
  224. {
  225. return __hpte_page_size(h, l, 1);
  226. }
  227. static inline unsigned long hpte_rpn(unsigned long ptel, unsigned long psize)
  228. {
  229. return ((ptel & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT;
  230. }
  231. static inline int hpte_is_writable(unsigned long ptel)
  232. {
  233. unsigned long pp = ptel & (HPTE_R_PP0 | HPTE_R_PP);
  234. return pp != PP_RXRX && pp != PP_RXXX;
  235. }
  236. static inline unsigned long hpte_make_readonly(unsigned long ptel)
  237. {
  238. if ((ptel & HPTE_R_PP0) || (ptel & HPTE_R_PP) == PP_RWXX)
  239. ptel = (ptel & ~HPTE_R_PP) | PP_RXXX;
  240. else
  241. ptel |= PP_RXRX;
  242. return ptel;
  243. }
  244. static inline bool hpte_cache_flags_ok(unsigned long hptel, bool is_ci)
  245. {
  246. unsigned int wimg = hptel & HPTE_R_WIMG;
  247. /* Handle SAO */
  248. if (wimg == (HPTE_R_W | HPTE_R_I | HPTE_R_M) &&
  249. cpu_has_feature(CPU_FTR_ARCH_206))
  250. wimg = HPTE_R_M;
  251. if (!is_ci)
  252. return wimg == HPTE_R_M;
  253. /*
  254. * if host is mapped cache inhibited, make sure hptel also have
  255. * cache inhibited.
  256. */
  257. if (wimg & HPTE_R_W) /* FIXME!! is this ok for all guest. ? */
  258. return false;
  259. return !!(wimg & HPTE_R_I);
  260. }
  261. /*
  262. * If it's present and writable, atomically set dirty and referenced bits and
  263. * return the PTE, otherwise return 0.
  264. */
  265. static inline pte_t kvmppc_read_update_linux_pte(pte_t *ptep, int writing)
  266. {
  267. pte_t old_pte, new_pte = __pte(0);
  268. while (1) {
  269. /*
  270. * Make sure we don't reload from ptep
  271. */
  272. old_pte = READ_ONCE(*ptep);
  273. /*
  274. * wait until H_PAGE_BUSY is clear then set it atomically
  275. */
  276. if (unlikely(pte_val(old_pte) & H_PAGE_BUSY)) {
  277. cpu_relax();
  278. continue;
  279. }
  280. /* If pte is not present return None */
  281. if (unlikely(!(pte_val(old_pte) & _PAGE_PRESENT)))
  282. return __pte(0);
  283. new_pte = pte_mkyoung(old_pte);
  284. if (writing && pte_write(old_pte))
  285. new_pte = pte_mkdirty(new_pte);
  286. if (pte_xchg(ptep, old_pte, new_pte))
  287. break;
  288. }
  289. return new_pte;
  290. }
  291. static inline bool hpte_read_permission(unsigned long pp, unsigned long key)
  292. {
  293. if (key)
  294. return PP_RWRX <= pp && pp <= PP_RXRX;
  295. return true;
  296. }
  297. static inline bool hpte_write_permission(unsigned long pp, unsigned long key)
  298. {
  299. if (key)
  300. return pp == PP_RWRW;
  301. return pp <= PP_RWRW;
  302. }
  303. static inline int hpte_get_skey_perm(unsigned long hpte_r, unsigned long amr)
  304. {
  305. unsigned long skey;
  306. skey = ((hpte_r & HPTE_R_KEY_HI) >> 57) |
  307. ((hpte_r & HPTE_R_KEY_LO) >> 9);
  308. return (amr >> (62 - 2 * skey)) & 3;
  309. }
  310. static inline void lock_rmap(unsigned long *rmap)
  311. {
  312. do {
  313. while (test_bit(KVMPPC_RMAP_LOCK_BIT, rmap))
  314. cpu_relax();
  315. } while (test_and_set_bit_lock(KVMPPC_RMAP_LOCK_BIT, rmap));
  316. }
  317. static inline void unlock_rmap(unsigned long *rmap)
  318. {
  319. __clear_bit_unlock(KVMPPC_RMAP_LOCK_BIT, rmap);
  320. }
  321. static inline bool slot_is_aligned(struct kvm_memory_slot *memslot,
  322. unsigned long pagesize)
  323. {
  324. unsigned long mask = (pagesize >> PAGE_SHIFT) - 1;
  325. if (pagesize <= PAGE_SIZE)
  326. return true;
  327. return !(memslot->base_gfn & mask) && !(memslot->npages & mask);
  328. }
  329. /*
  330. * This works for 4k, 64k and 16M pages on POWER7,
  331. * and 4k and 16M pages on PPC970.
  332. */
  333. static inline unsigned long slb_pgsize_encoding(unsigned long psize)
  334. {
  335. unsigned long senc = 0;
  336. if (psize > 0x1000) {
  337. senc = SLB_VSID_L;
  338. if (psize == 0x10000)
  339. senc |= SLB_VSID_LP_01;
  340. }
  341. return senc;
  342. }
  343. static inline int is_vrma_hpte(unsigned long hpte_v)
  344. {
  345. return (hpte_v & ~0xffffffUL) ==
  346. (HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)));
  347. }
  348. #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
  349. /*
  350. * Note modification of an HPTE; set the HPTE modified bit
  351. * if anyone is interested.
  352. */
  353. static inline void note_hpte_modification(struct kvm *kvm,
  354. struct revmap_entry *rev)
  355. {
  356. if (atomic_read(&kvm->arch.hpte_mod_interest))
  357. rev->guest_rpte |= HPTE_GR_MODIFIED;
  358. }
  359. /*
  360. * Like kvm_memslots(), but for use in real mode when we can't do
  361. * any RCU stuff (since the secondary threads are offline from the
  362. * kernel's point of view), and we can't print anything.
  363. * Thus we use rcu_dereference_raw() rather than rcu_dereference_check().
  364. */
  365. static inline struct kvm_memslots *kvm_memslots_raw(struct kvm *kvm)
  366. {
  367. return rcu_dereference_raw_notrace(kvm->memslots[0]);
  368. }
  369. extern void kvmppc_mmu_debugfs_init(struct kvm *kvm);
  370. extern void kvmhv_rm_send_ipi(int cpu);
  371. #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
  372. #endif /* __ASM_KVM_BOOK3S_64_H__ */