book3s_hv_nested.c 32 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright IBM Corporation, 2018
  4. * Authors Suraj Jitindar Singh <sjitindarsingh@gmail.com>
  5. * Paul Mackerras <paulus@ozlabs.org>
  6. *
  7. * Description: KVM functions specific to running nested KVM-HV guests
  8. * on Book3S processors (specifically POWER9 and later).
  9. */
  10. #include <linux/kernel.h>
  11. #include <linux/kvm_host.h>
  12. #include <linux/llist.h>
  13. #include <asm/kvm_ppc.h>
  14. #include <asm/kvm_book3s.h>
  15. #include <asm/mmu.h>
  16. #include <asm/pgtable.h>
  17. #include <asm/pgalloc.h>
  18. #include <asm/pte-walk.h>
  19. #include <asm/reg.h>
  20. static struct patb_entry *pseries_partition_tb;
  21. static void kvmhv_update_ptbl_cache(struct kvm_nested_guest *gp);
  22. static void kvmhv_free_memslot_nest_rmap(struct kvm_memory_slot *free);
  23. void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
  24. {
  25. struct kvmppc_vcore *vc = vcpu->arch.vcore;
  26. hr->pcr = vc->pcr;
  27. hr->dpdes = vc->dpdes;
  28. hr->hfscr = vcpu->arch.hfscr;
  29. hr->tb_offset = vc->tb_offset;
  30. hr->dawr0 = vcpu->arch.dawr;
  31. hr->dawrx0 = vcpu->arch.dawrx;
  32. hr->ciabr = vcpu->arch.ciabr;
  33. hr->purr = vcpu->arch.purr;
  34. hr->spurr = vcpu->arch.spurr;
  35. hr->ic = vcpu->arch.ic;
  36. hr->vtb = vc->vtb;
  37. hr->srr0 = vcpu->arch.shregs.srr0;
  38. hr->srr1 = vcpu->arch.shregs.srr1;
  39. hr->sprg[0] = vcpu->arch.shregs.sprg0;
  40. hr->sprg[1] = vcpu->arch.shregs.sprg1;
  41. hr->sprg[2] = vcpu->arch.shregs.sprg2;
  42. hr->sprg[3] = vcpu->arch.shregs.sprg3;
  43. hr->pidr = vcpu->arch.pid;
  44. hr->cfar = vcpu->arch.cfar;
  45. hr->ppr = vcpu->arch.ppr;
  46. }
  47. static void byteswap_pt_regs(struct pt_regs *regs)
  48. {
  49. unsigned long *addr = (unsigned long *) regs;
  50. for (; addr < ((unsigned long *) (regs + 1)); addr++)
  51. *addr = swab64(*addr);
  52. }
  53. static void byteswap_hv_regs(struct hv_guest_state *hr)
  54. {
  55. hr->version = swab64(hr->version);
  56. hr->lpid = swab32(hr->lpid);
  57. hr->vcpu_token = swab32(hr->vcpu_token);
  58. hr->lpcr = swab64(hr->lpcr);
  59. hr->pcr = swab64(hr->pcr);
  60. hr->amor = swab64(hr->amor);
  61. hr->dpdes = swab64(hr->dpdes);
  62. hr->hfscr = swab64(hr->hfscr);
  63. hr->tb_offset = swab64(hr->tb_offset);
  64. hr->dawr0 = swab64(hr->dawr0);
  65. hr->dawrx0 = swab64(hr->dawrx0);
  66. hr->ciabr = swab64(hr->ciabr);
  67. hr->hdec_expiry = swab64(hr->hdec_expiry);
  68. hr->purr = swab64(hr->purr);
  69. hr->spurr = swab64(hr->spurr);
  70. hr->ic = swab64(hr->ic);
  71. hr->vtb = swab64(hr->vtb);
  72. hr->hdar = swab64(hr->hdar);
  73. hr->hdsisr = swab64(hr->hdsisr);
  74. hr->heir = swab64(hr->heir);
  75. hr->asdr = swab64(hr->asdr);
  76. hr->srr0 = swab64(hr->srr0);
  77. hr->srr1 = swab64(hr->srr1);
  78. hr->sprg[0] = swab64(hr->sprg[0]);
  79. hr->sprg[1] = swab64(hr->sprg[1]);
  80. hr->sprg[2] = swab64(hr->sprg[2]);
  81. hr->sprg[3] = swab64(hr->sprg[3]);
  82. hr->pidr = swab64(hr->pidr);
  83. hr->cfar = swab64(hr->cfar);
  84. hr->ppr = swab64(hr->ppr);
  85. }
  86. static void save_hv_return_state(struct kvm_vcpu *vcpu, int trap,
  87. struct hv_guest_state *hr)
  88. {
  89. struct kvmppc_vcore *vc = vcpu->arch.vcore;
  90. hr->dpdes = vc->dpdes;
  91. hr->hfscr = vcpu->arch.hfscr;
  92. hr->purr = vcpu->arch.purr;
  93. hr->spurr = vcpu->arch.spurr;
  94. hr->ic = vcpu->arch.ic;
  95. hr->vtb = vc->vtb;
  96. hr->srr0 = vcpu->arch.shregs.srr0;
  97. hr->srr1 = vcpu->arch.shregs.srr1;
  98. hr->sprg[0] = vcpu->arch.shregs.sprg0;
  99. hr->sprg[1] = vcpu->arch.shregs.sprg1;
  100. hr->sprg[2] = vcpu->arch.shregs.sprg2;
  101. hr->sprg[3] = vcpu->arch.shregs.sprg3;
  102. hr->pidr = vcpu->arch.pid;
  103. hr->cfar = vcpu->arch.cfar;
  104. hr->ppr = vcpu->arch.ppr;
  105. switch (trap) {
  106. case BOOK3S_INTERRUPT_H_DATA_STORAGE:
  107. hr->hdar = vcpu->arch.fault_dar;
  108. hr->hdsisr = vcpu->arch.fault_dsisr;
  109. hr->asdr = vcpu->arch.fault_gpa;
  110. break;
  111. case BOOK3S_INTERRUPT_H_INST_STORAGE:
  112. hr->asdr = vcpu->arch.fault_gpa;
  113. break;
  114. case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
  115. hr->heir = vcpu->arch.emul_inst;
  116. break;
  117. }
  118. }
  119. static void sanitise_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
  120. {
  121. /*
  122. * Don't let L1 enable features for L2 which we've disabled for L1,
  123. * but preserve the interrupt cause field.
  124. */
  125. hr->hfscr &= (HFSCR_INTR_CAUSE | vcpu->arch.hfscr);
  126. /* Don't let data address watchpoint match in hypervisor state */
  127. hr->dawrx0 &= ~DAWRX_HYP;
  128. /* Don't let completed instruction address breakpt match in HV state */
  129. if ((hr->ciabr & CIABR_PRIV) == CIABR_PRIV_HYPER)
  130. hr->ciabr &= ~CIABR_PRIV;
  131. }
  132. static void restore_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
  133. {
  134. struct kvmppc_vcore *vc = vcpu->arch.vcore;
  135. vc->pcr = hr->pcr;
  136. vc->dpdes = hr->dpdes;
  137. vcpu->arch.hfscr = hr->hfscr;
  138. vcpu->arch.dawr = hr->dawr0;
  139. vcpu->arch.dawrx = hr->dawrx0;
  140. vcpu->arch.ciabr = hr->ciabr;
  141. vcpu->arch.purr = hr->purr;
  142. vcpu->arch.spurr = hr->spurr;
  143. vcpu->arch.ic = hr->ic;
  144. vc->vtb = hr->vtb;
  145. vcpu->arch.shregs.srr0 = hr->srr0;
  146. vcpu->arch.shregs.srr1 = hr->srr1;
  147. vcpu->arch.shregs.sprg0 = hr->sprg[0];
  148. vcpu->arch.shregs.sprg1 = hr->sprg[1];
  149. vcpu->arch.shregs.sprg2 = hr->sprg[2];
  150. vcpu->arch.shregs.sprg3 = hr->sprg[3];
  151. vcpu->arch.pid = hr->pidr;
  152. vcpu->arch.cfar = hr->cfar;
  153. vcpu->arch.ppr = hr->ppr;
  154. }
  155. void kvmhv_restore_hv_return_state(struct kvm_vcpu *vcpu,
  156. struct hv_guest_state *hr)
  157. {
  158. struct kvmppc_vcore *vc = vcpu->arch.vcore;
  159. vc->dpdes = hr->dpdes;
  160. vcpu->arch.hfscr = hr->hfscr;
  161. vcpu->arch.purr = hr->purr;
  162. vcpu->arch.spurr = hr->spurr;
  163. vcpu->arch.ic = hr->ic;
  164. vc->vtb = hr->vtb;
  165. vcpu->arch.fault_dar = hr->hdar;
  166. vcpu->arch.fault_dsisr = hr->hdsisr;
  167. vcpu->arch.fault_gpa = hr->asdr;
  168. vcpu->arch.emul_inst = hr->heir;
  169. vcpu->arch.shregs.srr0 = hr->srr0;
  170. vcpu->arch.shregs.srr1 = hr->srr1;
  171. vcpu->arch.shregs.sprg0 = hr->sprg[0];
  172. vcpu->arch.shregs.sprg1 = hr->sprg[1];
  173. vcpu->arch.shregs.sprg2 = hr->sprg[2];
  174. vcpu->arch.shregs.sprg3 = hr->sprg[3];
  175. vcpu->arch.pid = hr->pidr;
  176. vcpu->arch.cfar = hr->cfar;
  177. vcpu->arch.ppr = hr->ppr;
  178. }
  179. long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
  180. {
  181. long int err, r;
  182. struct kvm_nested_guest *l2;
  183. struct pt_regs l2_regs, saved_l1_regs;
  184. struct hv_guest_state l2_hv, saved_l1_hv;
  185. struct kvmppc_vcore *vc = vcpu->arch.vcore;
  186. u64 hv_ptr, regs_ptr;
  187. u64 hdec_exp;
  188. s64 delta_purr, delta_spurr, delta_ic, delta_vtb;
  189. u64 mask;
  190. unsigned long lpcr;
  191. if (vcpu->kvm->arch.l1_ptcr == 0)
  192. return H_NOT_AVAILABLE;
  193. /* copy parameters in */
  194. hv_ptr = kvmppc_get_gpr(vcpu, 4);
  195. err = kvm_vcpu_read_guest(vcpu, hv_ptr, &l2_hv,
  196. sizeof(struct hv_guest_state));
  197. if (err)
  198. return H_PARAMETER;
  199. if (kvmppc_need_byteswap(vcpu))
  200. byteswap_hv_regs(&l2_hv);
  201. if (l2_hv.version != HV_GUEST_STATE_VERSION)
  202. return H_P2;
  203. regs_ptr = kvmppc_get_gpr(vcpu, 5);
  204. err = kvm_vcpu_read_guest(vcpu, regs_ptr, &l2_regs,
  205. sizeof(struct pt_regs));
  206. if (err)
  207. return H_PARAMETER;
  208. if (kvmppc_need_byteswap(vcpu))
  209. byteswap_pt_regs(&l2_regs);
  210. if (l2_hv.vcpu_token >= NR_CPUS)
  211. return H_PARAMETER;
  212. /* translate lpid */
  213. l2 = kvmhv_get_nested(vcpu->kvm, l2_hv.lpid, true);
  214. if (!l2)
  215. return H_PARAMETER;
  216. if (!l2->l1_gr_to_hr) {
  217. mutex_lock(&l2->tlb_lock);
  218. kvmhv_update_ptbl_cache(l2);
  219. mutex_unlock(&l2->tlb_lock);
  220. }
  221. /* save l1 values of things */
  222. vcpu->arch.regs.msr = vcpu->arch.shregs.msr;
  223. saved_l1_regs = vcpu->arch.regs;
  224. kvmhv_save_hv_regs(vcpu, &saved_l1_hv);
  225. /* convert TB values/offsets to host (L0) values */
  226. hdec_exp = l2_hv.hdec_expiry - vc->tb_offset;
  227. vc->tb_offset += l2_hv.tb_offset;
  228. /* set L1 state to L2 state */
  229. vcpu->arch.nested = l2;
  230. vcpu->arch.nested_vcpu_id = l2_hv.vcpu_token;
  231. vcpu->arch.regs = l2_regs;
  232. vcpu->arch.shregs.msr = vcpu->arch.regs.msr;
  233. mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD |
  234. LPCR_LPES | LPCR_MER;
  235. lpcr = (vc->lpcr & ~mask) | (l2_hv.lpcr & mask);
  236. sanitise_hv_regs(vcpu, &l2_hv);
  237. restore_hv_regs(vcpu, &l2_hv);
  238. vcpu->arch.ret = RESUME_GUEST;
  239. vcpu->arch.trap = 0;
  240. do {
  241. if (mftb() >= hdec_exp) {
  242. vcpu->arch.trap = BOOK3S_INTERRUPT_HV_DECREMENTER;
  243. r = RESUME_HOST;
  244. break;
  245. }
  246. r = kvmhv_run_single_vcpu(vcpu->arch.kvm_run, vcpu, hdec_exp,
  247. lpcr);
  248. } while (is_kvmppc_resume_guest(r));
  249. /* save L2 state for return */
  250. l2_regs = vcpu->arch.regs;
  251. l2_regs.msr = vcpu->arch.shregs.msr;
  252. delta_purr = vcpu->arch.purr - l2_hv.purr;
  253. delta_spurr = vcpu->arch.spurr - l2_hv.spurr;
  254. delta_ic = vcpu->arch.ic - l2_hv.ic;
  255. delta_vtb = vc->vtb - l2_hv.vtb;
  256. save_hv_return_state(vcpu, vcpu->arch.trap, &l2_hv);
  257. /* restore L1 state */
  258. vcpu->arch.nested = NULL;
  259. vcpu->arch.regs = saved_l1_regs;
  260. vcpu->arch.shregs.msr = saved_l1_regs.msr & ~MSR_TS_MASK;
  261. /* set L1 MSR TS field according to L2 transaction state */
  262. if (l2_regs.msr & MSR_TS_MASK)
  263. vcpu->arch.shregs.msr |= MSR_TS_S;
  264. vc->tb_offset = saved_l1_hv.tb_offset;
  265. restore_hv_regs(vcpu, &saved_l1_hv);
  266. vcpu->arch.purr += delta_purr;
  267. vcpu->arch.spurr += delta_spurr;
  268. vcpu->arch.ic += delta_ic;
  269. vc->vtb += delta_vtb;
  270. kvmhv_put_nested(l2);
  271. /* copy l2_hv_state and regs back to guest */
  272. if (kvmppc_need_byteswap(vcpu)) {
  273. byteswap_hv_regs(&l2_hv);
  274. byteswap_pt_regs(&l2_regs);
  275. }
  276. err = kvm_vcpu_write_guest(vcpu, hv_ptr, &l2_hv,
  277. sizeof(struct hv_guest_state));
  278. if (err)
  279. return H_AUTHORITY;
  280. err = kvm_vcpu_write_guest(vcpu, regs_ptr, &l2_regs,
  281. sizeof(struct pt_regs));
  282. if (err)
  283. return H_AUTHORITY;
  284. if (r == -EINTR)
  285. return H_INTERRUPT;
  286. return vcpu->arch.trap;
  287. }
  288. long kvmhv_nested_init(void)
  289. {
  290. long int ptb_order;
  291. unsigned long ptcr;
  292. long rc;
  293. if (!kvmhv_on_pseries())
  294. return 0;
  295. if (!radix_enabled())
  296. return -ENODEV;
  297. /* find log base 2 of KVMPPC_NR_LPIDS, rounding up */
  298. ptb_order = __ilog2(KVMPPC_NR_LPIDS - 1) + 1;
  299. if (ptb_order < 8)
  300. ptb_order = 8;
  301. pseries_partition_tb = kmalloc(sizeof(struct patb_entry) << ptb_order,
  302. GFP_KERNEL);
  303. if (!pseries_partition_tb) {
  304. pr_err("kvm-hv: failed to allocated nested partition table\n");
  305. return -ENOMEM;
  306. }
  307. ptcr = __pa(pseries_partition_tb) | (ptb_order - 8);
  308. rc = plpar_hcall_norets(H_SET_PARTITION_TABLE, ptcr);
  309. if (rc != H_SUCCESS) {
  310. pr_err("kvm-hv: Parent hypervisor does not support nesting (rc=%ld)\n",
  311. rc);
  312. kfree(pseries_partition_tb);
  313. pseries_partition_tb = NULL;
  314. return -ENODEV;
  315. }
  316. return 0;
  317. }
  318. void kvmhv_nested_exit(void)
  319. {
  320. /*
  321. * N.B. the kvmhv_on_pseries() test is there because it enables
  322. * the compiler to remove the call to plpar_hcall_norets()
  323. * when CONFIG_PPC_PSERIES=n.
  324. */
  325. if (kvmhv_on_pseries() && pseries_partition_tb) {
  326. plpar_hcall_norets(H_SET_PARTITION_TABLE, 0);
  327. kfree(pseries_partition_tb);
  328. pseries_partition_tb = NULL;
  329. }
  330. }
  331. static void kvmhv_flush_lpid(unsigned int lpid)
  332. {
  333. long rc;
  334. if (!kvmhv_on_pseries()) {
  335. radix__flush_tlb_lpid(lpid);
  336. return;
  337. }
  338. rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(2, 0, 1),
  339. lpid, TLBIEL_INVAL_SET_LPID);
  340. if (rc)
  341. pr_err("KVM: TLB LPID invalidation hcall failed, rc=%ld\n", rc);
  342. }
  343. void kvmhv_set_ptbl_entry(unsigned int lpid, u64 dw0, u64 dw1)
  344. {
  345. if (!kvmhv_on_pseries()) {
  346. mmu_partition_table_set_entry(lpid, dw0, dw1);
  347. return;
  348. }
  349. pseries_partition_tb[lpid].patb0 = cpu_to_be64(dw0);
  350. pseries_partition_tb[lpid].patb1 = cpu_to_be64(dw1);
  351. /* L0 will do the necessary barriers */
  352. kvmhv_flush_lpid(lpid);
  353. }
  354. static void kvmhv_set_nested_ptbl(struct kvm_nested_guest *gp)
  355. {
  356. unsigned long dw0;
  357. dw0 = PATB_HR | radix__get_tree_size() |
  358. __pa(gp->shadow_pgtable) | RADIX_PGD_INDEX_SIZE;
  359. kvmhv_set_ptbl_entry(gp->shadow_lpid, dw0, gp->process_table);
  360. }
  361. void kvmhv_vm_nested_init(struct kvm *kvm)
  362. {
  363. kvm->arch.max_nested_lpid = -1;
  364. }
  365. /*
  366. * Handle the H_SET_PARTITION_TABLE hcall.
  367. * r4 = guest real address of partition table + log_2(size) - 12
  368. * (formatted as for the PTCR).
  369. */
  370. long kvmhv_set_partition_table(struct kvm_vcpu *vcpu)
  371. {
  372. struct kvm *kvm = vcpu->kvm;
  373. unsigned long ptcr = kvmppc_get_gpr(vcpu, 4);
  374. int srcu_idx;
  375. long ret = H_SUCCESS;
  376. srcu_idx = srcu_read_lock(&kvm->srcu);
  377. /*
  378. * Limit the partition table to 4096 entries (because that's what
  379. * hardware supports), and check the base address.
  380. */
  381. if ((ptcr & PRTS_MASK) > 12 - 8 ||
  382. !kvm_is_visible_gfn(vcpu->kvm, (ptcr & PRTB_MASK) >> PAGE_SHIFT))
  383. ret = H_PARAMETER;
  384. srcu_read_unlock(&kvm->srcu, srcu_idx);
  385. if (ret == H_SUCCESS)
  386. kvm->arch.l1_ptcr = ptcr;
  387. return ret;
  388. }
  389. /*
  390. * Reload the partition table entry for a guest.
  391. * Caller must hold gp->tlb_lock.
  392. */
  393. static void kvmhv_update_ptbl_cache(struct kvm_nested_guest *gp)
  394. {
  395. int ret;
  396. struct patb_entry ptbl_entry;
  397. unsigned long ptbl_addr;
  398. struct kvm *kvm = gp->l1_host;
  399. ret = -EFAULT;
  400. ptbl_addr = (kvm->arch.l1_ptcr & PRTB_MASK) + (gp->l1_lpid << 4);
  401. if (gp->l1_lpid < (1ul << ((kvm->arch.l1_ptcr & PRTS_MASK) + 8)))
  402. ret = kvm_read_guest(kvm, ptbl_addr,
  403. &ptbl_entry, sizeof(ptbl_entry));
  404. if (ret) {
  405. gp->l1_gr_to_hr = 0;
  406. gp->process_table = 0;
  407. } else {
  408. gp->l1_gr_to_hr = be64_to_cpu(ptbl_entry.patb0);
  409. gp->process_table = be64_to_cpu(ptbl_entry.patb1);
  410. }
  411. kvmhv_set_nested_ptbl(gp);
  412. }
  413. struct kvm_nested_guest *kvmhv_alloc_nested(struct kvm *kvm, unsigned int lpid)
  414. {
  415. struct kvm_nested_guest *gp;
  416. long shadow_lpid;
  417. gp = kzalloc(sizeof(*gp), GFP_KERNEL);
  418. if (!gp)
  419. return NULL;
  420. gp->l1_host = kvm;
  421. gp->l1_lpid = lpid;
  422. mutex_init(&gp->tlb_lock);
  423. gp->shadow_pgtable = pgd_alloc(kvm->mm);
  424. if (!gp->shadow_pgtable)
  425. goto out_free;
  426. shadow_lpid = kvmppc_alloc_lpid();
  427. if (shadow_lpid < 0)
  428. goto out_free2;
  429. gp->shadow_lpid = shadow_lpid;
  430. memset(gp->prev_cpu, -1, sizeof(gp->prev_cpu));
  431. return gp;
  432. out_free2:
  433. pgd_free(kvm->mm, gp->shadow_pgtable);
  434. out_free:
  435. kfree(gp);
  436. return NULL;
  437. }
  438. /*
  439. * Free up any resources allocated for a nested guest.
  440. */
  441. static void kvmhv_release_nested(struct kvm_nested_guest *gp)
  442. {
  443. struct kvm *kvm = gp->l1_host;
  444. if (gp->shadow_pgtable) {
  445. /*
  446. * No vcpu is using this struct and no call to
  447. * kvmhv_get_nested can find this struct,
  448. * so we don't need to hold kvm->mmu_lock.
  449. */
  450. kvmppc_free_pgtable_radix(kvm, gp->shadow_pgtable,
  451. gp->shadow_lpid);
  452. pgd_free(kvm->mm, gp->shadow_pgtable);
  453. }
  454. kvmhv_set_ptbl_entry(gp->shadow_lpid, 0, 0);
  455. kvmppc_free_lpid(gp->shadow_lpid);
  456. kfree(gp);
  457. }
  458. static void kvmhv_remove_nested(struct kvm_nested_guest *gp)
  459. {
  460. struct kvm *kvm = gp->l1_host;
  461. int lpid = gp->l1_lpid;
  462. long ref;
  463. spin_lock(&kvm->mmu_lock);
  464. if (gp == kvm->arch.nested_guests[lpid]) {
  465. kvm->arch.nested_guests[lpid] = NULL;
  466. if (lpid == kvm->arch.max_nested_lpid) {
  467. while (--lpid >= 0 && !kvm->arch.nested_guests[lpid])
  468. ;
  469. kvm->arch.max_nested_lpid = lpid;
  470. }
  471. --gp->refcnt;
  472. }
  473. ref = gp->refcnt;
  474. spin_unlock(&kvm->mmu_lock);
  475. if (ref == 0)
  476. kvmhv_release_nested(gp);
  477. }
  478. /*
  479. * Free up all nested resources allocated for this guest.
  480. * This is called with no vcpus of the guest running, when
  481. * switching the guest to HPT mode or when destroying the
  482. * guest.
  483. */
  484. void kvmhv_release_all_nested(struct kvm *kvm)
  485. {
  486. int i;
  487. struct kvm_nested_guest *gp;
  488. struct kvm_nested_guest *freelist = NULL;
  489. struct kvm_memory_slot *memslot;
  490. int srcu_idx;
  491. spin_lock(&kvm->mmu_lock);
  492. for (i = 0; i <= kvm->arch.max_nested_lpid; i++) {
  493. gp = kvm->arch.nested_guests[i];
  494. if (!gp)
  495. continue;
  496. kvm->arch.nested_guests[i] = NULL;
  497. if (--gp->refcnt == 0) {
  498. gp->next = freelist;
  499. freelist = gp;
  500. }
  501. }
  502. kvm->arch.max_nested_lpid = -1;
  503. spin_unlock(&kvm->mmu_lock);
  504. while ((gp = freelist) != NULL) {
  505. freelist = gp->next;
  506. kvmhv_release_nested(gp);
  507. }
  508. srcu_idx = srcu_read_lock(&kvm->srcu);
  509. kvm_for_each_memslot(memslot, kvm_memslots(kvm))
  510. kvmhv_free_memslot_nest_rmap(memslot);
  511. srcu_read_unlock(&kvm->srcu, srcu_idx);
  512. }
  513. /* caller must hold gp->tlb_lock */
  514. static void kvmhv_flush_nested(struct kvm_nested_guest *gp)
  515. {
  516. struct kvm *kvm = gp->l1_host;
  517. spin_lock(&kvm->mmu_lock);
  518. kvmppc_free_pgtable_radix(kvm, gp->shadow_pgtable, gp->shadow_lpid);
  519. spin_unlock(&kvm->mmu_lock);
  520. kvmhv_flush_lpid(gp->shadow_lpid);
  521. kvmhv_update_ptbl_cache(gp);
  522. if (gp->l1_gr_to_hr == 0)
  523. kvmhv_remove_nested(gp);
  524. }
  525. struct kvm_nested_guest *kvmhv_get_nested(struct kvm *kvm, int l1_lpid,
  526. bool create)
  527. {
  528. struct kvm_nested_guest *gp, *newgp;
  529. if (l1_lpid >= KVM_MAX_NESTED_GUESTS ||
  530. l1_lpid >= (1ul << ((kvm->arch.l1_ptcr & PRTS_MASK) + 12 - 4)))
  531. return NULL;
  532. spin_lock(&kvm->mmu_lock);
  533. gp = kvm->arch.nested_guests[l1_lpid];
  534. if (gp)
  535. ++gp->refcnt;
  536. spin_unlock(&kvm->mmu_lock);
  537. if (gp || !create)
  538. return gp;
  539. newgp = kvmhv_alloc_nested(kvm, l1_lpid);
  540. if (!newgp)
  541. return NULL;
  542. spin_lock(&kvm->mmu_lock);
  543. if (kvm->arch.nested_guests[l1_lpid]) {
  544. /* someone else beat us to it */
  545. gp = kvm->arch.nested_guests[l1_lpid];
  546. } else {
  547. kvm->arch.nested_guests[l1_lpid] = newgp;
  548. ++newgp->refcnt;
  549. gp = newgp;
  550. newgp = NULL;
  551. if (l1_lpid > kvm->arch.max_nested_lpid)
  552. kvm->arch.max_nested_lpid = l1_lpid;
  553. }
  554. ++gp->refcnt;
  555. spin_unlock(&kvm->mmu_lock);
  556. if (newgp)
  557. kvmhv_release_nested(newgp);
  558. return gp;
  559. }
  560. void kvmhv_put_nested(struct kvm_nested_guest *gp)
  561. {
  562. struct kvm *kvm = gp->l1_host;
  563. long ref;
  564. spin_lock(&kvm->mmu_lock);
  565. ref = --gp->refcnt;
  566. spin_unlock(&kvm->mmu_lock);
  567. if (ref == 0)
  568. kvmhv_release_nested(gp);
  569. }
  570. static struct kvm_nested_guest *kvmhv_find_nested(struct kvm *kvm, int lpid)
  571. {
  572. if (lpid > kvm->arch.max_nested_lpid)
  573. return NULL;
  574. return kvm->arch.nested_guests[lpid];
  575. }
  576. static inline bool kvmhv_n_rmap_is_equal(u64 rmap_1, u64 rmap_2)
  577. {
  578. return !((rmap_1 ^ rmap_2) & (RMAP_NESTED_LPID_MASK |
  579. RMAP_NESTED_GPA_MASK));
  580. }
  581. void kvmhv_insert_nest_rmap(struct kvm *kvm, unsigned long *rmapp,
  582. struct rmap_nested **n_rmap)
  583. {
  584. struct llist_node *entry = ((struct llist_head *) rmapp)->first;
  585. struct rmap_nested *cursor;
  586. u64 rmap, new_rmap = (*n_rmap)->rmap;
  587. /* Are there any existing entries? */
  588. if (!(*rmapp)) {
  589. /* No -> use the rmap as a single entry */
  590. *rmapp = new_rmap | RMAP_NESTED_IS_SINGLE_ENTRY;
  591. return;
  592. }
  593. /* Do any entries match what we're trying to insert? */
  594. for_each_nest_rmap_safe(cursor, entry, &rmap) {
  595. if (kvmhv_n_rmap_is_equal(rmap, new_rmap))
  596. return;
  597. }
  598. /* Do we need to create a list or just add the new entry? */
  599. rmap = *rmapp;
  600. if (rmap & RMAP_NESTED_IS_SINGLE_ENTRY) /* Not previously a list */
  601. *rmapp = 0UL;
  602. llist_add(&((*n_rmap)->list), (struct llist_head *) rmapp);
  603. if (rmap & RMAP_NESTED_IS_SINGLE_ENTRY) /* Not previously a list */
  604. (*n_rmap)->list.next = (struct llist_node *) rmap;
  605. /* Set NULL so not freed by caller */
  606. *n_rmap = NULL;
  607. }
  608. static void kvmhv_remove_nest_rmap(struct kvm *kvm, u64 n_rmap,
  609. unsigned long hpa, unsigned long mask)
  610. {
  611. struct kvm_nested_guest *gp;
  612. unsigned long gpa;
  613. unsigned int shift, lpid;
  614. pte_t *ptep;
  615. gpa = n_rmap & RMAP_NESTED_GPA_MASK;
  616. lpid = (n_rmap & RMAP_NESTED_LPID_MASK) >> RMAP_NESTED_LPID_SHIFT;
  617. gp = kvmhv_find_nested(kvm, lpid);
  618. if (!gp)
  619. return;
  620. /* Find and invalidate the pte */
  621. ptep = __find_linux_pte(gp->shadow_pgtable, gpa, NULL, &shift);
  622. /* Don't spuriously invalidate ptes if the pfn has changed */
  623. if (ptep && pte_present(*ptep) && ((pte_val(*ptep) & mask) == hpa))
  624. kvmppc_unmap_pte(kvm, ptep, gpa, shift, NULL, gp->shadow_lpid);
  625. }
  626. static void kvmhv_remove_nest_rmap_list(struct kvm *kvm, unsigned long *rmapp,
  627. unsigned long hpa, unsigned long mask)
  628. {
  629. struct llist_node *entry = llist_del_all((struct llist_head *) rmapp);
  630. struct rmap_nested *cursor;
  631. unsigned long rmap;
  632. for_each_nest_rmap_safe(cursor, entry, &rmap) {
  633. kvmhv_remove_nest_rmap(kvm, rmap, hpa, mask);
  634. kfree(cursor);
  635. }
  636. }
  637. /* called with kvm->mmu_lock held */
  638. void kvmhv_remove_nest_rmap_range(struct kvm *kvm,
  639. struct kvm_memory_slot *memslot,
  640. unsigned long gpa, unsigned long hpa,
  641. unsigned long nbytes)
  642. {
  643. unsigned long gfn, end_gfn;
  644. unsigned long addr_mask;
  645. if (!memslot)
  646. return;
  647. gfn = (gpa >> PAGE_SHIFT) - memslot->base_gfn;
  648. end_gfn = gfn + (nbytes >> PAGE_SHIFT);
  649. addr_mask = PTE_RPN_MASK & ~(nbytes - 1);
  650. hpa &= addr_mask;
  651. for (; gfn < end_gfn; gfn++) {
  652. unsigned long *rmap = &memslot->arch.rmap[gfn];
  653. kvmhv_remove_nest_rmap_list(kvm, rmap, hpa, addr_mask);
  654. }
  655. }
  656. static void kvmhv_free_memslot_nest_rmap(struct kvm_memory_slot *free)
  657. {
  658. unsigned long page;
  659. for (page = 0; page < free->npages; page++) {
  660. unsigned long rmap, *rmapp = &free->arch.rmap[page];
  661. struct rmap_nested *cursor;
  662. struct llist_node *entry;
  663. entry = llist_del_all((struct llist_head *) rmapp);
  664. for_each_nest_rmap_safe(cursor, entry, &rmap)
  665. kfree(cursor);
  666. }
  667. }
  668. static bool kvmhv_invalidate_shadow_pte(struct kvm_vcpu *vcpu,
  669. struct kvm_nested_guest *gp,
  670. long gpa, int *shift_ret)
  671. {
  672. struct kvm *kvm = vcpu->kvm;
  673. bool ret = false;
  674. pte_t *ptep;
  675. int shift;
  676. spin_lock(&kvm->mmu_lock);
  677. ptep = __find_linux_pte(gp->shadow_pgtable, gpa, NULL, &shift);
  678. if (!shift)
  679. shift = PAGE_SHIFT;
  680. if (ptep && pte_present(*ptep)) {
  681. kvmppc_unmap_pte(kvm, ptep, gpa, shift, NULL, gp->shadow_lpid);
  682. ret = true;
  683. }
  684. spin_unlock(&kvm->mmu_lock);
  685. if (shift_ret)
  686. *shift_ret = shift;
  687. return ret;
  688. }
  689. static inline int get_ric(unsigned int instr)
  690. {
  691. return (instr >> 18) & 0x3;
  692. }
  693. static inline int get_prs(unsigned int instr)
  694. {
  695. return (instr >> 17) & 0x1;
  696. }
  697. static inline int get_r(unsigned int instr)
  698. {
  699. return (instr >> 16) & 0x1;
  700. }
  701. static inline int get_lpid(unsigned long r_val)
  702. {
  703. return r_val & 0xffffffff;
  704. }
  705. static inline int get_is(unsigned long r_val)
  706. {
  707. return (r_val >> 10) & 0x3;
  708. }
  709. static inline int get_ap(unsigned long r_val)
  710. {
  711. return (r_val >> 5) & 0x7;
  712. }
  713. static inline long get_epn(unsigned long r_val)
  714. {
  715. return r_val >> 12;
  716. }
  717. static int kvmhv_emulate_tlbie_tlb_addr(struct kvm_vcpu *vcpu, int lpid,
  718. int ap, long epn)
  719. {
  720. struct kvm *kvm = vcpu->kvm;
  721. struct kvm_nested_guest *gp;
  722. long npages;
  723. int shift, shadow_shift;
  724. unsigned long addr;
  725. shift = ap_to_shift(ap);
  726. addr = epn << 12;
  727. if (shift < 0)
  728. /* Invalid ap encoding */
  729. return -EINVAL;
  730. addr &= ~((1UL << shift) - 1);
  731. npages = 1UL << (shift - PAGE_SHIFT);
  732. gp = kvmhv_get_nested(kvm, lpid, false);
  733. if (!gp) /* No such guest -> nothing to do */
  734. return 0;
  735. mutex_lock(&gp->tlb_lock);
  736. /* There may be more than one host page backing this single guest pte */
  737. do {
  738. kvmhv_invalidate_shadow_pte(vcpu, gp, addr, &shadow_shift);
  739. npages -= 1UL << (shadow_shift - PAGE_SHIFT);
  740. addr += 1UL << shadow_shift;
  741. } while (npages > 0);
  742. mutex_unlock(&gp->tlb_lock);
  743. kvmhv_put_nested(gp);
  744. return 0;
  745. }
  746. static void kvmhv_emulate_tlbie_lpid(struct kvm_vcpu *vcpu,
  747. struct kvm_nested_guest *gp, int ric)
  748. {
  749. struct kvm *kvm = vcpu->kvm;
  750. mutex_lock(&gp->tlb_lock);
  751. switch (ric) {
  752. case 0:
  753. /* Invalidate TLB */
  754. spin_lock(&kvm->mmu_lock);
  755. kvmppc_free_pgtable_radix(kvm, gp->shadow_pgtable,
  756. gp->shadow_lpid);
  757. kvmhv_flush_lpid(gp->shadow_lpid);
  758. spin_unlock(&kvm->mmu_lock);
  759. break;
  760. case 1:
  761. /*
  762. * Invalidate PWC
  763. * We don't cache this -> nothing to do
  764. */
  765. break;
  766. case 2:
  767. /* Invalidate TLB, PWC and caching of partition table entries */
  768. kvmhv_flush_nested(gp);
  769. break;
  770. default:
  771. break;
  772. }
  773. mutex_unlock(&gp->tlb_lock);
  774. }
  775. static void kvmhv_emulate_tlbie_all_lpid(struct kvm_vcpu *vcpu, int ric)
  776. {
  777. struct kvm *kvm = vcpu->kvm;
  778. struct kvm_nested_guest *gp;
  779. int i;
  780. spin_lock(&kvm->mmu_lock);
  781. for (i = 0; i <= kvm->arch.max_nested_lpid; i++) {
  782. gp = kvm->arch.nested_guests[i];
  783. if (gp) {
  784. spin_unlock(&kvm->mmu_lock);
  785. kvmhv_emulate_tlbie_lpid(vcpu, gp, ric);
  786. spin_lock(&kvm->mmu_lock);
  787. }
  788. }
  789. spin_unlock(&kvm->mmu_lock);
  790. }
  791. static int kvmhv_emulate_priv_tlbie(struct kvm_vcpu *vcpu, unsigned int instr,
  792. unsigned long rsval, unsigned long rbval)
  793. {
  794. struct kvm *kvm = vcpu->kvm;
  795. struct kvm_nested_guest *gp;
  796. int r, ric, prs, is, ap;
  797. int lpid;
  798. long epn;
  799. int ret = 0;
  800. ric = get_ric(instr);
  801. prs = get_prs(instr);
  802. r = get_r(instr);
  803. lpid = get_lpid(rsval);
  804. is = get_is(rbval);
  805. /*
  806. * These cases are invalid and are not handled:
  807. * r != 1 -> Only radix supported
  808. * prs == 1 -> Not HV privileged
  809. * ric == 3 -> No cluster bombs for radix
  810. * is == 1 -> Partition scoped translations not associated with pid
  811. * (!is) && (ric == 1 || ric == 2) -> Not supported by ISA
  812. */
  813. if ((!r) || (prs) || (ric == 3) || (is == 1) ||
  814. ((!is) && (ric == 1 || ric == 2)))
  815. return -EINVAL;
  816. switch (is) {
  817. case 0:
  818. /*
  819. * We know ric == 0
  820. * Invalidate TLB for a given target address
  821. */
  822. epn = get_epn(rbval);
  823. ap = get_ap(rbval);
  824. ret = kvmhv_emulate_tlbie_tlb_addr(vcpu, lpid, ap, epn);
  825. break;
  826. case 2:
  827. /* Invalidate matching LPID */
  828. gp = kvmhv_get_nested(kvm, lpid, false);
  829. if (gp) {
  830. kvmhv_emulate_tlbie_lpid(vcpu, gp, ric);
  831. kvmhv_put_nested(gp);
  832. }
  833. break;
  834. case 3:
  835. /* Invalidate ALL LPIDs */
  836. kvmhv_emulate_tlbie_all_lpid(vcpu, ric);
  837. break;
  838. default:
  839. ret = -EINVAL;
  840. break;
  841. }
  842. return ret;
  843. }
  844. /*
  845. * This handles the H_TLB_INVALIDATE hcall.
  846. * Parameters are (r4) tlbie instruction code, (r5) rS contents,
  847. * (r6) rB contents.
  848. */
  849. long kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu)
  850. {
  851. int ret;
  852. ret = kvmhv_emulate_priv_tlbie(vcpu, kvmppc_get_gpr(vcpu, 4),
  853. kvmppc_get_gpr(vcpu, 5), kvmppc_get_gpr(vcpu, 6));
  854. if (ret)
  855. return H_PARAMETER;
  856. return H_SUCCESS;
  857. }
  858. /* Used to convert a nested guest real address to a L1 guest real address */
  859. static int kvmhv_translate_addr_nested(struct kvm_vcpu *vcpu,
  860. struct kvm_nested_guest *gp,
  861. unsigned long n_gpa, unsigned long dsisr,
  862. struct kvmppc_pte *gpte_p)
  863. {
  864. u64 fault_addr, flags = dsisr & DSISR_ISSTORE;
  865. int ret;
  866. ret = kvmppc_mmu_walk_radix_tree(vcpu, n_gpa, gpte_p, gp->l1_gr_to_hr,
  867. &fault_addr);
  868. if (ret) {
  869. /* We didn't find a pte */
  870. if (ret == -EINVAL) {
  871. /* Unsupported mmu config */
  872. flags |= DSISR_UNSUPP_MMU;
  873. } else if (ret == -ENOENT) {
  874. /* No translation found */
  875. flags |= DSISR_NOHPTE;
  876. } else if (ret == -EFAULT) {
  877. /* Couldn't access L1 real address */
  878. flags |= DSISR_PRTABLE_FAULT;
  879. vcpu->arch.fault_gpa = fault_addr;
  880. } else {
  881. /* Unknown error */
  882. return ret;
  883. }
  884. goto forward_to_l1;
  885. } else {
  886. /* We found a pte -> check permissions */
  887. if (dsisr & DSISR_ISSTORE) {
  888. /* Can we write? */
  889. if (!gpte_p->may_write) {
  890. flags |= DSISR_PROTFAULT;
  891. goto forward_to_l1;
  892. }
  893. } else if (vcpu->arch.trap == BOOK3S_INTERRUPT_H_INST_STORAGE) {
  894. /* Can we execute? */
  895. if (!gpte_p->may_execute) {
  896. flags |= SRR1_ISI_N_OR_G;
  897. goto forward_to_l1;
  898. }
  899. } else {
  900. /* Can we read? */
  901. if (!gpte_p->may_read && !gpte_p->may_write) {
  902. flags |= DSISR_PROTFAULT;
  903. goto forward_to_l1;
  904. }
  905. }
  906. }
  907. return 0;
  908. forward_to_l1:
  909. vcpu->arch.fault_dsisr = flags;
  910. if (vcpu->arch.trap == BOOK3S_INTERRUPT_H_INST_STORAGE) {
  911. vcpu->arch.shregs.msr &= ~0x783f0000ul;
  912. vcpu->arch.shregs.msr |= flags;
  913. }
  914. return RESUME_HOST;
  915. }
  916. static long kvmhv_handle_nested_set_rc(struct kvm_vcpu *vcpu,
  917. struct kvm_nested_guest *gp,
  918. unsigned long n_gpa,
  919. struct kvmppc_pte gpte,
  920. unsigned long dsisr)
  921. {
  922. struct kvm *kvm = vcpu->kvm;
  923. bool writing = !!(dsisr & DSISR_ISSTORE);
  924. u64 pgflags;
  925. bool ret;
  926. /* Are the rc bits set in the L1 partition scoped pte? */
  927. pgflags = _PAGE_ACCESSED;
  928. if (writing)
  929. pgflags |= _PAGE_DIRTY;
  930. if (pgflags & ~gpte.rc)
  931. return RESUME_HOST;
  932. spin_lock(&kvm->mmu_lock);
  933. /* Set the rc bit in the pte of our (L0) pgtable for the L1 guest */
  934. ret = kvmppc_hv_handle_set_rc(kvm, kvm->arch.pgtable, writing,
  935. gpte.raddr, kvm->arch.lpid);
  936. spin_unlock(&kvm->mmu_lock);
  937. if (!ret)
  938. return -EINVAL;
  939. /* Set the rc bit in the pte of the shadow_pgtable for the nest guest */
  940. ret = kvmppc_hv_handle_set_rc(kvm, gp->shadow_pgtable, writing, n_gpa,
  941. gp->shadow_lpid);
  942. if (!ret)
  943. return -EINVAL;
  944. return 0;
  945. }
  946. static inline int kvmppc_radix_level_to_shift(int level)
  947. {
  948. switch (level) {
  949. case 2:
  950. return PUD_SHIFT;
  951. case 1:
  952. return PMD_SHIFT;
  953. default:
  954. return PAGE_SHIFT;
  955. }
  956. }
  957. static inline int kvmppc_radix_shift_to_level(int shift)
  958. {
  959. if (shift == PUD_SHIFT)
  960. return 2;
  961. if (shift == PMD_SHIFT)
  962. return 1;
  963. if (shift == PAGE_SHIFT)
  964. return 0;
  965. WARN_ON_ONCE(1);
  966. return 0;
  967. }
  968. /* called with gp->tlb_lock held */
  969. static long int __kvmhv_nested_page_fault(struct kvm_vcpu *vcpu,
  970. struct kvm_nested_guest *gp)
  971. {
  972. struct kvm *kvm = vcpu->kvm;
  973. struct kvm_memory_slot *memslot;
  974. struct rmap_nested *n_rmap;
  975. struct kvmppc_pte gpte;
  976. pte_t pte, *pte_p;
  977. unsigned long mmu_seq;
  978. unsigned long dsisr = vcpu->arch.fault_dsisr;
  979. unsigned long ea = vcpu->arch.fault_dar;
  980. unsigned long *rmapp;
  981. unsigned long n_gpa, gpa, gfn, perm = 0UL;
  982. unsigned int shift, l1_shift, level;
  983. bool writing = !!(dsisr & DSISR_ISSTORE);
  984. bool kvm_ro = false;
  985. long int ret;
  986. if (!gp->l1_gr_to_hr) {
  987. kvmhv_update_ptbl_cache(gp);
  988. if (!gp->l1_gr_to_hr)
  989. return RESUME_HOST;
  990. }
  991. /* Convert the nested guest real address into a L1 guest real address */
  992. n_gpa = vcpu->arch.fault_gpa & ~0xF000000000000FFFULL;
  993. if (!(dsisr & DSISR_PRTABLE_FAULT))
  994. n_gpa |= ea & 0xFFF;
  995. ret = kvmhv_translate_addr_nested(vcpu, gp, n_gpa, dsisr, &gpte);
  996. /*
  997. * If the hardware found a translation but we don't now have a usable
  998. * translation in the l1 partition-scoped tree, remove the shadow pte
  999. * and let the guest retry.
  1000. */
  1001. if (ret == RESUME_HOST &&
  1002. (dsisr & (DSISR_PROTFAULT | DSISR_BADACCESS | DSISR_NOEXEC_OR_G |
  1003. DSISR_BAD_COPYPASTE)))
  1004. goto inval;
  1005. if (ret)
  1006. return ret;
  1007. /* Failed to set the reference/change bits */
  1008. if (dsisr & DSISR_SET_RC) {
  1009. ret = kvmhv_handle_nested_set_rc(vcpu, gp, n_gpa, gpte, dsisr);
  1010. if (ret == RESUME_HOST)
  1011. return ret;
  1012. if (ret)
  1013. goto inval;
  1014. dsisr &= ~DSISR_SET_RC;
  1015. if (!(dsisr & (DSISR_BAD_FAULT_64S | DSISR_NOHPTE |
  1016. DSISR_PROTFAULT)))
  1017. return RESUME_GUEST;
  1018. }
  1019. /*
  1020. * We took an HISI or HDSI while we were running a nested guest which
  1021. * means we have no partition scoped translation for that. This means
  1022. * we need to insert a pte for the mapping into our shadow_pgtable.
  1023. */
  1024. l1_shift = gpte.page_shift;
  1025. if (l1_shift < PAGE_SHIFT) {
  1026. /* We don't support l1 using a page size smaller than our own */
  1027. pr_err("KVM: L1 guest page shift (%d) less than our own (%d)\n",
  1028. l1_shift, PAGE_SHIFT);
  1029. return -EINVAL;
  1030. }
  1031. gpa = gpte.raddr;
  1032. gfn = gpa >> PAGE_SHIFT;
  1033. /* 1. Get the corresponding host memslot */
  1034. memslot = gfn_to_memslot(kvm, gfn);
  1035. if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) {
  1036. if (dsisr & (DSISR_PRTABLE_FAULT | DSISR_BADACCESS)) {
  1037. /* unusual error -> reflect to the guest as a DSI */
  1038. kvmppc_core_queue_data_storage(vcpu, ea, dsisr);
  1039. return RESUME_GUEST;
  1040. }
  1041. /* passthrough of emulated MMIO case... */
  1042. pr_err("emulated MMIO passthrough?\n");
  1043. return -EINVAL;
  1044. }
  1045. if (memslot->flags & KVM_MEM_READONLY) {
  1046. if (writing) {
  1047. /* Give the guest a DSI */
  1048. kvmppc_core_queue_data_storage(vcpu, ea,
  1049. DSISR_ISSTORE | DSISR_PROTFAULT);
  1050. return RESUME_GUEST;
  1051. }
  1052. kvm_ro = true;
  1053. }
  1054. /* 2. Find the host pte for this L1 guest real address */
  1055. /* Used to check for invalidations in progress */
  1056. mmu_seq = kvm->mmu_notifier_seq;
  1057. smp_rmb();
  1058. /* See if can find translation in our partition scoped tables for L1 */
  1059. pte = __pte(0);
  1060. spin_lock(&kvm->mmu_lock);
  1061. pte_p = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
  1062. if (!shift)
  1063. shift = PAGE_SHIFT;
  1064. if (pte_p)
  1065. pte = *pte_p;
  1066. spin_unlock(&kvm->mmu_lock);
  1067. if (!pte_present(pte) || (writing && !(pte_val(pte) & _PAGE_WRITE))) {
  1068. /* No suitable pte found -> try to insert a mapping */
  1069. ret = kvmppc_book3s_instantiate_page(vcpu, gpa, memslot,
  1070. writing, kvm_ro, &pte, &level);
  1071. if (ret == -EAGAIN)
  1072. return RESUME_GUEST;
  1073. else if (ret)
  1074. return ret;
  1075. shift = kvmppc_radix_level_to_shift(level);
  1076. }
  1077. /* 3. Compute the pte we need to insert for nest_gpa -> host r_addr */
  1078. /* The permissions is the combination of the host and l1 guest ptes */
  1079. perm |= gpte.may_read ? 0UL : _PAGE_READ;
  1080. perm |= gpte.may_write ? 0UL : _PAGE_WRITE;
  1081. perm |= gpte.may_execute ? 0UL : _PAGE_EXEC;
  1082. pte = __pte(pte_val(pte) & ~perm);
  1083. /* What size pte can we insert? */
  1084. if (shift > l1_shift) {
  1085. u64 mask;
  1086. unsigned int actual_shift = PAGE_SHIFT;
  1087. if (PMD_SHIFT < l1_shift)
  1088. actual_shift = PMD_SHIFT;
  1089. mask = (1UL << shift) - (1UL << actual_shift);
  1090. pte = __pte(pte_val(pte) | (gpa & mask));
  1091. shift = actual_shift;
  1092. }
  1093. level = kvmppc_radix_shift_to_level(shift);
  1094. n_gpa &= ~((1UL << shift) - 1);
  1095. /* 4. Insert the pte into our shadow_pgtable */
  1096. n_rmap = kzalloc(sizeof(*n_rmap), GFP_KERNEL);
  1097. if (!n_rmap)
  1098. return RESUME_GUEST; /* Let the guest try again */
  1099. n_rmap->rmap = (n_gpa & RMAP_NESTED_GPA_MASK) |
  1100. (((unsigned long) gp->l1_lpid) << RMAP_NESTED_LPID_SHIFT);
  1101. rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn];
  1102. ret = kvmppc_create_pte(kvm, gp->shadow_pgtable, pte, n_gpa, level,
  1103. mmu_seq, gp->shadow_lpid, rmapp, &n_rmap);
  1104. if (n_rmap)
  1105. kfree(n_rmap);
  1106. if (ret == -EAGAIN)
  1107. ret = RESUME_GUEST; /* Let the guest try again */
  1108. return ret;
  1109. inval:
  1110. kvmhv_invalidate_shadow_pte(vcpu, gp, n_gpa, NULL);
  1111. return RESUME_GUEST;
  1112. }
  1113. long int kvmhv_nested_page_fault(struct kvm_vcpu *vcpu)
  1114. {
  1115. struct kvm_nested_guest *gp = vcpu->arch.nested;
  1116. long int ret;
  1117. mutex_lock(&gp->tlb_lock);
  1118. ret = __kvmhv_nested_page_fault(vcpu, gp);
  1119. mutex_unlock(&gp->tlb_lock);
  1120. return ret;
  1121. }
  1122. int kvmhv_nested_next_lpid(struct kvm *kvm, int lpid)
  1123. {
  1124. int ret = -1;
  1125. spin_lock(&kvm->mmu_lock);
  1126. while (++lpid <= kvm->arch.max_nested_lpid) {
  1127. if (kvm->arch.nested_guests[lpid]) {
  1128. ret = lpid;
  1129. break;
  1130. }
  1131. }
  1132. spin_unlock(&kvm->mmu_lock);
  1133. return ret;
  1134. }