vsie.c 33 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * kvm nested virtualization support for s390x
  4. *
  5. * Copyright IBM Corp. 2016
  6. *
  7. * Author(s): David Hildenbrand <dahi@linux.vnet.ibm.com>
  8. */
  9. #include <linux/vmalloc.h>
  10. #include <linux/kvm_host.h>
  11. #include <linux/bug.h>
  12. #include <linux/list.h>
  13. #include <linux/bitmap.h>
  14. #include <linux/sched/signal.h>
  15. #include <asm/gmap.h>
  16. #include <asm/mmu_context.h>
  17. #include <asm/sclp.h>
  18. #include <asm/nmi.h>
  19. #include <asm/dis.h>
  20. #include "kvm-s390.h"
  21. #include "gaccess.h"
  22. struct vsie_page {
  23. struct kvm_s390_sie_block scb_s; /* 0x0000 */
  24. /*
  25. * the backup info for machine check. ensure it's at
  26. * the same offset as that in struct sie_page!
  27. */
  28. struct mcck_volatile_info mcck_info; /* 0x0200 */
  29. /*
  30. * The pinned original scb. Be aware that other VCPUs can modify
  31. * it while we read from it. Values that are used for conditions or
  32. * are reused conditionally, should be accessed via READ_ONCE.
  33. */
  34. struct kvm_s390_sie_block *scb_o; /* 0x0218 */
  35. /* the shadow gmap in use by the vsie_page */
  36. struct gmap *gmap; /* 0x0220 */
  37. /* address of the last reported fault to guest2 */
  38. unsigned long fault_addr; /* 0x0228 */
  39. /* calculated guest addresses of satellite control blocks */
  40. gpa_t sca_gpa; /* 0x0230 */
  41. gpa_t itdba_gpa; /* 0x0238 */
  42. gpa_t gvrd_gpa; /* 0x0240 */
  43. gpa_t riccbd_gpa; /* 0x0248 */
  44. gpa_t sdnx_gpa; /* 0x0250 */
  45. __u8 reserved[0x0700 - 0x0258]; /* 0x0258 */
  46. struct kvm_s390_crypto_cb crycb; /* 0x0700 */
  47. __u8 fac[S390_ARCH_FAC_LIST_SIZE_BYTE]; /* 0x0800 */
  48. };
  49. /* trigger a validity icpt for the given scb */
  50. static int set_validity_icpt(struct kvm_s390_sie_block *scb,
  51. __u16 reason_code)
  52. {
  53. scb->ipa = 0x1000;
  54. scb->ipb = ((__u32) reason_code) << 16;
  55. scb->icptcode = ICPT_VALIDITY;
  56. return 1;
  57. }
  58. /* mark the prefix as unmapped, this will block the VSIE */
  59. static void prefix_unmapped(struct vsie_page *vsie_page)
  60. {
  61. atomic_or(PROG_REQUEST, &vsie_page->scb_s.prog20);
  62. }
  63. /* mark the prefix as unmapped and wait until the VSIE has been left */
  64. static void prefix_unmapped_sync(struct vsie_page *vsie_page)
  65. {
  66. prefix_unmapped(vsie_page);
  67. if (vsie_page->scb_s.prog0c & PROG_IN_SIE)
  68. atomic_or(CPUSTAT_STOP_INT, &vsie_page->scb_s.cpuflags);
  69. while (vsie_page->scb_s.prog0c & PROG_IN_SIE)
  70. cpu_relax();
  71. }
  72. /* mark the prefix as mapped, this will allow the VSIE to run */
  73. static void prefix_mapped(struct vsie_page *vsie_page)
  74. {
  75. atomic_andnot(PROG_REQUEST, &vsie_page->scb_s.prog20);
  76. }
  77. /* test if the prefix is mapped into the gmap shadow */
  78. static int prefix_is_mapped(struct vsie_page *vsie_page)
  79. {
  80. return !(atomic_read(&vsie_page->scb_s.prog20) & PROG_REQUEST);
  81. }
  82. /* copy the updated intervention request bits into the shadow scb */
  83. static void update_intervention_requests(struct vsie_page *vsie_page)
  84. {
  85. const int bits = CPUSTAT_STOP_INT | CPUSTAT_IO_INT | CPUSTAT_EXT_INT;
  86. int cpuflags;
  87. cpuflags = atomic_read(&vsie_page->scb_o->cpuflags);
  88. atomic_andnot(bits, &vsie_page->scb_s.cpuflags);
  89. atomic_or(cpuflags & bits, &vsie_page->scb_s.cpuflags);
  90. }
  91. /* shadow (filter and validate) the cpuflags */
  92. static int prepare_cpuflags(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  93. {
  94. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  95. struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
  96. int newflags, cpuflags = atomic_read(&scb_o->cpuflags);
  97. /* we don't allow ESA/390 guests */
  98. if (!(cpuflags & CPUSTAT_ZARCH))
  99. return set_validity_icpt(scb_s, 0x0001U);
  100. if (cpuflags & (CPUSTAT_RRF | CPUSTAT_MCDS))
  101. return set_validity_icpt(scb_s, 0x0001U);
  102. else if (cpuflags & (CPUSTAT_SLSV | CPUSTAT_SLSR))
  103. return set_validity_icpt(scb_s, 0x0007U);
  104. /* intervention requests will be set later */
  105. newflags = CPUSTAT_ZARCH;
  106. if (cpuflags & CPUSTAT_GED && test_kvm_facility(vcpu->kvm, 8))
  107. newflags |= CPUSTAT_GED;
  108. if (cpuflags & CPUSTAT_GED2 && test_kvm_facility(vcpu->kvm, 78)) {
  109. if (cpuflags & CPUSTAT_GED)
  110. return set_validity_icpt(scb_s, 0x0001U);
  111. newflags |= CPUSTAT_GED2;
  112. }
  113. if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_GPERE))
  114. newflags |= cpuflags & CPUSTAT_P;
  115. if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_GSLS))
  116. newflags |= cpuflags & CPUSTAT_SM;
  117. if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_IBS))
  118. newflags |= cpuflags & CPUSTAT_IBS;
  119. if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_KSS))
  120. newflags |= cpuflags & CPUSTAT_KSS;
  121. atomic_set(&scb_s->cpuflags, newflags);
  122. return 0;
  123. }
  124. /*
  125. * Create a shadow copy of the crycb block and setup key wrapping, if
  126. * requested for guest 3 and enabled for guest 2.
  127. *
  128. * We only accept format-1 (no AP in g2), but convert it into format-2
  129. * There is nothing to do for format-0.
  130. *
  131. * Returns: - 0 if shadowed or nothing to do
  132. * - > 0 if control has to be given to guest 2
  133. */
  134. static int shadow_crycb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  135. {
  136. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  137. struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
  138. const uint32_t crycbd_o = READ_ONCE(scb_o->crycbd);
  139. const u32 crycb_addr = crycbd_o & 0x7ffffff8U;
  140. unsigned long *b1, *b2;
  141. u8 ecb3_flags;
  142. scb_s->crycbd = 0;
  143. if (!(crycbd_o & vcpu->arch.sie_block->crycbd & CRYCB_FORMAT1))
  144. return 0;
  145. /* format-1 is supported with message-security-assist extension 3 */
  146. if (!test_kvm_facility(vcpu->kvm, 76))
  147. return 0;
  148. /* we may only allow it if enabled for guest 2 */
  149. ecb3_flags = scb_o->ecb3 & vcpu->arch.sie_block->ecb3 &
  150. (ECB3_AES | ECB3_DEA);
  151. if (!ecb3_flags)
  152. return 0;
  153. if ((crycb_addr & PAGE_MASK) != ((crycb_addr + 128) & PAGE_MASK))
  154. return set_validity_icpt(scb_s, 0x003CU);
  155. else if (!crycb_addr)
  156. return set_validity_icpt(scb_s, 0x0039U);
  157. /* copy only the wrapping keys */
  158. if (read_guest_real(vcpu, crycb_addr + 72, &vsie_page->crycb, 56))
  159. return set_validity_icpt(scb_s, 0x0035U);
  160. scb_s->ecb3 |= ecb3_flags;
  161. scb_s->crycbd = ((__u32)(__u64) &vsie_page->crycb) | CRYCB_FORMAT1 |
  162. CRYCB_FORMAT2;
  163. /* xor both blocks in one run */
  164. b1 = (unsigned long *) vsie_page->crycb.dea_wrapping_key_mask;
  165. b2 = (unsigned long *)
  166. vcpu->kvm->arch.crypto.crycb->dea_wrapping_key_mask;
  167. /* as 56%8 == 0, bitmap_xor won't overwrite any data */
  168. bitmap_xor(b1, b1, b2, BITS_PER_BYTE * 56);
  169. return 0;
  170. }
  171. /* shadow (round up/down) the ibc to avoid validity icpt */
  172. static void prepare_ibc(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  173. {
  174. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  175. struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
  176. /* READ_ONCE does not work on bitfields - use a temporary variable */
  177. const uint32_t __new_ibc = scb_o->ibc;
  178. const uint32_t new_ibc = READ_ONCE(__new_ibc) & 0x0fffU;
  179. __u64 min_ibc = (sclp.ibc >> 16) & 0x0fffU;
  180. scb_s->ibc = 0;
  181. /* ibc installed in g2 and requested for g3 */
  182. if (vcpu->kvm->arch.model.ibc && new_ibc) {
  183. scb_s->ibc = new_ibc;
  184. /* takte care of the minimum ibc level of the machine */
  185. if (scb_s->ibc < min_ibc)
  186. scb_s->ibc = min_ibc;
  187. /* take care of the maximum ibc level set for the guest */
  188. if (scb_s->ibc > vcpu->kvm->arch.model.ibc)
  189. scb_s->ibc = vcpu->kvm->arch.model.ibc;
  190. }
  191. }
  192. /* unshadow the scb, copying parameters back to the real scb */
  193. static void unshadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  194. {
  195. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  196. struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
  197. /* interception */
  198. scb_o->icptcode = scb_s->icptcode;
  199. scb_o->icptstatus = scb_s->icptstatus;
  200. scb_o->ipa = scb_s->ipa;
  201. scb_o->ipb = scb_s->ipb;
  202. scb_o->gbea = scb_s->gbea;
  203. /* timer */
  204. scb_o->cputm = scb_s->cputm;
  205. scb_o->ckc = scb_s->ckc;
  206. scb_o->todpr = scb_s->todpr;
  207. /* guest state */
  208. scb_o->gpsw = scb_s->gpsw;
  209. scb_o->gg14 = scb_s->gg14;
  210. scb_o->gg15 = scb_s->gg15;
  211. memcpy(scb_o->gcr, scb_s->gcr, 128);
  212. scb_o->pp = scb_s->pp;
  213. /* branch prediction */
  214. if (test_kvm_facility(vcpu->kvm, 82)) {
  215. scb_o->fpf &= ~FPF_BPBC;
  216. scb_o->fpf |= scb_s->fpf & FPF_BPBC;
  217. }
  218. /* interrupt intercept */
  219. switch (scb_s->icptcode) {
  220. case ICPT_PROGI:
  221. case ICPT_INSTPROGI:
  222. case ICPT_EXTINT:
  223. memcpy((void *)((u64)scb_o + 0xc0),
  224. (void *)((u64)scb_s + 0xc0), 0xf0 - 0xc0);
  225. break;
  226. case ICPT_PARTEXEC:
  227. /* MVPG only */
  228. memcpy((void *)((u64)scb_o + 0xc0),
  229. (void *)((u64)scb_s + 0xc0), 0xd0 - 0xc0);
  230. break;
  231. }
  232. if (scb_s->ihcpu != 0xffffU)
  233. scb_o->ihcpu = scb_s->ihcpu;
  234. }
  235. /*
  236. * Setup the shadow scb by copying and checking the relevant parts of the g2
  237. * provided scb.
  238. *
  239. * Returns: - 0 if the scb has been shadowed
  240. * - > 0 if control has to be given to guest 2
  241. */
  242. static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  243. {
  244. struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
  245. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  246. /* READ_ONCE does not work on bitfields - use a temporary variable */
  247. const uint32_t __new_prefix = scb_o->prefix;
  248. const uint32_t new_prefix = READ_ONCE(__new_prefix);
  249. const bool wants_tx = READ_ONCE(scb_o->ecb) & ECB_TE;
  250. bool had_tx = scb_s->ecb & ECB_TE;
  251. unsigned long new_mso = 0;
  252. int rc;
  253. /* make sure we don't have any leftovers when reusing the scb */
  254. scb_s->icptcode = 0;
  255. scb_s->eca = 0;
  256. scb_s->ecb = 0;
  257. scb_s->ecb2 = 0;
  258. scb_s->ecb3 = 0;
  259. scb_s->ecd = 0;
  260. scb_s->fac = 0;
  261. scb_s->fpf = 0;
  262. rc = prepare_cpuflags(vcpu, vsie_page);
  263. if (rc)
  264. goto out;
  265. /* timer */
  266. scb_s->cputm = scb_o->cputm;
  267. scb_s->ckc = scb_o->ckc;
  268. scb_s->todpr = scb_o->todpr;
  269. scb_s->epoch = scb_o->epoch;
  270. /* guest state */
  271. scb_s->gpsw = scb_o->gpsw;
  272. scb_s->gg14 = scb_o->gg14;
  273. scb_s->gg15 = scb_o->gg15;
  274. memcpy(scb_s->gcr, scb_o->gcr, 128);
  275. scb_s->pp = scb_o->pp;
  276. /* interception / execution handling */
  277. scb_s->gbea = scb_o->gbea;
  278. scb_s->lctl = scb_o->lctl;
  279. scb_s->svcc = scb_o->svcc;
  280. scb_s->ictl = scb_o->ictl;
  281. /*
  282. * SKEY handling functions can't deal with false setting of PTE invalid
  283. * bits. Therefore we cannot provide interpretation and would later
  284. * have to provide own emulation handlers.
  285. */
  286. if (!(atomic_read(&scb_s->cpuflags) & CPUSTAT_KSS))
  287. scb_s->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
  288. scb_s->icpua = scb_o->icpua;
  289. if (!(atomic_read(&scb_s->cpuflags) & CPUSTAT_SM))
  290. new_mso = READ_ONCE(scb_o->mso) & 0xfffffffffff00000UL;
  291. /* if the hva of the prefix changes, we have to remap the prefix */
  292. if (scb_s->mso != new_mso || scb_s->prefix != new_prefix)
  293. prefix_unmapped(vsie_page);
  294. /* SIE will do mso/msl validity and exception checks for us */
  295. scb_s->msl = scb_o->msl & 0xfffffffffff00000UL;
  296. scb_s->mso = new_mso;
  297. scb_s->prefix = new_prefix;
  298. /* We have to definetly flush the tlb if this scb never ran */
  299. if (scb_s->ihcpu != 0xffffU)
  300. scb_s->ihcpu = scb_o->ihcpu;
  301. /* MVPG and Protection Exception Interpretation are always available */
  302. scb_s->eca |= scb_o->eca & (ECA_MVPGI | ECA_PROTEXCI);
  303. /* Host-protection-interruption introduced with ESOP */
  304. if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_ESOP))
  305. scb_s->ecb |= scb_o->ecb & ECB_HOSTPROTINT;
  306. /* transactional execution */
  307. if (test_kvm_facility(vcpu->kvm, 73) && wants_tx) {
  308. /* remap the prefix is tx is toggled on */
  309. if (!had_tx)
  310. prefix_unmapped(vsie_page);
  311. scb_s->ecb |= ECB_TE;
  312. }
  313. /* branch prediction */
  314. if (test_kvm_facility(vcpu->kvm, 82))
  315. scb_s->fpf |= scb_o->fpf & FPF_BPBC;
  316. /* SIMD */
  317. if (test_kvm_facility(vcpu->kvm, 129)) {
  318. scb_s->eca |= scb_o->eca & ECA_VX;
  319. scb_s->ecd |= scb_o->ecd & ECD_HOSTREGMGMT;
  320. }
  321. /* Run-time-Instrumentation */
  322. if (test_kvm_facility(vcpu->kvm, 64))
  323. scb_s->ecb3 |= scb_o->ecb3 & ECB3_RI;
  324. /* Instruction Execution Prevention */
  325. if (test_kvm_facility(vcpu->kvm, 130))
  326. scb_s->ecb2 |= scb_o->ecb2 & ECB2_IEP;
  327. /* Guarded Storage */
  328. if (test_kvm_facility(vcpu->kvm, 133)) {
  329. scb_s->ecb |= scb_o->ecb & ECB_GS;
  330. scb_s->ecd |= scb_o->ecd & ECD_HOSTREGMGMT;
  331. }
  332. if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_SIIF))
  333. scb_s->eca |= scb_o->eca & ECA_SII;
  334. if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_IB))
  335. scb_s->eca |= scb_o->eca & ECA_IB;
  336. if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_CEI))
  337. scb_s->eca |= scb_o->eca & ECA_CEI;
  338. /* Epoch Extension */
  339. if (test_kvm_facility(vcpu->kvm, 139))
  340. scb_s->ecd |= scb_o->ecd & ECD_MEF;
  341. prepare_ibc(vcpu, vsie_page);
  342. rc = shadow_crycb(vcpu, vsie_page);
  343. out:
  344. if (rc)
  345. unshadow_scb(vcpu, vsie_page);
  346. return rc;
  347. }
  348. void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start,
  349. unsigned long end)
  350. {
  351. struct kvm *kvm = gmap->private;
  352. struct vsie_page *cur;
  353. unsigned long prefix;
  354. struct page *page;
  355. int i;
  356. if (!gmap_is_shadow(gmap))
  357. return;
  358. if (start >= 1UL << 31)
  359. /* We are only interested in prefix pages */
  360. return;
  361. /*
  362. * Only new shadow blocks are added to the list during runtime,
  363. * therefore we can safely reference them all the time.
  364. */
  365. for (i = 0; i < kvm->arch.vsie.page_count; i++) {
  366. page = READ_ONCE(kvm->arch.vsie.pages[i]);
  367. if (!page)
  368. continue;
  369. cur = page_to_virt(page);
  370. if (READ_ONCE(cur->gmap) != gmap)
  371. continue;
  372. prefix = cur->scb_s.prefix << GUEST_PREFIX_SHIFT;
  373. /* with mso/msl, the prefix lies at an offset */
  374. prefix += cur->scb_s.mso;
  375. if (prefix <= end && start <= prefix + 2 * PAGE_SIZE - 1)
  376. prefix_unmapped_sync(cur);
  377. }
  378. }
  379. /*
  380. * Map the first prefix page and if tx is enabled also the second prefix page.
  381. *
  382. * The prefix will be protected, a gmap notifier will inform about unmaps.
  383. * The shadow scb must not be executed until the prefix is remapped, this is
  384. * guaranteed by properly handling PROG_REQUEST.
  385. *
  386. * Returns: - 0 on if successfully mapped or already mapped
  387. * - > 0 if control has to be given to guest 2
  388. * - -EAGAIN if the caller can retry immediately
  389. * - -ENOMEM if out of memory
  390. */
  391. static int map_prefix(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  392. {
  393. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  394. u64 prefix = scb_s->prefix << GUEST_PREFIX_SHIFT;
  395. int rc;
  396. if (prefix_is_mapped(vsie_page))
  397. return 0;
  398. /* mark it as mapped so we can catch any concurrent unmappers */
  399. prefix_mapped(vsie_page);
  400. /* with mso/msl, the prefix lies at offset *mso* */
  401. prefix += scb_s->mso;
  402. rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix);
  403. if (!rc && (scb_s->ecb & ECB_TE))
  404. rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
  405. prefix + PAGE_SIZE);
  406. /*
  407. * We don't have to mprotect, we will be called for all unshadows.
  408. * SIE will detect if protection applies and trigger a validity.
  409. */
  410. if (rc)
  411. prefix_unmapped(vsie_page);
  412. if (rc > 0 || rc == -EFAULT)
  413. rc = set_validity_icpt(scb_s, 0x0037U);
  414. return rc;
  415. }
  416. /*
  417. * Pin the guest page given by gpa and set hpa to the pinned host address.
  418. * Will always be pinned writable.
  419. *
  420. * Returns: - 0 on success
  421. * - -EINVAL if the gpa is not valid guest storage
  422. */
  423. static int pin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t *hpa)
  424. {
  425. struct page *page;
  426. page = gfn_to_page(kvm, gpa_to_gfn(gpa));
  427. if (is_error_page(page))
  428. return -EINVAL;
  429. *hpa = (hpa_t) page_to_virt(page) + (gpa & ~PAGE_MASK);
  430. return 0;
  431. }
  432. /* Unpins a page previously pinned via pin_guest_page, marking it as dirty. */
  433. static void unpin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t hpa)
  434. {
  435. kvm_release_pfn_dirty(hpa >> PAGE_SHIFT);
  436. /* mark the page always as dirty for migration */
  437. mark_page_dirty(kvm, gpa_to_gfn(gpa));
  438. }
  439. /* unpin all blocks previously pinned by pin_blocks(), marking them dirty */
  440. static void unpin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  441. {
  442. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  443. hpa_t hpa;
  444. hpa = (u64) scb_s->scaoh << 32 | scb_s->scaol;
  445. if (hpa) {
  446. unpin_guest_page(vcpu->kvm, vsie_page->sca_gpa, hpa);
  447. vsie_page->sca_gpa = 0;
  448. scb_s->scaol = 0;
  449. scb_s->scaoh = 0;
  450. }
  451. hpa = scb_s->itdba;
  452. if (hpa) {
  453. unpin_guest_page(vcpu->kvm, vsie_page->itdba_gpa, hpa);
  454. vsie_page->itdba_gpa = 0;
  455. scb_s->itdba = 0;
  456. }
  457. hpa = scb_s->gvrd;
  458. if (hpa) {
  459. unpin_guest_page(vcpu->kvm, vsie_page->gvrd_gpa, hpa);
  460. vsie_page->gvrd_gpa = 0;
  461. scb_s->gvrd = 0;
  462. }
  463. hpa = scb_s->riccbd;
  464. if (hpa) {
  465. unpin_guest_page(vcpu->kvm, vsie_page->riccbd_gpa, hpa);
  466. vsie_page->riccbd_gpa = 0;
  467. scb_s->riccbd = 0;
  468. }
  469. hpa = scb_s->sdnxo;
  470. if (hpa) {
  471. unpin_guest_page(vcpu->kvm, vsie_page->sdnx_gpa, hpa);
  472. vsie_page->sdnx_gpa = 0;
  473. scb_s->sdnxo = 0;
  474. }
  475. }
  476. /*
  477. * Instead of shadowing some blocks, we can simply forward them because the
  478. * addresses in the scb are 64 bit long.
  479. *
  480. * This works as long as the data lies in one page. If blocks ever exceed one
  481. * page, we have to fall back to shadowing.
  482. *
  483. * As we reuse the sca, the vcpu pointers contained in it are invalid. We must
  484. * therefore not enable any facilities that access these pointers (e.g. SIGPIF).
  485. *
  486. * Returns: - 0 if all blocks were pinned.
  487. * - > 0 if control has to be given to guest 2
  488. * - -ENOMEM if out of memory
  489. */
  490. static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  491. {
  492. struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
  493. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  494. hpa_t hpa;
  495. gpa_t gpa;
  496. int rc = 0;
  497. gpa = READ_ONCE(scb_o->scaol) & ~0xfUL;
  498. if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_64BSCAO))
  499. gpa |= (u64) READ_ONCE(scb_o->scaoh) << 32;
  500. if (gpa) {
  501. if (!(gpa & ~0x1fffUL))
  502. rc = set_validity_icpt(scb_s, 0x0038U);
  503. else if ((gpa & ~0x1fffUL) == kvm_s390_get_prefix(vcpu))
  504. rc = set_validity_icpt(scb_s, 0x0011U);
  505. else if ((gpa & PAGE_MASK) !=
  506. ((gpa + sizeof(struct bsca_block) - 1) & PAGE_MASK))
  507. rc = set_validity_icpt(scb_s, 0x003bU);
  508. if (!rc) {
  509. rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
  510. if (rc)
  511. rc = set_validity_icpt(scb_s, 0x0034U);
  512. }
  513. if (rc)
  514. goto unpin;
  515. vsie_page->sca_gpa = gpa;
  516. scb_s->scaoh = (u32)((u64)hpa >> 32);
  517. scb_s->scaol = (u32)(u64)hpa;
  518. }
  519. gpa = READ_ONCE(scb_o->itdba) & ~0xffUL;
  520. if (gpa && (scb_s->ecb & ECB_TE)) {
  521. if (!(gpa & ~0x1fffU)) {
  522. rc = set_validity_icpt(scb_s, 0x0080U);
  523. goto unpin;
  524. }
  525. /* 256 bytes cannot cross page boundaries */
  526. rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
  527. if (rc) {
  528. rc = set_validity_icpt(scb_s, 0x0080U);
  529. goto unpin;
  530. }
  531. vsie_page->itdba_gpa = gpa;
  532. scb_s->itdba = hpa;
  533. }
  534. gpa = READ_ONCE(scb_o->gvrd) & ~0x1ffUL;
  535. if (gpa && (scb_s->eca & ECA_VX) && !(scb_s->ecd & ECD_HOSTREGMGMT)) {
  536. if (!(gpa & ~0x1fffUL)) {
  537. rc = set_validity_icpt(scb_s, 0x1310U);
  538. goto unpin;
  539. }
  540. /*
  541. * 512 bytes vector registers cannot cross page boundaries
  542. * if this block gets bigger, we have to shadow it.
  543. */
  544. rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
  545. if (rc) {
  546. rc = set_validity_icpt(scb_s, 0x1310U);
  547. goto unpin;
  548. }
  549. vsie_page->gvrd_gpa = gpa;
  550. scb_s->gvrd = hpa;
  551. }
  552. gpa = READ_ONCE(scb_o->riccbd) & ~0x3fUL;
  553. if (gpa && (scb_s->ecb3 & ECB3_RI)) {
  554. if (!(gpa & ~0x1fffUL)) {
  555. rc = set_validity_icpt(scb_s, 0x0043U);
  556. goto unpin;
  557. }
  558. /* 64 bytes cannot cross page boundaries */
  559. rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
  560. if (rc) {
  561. rc = set_validity_icpt(scb_s, 0x0043U);
  562. goto unpin;
  563. }
  564. /* Validity 0x0044 will be checked by SIE */
  565. vsie_page->riccbd_gpa = gpa;
  566. scb_s->riccbd = hpa;
  567. }
  568. if ((scb_s->ecb & ECB_GS) && !(scb_s->ecd & ECD_HOSTREGMGMT)) {
  569. unsigned long sdnxc;
  570. gpa = READ_ONCE(scb_o->sdnxo) & ~0xfUL;
  571. sdnxc = READ_ONCE(scb_o->sdnxo) & 0xfUL;
  572. if (!gpa || !(gpa & ~0x1fffUL)) {
  573. rc = set_validity_icpt(scb_s, 0x10b0U);
  574. goto unpin;
  575. }
  576. if (sdnxc < 6 || sdnxc > 12) {
  577. rc = set_validity_icpt(scb_s, 0x10b1U);
  578. goto unpin;
  579. }
  580. if (gpa & ((1 << sdnxc) - 1)) {
  581. rc = set_validity_icpt(scb_s, 0x10b2U);
  582. goto unpin;
  583. }
  584. /* Due to alignment rules (checked above) this cannot
  585. * cross page boundaries
  586. */
  587. rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
  588. if (rc) {
  589. rc = set_validity_icpt(scb_s, 0x10b0U);
  590. goto unpin;
  591. }
  592. vsie_page->sdnx_gpa = gpa;
  593. scb_s->sdnxo = hpa | sdnxc;
  594. }
  595. return 0;
  596. unpin:
  597. unpin_blocks(vcpu, vsie_page);
  598. return rc;
  599. }
  600. /* unpin the scb provided by guest 2, marking it as dirty */
  601. static void unpin_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page,
  602. gpa_t gpa)
  603. {
  604. hpa_t hpa = (hpa_t) vsie_page->scb_o;
  605. if (hpa)
  606. unpin_guest_page(vcpu->kvm, gpa, hpa);
  607. vsie_page->scb_o = NULL;
  608. }
  609. /*
  610. * Pin the scb at gpa provided by guest 2 at vsie_page->scb_o.
  611. *
  612. * Returns: - 0 if the scb was pinned.
  613. * - > 0 if control has to be given to guest 2
  614. */
  615. static int pin_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page,
  616. gpa_t gpa)
  617. {
  618. hpa_t hpa;
  619. int rc;
  620. rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
  621. if (rc) {
  622. rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
  623. WARN_ON_ONCE(rc);
  624. return 1;
  625. }
  626. vsie_page->scb_o = (struct kvm_s390_sie_block *) hpa;
  627. return 0;
  628. }
  629. /*
  630. * Inject a fault into guest 2.
  631. *
  632. * Returns: - > 0 if control has to be given to guest 2
  633. * < 0 if an error occurred during injection.
  634. */
  635. static int inject_fault(struct kvm_vcpu *vcpu, __u16 code, __u64 vaddr,
  636. bool write_flag)
  637. {
  638. struct kvm_s390_pgm_info pgm = {
  639. .code = code,
  640. .trans_exc_code =
  641. /* 0-51: virtual address */
  642. (vaddr & 0xfffffffffffff000UL) |
  643. /* 52-53: store / fetch */
  644. (((unsigned int) !write_flag) + 1) << 10,
  645. /* 62-63: asce id (alway primary == 0) */
  646. .exc_access_id = 0, /* always primary */
  647. .op_access_id = 0, /* not MVPG */
  648. };
  649. int rc;
  650. if (code == PGM_PROTECTION)
  651. pgm.trans_exc_code |= 0x4UL;
  652. rc = kvm_s390_inject_prog_irq(vcpu, &pgm);
  653. return rc ? rc : 1;
  654. }
  655. /*
  656. * Handle a fault during vsie execution on a gmap shadow.
  657. *
  658. * Returns: - 0 if the fault was resolved
  659. * - > 0 if control has to be given to guest 2
  660. * - < 0 if an error occurred
  661. */
  662. static int handle_fault(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  663. {
  664. int rc;
  665. if (current->thread.gmap_int_code == PGM_PROTECTION)
  666. /* we can directly forward all protection exceptions */
  667. return inject_fault(vcpu, PGM_PROTECTION,
  668. current->thread.gmap_addr, 1);
  669. rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
  670. current->thread.gmap_addr);
  671. if (rc > 0) {
  672. rc = inject_fault(vcpu, rc,
  673. current->thread.gmap_addr,
  674. current->thread.gmap_write_flag);
  675. if (rc >= 0)
  676. vsie_page->fault_addr = current->thread.gmap_addr;
  677. }
  678. return rc;
  679. }
  680. /*
  681. * Retry the previous fault that required guest 2 intervention. This avoids
  682. * one superfluous SIE re-entry and direct exit.
  683. *
  684. * Will ignore any errors. The next SIE fault will do proper fault handling.
  685. */
  686. static void handle_last_fault(struct kvm_vcpu *vcpu,
  687. struct vsie_page *vsie_page)
  688. {
  689. if (vsie_page->fault_addr)
  690. kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
  691. vsie_page->fault_addr);
  692. vsie_page->fault_addr = 0;
  693. }
  694. static inline void clear_vsie_icpt(struct vsie_page *vsie_page)
  695. {
  696. vsie_page->scb_s.icptcode = 0;
  697. }
  698. /* rewind the psw and clear the vsie icpt, so we can retry execution */
  699. static void retry_vsie_icpt(struct vsie_page *vsie_page)
  700. {
  701. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  702. int ilen = insn_length(scb_s->ipa >> 8);
  703. /* take care of EXECUTE instructions */
  704. if (scb_s->icptstatus & 1) {
  705. ilen = (scb_s->icptstatus >> 4) & 0x6;
  706. if (!ilen)
  707. ilen = 4;
  708. }
  709. scb_s->gpsw.addr = __rewind_psw(scb_s->gpsw, ilen);
  710. clear_vsie_icpt(vsie_page);
  711. }
  712. /*
  713. * Try to shadow + enable the guest 2 provided facility list.
  714. * Retry instruction execution if enabled for and provided by guest 2.
  715. *
  716. * Returns: - 0 if handled (retry or guest 2 icpt)
  717. * - > 0 if control has to be given to guest 2
  718. */
  719. static int handle_stfle(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  720. {
  721. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  722. __u32 fac = READ_ONCE(vsie_page->scb_o->fac) & 0x7ffffff8U;
  723. if (fac && test_kvm_facility(vcpu->kvm, 7)) {
  724. retry_vsie_icpt(vsie_page);
  725. if (read_guest_real(vcpu, fac, &vsie_page->fac,
  726. sizeof(vsie_page->fac)))
  727. return set_validity_icpt(scb_s, 0x1090U);
  728. scb_s->fac = (__u32)(__u64) &vsie_page->fac;
  729. }
  730. return 0;
  731. }
  732. /*
  733. * Run the vsie on a shadow scb and a shadow gmap, without any further
  734. * sanity checks, handling SIE faults.
  735. *
  736. * Returns: - 0 everything went fine
  737. * - > 0 if control has to be given to guest 2
  738. * - < 0 if an error occurred
  739. */
  740. static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  741. {
  742. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  743. struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
  744. int rc;
  745. handle_last_fault(vcpu, vsie_page);
  746. if (need_resched())
  747. schedule();
  748. if (test_cpu_flag(CIF_MCCK_PENDING))
  749. s390_handle_mcck();
  750. srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
  751. local_irq_disable();
  752. guest_enter_irqoff();
  753. local_irq_enable();
  754. rc = sie64a(scb_s, vcpu->run->s.regs.gprs);
  755. local_irq_disable();
  756. guest_exit_irqoff();
  757. local_irq_enable();
  758. vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
  759. if (rc == -EINTR) {
  760. VCPU_EVENT(vcpu, 3, "%s", "machine check");
  761. kvm_s390_reinject_machine_check(vcpu, &vsie_page->mcck_info);
  762. return 0;
  763. }
  764. if (rc > 0)
  765. rc = 0; /* we could still have an icpt */
  766. else if (rc == -EFAULT)
  767. return handle_fault(vcpu, vsie_page);
  768. switch (scb_s->icptcode) {
  769. case ICPT_INST:
  770. if (scb_s->ipa == 0xb2b0)
  771. rc = handle_stfle(vcpu, vsie_page);
  772. break;
  773. case ICPT_STOP:
  774. /* stop not requested by g2 - must have been a kick */
  775. if (!(atomic_read(&scb_o->cpuflags) & CPUSTAT_STOP_INT))
  776. clear_vsie_icpt(vsie_page);
  777. break;
  778. case ICPT_VALIDITY:
  779. if ((scb_s->ipa & 0xf000) != 0xf000)
  780. scb_s->ipa += 0x1000;
  781. break;
  782. }
  783. return rc;
  784. }
  785. static void release_gmap_shadow(struct vsie_page *vsie_page)
  786. {
  787. if (vsie_page->gmap)
  788. gmap_put(vsie_page->gmap);
  789. WRITE_ONCE(vsie_page->gmap, NULL);
  790. prefix_unmapped(vsie_page);
  791. }
  792. static int acquire_gmap_shadow(struct kvm_vcpu *vcpu,
  793. struct vsie_page *vsie_page)
  794. {
  795. unsigned long asce;
  796. union ctlreg0 cr0;
  797. struct gmap *gmap;
  798. int edat;
  799. asce = vcpu->arch.sie_block->gcr[1];
  800. cr0.val = vcpu->arch.sie_block->gcr[0];
  801. edat = cr0.edat && test_kvm_facility(vcpu->kvm, 8);
  802. edat += edat && test_kvm_facility(vcpu->kvm, 78);
  803. /*
  804. * ASCE or EDAT could have changed since last icpt, or the gmap
  805. * we're holding has been unshadowed. If the gmap is still valid,
  806. * we can safely reuse it.
  807. */
  808. if (vsie_page->gmap && gmap_shadow_valid(vsie_page->gmap, asce, edat))
  809. return 0;
  810. /* release the old shadow - if any, and mark the prefix as unmapped */
  811. release_gmap_shadow(vsie_page);
  812. gmap = gmap_shadow(vcpu->arch.gmap, asce, edat);
  813. if (IS_ERR(gmap))
  814. return PTR_ERR(gmap);
  815. gmap->private = vcpu->kvm;
  816. WRITE_ONCE(vsie_page->gmap, gmap);
  817. return 0;
  818. }
  819. /*
  820. * Register the shadow scb at the VCPU, e.g. for kicking out of vsie.
  821. */
  822. static void register_shadow_scb(struct kvm_vcpu *vcpu,
  823. struct vsie_page *vsie_page)
  824. {
  825. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  826. WRITE_ONCE(vcpu->arch.vsie_block, &vsie_page->scb_s);
  827. /*
  828. * External calls have to lead to a kick of the vcpu and
  829. * therefore the vsie -> Simulate Wait state.
  830. */
  831. kvm_s390_set_cpuflags(vcpu, CPUSTAT_WAIT);
  832. /*
  833. * We have to adjust the g3 epoch by the g2 epoch. The epoch will
  834. * automatically be adjusted on tod clock changes via kvm_sync_clock.
  835. */
  836. preempt_disable();
  837. scb_s->epoch += vcpu->kvm->arch.epoch;
  838. if (scb_s->ecd & ECD_MEF) {
  839. scb_s->epdx += vcpu->kvm->arch.epdx;
  840. if (scb_s->epoch < vcpu->kvm->arch.epoch)
  841. scb_s->epdx += 1;
  842. }
  843. preempt_enable();
  844. }
  845. /*
  846. * Unregister a shadow scb from a VCPU.
  847. */
  848. static void unregister_shadow_scb(struct kvm_vcpu *vcpu)
  849. {
  850. kvm_s390_clear_cpuflags(vcpu, CPUSTAT_WAIT);
  851. WRITE_ONCE(vcpu->arch.vsie_block, NULL);
  852. }
  853. /*
  854. * Run the vsie on a shadowed scb, managing the gmap shadow, handling
  855. * prefix pages and faults.
  856. *
  857. * Returns: - 0 if no errors occurred
  858. * - > 0 if control has to be given to guest 2
  859. * - -ENOMEM if out of memory
  860. */
  861. static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  862. {
  863. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  864. int rc = 0;
  865. while (1) {
  866. rc = acquire_gmap_shadow(vcpu, vsie_page);
  867. if (!rc)
  868. rc = map_prefix(vcpu, vsie_page);
  869. if (!rc) {
  870. gmap_enable(vsie_page->gmap);
  871. update_intervention_requests(vsie_page);
  872. rc = do_vsie_run(vcpu, vsie_page);
  873. gmap_enable(vcpu->arch.gmap);
  874. }
  875. atomic_andnot(PROG_BLOCK_SIE, &scb_s->prog20);
  876. if (rc == -EAGAIN)
  877. rc = 0;
  878. if (rc || scb_s->icptcode || signal_pending(current) ||
  879. kvm_s390_vcpu_has_irq(vcpu, 0))
  880. break;
  881. }
  882. if (rc == -EFAULT) {
  883. /*
  884. * Addressing exceptions are always presentes as intercepts.
  885. * As addressing exceptions are suppressing and our guest 3 PSW
  886. * points at the responsible instruction, we have to
  887. * forward the PSW and set the ilc. If we can't read guest 3
  888. * instruction, we can use an arbitrary ilc. Let's always use
  889. * ilen = 4 for now, so we can avoid reading in guest 3 virtual
  890. * memory. (we could also fake the shadow so the hardware
  891. * handles it).
  892. */
  893. scb_s->icptcode = ICPT_PROGI;
  894. scb_s->iprcc = PGM_ADDRESSING;
  895. scb_s->pgmilc = 4;
  896. scb_s->gpsw.addr = __rewind_psw(scb_s->gpsw, 4);
  897. }
  898. return rc;
  899. }
  900. /*
  901. * Get or create a vsie page for a scb address.
  902. *
  903. * Returns: - address of a vsie page (cached or new one)
  904. * - NULL if the same scb address is already used by another VCPU
  905. * - ERR_PTR(-ENOMEM) if out of memory
  906. */
  907. static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr)
  908. {
  909. struct vsie_page *vsie_page;
  910. struct page *page;
  911. int nr_vcpus;
  912. rcu_read_lock();
  913. page = radix_tree_lookup(&kvm->arch.vsie.addr_to_page, addr >> 9);
  914. rcu_read_unlock();
  915. if (page) {
  916. if (page_ref_inc_return(page) == 2)
  917. return page_to_virt(page);
  918. page_ref_dec(page);
  919. }
  920. /*
  921. * We want at least #online_vcpus shadows, so every VCPU can execute
  922. * the VSIE in parallel.
  923. */
  924. nr_vcpus = atomic_read(&kvm->online_vcpus);
  925. mutex_lock(&kvm->arch.vsie.mutex);
  926. if (kvm->arch.vsie.page_count < nr_vcpus) {
  927. page = alloc_page(GFP_KERNEL | __GFP_ZERO | GFP_DMA);
  928. if (!page) {
  929. mutex_unlock(&kvm->arch.vsie.mutex);
  930. return ERR_PTR(-ENOMEM);
  931. }
  932. page_ref_inc(page);
  933. kvm->arch.vsie.pages[kvm->arch.vsie.page_count] = page;
  934. kvm->arch.vsie.page_count++;
  935. } else {
  936. /* reuse an existing entry that belongs to nobody */
  937. while (true) {
  938. page = kvm->arch.vsie.pages[kvm->arch.vsie.next];
  939. if (page_ref_inc_return(page) == 2)
  940. break;
  941. page_ref_dec(page);
  942. kvm->arch.vsie.next++;
  943. kvm->arch.vsie.next %= nr_vcpus;
  944. }
  945. radix_tree_delete(&kvm->arch.vsie.addr_to_page, page->index >> 9);
  946. }
  947. page->index = addr;
  948. /* double use of the same address */
  949. if (radix_tree_insert(&kvm->arch.vsie.addr_to_page, addr >> 9, page)) {
  950. page_ref_dec(page);
  951. mutex_unlock(&kvm->arch.vsie.mutex);
  952. return NULL;
  953. }
  954. mutex_unlock(&kvm->arch.vsie.mutex);
  955. vsie_page = page_to_virt(page);
  956. memset(&vsie_page->scb_s, 0, sizeof(struct kvm_s390_sie_block));
  957. release_gmap_shadow(vsie_page);
  958. vsie_page->fault_addr = 0;
  959. vsie_page->scb_s.ihcpu = 0xffffU;
  960. return vsie_page;
  961. }
  962. /* put a vsie page acquired via get_vsie_page */
  963. static void put_vsie_page(struct kvm *kvm, struct vsie_page *vsie_page)
  964. {
  965. struct page *page = pfn_to_page(__pa(vsie_page) >> PAGE_SHIFT);
  966. page_ref_dec(page);
  967. }
  968. int kvm_s390_handle_vsie(struct kvm_vcpu *vcpu)
  969. {
  970. struct vsie_page *vsie_page;
  971. unsigned long scb_addr;
  972. int rc;
  973. vcpu->stat.instruction_sie++;
  974. if (!test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_SIEF2))
  975. return -EOPNOTSUPP;
  976. if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
  977. return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
  978. BUILD_BUG_ON(sizeof(struct vsie_page) != PAGE_SIZE);
  979. scb_addr = kvm_s390_get_base_disp_s(vcpu, NULL);
  980. /* 512 byte alignment */
  981. if (unlikely(scb_addr & 0x1ffUL))
  982. return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
  983. if (signal_pending(current) || kvm_s390_vcpu_has_irq(vcpu, 0))
  984. return 0;
  985. vsie_page = get_vsie_page(vcpu->kvm, scb_addr);
  986. if (IS_ERR(vsie_page))
  987. return PTR_ERR(vsie_page);
  988. else if (!vsie_page)
  989. /* double use of sie control block - simply do nothing */
  990. return 0;
  991. rc = pin_scb(vcpu, vsie_page, scb_addr);
  992. if (rc)
  993. goto out_put;
  994. rc = shadow_scb(vcpu, vsie_page);
  995. if (rc)
  996. goto out_unpin_scb;
  997. rc = pin_blocks(vcpu, vsie_page);
  998. if (rc)
  999. goto out_unshadow;
  1000. register_shadow_scb(vcpu, vsie_page);
  1001. rc = vsie_run(vcpu, vsie_page);
  1002. unregister_shadow_scb(vcpu);
  1003. unpin_blocks(vcpu, vsie_page);
  1004. out_unshadow:
  1005. unshadow_scb(vcpu, vsie_page);
  1006. out_unpin_scb:
  1007. unpin_scb(vcpu, vsie_page, scb_addr);
  1008. out_put:
  1009. put_vsie_page(vcpu->kvm, vsie_page);
  1010. return rc < 0 ? rc : 0;
  1011. }
  1012. /* Init the vsie data structures. To be called when a vm is initialized. */
  1013. void kvm_s390_vsie_init(struct kvm *kvm)
  1014. {
  1015. mutex_init(&kvm->arch.vsie.mutex);
  1016. INIT_RADIX_TREE(&kvm->arch.vsie.addr_to_page, GFP_KERNEL);
  1017. }
  1018. /* Destroy the vsie data structures. To be called when a vm is destroyed. */
  1019. void kvm_s390_vsie_destroy(struct kvm *kvm)
  1020. {
  1021. struct vsie_page *vsie_page;
  1022. struct page *page;
  1023. int i;
  1024. mutex_lock(&kvm->arch.vsie.mutex);
  1025. for (i = 0; i < kvm->arch.vsie.page_count; i++) {
  1026. page = kvm->arch.vsie.pages[i];
  1027. kvm->arch.vsie.pages[i] = NULL;
  1028. vsie_page = page_to_virt(page);
  1029. release_gmap_shadow(vsie_page);
  1030. /* free the radix tree entry */
  1031. radix_tree_delete(&kvm->arch.vsie.addr_to_page, page->index >> 9);
  1032. __free_page(page);
  1033. }
  1034. kvm->arch.vsie.page_count = 0;
  1035. mutex_unlock(&kvm->arch.vsie.mutex);
  1036. }
  1037. void kvm_s390_vsie_kick(struct kvm_vcpu *vcpu)
  1038. {
  1039. struct kvm_s390_sie_block *scb = READ_ONCE(vcpu->arch.vsie_block);
  1040. /*
  1041. * Even if the VCPU lets go of the shadow sie block reference, it is
  1042. * still valid in the cache. So we can safely kick it.
  1043. */
  1044. if (scb) {
  1045. atomic_or(PROG_BLOCK_SIE, &scb->prog20);
  1046. if (scb->prog0c & PROG_IN_SIE)
  1047. atomic_or(CPUSTAT_STOP_INT, &scb->cpuflags);
  1048. }
  1049. }