vsie.c 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * kvm nested virtualization support for s390x
  4. *
  5. * Copyright IBM Corp. 2016
  6. *
  7. * Author(s): David Hildenbrand <dahi@linux.vnet.ibm.com>
  8. */
  9. #include <linux/vmalloc.h>
  10. #include <linux/kvm_host.h>
  11. #include <linux/bug.h>
  12. #include <linux/list.h>
  13. #include <linux/bitmap.h>
  14. #include <linux/sched/signal.h>
  15. #include <asm/gmap.h>
  16. #include <asm/mmu_context.h>
  17. #include <asm/sclp.h>
  18. #include <asm/nmi.h>
  19. #include <asm/dis.h>
  20. #include "kvm-s390.h"
  21. #include "gaccess.h"
  22. struct vsie_page {
  23. struct kvm_s390_sie_block scb_s; /* 0x0000 */
  24. /*
  25. * the backup info for machine check. ensure it's at
  26. * the same offset as that in struct sie_page!
  27. */
  28. struct mcck_volatile_info mcck_info; /* 0x0200 */
  29. /*
  30. * The pinned original scb. Be aware that other VCPUs can modify
  31. * it while we read from it. Values that are used for conditions or
  32. * are reused conditionally, should be accessed via READ_ONCE.
  33. */
  34. struct kvm_s390_sie_block *scb_o; /* 0x0218 */
  35. /* the shadow gmap in use by the vsie_page */
  36. struct gmap *gmap; /* 0x0220 */
  37. /* address of the last reported fault to guest2 */
  38. unsigned long fault_addr; /* 0x0228 */
  39. /* calculated guest addresses of satellite control blocks */
  40. gpa_t sca_gpa; /* 0x0230 */
  41. gpa_t itdba_gpa; /* 0x0238 */
  42. gpa_t gvrd_gpa; /* 0x0240 */
  43. gpa_t riccbd_gpa; /* 0x0248 */
  44. gpa_t sdnx_gpa; /* 0x0250 */
  45. __u8 reserved[0x0700 - 0x0258]; /* 0x0258 */
  46. struct kvm_s390_crypto_cb crycb; /* 0x0700 */
  47. __u8 fac[S390_ARCH_FAC_LIST_SIZE_BYTE]; /* 0x0800 */
  48. };
  49. /* trigger a validity icpt for the given scb */
  50. static int set_validity_icpt(struct kvm_s390_sie_block *scb,
  51. __u16 reason_code)
  52. {
  53. scb->ipa = 0x1000;
  54. scb->ipb = ((__u32) reason_code) << 16;
  55. scb->icptcode = ICPT_VALIDITY;
  56. return 1;
  57. }
  58. /* mark the prefix as unmapped, this will block the VSIE */
  59. static void prefix_unmapped(struct vsie_page *vsie_page)
  60. {
  61. atomic_or(PROG_REQUEST, &vsie_page->scb_s.prog20);
  62. }
  63. /* mark the prefix as unmapped and wait until the VSIE has been left */
  64. static void prefix_unmapped_sync(struct vsie_page *vsie_page)
  65. {
  66. prefix_unmapped(vsie_page);
  67. if (vsie_page->scb_s.prog0c & PROG_IN_SIE)
  68. atomic_or(CPUSTAT_STOP_INT, &vsie_page->scb_s.cpuflags);
  69. while (vsie_page->scb_s.prog0c & PROG_IN_SIE)
  70. cpu_relax();
  71. }
  72. /* mark the prefix as mapped, this will allow the VSIE to run */
  73. static void prefix_mapped(struct vsie_page *vsie_page)
  74. {
  75. atomic_andnot(PROG_REQUEST, &vsie_page->scb_s.prog20);
  76. }
  77. /* test if the prefix is mapped into the gmap shadow */
  78. static int prefix_is_mapped(struct vsie_page *vsie_page)
  79. {
  80. return !(atomic_read(&vsie_page->scb_s.prog20) & PROG_REQUEST);
  81. }
  82. /* copy the updated intervention request bits into the shadow scb */
  83. static void update_intervention_requests(struct vsie_page *vsie_page)
  84. {
  85. const int bits = CPUSTAT_STOP_INT | CPUSTAT_IO_INT | CPUSTAT_EXT_INT;
  86. int cpuflags;
  87. cpuflags = atomic_read(&vsie_page->scb_o->cpuflags);
  88. atomic_andnot(bits, &vsie_page->scb_s.cpuflags);
  89. atomic_or(cpuflags & bits, &vsie_page->scb_s.cpuflags);
  90. }
  91. /* shadow (filter and validate) the cpuflags */
  92. static int prepare_cpuflags(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  93. {
  94. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  95. struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
  96. int newflags, cpuflags = atomic_read(&scb_o->cpuflags);
  97. /* we don't allow ESA/390 guests */
  98. if (!(cpuflags & CPUSTAT_ZARCH))
  99. return set_validity_icpt(scb_s, 0x0001U);
  100. if (cpuflags & (CPUSTAT_RRF | CPUSTAT_MCDS))
  101. return set_validity_icpt(scb_s, 0x0001U);
  102. else if (cpuflags & (CPUSTAT_SLSV | CPUSTAT_SLSR))
  103. return set_validity_icpt(scb_s, 0x0007U);
  104. /* intervention requests will be set later */
  105. newflags = CPUSTAT_ZARCH;
  106. if (cpuflags & CPUSTAT_GED && test_kvm_facility(vcpu->kvm, 8))
  107. newflags |= CPUSTAT_GED;
  108. if (cpuflags & CPUSTAT_GED2 && test_kvm_facility(vcpu->kvm, 78)) {
  109. if (cpuflags & CPUSTAT_GED)
  110. return set_validity_icpt(scb_s, 0x0001U);
  111. newflags |= CPUSTAT_GED2;
  112. }
  113. if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_GPERE))
  114. newflags |= cpuflags & CPUSTAT_P;
  115. if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_GSLS))
  116. newflags |= cpuflags & CPUSTAT_SM;
  117. if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_IBS))
  118. newflags |= cpuflags & CPUSTAT_IBS;
  119. if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_KSS))
  120. newflags |= cpuflags & CPUSTAT_KSS;
  121. atomic_set(&scb_s->cpuflags, newflags);
  122. return 0;
  123. }
  124. /*
  125. * Create a shadow copy of the crycb block and setup key wrapping, if
  126. * requested for guest 3 and enabled for guest 2.
  127. *
  128. * We only accept format-1 (no AP in g2), but convert it into format-2
  129. * There is nothing to do for format-0.
  130. *
  131. * Returns: - 0 if shadowed or nothing to do
  132. * - > 0 if control has to be given to guest 2
  133. */
  134. static int shadow_crycb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  135. {
  136. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  137. struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
  138. const uint32_t crycbd_o = READ_ONCE(scb_o->crycbd);
  139. const u32 crycb_addr = crycbd_o & 0x7ffffff8U;
  140. unsigned long *b1, *b2;
  141. u8 ecb3_flags;
  142. scb_s->crycbd = 0;
  143. if (!(crycbd_o & vcpu->arch.sie_block->crycbd & CRYCB_FORMAT1))
  144. return 0;
  145. /* format-1 is supported with message-security-assist extension 3 */
  146. if (!test_kvm_facility(vcpu->kvm, 76))
  147. return 0;
  148. /* we may only allow it if enabled for guest 2 */
  149. ecb3_flags = scb_o->ecb3 & vcpu->arch.sie_block->ecb3 &
  150. (ECB3_AES | ECB3_DEA);
  151. if (!ecb3_flags)
  152. return 0;
  153. if ((crycb_addr & PAGE_MASK) != ((crycb_addr + 128) & PAGE_MASK))
  154. return set_validity_icpt(scb_s, 0x003CU);
  155. else if (!crycb_addr)
  156. return set_validity_icpt(scb_s, 0x0039U);
  157. /* copy only the wrapping keys */
  158. if (read_guest_real(vcpu, crycb_addr + 72, &vsie_page->crycb, 56))
  159. return set_validity_icpt(scb_s, 0x0035U);
  160. scb_s->ecb3 |= ecb3_flags;
  161. scb_s->crycbd = ((__u32)(__u64) &vsie_page->crycb) | CRYCB_FORMAT1 |
  162. CRYCB_FORMAT2;
  163. /* xor both blocks in one run */
  164. b1 = (unsigned long *) vsie_page->crycb.dea_wrapping_key_mask;
  165. b2 = (unsigned long *)
  166. vcpu->kvm->arch.crypto.crycb->dea_wrapping_key_mask;
  167. /* as 56%8 == 0, bitmap_xor won't overwrite any data */
  168. bitmap_xor(b1, b1, b2, BITS_PER_BYTE * 56);
  169. return 0;
  170. }
  171. /* shadow (round up/down) the ibc to avoid validity icpt */
  172. static void prepare_ibc(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  173. {
  174. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  175. struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
  176. /* READ_ONCE does not work on bitfields - use a temporary variable */
  177. const uint32_t __new_ibc = scb_o->ibc;
  178. const uint32_t new_ibc = READ_ONCE(__new_ibc) & 0x0fffU;
  179. __u64 min_ibc = (sclp.ibc >> 16) & 0x0fffU;
  180. scb_s->ibc = 0;
  181. /* ibc installed in g2 and requested for g3 */
  182. if (vcpu->kvm->arch.model.ibc && new_ibc) {
  183. scb_s->ibc = new_ibc;
  184. /* takte care of the minimum ibc level of the machine */
  185. if (scb_s->ibc < min_ibc)
  186. scb_s->ibc = min_ibc;
  187. /* take care of the maximum ibc level set for the guest */
  188. if (scb_s->ibc > vcpu->kvm->arch.model.ibc)
  189. scb_s->ibc = vcpu->kvm->arch.model.ibc;
  190. }
  191. }
  192. /* unshadow the scb, copying parameters back to the real scb */
  193. static void unshadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  194. {
  195. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  196. struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
  197. /* interception */
  198. scb_o->icptcode = scb_s->icptcode;
  199. scb_o->icptstatus = scb_s->icptstatus;
  200. scb_o->ipa = scb_s->ipa;
  201. scb_o->ipb = scb_s->ipb;
  202. scb_o->gbea = scb_s->gbea;
  203. /* timer */
  204. scb_o->cputm = scb_s->cputm;
  205. scb_o->ckc = scb_s->ckc;
  206. scb_o->todpr = scb_s->todpr;
  207. /* guest state */
  208. scb_o->gpsw = scb_s->gpsw;
  209. scb_o->gg14 = scb_s->gg14;
  210. scb_o->gg15 = scb_s->gg15;
  211. memcpy(scb_o->gcr, scb_s->gcr, 128);
  212. scb_o->pp = scb_s->pp;
  213. /* interrupt intercept */
  214. switch (scb_s->icptcode) {
  215. case ICPT_PROGI:
  216. case ICPT_INSTPROGI:
  217. case ICPT_EXTINT:
  218. memcpy((void *)((u64)scb_o + 0xc0),
  219. (void *)((u64)scb_s + 0xc0), 0xf0 - 0xc0);
  220. break;
  221. case ICPT_PARTEXEC:
  222. /* MVPG only */
  223. memcpy((void *)((u64)scb_o + 0xc0),
  224. (void *)((u64)scb_s + 0xc0), 0xd0 - 0xc0);
  225. break;
  226. }
  227. if (scb_s->ihcpu != 0xffffU)
  228. scb_o->ihcpu = scb_s->ihcpu;
  229. }
  230. /*
  231. * Setup the shadow scb by copying and checking the relevant parts of the g2
  232. * provided scb.
  233. *
  234. * Returns: - 0 if the scb has been shadowed
  235. * - > 0 if control has to be given to guest 2
  236. */
  237. static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  238. {
  239. struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
  240. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  241. /* READ_ONCE does not work on bitfields - use a temporary variable */
  242. const uint32_t __new_prefix = scb_o->prefix;
  243. const uint32_t new_prefix = READ_ONCE(__new_prefix);
  244. const bool wants_tx = READ_ONCE(scb_o->ecb) & ECB_TE;
  245. bool had_tx = scb_s->ecb & ECB_TE;
  246. unsigned long new_mso = 0;
  247. int rc;
  248. /* make sure we don't have any leftovers when reusing the scb */
  249. scb_s->icptcode = 0;
  250. scb_s->eca = 0;
  251. scb_s->ecb = 0;
  252. scb_s->ecb2 = 0;
  253. scb_s->ecb3 = 0;
  254. scb_s->ecd = 0;
  255. scb_s->fac = 0;
  256. rc = prepare_cpuflags(vcpu, vsie_page);
  257. if (rc)
  258. goto out;
  259. /* timer */
  260. scb_s->cputm = scb_o->cputm;
  261. scb_s->ckc = scb_o->ckc;
  262. scb_s->todpr = scb_o->todpr;
  263. scb_s->epoch = scb_o->epoch;
  264. /* guest state */
  265. scb_s->gpsw = scb_o->gpsw;
  266. scb_s->gg14 = scb_o->gg14;
  267. scb_s->gg15 = scb_o->gg15;
  268. memcpy(scb_s->gcr, scb_o->gcr, 128);
  269. scb_s->pp = scb_o->pp;
  270. /* interception / execution handling */
  271. scb_s->gbea = scb_o->gbea;
  272. scb_s->lctl = scb_o->lctl;
  273. scb_s->svcc = scb_o->svcc;
  274. scb_s->ictl = scb_o->ictl;
  275. /*
  276. * SKEY handling functions can't deal with false setting of PTE invalid
  277. * bits. Therefore we cannot provide interpretation and would later
  278. * have to provide own emulation handlers.
  279. */
  280. if (!(atomic_read(&scb_s->cpuflags) & CPUSTAT_KSS))
  281. scb_s->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
  282. scb_s->icpua = scb_o->icpua;
  283. if (!(atomic_read(&scb_s->cpuflags) & CPUSTAT_SM))
  284. new_mso = READ_ONCE(scb_o->mso) & 0xfffffffffff00000UL;
  285. /* if the hva of the prefix changes, we have to remap the prefix */
  286. if (scb_s->mso != new_mso || scb_s->prefix != new_prefix)
  287. prefix_unmapped(vsie_page);
  288. /* SIE will do mso/msl validity and exception checks for us */
  289. scb_s->msl = scb_o->msl & 0xfffffffffff00000UL;
  290. scb_s->mso = new_mso;
  291. scb_s->prefix = new_prefix;
  292. /* We have to definetly flush the tlb if this scb never ran */
  293. if (scb_s->ihcpu != 0xffffU)
  294. scb_s->ihcpu = scb_o->ihcpu;
  295. /* MVPG and Protection Exception Interpretation are always available */
  296. scb_s->eca |= scb_o->eca & (ECA_MVPGI | ECA_PROTEXCI);
  297. /* Host-protection-interruption introduced with ESOP */
  298. if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_ESOP))
  299. scb_s->ecb |= scb_o->ecb & ECB_HOSTPROTINT;
  300. /* transactional execution */
  301. if (test_kvm_facility(vcpu->kvm, 73) && wants_tx) {
  302. /* remap the prefix is tx is toggled on */
  303. if (!had_tx)
  304. prefix_unmapped(vsie_page);
  305. scb_s->ecb |= ECB_TE;
  306. }
  307. /* SIMD */
  308. if (test_kvm_facility(vcpu->kvm, 129)) {
  309. scb_s->eca |= scb_o->eca & ECA_VX;
  310. scb_s->ecd |= scb_o->ecd & ECD_HOSTREGMGMT;
  311. }
  312. /* Run-time-Instrumentation */
  313. if (test_kvm_facility(vcpu->kvm, 64))
  314. scb_s->ecb3 |= scb_o->ecb3 & ECB3_RI;
  315. /* Instruction Execution Prevention */
  316. if (test_kvm_facility(vcpu->kvm, 130))
  317. scb_s->ecb2 |= scb_o->ecb2 & ECB2_IEP;
  318. /* Guarded Storage */
  319. if (test_kvm_facility(vcpu->kvm, 133)) {
  320. scb_s->ecb |= scb_o->ecb & ECB_GS;
  321. scb_s->ecd |= scb_o->ecd & ECD_HOSTREGMGMT;
  322. }
  323. if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_SIIF))
  324. scb_s->eca |= scb_o->eca & ECA_SII;
  325. if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_IB))
  326. scb_s->eca |= scb_o->eca & ECA_IB;
  327. if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_CEI))
  328. scb_s->eca |= scb_o->eca & ECA_CEI;
  329. /* Epoch Extension */
  330. if (test_kvm_facility(vcpu->kvm, 139))
  331. scb_s->ecd |= scb_o->ecd & ECD_MEF;
  332. prepare_ibc(vcpu, vsie_page);
  333. rc = shadow_crycb(vcpu, vsie_page);
  334. out:
  335. if (rc)
  336. unshadow_scb(vcpu, vsie_page);
  337. return rc;
  338. }
  339. void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start,
  340. unsigned long end)
  341. {
  342. struct kvm *kvm = gmap->private;
  343. struct vsie_page *cur;
  344. unsigned long prefix;
  345. struct page *page;
  346. int i;
  347. if (!gmap_is_shadow(gmap))
  348. return;
  349. if (start >= 1UL << 31)
  350. /* We are only interested in prefix pages */
  351. return;
  352. /*
  353. * Only new shadow blocks are added to the list during runtime,
  354. * therefore we can safely reference them all the time.
  355. */
  356. for (i = 0; i < kvm->arch.vsie.page_count; i++) {
  357. page = READ_ONCE(kvm->arch.vsie.pages[i]);
  358. if (!page)
  359. continue;
  360. cur = page_to_virt(page);
  361. if (READ_ONCE(cur->gmap) != gmap)
  362. continue;
  363. prefix = cur->scb_s.prefix << GUEST_PREFIX_SHIFT;
  364. /* with mso/msl, the prefix lies at an offset */
  365. prefix += cur->scb_s.mso;
  366. if (prefix <= end && start <= prefix + 2 * PAGE_SIZE - 1)
  367. prefix_unmapped_sync(cur);
  368. }
  369. }
  370. /*
  371. * Map the first prefix page and if tx is enabled also the second prefix page.
  372. *
  373. * The prefix will be protected, a gmap notifier will inform about unmaps.
  374. * The shadow scb must not be executed until the prefix is remapped, this is
  375. * guaranteed by properly handling PROG_REQUEST.
  376. *
  377. * Returns: - 0 on if successfully mapped or already mapped
  378. * - > 0 if control has to be given to guest 2
  379. * - -EAGAIN if the caller can retry immediately
  380. * - -ENOMEM if out of memory
  381. */
  382. static int map_prefix(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  383. {
  384. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  385. u64 prefix = scb_s->prefix << GUEST_PREFIX_SHIFT;
  386. int rc;
  387. if (prefix_is_mapped(vsie_page))
  388. return 0;
  389. /* mark it as mapped so we can catch any concurrent unmappers */
  390. prefix_mapped(vsie_page);
  391. /* with mso/msl, the prefix lies at offset *mso* */
  392. prefix += scb_s->mso;
  393. rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix);
  394. if (!rc && (scb_s->ecb & ECB_TE))
  395. rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
  396. prefix + PAGE_SIZE);
  397. /*
  398. * We don't have to mprotect, we will be called for all unshadows.
  399. * SIE will detect if protection applies and trigger a validity.
  400. */
  401. if (rc)
  402. prefix_unmapped(vsie_page);
  403. if (rc > 0 || rc == -EFAULT)
  404. rc = set_validity_icpt(scb_s, 0x0037U);
  405. return rc;
  406. }
  407. /*
  408. * Pin the guest page given by gpa and set hpa to the pinned host address.
  409. * Will always be pinned writable.
  410. *
  411. * Returns: - 0 on success
  412. * - -EINVAL if the gpa is not valid guest storage
  413. */
  414. static int pin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t *hpa)
  415. {
  416. struct page *page;
  417. page = gfn_to_page(kvm, gpa_to_gfn(gpa));
  418. if (is_error_page(page))
  419. return -EINVAL;
  420. *hpa = (hpa_t) page_to_virt(page) + (gpa & ~PAGE_MASK);
  421. return 0;
  422. }
  423. /* Unpins a page previously pinned via pin_guest_page, marking it as dirty. */
  424. static void unpin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t hpa)
  425. {
  426. kvm_release_pfn_dirty(hpa >> PAGE_SHIFT);
  427. /* mark the page always as dirty for migration */
  428. mark_page_dirty(kvm, gpa_to_gfn(gpa));
  429. }
  430. /* unpin all blocks previously pinned by pin_blocks(), marking them dirty */
  431. static void unpin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  432. {
  433. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  434. hpa_t hpa;
  435. hpa = (u64) scb_s->scaoh << 32 | scb_s->scaol;
  436. if (hpa) {
  437. unpin_guest_page(vcpu->kvm, vsie_page->sca_gpa, hpa);
  438. vsie_page->sca_gpa = 0;
  439. scb_s->scaol = 0;
  440. scb_s->scaoh = 0;
  441. }
  442. hpa = scb_s->itdba;
  443. if (hpa) {
  444. unpin_guest_page(vcpu->kvm, vsie_page->itdba_gpa, hpa);
  445. vsie_page->itdba_gpa = 0;
  446. scb_s->itdba = 0;
  447. }
  448. hpa = scb_s->gvrd;
  449. if (hpa) {
  450. unpin_guest_page(vcpu->kvm, vsie_page->gvrd_gpa, hpa);
  451. vsie_page->gvrd_gpa = 0;
  452. scb_s->gvrd = 0;
  453. }
  454. hpa = scb_s->riccbd;
  455. if (hpa) {
  456. unpin_guest_page(vcpu->kvm, vsie_page->riccbd_gpa, hpa);
  457. vsie_page->riccbd_gpa = 0;
  458. scb_s->riccbd = 0;
  459. }
  460. hpa = scb_s->sdnxo;
  461. if (hpa) {
  462. unpin_guest_page(vcpu->kvm, vsie_page->sdnx_gpa, hpa);
  463. vsie_page->sdnx_gpa = 0;
  464. scb_s->sdnxo = 0;
  465. }
  466. }
  467. /*
  468. * Instead of shadowing some blocks, we can simply forward them because the
  469. * addresses in the scb are 64 bit long.
  470. *
  471. * This works as long as the data lies in one page. If blocks ever exceed one
  472. * page, we have to fall back to shadowing.
  473. *
  474. * As we reuse the sca, the vcpu pointers contained in it are invalid. We must
  475. * therefore not enable any facilities that access these pointers (e.g. SIGPIF).
  476. *
  477. * Returns: - 0 if all blocks were pinned.
  478. * - > 0 if control has to be given to guest 2
  479. * - -ENOMEM if out of memory
  480. */
  481. static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  482. {
  483. struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
  484. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  485. hpa_t hpa;
  486. gpa_t gpa;
  487. int rc = 0;
  488. gpa = READ_ONCE(scb_o->scaol) & ~0xfUL;
  489. if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_64BSCAO))
  490. gpa |= (u64) READ_ONCE(scb_o->scaoh) << 32;
  491. if (gpa) {
  492. if (!(gpa & ~0x1fffUL))
  493. rc = set_validity_icpt(scb_s, 0x0038U);
  494. else if ((gpa & ~0x1fffUL) == kvm_s390_get_prefix(vcpu))
  495. rc = set_validity_icpt(scb_s, 0x0011U);
  496. else if ((gpa & PAGE_MASK) !=
  497. ((gpa + sizeof(struct bsca_block) - 1) & PAGE_MASK))
  498. rc = set_validity_icpt(scb_s, 0x003bU);
  499. if (!rc) {
  500. rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
  501. if (rc)
  502. rc = set_validity_icpt(scb_s, 0x0034U);
  503. }
  504. if (rc)
  505. goto unpin;
  506. vsie_page->sca_gpa = gpa;
  507. scb_s->scaoh = (u32)((u64)hpa >> 32);
  508. scb_s->scaol = (u32)(u64)hpa;
  509. }
  510. gpa = READ_ONCE(scb_o->itdba) & ~0xffUL;
  511. if (gpa && (scb_s->ecb & ECB_TE)) {
  512. if (!(gpa & ~0x1fffU)) {
  513. rc = set_validity_icpt(scb_s, 0x0080U);
  514. goto unpin;
  515. }
  516. /* 256 bytes cannot cross page boundaries */
  517. rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
  518. if (rc) {
  519. rc = set_validity_icpt(scb_s, 0x0080U);
  520. goto unpin;
  521. }
  522. vsie_page->itdba_gpa = gpa;
  523. scb_s->itdba = hpa;
  524. }
  525. gpa = READ_ONCE(scb_o->gvrd) & ~0x1ffUL;
  526. if (gpa && (scb_s->eca & ECA_VX) && !(scb_s->ecd & ECD_HOSTREGMGMT)) {
  527. if (!(gpa & ~0x1fffUL)) {
  528. rc = set_validity_icpt(scb_s, 0x1310U);
  529. goto unpin;
  530. }
  531. /*
  532. * 512 bytes vector registers cannot cross page boundaries
  533. * if this block gets bigger, we have to shadow it.
  534. */
  535. rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
  536. if (rc) {
  537. rc = set_validity_icpt(scb_s, 0x1310U);
  538. goto unpin;
  539. }
  540. vsie_page->gvrd_gpa = gpa;
  541. scb_s->gvrd = hpa;
  542. }
  543. gpa = READ_ONCE(scb_o->riccbd) & ~0x3fUL;
  544. if (gpa && (scb_s->ecb3 & ECB3_RI)) {
  545. if (!(gpa & ~0x1fffUL)) {
  546. rc = set_validity_icpt(scb_s, 0x0043U);
  547. goto unpin;
  548. }
  549. /* 64 bytes cannot cross page boundaries */
  550. rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
  551. if (rc) {
  552. rc = set_validity_icpt(scb_s, 0x0043U);
  553. goto unpin;
  554. }
  555. /* Validity 0x0044 will be checked by SIE */
  556. vsie_page->riccbd_gpa = gpa;
  557. scb_s->riccbd = hpa;
  558. }
  559. if ((scb_s->ecb & ECB_GS) && !(scb_s->ecd & ECD_HOSTREGMGMT)) {
  560. unsigned long sdnxc;
  561. gpa = READ_ONCE(scb_o->sdnxo) & ~0xfUL;
  562. sdnxc = READ_ONCE(scb_o->sdnxo) & 0xfUL;
  563. if (!gpa || !(gpa & ~0x1fffUL)) {
  564. rc = set_validity_icpt(scb_s, 0x10b0U);
  565. goto unpin;
  566. }
  567. if (sdnxc < 6 || sdnxc > 12) {
  568. rc = set_validity_icpt(scb_s, 0x10b1U);
  569. goto unpin;
  570. }
  571. if (gpa & ((1 << sdnxc) - 1)) {
  572. rc = set_validity_icpt(scb_s, 0x10b2U);
  573. goto unpin;
  574. }
  575. /* Due to alignment rules (checked above) this cannot
  576. * cross page boundaries
  577. */
  578. rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
  579. if (rc) {
  580. rc = set_validity_icpt(scb_s, 0x10b0U);
  581. goto unpin;
  582. }
  583. vsie_page->sdnx_gpa = gpa;
  584. scb_s->sdnxo = hpa | sdnxc;
  585. }
  586. return 0;
  587. unpin:
  588. unpin_blocks(vcpu, vsie_page);
  589. return rc;
  590. }
  591. /* unpin the scb provided by guest 2, marking it as dirty */
  592. static void unpin_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page,
  593. gpa_t gpa)
  594. {
  595. hpa_t hpa = (hpa_t) vsie_page->scb_o;
  596. if (hpa)
  597. unpin_guest_page(vcpu->kvm, gpa, hpa);
  598. vsie_page->scb_o = NULL;
  599. }
  600. /*
  601. * Pin the scb at gpa provided by guest 2 at vsie_page->scb_o.
  602. *
  603. * Returns: - 0 if the scb was pinned.
  604. * - > 0 if control has to be given to guest 2
  605. */
  606. static int pin_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page,
  607. gpa_t gpa)
  608. {
  609. hpa_t hpa;
  610. int rc;
  611. rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
  612. if (rc) {
  613. rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
  614. WARN_ON_ONCE(rc);
  615. return 1;
  616. }
  617. vsie_page->scb_o = (struct kvm_s390_sie_block *) hpa;
  618. return 0;
  619. }
  620. /*
  621. * Inject a fault into guest 2.
  622. *
  623. * Returns: - > 0 if control has to be given to guest 2
  624. * < 0 if an error occurred during injection.
  625. */
  626. static int inject_fault(struct kvm_vcpu *vcpu, __u16 code, __u64 vaddr,
  627. bool write_flag)
  628. {
  629. struct kvm_s390_pgm_info pgm = {
  630. .code = code,
  631. .trans_exc_code =
  632. /* 0-51: virtual address */
  633. (vaddr & 0xfffffffffffff000UL) |
  634. /* 52-53: store / fetch */
  635. (((unsigned int) !write_flag) + 1) << 10,
  636. /* 62-63: asce id (alway primary == 0) */
  637. .exc_access_id = 0, /* always primary */
  638. .op_access_id = 0, /* not MVPG */
  639. };
  640. int rc;
  641. if (code == PGM_PROTECTION)
  642. pgm.trans_exc_code |= 0x4UL;
  643. rc = kvm_s390_inject_prog_irq(vcpu, &pgm);
  644. return rc ? rc : 1;
  645. }
  646. /*
  647. * Handle a fault during vsie execution on a gmap shadow.
  648. *
  649. * Returns: - 0 if the fault was resolved
  650. * - > 0 if control has to be given to guest 2
  651. * - < 0 if an error occurred
  652. */
  653. static int handle_fault(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  654. {
  655. int rc;
  656. if (current->thread.gmap_int_code == PGM_PROTECTION)
  657. /* we can directly forward all protection exceptions */
  658. return inject_fault(vcpu, PGM_PROTECTION,
  659. current->thread.gmap_addr, 1);
  660. rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
  661. current->thread.gmap_addr);
  662. if (rc > 0) {
  663. rc = inject_fault(vcpu, rc,
  664. current->thread.gmap_addr,
  665. current->thread.gmap_write_flag);
  666. if (rc >= 0)
  667. vsie_page->fault_addr = current->thread.gmap_addr;
  668. }
  669. return rc;
  670. }
  671. /*
  672. * Retry the previous fault that required guest 2 intervention. This avoids
  673. * one superfluous SIE re-entry and direct exit.
  674. *
  675. * Will ignore any errors. The next SIE fault will do proper fault handling.
  676. */
  677. static void handle_last_fault(struct kvm_vcpu *vcpu,
  678. struct vsie_page *vsie_page)
  679. {
  680. if (vsie_page->fault_addr)
  681. kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
  682. vsie_page->fault_addr);
  683. vsie_page->fault_addr = 0;
  684. }
  685. static inline void clear_vsie_icpt(struct vsie_page *vsie_page)
  686. {
  687. vsie_page->scb_s.icptcode = 0;
  688. }
  689. /* rewind the psw and clear the vsie icpt, so we can retry execution */
  690. static void retry_vsie_icpt(struct vsie_page *vsie_page)
  691. {
  692. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  693. int ilen = insn_length(scb_s->ipa >> 8);
  694. /* take care of EXECUTE instructions */
  695. if (scb_s->icptstatus & 1) {
  696. ilen = (scb_s->icptstatus >> 4) & 0x6;
  697. if (!ilen)
  698. ilen = 4;
  699. }
  700. scb_s->gpsw.addr = __rewind_psw(scb_s->gpsw, ilen);
  701. clear_vsie_icpt(vsie_page);
  702. }
  703. /*
  704. * Try to shadow + enable the guest 2 provided facility list.
  705. * Retry instruction execution if enabled for and provided by guest 2.
  706. *
  707. * Returns: - 0 if handled (retry or guest 2 icpt)
  708. * - > 0 if control has to be given to guest 2
  709. */
  710. static int handle_stfle(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  711. {
  712. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  713. __u32 fac = READ_ONCE(vsie_page->scb_o->fac) & 0x7ffffff8U;
  714. if (fac && test_kvm_facility(vcpu->kvm, 7)) {
  715. retry_vsie_icpt(vsie_page);
  716. if (read_guest_real(vcpu, fac, &vsie_page->fac,
  717. sizeof(vsie_page->fac)))
  718. return set_validity_icpt(scb_s, 0x1090U);
  719. scb_s->fac = (__u32)(__u64) &vsie_page->fac;
  720. }
  721. return 0;
  722. }
  723. /*
  724. * Run the vsie on a shadow scb and a shadow gmap, without any further
  725. * sanity checks, handling SIE faults.
  726. *
  727. * Returns: - 0 everything went fine
  728. * - > 0 if control has to be given to guest 2
  729. * - < 0 if an error occurred
  730. */
  731. static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  732. {
  733. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  734. struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
  735. int rc;
  736. handle_last_fault(vcpu, vsie_page);
  737. if (need_resched())
  738. schedule();
  739. if (test_cpu_flag(CIF_MCCK_PENDING))
  740. s390_handle_mcck();
  741. srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
  742. local_irq_disable();
  743. guest_enter_irqoff();
  744. local_irq_enable();
  745. rc = sie64a(scb_s, vcpu->run->s.regs.gprs);
  746. local_irq_disable();
  747. guest_exit_irqoff();
  748. local_irq_enable();
  749. vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
  750. if (rc == -EINTR) {
  751. VCPU_EVENT(vcpu, 3, "%s", "machine check");
  752. kvm_s390_reinject_machine_check(vcpu, &vsie_page->mcck_info);
  753. return 0;
  754. }
  755. if (rc > 0)
  756. rc = 0; /* we could still have an icpt */
  757. else if (rc == -EFAULT)
  758. return handle_fault(vcpu, vsie_page);
  759. switch (scb_s->icptcode) {
  760. case ICPT_INST:
  761. if (scb_s->ipa == 0xb2b0)
  762. rc = handle_stfle(vcpu, vsie_page);
  763. break;
  764. case ICPT_STOP:
  765. /* stop not requested by g2 - must have been a kick */
  766. if (!(atomic_read(&scb_o->cpuflags) & CPUSTAT_STOP_INT))
  767. clear_vsie_icpt(vsie_page);
  768. break;
  769. case ICPT_VALIDITY:
  770. if ((scb_s->ipa & 0xf000) != 0xf000)
  771. scb_s->ipa += 0x1000;
  772. break;
  773. }
  774. return rc;
  775. }
  776. static void release_gmap_shadow(struct vsie_page *vsie_page)
  777. {
  778. if (vsie_page->gmap)
  779. gmap_put(vsie_page->gmap);
  780. WRITE_ONCE(vsie_page->gmap, NULL);
  781. prefix_unmapped(vsie_page);
  782. }
  783. static int acquire_gmap_shadow(struct kvm_vcpu *vcpu,
  784. struct vsie_page *vsie_page)
  785. {
  786. unsigned long asce;
  787. union ctlreg0 cr0;
  788. struct gmap *gmap;
  789. int edat;
  790. asce = vcpu->arch.sie_block->gcr[1];
  791. cr0.val = vcpu->arch.sie_block->gcr[0];
  792. edat = cr0.edat && test_kvm_facility(vcpu->kvm, 8);
  793. edat += edat && test_kvm_facility(vcpu->kvm, 78);
  794. /*
  795. * ASCE or EDAT could have changed since last icpt, or the gmap
  796. * we're holding has been unshadowed. If the gmap is still valid,
  797. * we can safely reuse it.
  798. */
  799. if (vsie_page->gmap && gmap_shadow_valid(vsie_page->gmap, asce, edat))
  800. return 0;
  801. /* release the old shadow - if any, and mark the prefix as unmapped */
  802. release_gmap_shadow(vsie_page);
  803. gmap = gmap_shadow(vcpu->arch.gmap, asce, edat);
  804. if (IS_ERR(gmap))
  805. return PTR_ERR(gmap);
  806. gmap->private = vcpu->kvm;
  807. WRITE_ONCE(vsie_page->gmap, gmap);
  808. return 0;
  809. }
  810. /*
  811. * Register the shadow scb at the VCPU, e.g. for kicking out of vsie.
  812. */
  813. static void register_shadow_scb(struct kvm_vcpu *vcpu,
  814. struct vsie_page *vsie_page)
  815. {
  816. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  817. WRITE_ONCE(vcpu->arch.vsie_block, &vsie_page->scb_s);
  818. /*
  819. * External calls have to lead to a kick of the vcpu and
  820. * therefore the vsie -> Simulate Wait state.
  821. */
  822. kvm_s390_set_cpuflags(vcpu, CPUSTAT_WAIT);
  823. /*
  824. * We have to adjust the g3 epoch by the g2 epoch. The epoch will
  825. * automatically be adjusted on tod clock changes via kvm_sync_clock.
  826. */
  827. preempt_disable();
  828. scb_s->epoch += vcpu->kvm->arch.epoch;
  829. if (scb_s->ecd & ECD_MEF) {
  830. scb_s->epdx += vcpu->kvm->arch.epdx;
  831. if (scb_s->epoch < vcpu->kvm->arch.epoch)
  832. scb_s->epdx += 1;
  833. }
  834. preempt_enable();
  835. }
  836. /*
  837. * Unregister a shadow scb from a VCPU.
  838. */
  839. static void unregister_shadow_scb(struct kvm_vcpu *vcpu)
  840. {
  841. atomic_andnot(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
  842. WRITE_ONCE(vcpu->arch.vsie_block, NULL);
  843. }
  844. /*
  845. * Run the vsie on a shadowed scb, managing the gmap shadow, handling
  846. * prefix pages and faults.
  847. *
  848. * Returns: - 0 if no errors occurred
  849. * - > 0 if control has to be given to guest 2
  850. * - -ENOMEM if out of memory
  851. */
  852. static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  853. {
  854. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  855. int rc = 0;
  856. while (1) {
  857. rc = acquire_gmap_shadow(vcpu, vsie_page);
  858. if (!rc)
  859. rc = map_prefix(vcpu, vsie_page);
  860. if (!rc) {
  861. gmap_enable(vsie_page->gmap);
  862. update_intervention_requests(vsie_page);
  863. rc = do_vsie_run(vcpu, vsie_page);
  864. gmap_enable(vcpu->arch.gmap);
  865. }
  866. atomic_andnot(PROG_BLOCK_SIE, &scb_s->prog20);
  867. if (rc == -EAGAIN)
  868. rc = 0;
  869. if (rc || scb_s->icptcode || signal_pending(current) ||
  870. kvm_s390_vcpu_has_irq(vcpu, 0))
  871. break;
  872. }
  873. if (rc == -EFAULT) {
  874. /*
  875. * Addressing exceptions are always presentes as intercepts.
  876. * As addressing exceptions are suppressing and our guest 3 PSW
  877. * points at the responsible instruction, we have to
  878. * forward the PSW and set the ilc. If we can't read guest 3
  879. * instruction, we can use an arbitrary ilc. Let's always use
  880. * ilen = 4 for now, so we can avoid reading in guest 3 virtual
  881. * memory. (we could also fake the shadow so the hardware
  882. * handles it).
  883. */
  884. scb_s->icptcode = ICPT_PROGI;
  885. scb_s->iprcc = PGM_ADDRESSING;
  886. scb_s->pgmilc = 4;
  887. scb_s->gpsw.addr = __rewind_psw(scb_s->gpsw, 4);
  888. }
  889. return rc;
  890. }
  891. /*
  892. * Get or create a vsie page for a scb address.
  893. *
  894. * Returns: - address of a vsie page (cached or new one)
  895. * - NULL if the same scb address is already used by another VCPU
  896. * - ERR_PTR(-ENOMEM) if out of memory
  897. */
  898. static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr)
  899. {
  900. struct vsie_page *vsie_page;
  901. struct page *page;
  902. int nr_vcpus;
  903. rcu_read_lock();
  904. page = radix_tree_lookup(&kvm->arch.vsie.addr_to_page, addr >> 9);
  905. rcu_read_unlock();
  906. if (page) {
  907. if (page_ref_inc_return(page) == 2)
  908. return page_to_virt(page);
  909. page_ref_dec(page);
  910. }
  911. /*
  912. * We want at least #online_vcpus shadows, so every VCPU can execute
  913. * the VSIE in parallel.
  914. */
  915. nr_vcpus = atomic_read(&kvm->online_vcpus);
  916. mutex_lock(&kvm->arch.vsie.mutex);
  917. if (kvm->arch.vsie.page_count < nr_vcpus) {
  918. page = alloc_page(GFP_KERNEL | __GFP_ZERO | GFP_DMA);
  919. if (!page) {
  920. mutex_unlock(&kvm->arch.vsie.mutex);
  921. return ERR_PTR(-ENOMEM);
  922. }
  923. page_ref_inc(page);
  924. kvm->arch.vsie.pages[kvm->arch.vsie.page_count] = page;
  925. kvm->arch.vsie.page_count++;
  926. } else {
  927. /* reuse an existing entry that belongs to nobody */
  928. while (true) {
  929. page = kvm->arch.vsie.pages[kvm->arch.vsie.next];
  930. if (page_ref_inc_return(page) == 2)
  931. break;
  932. page_ref_dec(page);
  933. kvm->arch.vsie.next++;
  934. kvm->arch.vsie.next %= nr_vcpus;
  935. }
  936. radix_tree_delete(&kvm->arch.vsie.addr_to_page, page->index >> 9);
  937. }
  938. page->index = addr;
  939. /* double use of the same address */
  940. if (radix_tree_insert(&kvm->arch.vsie.addr_to_page, addr >> 9, page)) {
  941. page_ref_dec(page);
  942. mutex_unlock(&kvm->arch.vsie.mutex);
  943. return NULL;
  944. }
  945. mutex_unlock(&kvm->arch.vsie.mutex);
  946. vsie_page = page_to_virt(page);
  947. memset(&vsie_page->scb_s, 0, sizeof(struct kvm_s390_sie_block));
  948. release_gmap_shadow(vsie_page);
  949. vsie_page->fault_addr = 0;
  950. vsie_page->scb_s.ihcpu = 0xffffU;
  951. return vsie_page;
  952. }
  953. /* put a vsie page acquired via get_vsie_page */
  954. static void put_vsie_page(struct kvm *kvm, struct vsie_page *vsie_page)
  955. {
  956. struct page *page = pfn_to_page(__pa(vsie_page) >> PAGE_SHIFT);
  957. page_ref_dec(page);
  958. }
  959. int kvm_s390_handle_vsie(struct kvm_vcpu *vcpu)
  960. {
  961. struct vsie_page *vsie_page;
  962. unsigned long scb_addr;
  963. int rc;
  964. vcpu->stat.instruction_sie++;
  965. if (!test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_SIEF2))
  966. return -EOPNOTSUPP;
  967. if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
  968. return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
  969. BUILD_BUG_ON(sizeof(struct vsie_page) != PAGE_SIZE);
  970. scb_addr = kvm_s390_get_base_disp_s(vcpu, NULL);
  971. /* 512 byte alignment */
  972. if (unlikely(scb_addr & 0x1ffUL))
  973. return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
  974. if (signal_pending(current) || kvm_s390_vcpu_has_irq(vcpu, 0))
  975. return 0;
  976. vsie_page = get_vsie_page(vcpu->kvm, scb_addr);
  977. if (IS_ERR(vsie_page))
  978. return PTR_ERR(vsie_page);
  979. else if (!vsie_page)
  980. /* double use of sie control block - simply do nothing */
  981. return 0;
  982. rc = pin_scb(vcpu, vsie_page, scb_addr);
  983. if (rc)
  984. goto out_put;
  985. rc = shadow_scb(vcpu, vsie_page);
  986. if (rc)
  987. goto out_unpin_scb;
  988. rc = pin_blocks(vcpu, vsie_page);
  989. if (rc)
  990. goto out_unshadow;
  991. register_shadow_scb(vcpu, vsie_page);
  992. rc = vsie_run(vcpu, vsie_page);
  993. unregister_shadow_scb(vcpu);
  994. unpin_blocks(vcpu, vsie_page);
  995. out_unshadow:
  996. unshadow_scb(vcpu, vsie_page);
  997. out_unpin_scb:
  998. unpin_scb(vcpu, vsie_page, scb_addr);
  999. out_put:
  1000. put_vsie_page(vcpu->kvm, vsie_page);
  1001. return rc < 0 ? rc : 0;
  1002. }
  1003. /* Init the vsie data structures. To be called when a vm is initialized. */
  1004. void kvm_s390_vsie_init(struct kvm *kvm)
  1005. {
  1006. mutex_init(&kvm->arch.vsie.mutex);
  1007. INIT_RADIX_TREE(&kvm->arch.vsie.addr_to_page, GFP_KERNEL);
  1008. }
  1009. /* Destroy the vsie data structures. To be called when a vm is destroyed. */
  1010. void kvm_s390_vsie_destroy(struct kvm *kvm)
  1011. {
  1012. struct vsie_page *vsie_page;
  1013. struct page *page;
  1014. int i;
  1015. mutex_lock(&kvm->arch.vsie.mutex);
  1016. for (i = 0; i < kvm->arch.vsie.page_count; i++) {
  1017. page = kvm->arch.vsie.pages[i];
  1018. kvm->arch.vsie.pages[i] = NULL;
  1019. vsie_page = page_to_virt(page);
  1020. release_gmap_shadow(vsie_page);
  1021. /* free the radix tree entry */
  1022. radix_tree_delete(&kvm->arch.vsie.addr_to_page, page->index >> 9);
  1023. __free_page(page);
  1024. }
  1025. kvm->arch.vsie.page_count = 0;
  1026. mutex_unlock(&kvm->arch.vsie.mutex);
  1027. }
  1028. void kvm_s390_vsie_kick(struct kvm_vcpu *vcpu)
  1029. {
  1030. struct kvm_s390_sie_block *scb = READ_ONCE(vcpu->arch.vsie_block);
  1031. /*
  1032. * Even if the VCPU lets go of the shadow sie block reference, it is
  1033. * still valid in the cache. So we can safely kick it.
  1034. */
  1035. if (scb) {
  1036. atomic_or(PROG_BLOCK_SIE, &scb->prog20);
  1037. if (scb->prog0c & PROG_IN_SIE)
  1038. atomic_or(CPUSTAT_STOP_INT, &scb->cpuflags);
  1039. }
  1040. }