book3s_xive_template.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639
  1. /*
  2. * Copyright 2017 Benjamin Herrenschmidt, IBM Corporation
  3. *
  4. * This program is free software; you can redistribute it and/or modify
  5. * it under the terms of the GNU General Public License, version 2, as
  6. * published by the Free Software Foundation.
  7. */
  8. /* File to be included by other .c files */
  9. #define XGLUE(a,b) a##b
  10. #define GLUE(a,b) XGLUE(a,b)
  11. /* Dummy interrupt used when taking interrupts out of a queue in H_CPPR */
  12. #define XICS_DUMMY 1
  13. static void GLUE(X_PFX,ack_pending)(struct kvmppc_xive_vcpu *xc)
  14. {
  15. u8 cppr;
  16. u16 ack;
  17. /*
  18. * Ensure any previous store to CPPR is ordered vs.
  19. * the subsequent loads from PIPR or ACK.
  20. */
  21. eieio();
  22. /* Perform the acknowledge OS to register cycle. */
  23. ack = be16_to_cpu(__x_readw(__x_tima + TM_SPC_ACK_OS_REG));
  24. /* Synchronize subsequent queue accesses */
  25. mb();
  26. /* XXX Check grouping level */
  27. /* Anything ? */
  28. if (!((ack >> 8) & TM_QW1_NSR_EO))
  29. return;
  30. /* Grab CPPR of the most favored pending interrupt */
  31. cppr = ack & 0xff;
  32. if (cppr < 8)
  33. xc->pending |= 1 << cppr;
  34. #ifdef XIVE_RUNTIME_CHECKS
  35. /* Check consistency */
  36. if (cppr >= xc->hw_cppr)
  37. pr_warn("KVM-XIVE: CPU %d odd ack CPPR, got %d at %d\n",
  38. smp_processor_id(), cppr, xc->hw_cppr);
  39. #endif
  40. /*
  41. * Update our image of the HW CPPR. We don't yet modify
  42. * xc->cppr, this will be done as we scan for interrupts
  43. * in the queues.
  44. */
  45. xc->hw_cppr = cppr;
  46. }
  47. static u8 GLUE(X_PFX,esb_load)(struct xive_irq_data *xd, u32 offset)
  48. {
  49. u64 val;
  50. if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG)
  51. offset |= offset << 4;
  52. val =__x_readq(__x_eoi_page(xd) + offset);
  53. #ifdef __LITTLE_ENDIAN__
  54. val >>= 64-8;
  55. #endif
  56. return (u8)val;
  57. }
  58. static void GLUE(X_PFX,source_eoi)(u32 hw_irq, struct xive_irq_data *xd)
  59. {
  60. /* If the XIVE supports the new "store EOI facility, use it */
  61. if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
  62. __x_writeq(0, __x_eoi_page(xd) + XIVE_ESB_STORE_EOI);
  63. else if (hw_irq && xd->flags & XIVE_IRQ_FLAG_EOI_FW)
  64. opal_int_eoi(hw_irq);
  65. else if (xd->flags & XIVE_IRQ_FLAG_LSI) {
  66. /*
  67. * For LSIs the HW EOI cycle is used rather than PQ bits,
  68. * as they are automatically re-triggred in HW when still
  69. * pending.
  70. */
  71. __x_readq(__x_eoi_page(xd) + XIVE_ESB_LOAD_EOI);
  72. } else {
  73. uint64_t eoi_val;
  74. /*
  75. * Otherwise for EOI, we use the special MMIO that does
  76. * a clear of both P and Q and returns the old Q,
  77. * except for LSIs where we use the "EOI cycle" special
  78. * load.
  79. *
  80. * This allows us to then do a re-trigger if Q was set
  81. * rather than synthetizing an interrupt in software
  82. */
  83. eoi_val = GLUE(X_PFX,esb_load)(xd, XIVE_ESB_SET_PQ_00);
  84. /* Re-trigger if needed */
  85. if ((eoi_val & 1) && __x_trig_page(xd))
  86. __x_writeq(0, __x_trig_page(xd));
  87. }
  88. }
  89. enum {
  90. scan_fetch,
  91. scan_poll,
  92. scan_eoi,
  93. };
  94. static u32 GLUE(X_PFX,scan_interrupts)(struct kvmppc_xive_vcpu *xc,
  95. u8 pending, int scan_type)
  96. {
  97. u32 hirq = 0;
  98. u8 prio = 0xff;
  99. /* Find highest pending priority */
  100. while ((xc->mfrr != 0xff || pending != 0) && hirq == 0) {
  101. struct xive_q *q;
  102. u32 idx, toggle;
  103. __be32 *qpage;
  104. /*
  105. * If pending is 0 this will return 0xff which is what
  106. * we want
  107. */
  108. prio = ffs(pending) - 1;
  109. /*
  110. * If the most favoured prio we found pending is less
  111. * favored (or equal) than a pending IPI, we return
  112. * the IPI instead.
  113. *
  114. * Note: If pending was 0 and mfrr is 0xff, we will
  115. * not spurriously take an IPI because mfrr cannot
  116. * then be smaller than cppr.
  117. */
  118. if (prio >= xc->mfrr && xc->mfrr < xc->cppr) {
  119. prio = xc->mfrr;
  120. hirq = XICS_IPI;
  121. break;
  122. }
  123. /* Don't scan past the guest cppr */
  124. if (prio >= xc->cppr || prio > 7)
  125. break;
  126. /* Grab queue and pointers */
  127. q = &xc->queues[prio];
  128. idx = q->idx;
  129. toggle = q->toggle;
  130. /*
  131. * Snapshot the queue page. The test further down for EOI
  132. * must use the same "copy" that was used by __xive_read_eq
  133. * since qpage can be set concurrently and we don't want
  134. * to miss an EOI.
  135. */
  136. qpage = READ_ONCE(q->qpage);
  137. skip_ipi:
  138. /*
  139. * Try to fetch from the queue. Will return 0 for a
  140. * non-queueing priority (ie, qpage = 0).
  141. */
  142. hirq = __xive_read_eq(qpage, q->msk, &idx, &toggle);
  143. /*
  144. * If this was a signal for an MFFR change done by
  145. * H_IPI we skip it. Additionally, if we were fetching
  146. * we EOI it now, thus re-enabling reception of a new
  147. * such signal.
  148. *
  149. * We also need to do that if prio is 0 and we had no
  150. * page for the queue. In this case, we have non-queued
  151. * IPI that needs to be EOId.
  152. *
  153. * This is safe because if we have another pending MFRR
  154. * change that wasn't observed above, the Q bit will have
  155. * been set and another occurrence of the IPI will trigger.
  156. */
  157. if (hirq == XICS_IPI || (prio == 0 && !qpage)) {
  158. if (scan_type == scan_fetch)
  159. GLUE(X_PFX,source_eoi)(xc->vp_ipi,
  160. &xc->vp_ipi_data);
  161. /* Loop back on same queue with updated idx/toggle */
  162. #ifdef XIVE_RUNTIME_CHECKS
  163. WARN_ON(hirq && hirq != XICS_IPI);
  164. #endif
  165. if (hirq)
  166. goto skip_ipi;
  167. }
  168. /* If it's the dummy interrupt, continue searching */
  169. if (hirq == XICS_DUMMY)
  170. goto skip_ipi;
  171. /* If fetching, update queue pointers */
  172. if (scan_type == scan_fetch) {
  173. q->idx = idx;
  174. q->toggle = toggle;
  175. }
  176. /* Something found, stop searching */
  177. if (hirq)
  178. break;
  179. /* Clear the pending bit on the now empty queue */
  180. pending &= ~(1 << prio);
  181. /*
  182. * Check if the queue count needs adjusting due to
  183. * interrupts being moved away.
  184. */
  185. if (atomic_read(&q->pending_count)) {
  186. int p = atomic_xchg(&q->pending_count, 0);
  187. if (p) {
  188. #ifdef XIVE_RUNTIME_CHECKS
  189. WARN_ON(p > atomic_read(&q->count));
  190. #endif
  191. atomic_sub(p, &q->count);
  192. }
  193. }
  194. }
  195. /* If we are just taking a "peek", do nothing else */
  196. if (scan_type == scan_poll)
  197. return hirq;
  198. /* Update the pending bits */
  199. xc->pending = pending;
  200. /*
  201. * If this is an EOI that's it, no CPPR adjustment done here,
  202. * all we needed was cleanup the stale pending bits and check
  203. * if there's anything left.
  204. */
  205. if (scan_type == scan_eoi)
  206. return hirq;
  207. /*
  208. * If we found an interrupt, adjust what the guest CPPR should
  209. * be as if we had just fetched that interrupt from HW.
  210. *
  211. * Note: This can only make xc->cppr smaller as the previous
  212. * loop will only exit with hirq != 0 if prio is lower than
  213. * the current xc->cppr. Thus we don't need to re-check xc->mfrr
  214. * for pending IPIs.
  215. */
  216. if (hirq)
  217. xc->cppr = prio;
  218. /*
  219. * If it was an IPI the HW CPPR might have been lowered too much
  220. * as the HW interrupt we use for IPIs is routed to priority 0.
  221. *
  222. * We re-sync it here.
  223. */
  224. if (xc->cppr != xc->hw_cppr) {
  225. xc->hw_cppr = xc->cppr;
  226. __x_writeb(xc->cppr, __x_tima + TM_QW1_OS + TM_CPPR);
  227. }
  228. return hirq;
  229. }
  230. X_STATIC unsigned long GLUE(X_PFX,h_xirr)(struct kvm_vcpu *vcpu)
  231. {
  232. struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
  233. u8 old_cppr;
  234. u32 hirq;
  235. pr_devel("H_XIRR\n");
  236. xc->GLUE(X_STAT_PFX,h_xirr)++;
  237. /* First collect pending bits from HW */
  238. GLUE(X_PFX,ack_pending)(xc);
  239. pr_devel(" new pending=0x%02x hw_cppr=%d cppr=%d\n",
  240. xc->pending, xc->hw_cppr, xc->cppr);
  241. /* Grab previous CPPR and reverse map it */
  242. old_cppr = xive_prio_to_guest(xc->cppr);
  243. /* Scan for actual interrupts */
  244. hirq = GLUE(X_PFX,scan_interrupts)(xc, xc->pending, scan_fetch);
  245. pr_devel(" got hirq=0x%x hw_cppr=%d cppr=%d\n",
  246. hirq, xc->hw_cppr, xc->cppr);
  247. #ifdef XIVE_RUNTIME_CHECKS
  248. /* That should never hit */
  249. if (hirq & 0xff000000)
  250. pr_warn("XIVE: Weird guest interrupt number 0x%08x\n", hirq);
  251. #endif
  252. /*
  253. * XXX We could check if the interrupt is masked here and
  254. * filter it. If we chose to do so, we would need to do:
  255. *
  256. * if (masked) {
  257. * lock();
  258. * if (masked) {
  259. * old_Q = true;
  260. * hirq = 0;
  261. * }
  262. * unlock();
  263. * }
  264. */
  265. /* Return interrupt and old CPPR in GPR4 */
  266. vcpu->arch.regs.gpr[4] = hirq | (old_cppr << 24);
  267. return H_SUCCESS;
  268. }
  269. X_STATIC unsigned long GLUE(X_PFX,h_ipoll)(struct kvm_vcpu *vcpu, unsigned long server)
  270. {
  271. struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
  272. u8 pending = xc->pending;
  273. u32 hirq;
  274. pr_devel("H_IPOLL(server=%ld)\n", server);
  275. xc->GLUE(X_STAT_PFX,h_ipoll)++;
  276. /* Grab the target VCPU if not the current one */
  277. if (xc->server_num != server) {
  278. vcpu = kvmppc_xive_find_server(vcpu->kvm, server);
  279. if (!vcpu)
  280. return H_PARAMETER;
  281. xc = vcpu->arch.xive_vcpu;
  282. /* Scan all priorities */
  283. pending = 0xff;
  284. } else {
  285. /* Grab pending interrupt if any */
  286. __be64 qw1 = __x_readq(__x_tima + TM_QW1_OS);
  287. u8 pipr = be64_to_cpu(qw1) & 0xff;
  288. if (pipr < 8)
  289. pending |= 1 << pipr;
  290. }
  291. hirq = GLUE(X_PFX,scan_interrupts)(xc, pending, scan_poll);
  292. /* Return interrupt and old CPPR in GPR4 */
  293. vcpu->arch.regs.gpr[4] = hirq | (xc->cppr << 24);
  294. return H_SUCCESS;
  295. }
  296. static void GLUE(X_PFX,push_pending_to_hw)(struct kvmppc_xive_vcpu *xc)
  297. {
  298. u8 pending, prio;
  299. pending = xc->pending;
  300. if (xc->mfrr != 0xff) {
  301. if (xc->mfrr < 8)
  302. pending |= 1 << xc->mfrr;
  303. else
  304. pending |= 0x80;
  305. }
  306. if (!pending)
  307. return;
  308. prio = ffs(pending) - 1;
  309. __x_writeb(prio, __x_tima + TM_SPC_SET_OS_PENDING);
  310. }
  311. static void GLUE(X_PFX,scan_for_rerouted_irqs)(struct kvmppc_xive *xive,
  312. struct kvmppc_xive_vcpu *xc)
  313. {
  314. unsigned int prio;
  315. /* For each priority that is now masked */
  316. for (prio = xc->cppr; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
  317. struct xive_q *q = &xc->queues[prio];
  318. struct kvmppc_xive_irq_state *state;
  319. struct kvmppc_xive_src_block *sb;
  320. u32 idx, toggle, entry, irq, hw_num;
  321. struct xive_irq_data *xd;
  322. __be32 *qpage;
  323. u16 src;
  324. idx = q->idx;
  325. toggle = q->toggle;
  326. qpage = READ_ONCE(q->qpage);
  327. if (!qpage)
  328. continue;
  329. /* For each interrupt in the queue */
  330. for (;;) {
  331. entry = be32_to_cpup(qpage + idx);
  332. /* No more ? */
  333. if ((entry >> 31) == toggle)
  334. break;
  335. irq = entry & 0x7fffffff;
  336. /* Skip dummies and IPIs */
  337. if (irq == XICS_DUMMY || irq == XICS_IPI)
  338. goto next;
  339. sb = kvmppc_xive_find_source(xive, irq, &src);
  340. if (!sb)
  341. goto next;
  342. state = &sb->irq_state[src];
  343. /* Has it been rerouted ? */
  344. if (xc->server_num == state->act_server)
  345. goto next;
  346. /*
  347. * Allright, it *has* been re-routed, kill it from
  348. * the queue.
  349. */
  350. qpage[idx] = cpu_to_be32((entry & 0x80000000) | XICS_DUMMY);
  351. /* Find the HW interrupt */
  352. kvmppc_xive_select_irq(state, &hw_num, &xd);
  353. /* If it's not an LSI, set PQ to 11 the EOI will force a resend */
  354. if (!(xd->flags & XIVE_IRQ_FLAG_LSI))
  355. GLUE(X_PFX,esb_load)(xd, XIVE_ESB_SET_PQ_11);
  356. /* EOI the source */
  357. GLUE(X_PFX,source_eoi)(hw_num, xd);
  358. next:
  359. idx = (idx + 1) & q->msk;
  360. if (idx == 0)
  361. toggle ^= 1;
  362. }
  363. }
  364. }
  365. X_STATIC int GLUE(X_PFX,h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr)
  366. {
  367. struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
  368. struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
  369. u8 old_cppr;
  370. pr_devel("H_CPPR(cppr=%ld)\n", cppr);
  371. xc->GLUE(X_STAT_PFX,h_cppr)++;
  372. /* Map CPPR */
  373. cppr = xive_prio_from_guest(cppr);
  374. /* Remember old and update SW state */
  375. old_cppr = xc->cppr;
  376. xc->cppr = cppr;
  377. /*
  378. * Order the above update of xc->cppr with the subsequent
  379. * read of xc->mfrr inside push_pending_to_hw()
  380. */
  381. smp_mb();
  382. if (cppr > old_cppr) {
  383. /*
  384. * We are masking less, we need to look for pending things
  385. * to deliver and set VP pending bits accordingly to trigger
  386. * a new interrupt otherwise we might miss MFRR changes for
  387. * which we have optimized out sending an IPI signal.
  388. */
  389. GLUE(X_PFX,push_pending_to_hw)(xc);
  390. } else {
  391. /*
  392. * We are masking more, we need to check the queue for any
  393. * interrupt that has been routed to another CPU, take
  394. * it out (replace it with the dummy) and retrigger it.
  395. *
  396. * This is necessary since those interrupts may otherwise
  397. * never be processed, at least not until this CPU restores
  398. * its CPPR.
  399. *
  400. * This is in theory racy vs. HW adding new interrupts to
  401. * the queue. In practice this works because the interesting
  402. * cases are when the guest has done a set_xive() to move the
  403. * interrupt away, which flushes the xive, followed by the
  404. * target CPU doing a H_CPPR. So any new interrupt coming into
  405. * the queue must still be routed to us and isn't a source
  406. * of concern.
  407. */
  408. GLUE(X_PFX,scan_for_rerouted_irqs)(xive, xc);
  409. }
  410. /* Apply new CPPR */
  411. xc->hw_cppr = cppr;
  412. __x_writeb(cppr, __x_tima + TM_QW1_OS + TM_CPPR);
  413. return H_SUCCESS;
  414. }
  415. X_STATIC int GLUE(X_PFX,h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr)
  416. {
  417. struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
  418. struct kvmppc_xive_src_block *sb;
  419. struct kvmppc_xive_irq_state *state;
  420. struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
  421. struct xive_irq_data *xd;
  422. u8 new_cppr = xirr >> 24;
  423. u32 irq = xirr & 0x00ffffff, hw_num;
  424. u16 src;
  425. int rc = 0;
  426. pr_devel("H_EOI(xirr=%08lx)\n", xirr);
  427. xc->GLUE(X_STAT_PFX,h_eoi)++;
  428. xc->cppr = xive_prio_from_guest(new_cppr);
  429. /*
  430. * IPIs are synthetized from MFRR and thus don't need
  431. * any special EOI handling. The underlying interrupt
  432. * used to signal MFRR changes is EOId when fetched from
  433. * the queue.
  434. */
  435. if (irq == XICS_IPI || irq == 0) {
  436. /*
  437. * This barrier orders the setting of xc->cppr vs.
  438. * subsquent test of xc->mfrr done inside
  439. * scan_interrupts and push_pending_to_hw
  440. */
  441. smp_mb();
  442. goto bail;
  443. }
  444. /* Find interrupt source */
  445. sb = kvmppc_xive_find_source(xive, irq, &src);
  446. if (!sb) {
  447. pr_devel(" source not found !\n");
  448. rc = H_PARAMETER;
  449. /* Same as above */
  450. smp_mb();
  451. goto bail;
  452. }
  453. state = &sb->irq_state[src];
  454. kvmppc_xive_select_irq(state, &hw_num, &xd);
  455. state->in_eoi = true;
  456. /*
  457. * This barrier orders both setting of in_eoi above vs,
  458. * subsequent test of guest_priority, and the setting
  459. * of xc->cppr vs. subsquent test of xc->mfrr done inside
  460. * scan_interrupts and push_pending_to_hw
  461. */
  462. smp_mb();
  463. again:
  464. if (state->guest_priority == MASKED) {
  465. arch_spin_lock(&sb->lock);
  466. if (state->guest_priority != MASKED) {
  467. arch_spin_unlock(&sb->lock);
  468. goto again;
  469. }
  470. pr_devel(" EOI on saved P...\n");
  471. /* Clear old_p, that will cause unmask to perform an EOI */
  472. state->old_p = false;
  473. arch_spin_unlock(&sb->lock);
  474. } else {
  475. pr_devel(" EOI on source...\n");
  476. /* Perform EOI on the source */
  477. GLUE(X_PFX,source_eoi)(hw_num, xd);
  478. /* If it's an emulated LSI, check level and resend */
  479. if (state->lsi && state->asserted)
  480. __x_writeq(0, __x_trig_page(xd));
  481. }
  482. /*
  483. * This barrier orders the above guest_priority check
  484. * and spin_lock/unlock with clearing in_eoi below.
  485. *
  486. * It also has to be a full mb() as it must ensure
  487. * the MMIOs done in source_eoi() are completed before
  488. * state->in_eoi is visible.
  489. */
  490. mb();
  491. state->in_eoi = false;
  492. bail:
  493. /* Re-evaluate pending IRQs and update HW */
  494. GLUE(X_PFX,scan_interrupts)(xc, xc->pending, scan_eoi);
  495. GLUE(X_PFX,push_pending_to_hw)(xc);
  496. pr_devel(" after scan pending=%02x\n", xc->pending);
  497. /* Apply new CPPR */
  498. xc->hw_cppr = xc->cppr;
  499. __x_writeb(xc->cppr, __x_tima + TM_QW1_OS + TM_CPPR);
  500. return rc;
  501. }
  502. X_STATIC int GLUE(X_PFX,h_ipi)(struct kvm_vcpu *vcpu, unsigned long server,
  503. unsigned long mfrr)
  504. {
  505. struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
  506. pr_devel("H_IPI(server=%08lx,mfrr=%ld)\n", server, mfrr);
  507. xc->GLUE(X_STAT_PFX,h_ipi)++;
  508. /* Find target */
  509. vcpu = kvmppc_xive_find_server(vcpu->kvm, server);
  510. if (!vcpu)
  511. return H_PARAMETER;
  512. xc = vcpu->arch.xive_vcpu;
  513. /* Locklessly write over MFRR */
  514. xc->mfrr = mfrr;
  515. /*
  516. * The load of xc->cppr below and the subsequent MMIO store
  517. * to the IPI must happen after the above mfrr update is
  518. * globally visible so that:
  519. *
  520. * - Synchronize with another CPU doing an H_EOI or a H_CPPR
  521. * updating xc->cppr then reading xc->mfrr.
  522. *
  523. * - The target of the IPI sees the xc->mfrr update
  524. */
  525. mb();
  526. /* Shoot the IPI if most favored than target cppr */
  527. if (mfrr < xc->cppr)
  528. __x_writeq(0, __x_trig_page(&xc->vp_ipi_data));
  529. return H_SUCCESS;
  530. }