eventfd.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932
  1. /*
  2. * kvm eventfd support - use eventfd objects to signal various KVM events
  3. *
  4. * Copyright 2009 Novell. All Rights Reserved.
  5. * Copyright 2010 Red Hat, Inc. and/or its affiliates.
  6. *
  7. * Author:
  8. * Gregory Haskins <ghaskins@novell.com>
  9. *
  10. * This file is free software; you can redistribute it and/or modify
  11. * it under the terms of version 2 of the GNU General Public License
  12. * as published by the Free Software Foundation.
  13. *
  14. * This program is distributed in the hope that it will be useful,
  15. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  17. * GNU General Public License for more details.
  18. *
  19. * You should have received a copy of the GNU General Public License
  20. * along with this program; if not, write to the Free Software Foundation,
  21. * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
  22. */
  23. #include <linux/kvm_host.h>
  24. #include <linux/kvm.h>
  25. #include <linux/workqueue.h>
  26. #include <linux/syscalls.h>
  27. #include <linux/wait.h>
  28. #include <linux/poll.h>
  29. #include <linux/file.h>
  30. #include <linux/list.h>
  31. #include <linux/eventfd.h>
  32. #include <linux/kernel.h>
  33. #include <linux/srcu.h>
  34. #include <linux/slab.h>
  35. #include <linux/seqlock.h>
  36. #include <trace/events/kvm.h>
  37. #ifdef __KVM_HAVE_IOAPIC
  38. #include "ioapic.h"
  39. #endif
  40. #include "iodev.h"
  41. #ifdef CONFIG_HAVE_KVM_IRQFD
  42. /*
  43. * --------------------------------------------------------------------
  44. * irqfd: Allows an fd to be used to inject an interrupt to the guest
  45. *
  46. * Credit goes to Avi Kivity for the original idea.
  47. * --------------------------------------------------------------------
  48. */
  49. /*
  50. * Resampling irqfds are a special variety of irqfds used to emulate
  51. * level triggered interrupts. The interrupt is asserted on eventfd
  52. * trigger. On acknowledgement through the irq ack notifier, the
  53. * interrupt is de-asserted and userspace is notified through the
  54. * resamplefd. All resamplers on the same gsi are de-asserted
  55. * together, so we don't need to track the state of each individual
  56. * user. We can also therefore share the same irq source ID.
  57. */
  58. struct _irqfd_resampler {
  59. struct kvm *kvm;
  60. /*
  61. * List of resampling struct _irqfd objects sharing this gsi.
  62. * RCU list modified under kvm->irqfds.resampler_lock
  63. */
  64. struct list_head list;
  65. struct kvm_irq_ack_notifier notifier;
  66. /*
  67. * Entry in list of kvm->irqfd.resampler_list. Use for sharing
  68. * resamplers among irqfds on the same gsi.
  69. * Accessed and modified under kvm->irqfds.resampler_lock
  70. */
  71. struct list_head link;
  72. };
  73. struct _irqfd {
  74. /* Used for MSI fast-path */
  75. struct kvm *kvm;
  76. wait_queue_t wait;
  77. /* Update side is protected by irqfds.lock */
  78. struct kvm_kernel_irq_routing_entry irq_entry;
  79. seqcount_t irq_entry_sc;
  80. /* Used for level IRQ fast-path */
  81. int gsi;
  82. struct work_struct inject;
  83. /* The resampler used by this irqfd (resampler-only) */
  84. struct _irqfd_resampler *resampler;
  85. /* Eventfd notified on resample (resampler-only) */
  86. struct eventfd_ctx *resamplefd;
  87. /* Entry in list of irqfds for a resampler (resampler-only) */
  88. struct list_head resampler_link;
  89. /* Used for setup/shutdown */
  90. struct eventfd_ctx *eventfd;
  91. struct list_head list;
  92. poll_table pt;
  93. struct work_struct shutdown;
  94. };
  95. static struct workqueue_struct *irqfd_cleanup_wq;
  96. static void
  97. irqfd_inject(struct work_struct *work)
  98. {
  99. struct _irqfd *irqfd = container_of(work, struct _irqfd, inject);
  100. struct kvm *kvm = irqfd->kvm;
  101. if (!irqfd->resampler) {
  102. kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1,
  103. false);
  104. kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0,
  105. false);
  106. } else
  107. kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
  108. irqfd->gsi, 1, false);
  109. }
  110. /*
  111. * Since resampler irqfds share an IRQ source ID, we de-assert once
  112. * then notify all of the resampler irqfds using this GSI. We can't
  113. * do multiple de-asserts or we risk racing with incoming re-asserts.
  114. */
  115. static void
  116. irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian)
  117. {
  118. struct _irqfd_resampler *resampler;
  119. struct kvm *kvm;
  120. struct _irqfd *irqfd;
  121. int idx;
  122. resampler = container_of(kian, struct _irqfd_resampler, notifier);
  123. kvm = resampler->kvm;
  124. kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
  125. resampler->notifier.gsi, 0, false);
  126. idx = srcu_read_lock(&kvm->irq_srcu);
  127. list_for_each_entry_rcu(irqfd, &resampler->list, resampler_link)
  128. eventfd_signal(irqfd->resamplefd, 1);
  129. srcu_read_unlock(&kvm->irq_srcu, idx);
  130. }
  131. static void
  132. irqfd_resampler_shutdown(struct _irqfd *irqfd)
  133. {
  134. struct _irqfd_resampler *resampler = irqfd->resampler;
  135. struct kvm *kvm = resampler->kvm;
  136. mutex_lock(&kvm->irqfds.resampler_lock);
  137. list_del_rcu(&irqfd->resampler_link);
  138. synchronize_srcu(&kvm->irq_srcu);
  139. if (list_empty(&resampler->list)) {
  140. list_del(&resampler->link);
  141. kvm_unregister_irq_ack_notifier(kvm, &resampler->notifier);
  142. kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
  143. resampler->notifier.gsi, 0, false);
  144. kfree(resampler);
  145. }
  146. mutex_unlock(&kvm->irqfds.resampler_lock);
  147. }
  148. /*
  149. * Race-free decouple logic (ordering is critical)
  150. */
  151. static void
  152. irqfd_shutdown(struct work_struct *work)
  153. {
  154. struct _irqfd *irqfd = container_of(work, struct _irqfd, shutdown);
  155. u64 cnt;
  156. /*
  157. * Synchronize with the wait-queue and unhook ourselves to prevent
  158. * further events.
  159. */
  160. eventfd_ctx_remove_wait_queue(irqfd->eventfd, &irqfd->wait, &cnt);
  161. /*
  162. * We know no new events will be scheduled at this point, so block
  163. * until all previously outstanding events have completed
  164. */
  165. flush_work(&irqfd->inject);
  166. if (irqfd->resampler) {
  167. irqfd_resampler_shutdown(irqfd);
  168. eventfd_ctx_put(irqfd->resamplefd);
  169. }
  170. /*
  171. * It is now safe to release the object's resources
  172. */
  173. eventfd_ctx_put(irqfd->eventfd);
  174. kfree(irqfd);
  175. }
  176. /* assumes kvm->irqfds.lock is held */
  177. static bool
  178. irqfd_is_active(struct _irqfd *irqfd)
  179. {
  180. return list_empty(&irqfd->list) ? false : true;
  181. }
  182. /*
  183. * Mark the irqfd as inactive and schedule it for removal
  184. *
  185. * assumes kvm->irqfds.lock is held
  186. */
  187. static void
  188. irqfd_deactivate(struct _irqfd *irqfd)
  189. {
  190. BUG_ON(!irqfd_is_active(irqfd));
  191. list_del_init(&irqfd->list);
  192. queue_work(irqfd_cleanup_wq, &irqfd->shutdown);
  193. }
  194. /*
  195. * Called with wqh->lock held and interrupts disabled
  196. */
  197. static int
  198. irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
  199. {
  200. struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait);
  201. unsigned long flags = (unsigned long)key;
  202. struct kvm_kernel_irq_routing_entry irq;
  203. struct kvm *kvm = irqfd->kvm;
  204. unsigned seq;
  205. int idx;
  206. if (flags & POLLIN) {
  207. idx = srcu_read_lock(&kvm->irq_srcu);
  208. do {
  209. seq = read_seqcount_begin(&irqfd->irq_entry_sc);
  210. irq = irqfd->irq_entry;
  211. } while (read_seqcount_retry(&irqfd->irq_entry_sc, seq));
  212. /* An event has been signaled, inject an interrupt */
  213. if (irq.type == KVM_IRQ_ROUTING_MSI)
  214. kvm_set_msi(&irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1,
  215. false);
  216. else
  217. schedule_work(&irqfd->inject);
  218. srcu_read_unlock(&kvm->irq_srcu, idx);
  219. }
  220. if (flags & POLLHUP) {
  221. /* The eventfd is closing, detach from KVM */
  222. unsigned long flags;
  223. spin_lock_irqsave(&kvm->irqfds.lock, flags);
  224. /*
  225. * We must check if someone deactivated the irqfd before
  226. * we could acquire the irqfds.lock since the item is
  227. * deactivated from the KVM side before it is unhooked from
  228. * the wait-queue. If it is already deactivated, we can
  229. * simply return knowing the other side will cleanup for us.
  230. * We cannot race against the irqfd going away since the
  231. * other side is required to acquire wqh->lock, which we hold
  232. */
  233. if (irqfd_is_active(irqfd))
  234. irqfd_deactivate(irqfd);
  235. spin_unlock_irqrestore(&kvm->irqfds.lock, flags);
  236. }
  237. return 0;
  238. }
  239. static void
  240. irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
  241. poll_table *pt)
  242. {
  243. struct _irqfd *irqfd = container_of(pt, struct _irqfd, pt);
  244. add_wait_queue(wqh, &irqfd->wait);
  245. }
  246. /* Must be called under irqfds.lock */
  247. static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd)
  248. {
  249. struct kvm_kernel_irq_routing_entry *e;
  250. struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
  251. int i, n_entries;
  252. n_entries = kvm_irq_map_gsi(kvm, entries, irqfd->gsi);
  253. write_seqcount_begin(&irqfd->irq_entry_sc);
  254. irqfd->irq_entry.type = 0;
  255. e = entries;
  256. for (i = 0; i < n_entries; ++i, ++e) {
  257. /* Only fast-path MSI. */
  258. if (e->type == KVM_IRQ_ROUTING_MSI)
  259. irqfd->irq_entry = *e;
  260. }
  261. write_seqcount_end(&irqfd->irq_entry_sc);
  262. }
  263. static int
  264. kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
  265. {
  266. struct _irqfd *irqfd, *tmp;
  267. struct fd f;
  268. struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL;
  269. int ret;
  270. unsigned int events;
  271. int idx;
  272. irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL);
  273. if (!irqfd)
  274. return -ENOMEM;
  275. irqfd->kvm = kvm;
  276. irqfd->gsi = args->gsi;
  277. INIT_LIST_HEAD(&irqfd->list);
  278. INIT_WORK(&irqfd->inject, irqfd_inject);
  279. INIT_WORK(&irqfd->shutdown, irqfd_shutdown);
  280. seqcount_init(&irqfd->irq_entry_sc);
  281. f = fdget(args->fd);
  282. if (!f.file) {
  283. ret = -EBADF;
  284. goto out;
  285. }
  286. eventfd = eventfd_ctx_fileget(f.file);
  287. if (IS_ERR(eventfd)) {
  288. ret = PTR_ERR(eventfd);
  289. goto fail;
  290. }
  291. irqfd->eventfd = eventfd;
  292. if (args->flags & KVM_IRQFD_FLAG_RESAMPLE) {
  293. struct _irqfd_resampler *resampler;
  294. resamplefd = eventfd_ctx_fdget(args->resamplefd);
  295. if (IS_ERR(resamplefd)) {
  296. ret = PTR_ERR(resamplefd);
  297. goto fail;
  298. }
  299. irqfd->resamplefd = resamplefd;
  300. INIT_LIST_HEAD(&irqfd->resampler_link);
  301. mutex_lock(&kvm->irqfds.resampler_lock);
  302. list_for_each_entry(resampler,
  303. &kvm->irqfds.resampler_list, link) {
  304. if (resampler->notifier.gsi == irqfd->gsi) {
  305. irqfd->resampler = resampler;
  306. break;
  307. }
  308. }
  309. if (!irqfd->resampler) {
  310. resampler = kzalloc(sizeof(*resampler), GFP_KERNEL);
  311. if (!resampler) {
  312. ret = -ENOMEM;
  313. mutex_unlock(&kvm->irqfds.resampler_lock);
  314. goto fail;
  315. }
  316. resampler->kvm = kvm;
  317. INIT_LIST_HEAD(&resampler->list);
  318. resampler->notifier.gsi = irqfd->gsi;
  319. resampler->notifier.irq_acked = irqfd_resampler_ack;
  320. INIT_LIST_HEAD(&resampler->link);
  321. list_add(&resampler->link, &kvm->irqfds.resampler_list);
  322. kvm_register_irq_ack_notifier(kvm,
  323. &resampler->notifier);
  324. irqfd->resampler = resampler;
  325. }
  326. list_add_rcu(&irqfd->resampler_link, &irqfd->resampler->list);
  327. synchronize_srcu(&kvm->irq_srcu);
  328. mutex_unlock(&kvm->irqfds.resampler_lock);
  329. }
  330. /*
  331. * Install our own custom wake-up handling so we are notified via
  332. * a callback whenever someone signals the underlying eventfd
  333. */
  334. init_waitqueue_func_entry(&irqfd->wait, irqfd_wakeup);
  335. init_poll_funcptr(&irqfd->pt, irqfd_ptable_queue_proc);
  336. spin_lock_irq(&kvm->irqfds.lock);
  337. ret = 0;
  338. list_for_each_entry(tmp, &kvm->irqfds.items, list) {
  339. if (irqfd->eventfd != tmp->eventfd)
  340. continue;
  341. /* This fd is used for another irq already. */
  342. ret = -EBUSY;
  343. spin_unlock_irq(&kvm->irqfds.lock);
  344. goto fail;
  345. }
  346. idx = srcu_read_lock(&kvm->irq_srcu);
  347. irqfd_update(kvm, irqfd);
  348. srcu_read_unlock(&kvm->irq_srcu, idx);
  349. list_add_tail(&irqfd->list, &kvm->irqfds.items);
  350. spin_unlock_irq(&kvm->irqfds.lock);
  351. /*
  352. * Check if there was an event already pending on the eventfd
  353. * before we registered, and trigger it as if we didn't miss it.
  354. */
  355. events = f.file->f_op->poll(f.file, &irqfd->pt);
  356. if (events & POLLIN)
  357. schedule_work(&irqfd->inject);
  358. /*
  359. * do not drop the file until the irqfd is fully initialized, otherwise
  360. * we might race against the POLLHUP
  361. */
  362. fdput(f);
  363. return 0;
  364. fail:
  365. if (irqfd->resampler)
  366. irqfd_resampler_shutdown(irqfd);
  367. if (resamplefd && !IS_ERR(resamplefd))
  368. eventfd_ctx_put(resamplefd);
  369. if (eventfd && !IS_ERR(eventfd))
  370. eventfd_ctx_put(eventfd);
  371. fdput(f);
  372. out:
  373. kfree(irqfd);
  374. return ret;
  375. }
  376. bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
  377. {
  378. struct kvm_irq_ack_notifier *kian;
  379. int gsi, idx;
  380. idx = srcu_read_lock(&kvm->irq_srcu);
  381. gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
  382. if (gsi != -1)
  383. hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
  384. link)
  385. if (kian->gsi == gsi) {
  386. srcu_read_unlock(&kvm->irq_srcu, idx);
  387. return true;
  388. }
  389. srcu_read_unlock(&kvm->irq_srcu, idx);
  390. return false;
  391. }
  392. EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
  393. void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
  394. {
  395. struct kvm_irq_ack_notifier *kian;
  396. int gsi, idx;
  397. trace_kvm_ack_irq(irqchip, pin);
  398. idx = srcu_read_lock(&kvm->irq_srcu);
  399. gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
  400. if (gsi != -1)
  401. hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
  402. link)
  403. if (kian->gsi == gsi)
  404. kian->irq_acked(kian);
  405. srcu_read_unlock(&kvm->irq_srcu, idx);
  406. }
  407. void kvm_register_irq_ack_notifier(struct kvm *kvm,
  408. struct kvm_irq_ack_notifier *kian)
  409. {
  410. mutex_lock(&kvm->irq_lock);
  411. hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
  412. mutex_unlock(&kvm->irq_lock);
  413. #ifdef __KVM_HAVE_IOAPIC
  414. kvm_vcpu_request_scan_ioapic(kvm);
  415. #endif
  416. }
  417. void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
  418. struct kvm_irq_ack_notifier *kian)
  419. {
  420. mutex_lock(&kvm->irq_lock);
  421. hlist_del_init_rcu(&kian->link);
  422. mutex_unlock(&kvm->irq_lock);
  423. synchronize_srcu(&kvm->irq_srcu);
  424. #ifdef __KVM_HAVE_IOAPIC
  425. kvm_vcpu_request_scan_ioapic(kvm);
  426. #endif
  427. }
  428. #endif
  429. void
  430. kvm_eventfd_init(struct kvm *kvm)
  431. {
  432. #ifdef CONFIG_HAVE_KVM_IRQFD
  433. spin_lock_init(&kvm->irqfds.lock);
  434. INIT_LIST_HEAD(&kvm->irqfds.items);
  435. INIT_LIST_HEAD(&kvm->irqfds.resampler_list);
  436. mutex_init(&kvm->irqfds.resampler_lock);
  437. #endif
  438. INIT_LIST_HEAD(&kvm->ioeventfds);
  439. }
  440. #ifdef CONFIG_HAVE_KVM_IRQFD
  441. /*
  442. * shutdown any irqfd's that match fd+gsi
  443. */
  444. static int
  445. kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args)
  446. {
  447. struct _irqfd *irqfd, *tmp;
  448. struct eventfd_ctx *eventfd;
  449. eventfd = eventfd_ctx_fdget(args->fd);
  450. if (IS_ERR(eventfd))
  451. return PTR_ERR(eventfd);
  452. spin_lock_irq(&kvm->irqfds.lock);
  453. list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) {
  454. if (irqfd->eventfd == eventfd && irqfd->gsi == args->gsi) {
  455. /*
  456. * This clearing of irq_entry.type is needed for when
  457. * another thread calls kvm_irq_routing_update before
  458. * we flush workqueue below (we synchronize with
  459. * kvm_irq_routing_update using irqfds.lock).
  460. */
  461. write_seqcount_begin(&irqfd->irq_entry_sc);
  462. irqfd->irq_entry.type = 0;
  463. write_seqcount_end(&irqfd->irq_entry_sc);
  464. irqfd_deactivate(irqfd);
  465. }
  466. }
  467. spin_unlock_irq(&kvm->irqfds.lock);
  468. eventfd_ctx_put(eventfd);
  469. /*
  470. * Block until we know all outstanding shutdown jobs have completed
  471. * so that we guarantee there will not be any more interrupts on this
  472. * gsi once this deassign function returns.
  473. */
  474. flush_workqueue(irqfd_cleanup_wq);
  475. return 0;
  476. }
  477. int
  478. kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
  479. {
  480. if (args->flags & ~(KVM_IRQFD_FLAG_DEASSIGN | KVM_IRQFD_FLAG_RESAMPLE))
  481. return -EINVAL;
  482. if (args->flags & KVM_IRQFD_FLAG_DEASSIGN)
  483. return kvm_irqfd_deassign(kvm, args);
  484. return kvm_irqfd_assign(kvm, args);
  485. }
  486. /*
  487. * This function is called as the kvm VM fd is being released. Shutdown all
  488. * irqfds that still remain open
  489. */
  490. void
  491. kvm_irqfd_release(struct kvm *kvm)
  492. {
  493. struct _irqfd *irqfd, *tmp;
  494. spin_lock_irq(&kvm->irqfds.lock);
  495. list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list)
  496. irqfd_deactivate(irqfd);
  497. spin_unlock_irq(&kvm->irqfds.lock);
  498. /*
  499. * Block until we know all outstanding shutdown jobs have completed
  500. * since we do not take a kvm* reference.
  501. */
  502. flush_workqueue(irqfd_cleanup_wq);
  503. }
  504. /*
  505. * Take note of a change in irq routing.
  506. * Caller must invoke synchronize_srcu(&kvm->irq_srcu) afterwards.
  507. */
  508. void kvm_irq_routing_update(struct kvm *kvm)
  509. {
  510. struct _irqfd *irqfd;
  511. spin_lock_irq(&kvm->irqfds.lock);
  512. list_for_each_entry(irqfd, &kvm->irqfds.items, list)
  513. irqfd_update(kvm, irqfd);
  514. spin_unlock_irq(&kvm->irqfds.lock);
  515. }
  516. /*
  517. * create a host-wide workqueue for issuing deferred shutdown requests
  518. * aggregated from all vm* instances. We need our own isolated single-thread
  519. * queue to prevent deadlock against flushing the normal work-queue.
  520. */
  521. int kvm_irqfd_init(void)
  522. {
  523. irqfd_cleanup_wq = create_singlethread_workqueue("kvm-irqfd-cleanup");
  524. if (!irqfd_cleanup_wq)
  525. return -ENOMEM;
  526. return 0;
  527. }
  528. void kvm_irqfd_exit(void)
  529. {
  530. destroy_workqueue(irqfd_cleanup_wq);
  531. }
  532. #endif
  533. /*
  534. * --------------------------------------------------------------------
  535. * ioeventfd: translate a PIO/MMIO memory write to an eventfd signal.
  536. *
  537. * userspace can register a PIO/MMIO address with an eventfd for receiving
  538. * notification when the memory has been touched.
  539. * --------------------------------------------------------------------
  540. */
  541. struct _ioeventfd {
  542. struct list_head list;
  543. u64 addr;
  544. int length;
  545. struct eventfd_ctx *eventfd;
  546. u64 datamatch;
  547. struct kvm_io_device dev;
  548. u8 bus_idx;
  549. bool wildcard;
  550. };
  551. static inline struct _ioeventfd *
  552. to_ioeventfd(struct kvm_io_device *dev)
  553. {
  554. return container_of(dev, struct _ioeventfd, dev);
  555. }
  556. static void
  557. ioeventfd_release(struct _ioeventfd *p)
  558. {
  559. eventfd_ctx_put(p->eventfd);
  560. list_del(&p->list);
  561. kfree(p);
  562. }
  563. static bool
  564. ioeventfd_in_range(struct _ioeventfd *p, gpa_t addr, int len, const void *val)
  565. {
  566. u64 _val;
  567. if (addr != p->addr)
  568. /* address must be precise for a hit */
  569. return false;
  570. if (!p->length)
  571. /* length = 0 means only look at the address, so always a hit */
  572. return true;
  573. if (len != p->length)
  574. /* address-range must be precise for a hit */
  575. return false;
  576. if (p->wildcard)
  577. /* all else equal, wildcard is always a hit */
  578. return true;
  579. /* otherwise, we have to actually compare the data */
  580. BUG_ON(!IS_ALIGNED((unsigned long)val, len));
  581. switch (len) {
  582. case 1:
  583. _val = *(u8 *)val;
  584. break;
  585. case 2:
  586. _val = *(u16 *)val;
  587. break;
  588. case 4:
  589. _val = *(u32 *)val;
  590. break;
  591. case 8:
  592. _val = *(u64 *)val;
  593. break;
  594. default:
  595. return false;
  596. }
  597. return _val == p->datamatch ? true : false;
  598. }
  599. /* MMIO/PIO writes trigger an event if the addr/val match */
  600. static int
  601. ioeventfd_write(struct kvm_io_device *this, gpa_t addr, int len,
  602. const void *val)
  603. {
  604. struct _ioeventfd *p = to_ioeventfd(this);
  605. if (!ioeventfd_in_range(p, addr, len, val))
  606. return -EOPNOTSUPP;
  607. eventfd_signal(p->eventfd, 1);
  608. return 0;
  609. }
  610. /*
  611. * This function is called as KVM is completely shutting down. We do not
  612. * need to worry about locking just nuke anything we have as quickly as possible
  613. */
  614. static void
  615. ioeventfd_destructor(struct kvm_io_device *this)
  616. {
  617. struct _ioeventfd *p = to_ioeventfd(this);
  618. ioeventfd_release(p);
  619. }
  620. static const struct kvm_io_device_ops ioeventfd_ops = {
  621. .write = ioeventfd_write,
  622. .destructor = ioeventfd_destructor,
  623. };
  624. /* assumes kvm->slots_lock held */
  625. static bool
  626. ioeventfd_check_collision(struct kvm *kvm, struct _ioeventfd *p)
  627. {
  628. struct _ioeventfd *_p;
  629. list_for_each_entry(_p, &kvm->ioeventfds, list)
  630. if (_p->bus_idx == p->bus_idx &&
  631. _p->addr == p->addr &&
  632. (!_p->length || !p->length ||
  633. (_p->length == p->length &&
  634. (_p->wildcard || p->wildcard ||
  635. _p->datamatch == p->datamatch))))
  636. return true;
  637. return false;
  638. }
  639. static enum kvm_bus ioeventfd_bus_from_flags(__u32 flags)
  640. {
  641. if (flags & KVM_IOEVENTFD_FLAG_PIO)
  642. return KVM_PIO_BUS;
  643. if (flags & KVM_IOEVENTFD_FLAG_VIRTIO_CCW_NOTIFY)
  644. return KVM_VIRTIO_CCW_NOTIFY_BUS;
  645. return KVM_MMIO_BUS;
  646. }
  647. static int
  648. kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
  649. {
  650. enum kvm_bus bus_idx;
  651. struct _ioeventfd *p;
  652. struct eventfd_ctx *eventfd;
  653. int ret;
  654. bus_idx = ioeventfd_bus_from_flags(args->flags);
  655. /* must be natural-word sized, or 0 to ignore length */
  656. switch (args->len) {
  657. case 0:
  658. case 1:
  659. case 2:
  660. case 4:
  661. case 8:
  662. break;
  663. default:
  664. return -EINVAL;
  665. }
  666. /* check for range overflow */
  667. if (args->addr + args->len < args->addr)
  668. return -EINVAL;
  669. /* check for extra flags that we don't understand */
  670. if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK)
  671. return -EINVAL;
  672. /* ioeventfd with no length can't be combined with DATAMATCH */
  673. if (!args->len &&
  674. args->flags & (KVM_IOEVENTFD_FLAG_PIO |
  675. KVM_IOEVENTFD_FLAG_DATAMATCH))
  676. return -EINVAL;
  677. eventfd = eventfd_ctx_fdget(args->fd);
  678. if (IS_ERR(eventfd))
  679. return PTR_ERR(eventfd);
  680. p = kzalloc(sizeof(*p), GFP_KERNEL);
  681. if (!p) {
  682. ret = -ENOMEM;
  683. goto fail;
  684. }
  685. INIT_LIST_HEAD(&p->list);
  686. p->addr = args->addr;
  687. p->bus_idx = bus_idx;
  688. p->length = args->len;
  689. p->eventfd = eventfd;
  690. /* The datamatch feature is optional, otherwise this is a wildcard */
  691. if (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH)
  692. p->datamatch = args->datamatch;
  693. else
  694. p->wildcard = true;
  695. mutex_lock(&kvm->slots_lock);
  696. /* Verify that there isn't a match already */
  697. if (ioeventfd_check_collision(kvm, p)) {
  698. ret = -EEXIST;
  699. goto unlock_fail;
  700. }
  701. kvm_iodevice_init(&p->dev, &ioeventfd_ops);
  702. ret = kvm_io_bus_register_dev(kvm, bus_idx, p->addr, p->length,
  703. &p->dev);
  704. if (ret < 0)
  705. goto unlock_fail;
  706. /* When length is ignored, MMIO is also put on a separate bus, for
  707. * faster lookups.
  708. */
  709. if (!args->len && !(args->flags & KVM_IOEVENTFD_FLAG_PIO)) {
  710. ret = kvm_io_bus_register_dev(kvm, KVM_FAST_MMIO_BUS,
  711. p->addr, 0, &p->dev);
  712. if (ret < 0)
  713. goto register_fail;
  714. }
  715. kvm->buses[bus_idx]->ioeventfd_count++;
  716. list_add_tail(&p->list, &kvm->ioeventfds);
  717. mutex_unlock(&kvm->slots_lock);
  718. return 0;
  719. register_fail:
  720. kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev);
  721. unlock_fail:
  722. mutex_unlock(&kvm->slots_lock);
  723. fail:
  724. kfree(p);
  725. eventfd_ctx_put(eventfd);
  726. return ret;
  727. }
  728. static int
  729. kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
  730. {
  731. enum kvm_bus bus_idx;
  732. struct _ioeventfd *p, *tmp;
  733. struct eventfd_ctx *eventfd;
  734. int ret = -ENOENT;
  735. bus_idx = ioeventfd_bus_from_flags(args->flags);
  736. eventfd = eventfd_ctx_fdget(args->fd);
  737. if (IS_ERR(eventfd))
  738. return PTR_ERR(eventfd);
  739. mutex_lock(&kvm->slots_lock);
  740. list_for_each_entry_safe(p, tmp, &kvm->ioeventfds, list) {
  741. bool wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH);
  742. if (p->bus_idx != bus_idx ||
  743. p->eventfd != eventfd ||
  744. p->addr != args->addr ||
  745. p->length != args->len ||
  746. p->wildcard != wildcard)
  747. continue;
  748. if (!p->wildcard && p->datamatch != args->datamatch)
  749. continue;
  750. kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev);
  751. if (!p->length) {
  752. kvm_io_bus_unregister_dev(kvm, KVM_FAST_MMIO_BUS,
  753. &p->dev);
  754. }
  755. kvm->buses[bus_idx]->ioeventfd_count--;
  756. ioeventfd_release(p);
  757. ret = 0;
  758. break;
  759. }
  760. mutex_unlock(&kvm->slots_lock);
  761. eventfd_ctx_put(eventfd);
  762. return ret;
  763. }
  764. int
  765. kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
  766. {
  767. if (args->flags & KVM_IOEVENTFD_FLAG_DEASSIGN)
  768. return kvm_deassign_ioeventfd(kvm, args);
  769. return kvm_assign_ioeventfd(kvm, args);
  770. }