virtio_pci_common.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593
  1. /*
  2. * Virtio PCI driver - common functionality for all device versions
  3. *
  4. * This module allows virtio devices to be used over a virtual PCI device.
  5. * This can be used with QEMU based VMMs like KVM or Xen.
  6. *
  7. * Copyright IBM Corp. 2007
  8. * Copyright Red Hat, Inc. 2014
  9. *
  10. * Authors:
  11. * Anthony Liguori <aliguori@us.ibm.com>
  12. * Rusty Russell <rusty@rustcorp.com.au>
  13. * Michael S. Tsirkin <mst@redhat.com>
  14. *
  15. * This work is licensed under the terms of the GNU GPL, version 2 or later.
  16. * See the COPYING file in the top-level directory.
  17. *
  18. */
  19. #include "virtio_pci_common.h"
  20. static bool force_legacy = false;
  21. #if IS_ENABLED(CONFIG_VIRTIO_PCI_LEGACY)
  22. module_param(force_legacy, bool, 0444);
  23. MODULE_PARM_DESC(force_legacy,
  24. "Force legacy mode for transitional virtio 1 devices");
  25. #endif
  26. /* wait for pending irq handlers */
  27. void vp_synchronize_vectors(struct virtio_device *vdev)
  28. {
  29. struct virtio_pci_device *vp_dev = to_vp_device(vdev);
  30. int i;
  31. if (vp_dev->intx_enabled)
  32. synchronize_irq(vp_dev->pci_dev->irq);
  33. for (i = 0; i < vp_dev->msix_vectors; ++i)
  34. synchronize_irq(pci_irq_vector(vp_dev->pci_dev, i));
  35. }
  36. /* the notify function used when creating a virt queue */
  37. bool vp_notify(struct virtqueue *vq)
  38. {
  39. /* we write the queue's selector into the notification register to
  40. * signal the other end */
  41. iowrite16(vq->index, (void __iomem *)vq->priv);
  42. return true;
  43. }
  44. /* Handle a configuration change: Tell driver if it wants to know. */
  45. static irqreturn_t vp_config_changed(int irq, void *opaque)
  46. {
  47. struct virtio_pci_device *vp_dev = opaque;
  48. virtio_config_changed(&vp_dev->vdev);
  49. return IRQ_HANDLED;
  50. }
  51. /* Notify all virtqueues on an interrupt. */
  52. static irqreturn_t vp_vring_interrupt(int irq, void *opaque)
  53. {
  54. struct virtio_pci_device *vp_dev = opaque;
  55. struct virtio_pci_vq_info *info;
  56. irqreturn_t ret = IRQ_NONE;
  57. unsigned long flags;
  58. spin_lock_irqsave(&vp_dev->lock, flags);
  59. list_for_each_entry(info, &vp_dev->virtqueues, node) {
  60. if (vring_interrupt(irq, info->vq) == IRQ_HANDLED)
  61. ret = IRQ_HANDLED;
  62. }
  63. spin_unlock_irqrestore(&vp_dev->lock, flags);
  64. return ret;
  65. }
  66. /* A small wrapper to also acknowledge the interrupt when it's handled.
  67. * I really need an EIO hook for the vring so I can ack the interrupt once we
  68. * know that we'll be handling the IRQ but before we invoke the callback since
  69. * the callback may notify the host which results in the host attempting to
  70. * raise an interrupt that we would then mask once we acknowledged the
  71. * interrupt. */
  72. static irqreturn_t vp_interrupt(int irq, void *opaque)
  73. {
  74. struct virtio_pci_device *vp_dev = opaque;
  75. u8 isr;
  76. /* reading the ISR has the effect of also clearing it so it's very
  77. * important to save off the value. */
  78. isr = ioread8(vp_dev->isr);
  79. /* It's definitely not us if the ISR was not high */
  80. if (!isr)
  81. return IRQ_NONE;
  82. /* Configuration change? Tell driver if it wants to know. */
  83. if (isr & VIRTIO_PCI_ISR_CONFIG)
  84. vp_config_changed(irq, opaque);
  85. return vp_vring_interrupt(irq, opaque);
  86. }
  87. static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors,
  88. bool per_vq_vectors, struct irq_affinity *desc)
  89. {
  90. struct virtio_pci_device *vp_dev = to_vp_device(vdev);
  91. const char *name = dev_name(&vp_dev->vdev.dev);
  92. unsigned i, v;
  93. int err = -ENOMEM;
  94. vp_dev->msix_vectors = nvectors;
  95. vp_dev->msix_names = kmalloc(nvectors * sizeof *vp_dev->msix_names,
  96. GFP_KERNEL);
  97. if (!vp_dev->msix_names)
  98. goto error;
  99. vp_dev->msix_affinity_masks
  100. = kzalloc(nvectors * sizeof *vp_dev->msix_affinity_masks,
  101. GFP_KERNEL);
  102. if (!vp_dev->msix_affinity_masks)
  103. goto error;
  104. for (i = 0; i < nvectors; ++i)
  105. if (!alloc_cpumask_var(&vp_dev->msix_affinity_masks[i],
  106. GFP_KERNEL))
  107. goto error;
  108. err = pci_alloc_irq_vectors_affinity(vp_dev->pci_dev, nvectors,
  109. nvectors, PCI_IRQ_MSIX |
  110. (desc ? PCI_IRQ_AFFINITY : 0),
  111. desc);
  112. if (err < 0)
  113. goto error;
  114. vp_dev->msix_enabled = 1;
  115. /* Set the vector used for configuration */
  116. v = vp_dev->msix_used_vectors;
  117. snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
  118. "%s-config", name);
  119. err = request_irq(pci_irq_vector(vp_dev->pci_dev, v),
  120. vp_config_changed, 0, vp_dev->msix_names[v],
  121. vp_dev);
  122. if (err)
  123. goto error;
  124. ++vp_dev->msix_used_vectors;
  125. v = vp_dev->config_vector(vp_dev, v);
  126. /* Verify we had enough resources to assign the vector */
  127. if (v == VIRTIO_MSI_NO_VECTOR) {
  128. err = -EBUSY;
  129. goto error;
  130. }
  131. if (!per_vq_vectors) {
  132. /* Shared vector for all VQs */
  133. v = vp_dev->msix_used_vectors;
  134. snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
  135. "%s-virtqueues", name);
  136. err = request_irq(pci_irq_vector(vp_dev->pci_dev, v),
  137. vp_vring_interrupt, 0, vp_dev->msix_names[v],
  138. vp_dev);
  139. if (err)
  140. goto error;
  141. ++vp_dev->msix_used_vectors;
  142. }
  143. return 0;
  144. error:
  145. return err;
  146. }
  147. static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned index,
  148. void (*callback)(struct virtqueue *vq),
  149. const char *name,
  150. u16 msix_vec)
  151. {
  152. struct virtio_pci_device *vp_dev = to_vp_device(vdev);
  153. struct virtio_pci_vq_info *info = kmalloc(sizeof *info, GFP_KERNEL);
  154. struct virtqueue *vq;
  155. unsigned long flags;
  156. /* fill out our structure that represents an active queue */
  157. if (!info)
  158. return ERR_PTR(-ENOMEM);
  159. vq = vp_dev->setup_vq(vp_dev, info, index, callback, name,
  160. msix_vec);
  161. if (IS_ERR(vq))
  162. goto out_info;
  163. info->vq = vq;
  164. if (callback) {
  165. spin_lock_irqsave(&vp_dev->lock, flags);
  166. list_add(&info->node, &vp_dev->virtqueues);
  167. spin_unlock_irqrestore(&vp_dev->lock, flags);
  168. } else {
  169. INIT_LIST_HEAD(&info->node);
  170. }
  171. vp_dev->vqs[index] = info;
  172. return vq;
  173. out_info:
  174. kfree(info);
  175. return vq;
  176. }
  177. static void vp_del_vq(struct virtqueue *vq)
  178. {
  179. struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
  180. struct virtio_pci_vq_info *info = vp_dev->vqs[vq->index];
  181. unsigned long flags;
  182. spin_lock_irqsave(&vp_dev->lock, flags);
  183. list_del(&info->node);
  184. spin_unlock_irqrestore(&vp_dev->lock, flags);
  185. vp_dev->del_vq(info);
  186. kfree(info);
  187. }
  188. /* the config->del_vqs() implementation */
  189. void vp_del_vqs(struct virtio_device *vdev)
  190. {
  191. struct virtio_pci_device *vp_dev = to_vp_device(vdev);
  192. struct virtqueue *vq, *n;
  193. int i;
  194. list_for_each_entry_safe(vq, n, &vdev->vqs, list) {
  195. if (vp_dev->per_vq_vectors) {
  196. int v = vp_dev->vqs[vq->index]->msix_vector;
  197. if (v != VIRTIO_MSI_NO_VECTOR) {
  198. int irq = pci_irq_vector(vp_dev->pci_dev, v);
  199. irq_set_affinity_hint(irq, NULL);
  200. free_irq(irq, vq);
  201. }
  202. }
  203. vp_del_vq(vq);
  204. }
  205. vp_dev->per_vq_vectors = false;
  206. if (vp_dev->intx_enabled) {
  207. free_irq(vp_dev->pci_dev->irq, vp_dev);
  208. vp_dev->intx_enabled = 0;
  209. }
  210. for (i = 0; i < vp_dev->msix_used_vectors; ++i)
  211. free_irq(pci_irq_vector(vp_dev->pci_dev, i), vp_dev);
  212. for (i = 0; i < vp_dev->msix_vectors; i++)
  213. if (vp_dev->msix_affinity_masks[i])
  214. free_cpumask_var(vp_dev->msix_affinity_masks[i]);
  215. if (vp_dev->msix_enabled) {
  216. /* Disable the vector used for configuration */
  217. vp_dev->config_vector(vp_dev, VIRTIO_MSI_NO_VECTOR);
  218. pci_free_irq_vectors(vp_dev->pci_dev);
  219. vp_dev->msix_enabled = 0;
  220. }
  221. vp_dev->msix_vectors = 0;
  222. vp_dev->msix_used_vectors = 0;
  223. kfree(vp_dev->msix_names);
  224. vp_dev->msix_names = NULL;
  225. kfree(vp_dev->msix_affinity_masks);
  226. vp_dev->msix_affinity_masks = NULL;
  227. kfree(vp_dev->vqs);
  228. vp_dev->vqs = NULL;
  229. }
  230. static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
  231. struct virtqueue *vqs[], vq_callback_t *callbacks[],
  232. const char * const names[], bool per_vq_vectors,
  233. struct irq_affinity *desc)
  234. {
  235. struct virtio_pci_device *vp_dev = to_vp_device(vdev);
  236. u16 msix_vec;
  237. int i, err, nvectors, allocated_vectors;
  238. vp_dev->vqs = kcalloc(nvqs, sizeof(*vp_dev->vqs), GFP_KERNEL);
  239. if (!vp_dev->vqs)
  240. return -ENOMEM;
  241. if (per_vq_vectors) {
  242. /* Best option: one for change interrupt, one per vq. */
  243. nvectors = 1;
  244. for (i = 0; i < nvqs; ++i)
  245. if (callbacks[i])
  246. ++nvectors;
  247. } else {
  248. /* Second best: one for change, shared for all vqs. */
  249. nvectors = 2;
  250. }
  251. err = vp_request_msix_vectors(vdev, nvectors, per_vq_vectors,
  252. per_vq_vectors ? desc : NULL);
  253. if (err)
  254. goto error_find;
  255. vp_dev->per_vq_vectors = per_vq_vectors;
  256. allocated_vectors = vp_dev->msix_used_vectors;
  257. for (i = 0; i < nvqs; ++i) {
  258. if (!names[i]) {
  259. vqs[i] = NULL;
  260. continue;
  261. }
  262. if (!callbacks[i])
  263. msix_vec = VIRTIO_MSI_NO_VECTOR;
  264. else if (vp_dev->per_vq_vectors)
  265. msix_vec = allocated_vectors++;
  266. else
  267. msix_vec = VP_MSIX_VQ_VECTOR;
  268. vqs[i] = vp_setup_vq(vdev, i, callbacks[i], names[i],
  269. msix_vec);
  270. if (IS_ERR(vqs[i])) {
  271. err = PTR_ERR(vqs[i]);
  272. goto error_find;
  273. }
  274. if (!vp_dev->per_vq_vectors || msix_vec == VIRTIO_MSI_NO_VECTOR)
  275. continue;
  276. /* allocate per-vq irq if available and necessary */
  277. snprintf(vp_dev->msix_names[msix_vec],
  278. sizeof *vp_dev->msix_names,
  279. "%s-%s",
  280. dev_name(&vp_dev->vdev.dev), names[i]);
  281. err = request_irq(pci_irq_vector(vp_dev->pci_dev, msix_vec),
  282. vring_interrupt, 0,
  283. vp_dev->msix_names[msix_vec],
  284. vqs[i]);
  285. if (err)
  286. goto error_find;
  287. }
  288. return 0;
  289. error_find:
  290. vp_del_vqs(vdev);
  291. return err;
  292. }
  293. static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned nvqs,
  294. struct virtqueue *vqs[], vq_callback_t *callbacks[],
  295. const char * const names[])
  296. {
  297. struct virtio_pci_device *vp_dev = to_vp_device(vdev);
  298. int i, err;
  299. vp_dev->vqs = kcalloc(nvqs, sizeof(*vp_dev->vqs), GFP_KERNEL);
  300. if (!vp_dev->vqs)
  301. return -ENOMEM;
  302. err = request_irq(vp_dev->pci_dev->irq, vp_interrupt, IRQF_SHARED,
  303. dev_name(&vdev->dev), vp_dev);
  304. if (err)
  305. goto out_del_vqs;
  306. vp_dev->intx_enabled = 1;
  307. vp_dev->per_vq_vectors = false;
  308. for (i = 0; i < nvqs; ++i) {
  309. if (!names[i]) {
  310. vqs[i] = NULL;
  311. continue;
  312. }
  313. vqs[i] = vp_setup_vq(vdev, i, callbacks[i], names[i],
  314. VIRTIO_MSI_NO_VECTOR);
  315. if (IS_ERR(vqs[i])) {
  316. err = PTR_ERR(vqs[i]);
  317. goto out_del_vqs;
  318. }
  319. }
  320. return 0;
  321. out_del_vqs:
  322. vp_del_vqs(vdev);
  323. return err;
  324. }
  325. /* the config->find_vqs() implementation */
  326. int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs,
  327. struct virtqueue *vqs[], vq_callback_t *callbacks[],
  328. const char * const names[], struct irq_affinity *desc)
  329. {
  330. int err;
  331. /* Try MSI-X with one vector per queue. */
  332. err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, true, desc);
  333. if (!err)
  334. return 0;
  335. /* Fallback: MSI-X with one vector for config, one shared for queues. */
  336. err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, false, desc);
  337. if (!err)
  338. return 0;
  339. /* Finally fall back to regular interrupts. */
  340. return vp_find_vqs_intx(vdev, nvqs, vqs, callbacks, names);
  341. }
  342. const char *vp_bus_name(struct virtio_device *vdev)
  343. {
  344. struct virtio_pci_device *vp_dev = to_vp_device(vdev);
  345. return pci_name(vp_dev->pci_dev);
  346. }
  347. /* Setup the affinity for a virtqueue:
  348. * - force the affinity for per vq vector
  349. * - OR over all affinities for shared MSI
  350. * - ignore the affinity request if we're using INTX
  351. */
  352. int vp_set_vq_affinity(struct virtqueue *vq, int cpu)
  353. {
  354. struct virtio_device *vdev = vq->vdev;
  355. struct virtio_pci_device *vp_dev = to_vp_device(vdev);
  356. struct virtio_pci_vq_info *info = vp_dev->vqs[vq->index];
  357. struct cpumask *mask;
  358. unsigned int irq;
  359. if (!vq->callback)
  360. return -EINVAL;
  361. if (vp_dev->msix_enabled) {
  362. mask = vp_dev->msix_affinity_masks[info->msix_vector];
  363. irq = pci_irq_vector(vp_dev->pci_dev, info->msix_vector);
  364. if (cpu == -1)
  365. irq_set_affinity_hint(irq, NULL);
  366. else {
  367. cpumask_clear(mask);
  368. cpumask_set_cpu(cpu, mask);
  369. irq_set_affinity_hint(irq, mask);
  370. }
  371. }
  372. return 0;
  373. }
  374. const struct cpumask *vp_get_vq_affinity(struct virtio_device *vdev, int index)
  375. {
  376. struct virtio_pci_device *vp_dev = to_vp_device(vdev);
  377. if (!vp_dev->per_vq_vectors ||
  378. vp_dev->vqs[index]->msix_vector == VIRTIO_MSI_NO_VECTOR)
  379. return NULL;
  380. return pci_irq_get_affinity(vp_dev->pci_dev,
  381. vp_dev->vqs[index]->msix_vector);
  382. }
  383. #ifdef CONFIG_PM_SLEEP
  384. static int virtio_pci_freeze(struct device *dev)
  385. {
  386. struct pci_dev *pci_dev = to_pci_dev(dev);
  387. struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
  388. int ret;
  389. ret = virtio_device_freeze(&vp_dev->vdev);
  390. if (!ret)
  391. pci_disable_device(pci_dev);
  392. return ret;
  393. }
  394. static int virtio_pci_restore(struct device *dev)
  395. {
  396. struct pci_dev *pci_dev = to_pci_dev(dev);
  397. struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
  398. int ret;
  399. ret = pci_enable_device(pci_dev);
  400. if (ret)
  401. return ret;
  402. pci_set_master(pci_dev);
  403. return virtio_device_restore(&vp_dev->vdev);
  404. }
  405. static const struct dev_pm_ops virtio_pci_pm_ops = {
  406. SET_SYSTEM_SLEEP_PM_OPS(virtio_pci_freeze, virtio_pci_restore)
  407. };
  408. #endif
  409. /* Qumranet donated their vendor ID for devices 0x1000 thru 0x10FF. */
  410. static const struct pci_device_id virtio_pci_id_table[] = {
  411. { PCI_DEVICE(PCI_VENDOR_ID_REDHAT_QUMRANET, PCI_ANY_ID) },
  412. { 0 }
  413. };
  414. MODULE_DEVICE_TABLE(pci, virtio_pci_id_table);
  415. static void virtio_pci_release_dev(struct device *_d)
  416. {
  417. struct virtio_device *vdev = dev_to_virtio(_d);
  418. struct virtio_pci_device *vp_dev = to_vp_device(vdev);
  419. /* As struct device is a kobject, it's not safe to
  420. * free the memory (including the reference counter itself)
  421. * until it's release callback. */
  422. kfree(vp_dev);
  423. }
  424. static int virtio_pci_probe(struct pci_dev *pci_dev,
  425. const struct pci_device_id *id)
  426. {
  427. struct virtio_pci_device *vp_dev;
  428. int rc;
  429. /* allocate our structure and fill it out */
  430. vp_dev = kzalloc(sizeof(struct virtio_pci_device), GFP_KERNEL);
  431. if (!vp_dev)
  432. return -ENOMEM;
  433. pci_set_drvdata(pci_dev, vp_dev);
  434. vp_dev->vdev.dev.parent = &pci_dev->dev;
  435. vp_dev->vdev.dev.release = virtio_pci_release_dev;
  436. vp_dev->pci_dev = pci_dev;
  437. INIT_LIST_HEAD(&vp_dev->virtqueues);
  438. spin_lock_init(&vp_dev->lock);
  439. /* enable the device */
  440. rc = pci_enable_device(pci_dev);
  441. if (rc)
  442. goto err_enable_device;
  443. if (force_legacy) {
  444. rc = virtio_pci_legacy_probe(vp_dev);
  445. /* Also try modern mode if we can't map BAR0 (no IO space). */
  446. if (rc == -ENODEV || rc == -ENOMEM)
  447. rc = virtio_pci_modern_probe(vp_dev);
  448. if (rc)
  449. goto err_probe;
  450. } else {
  451. rc = virtio_pci_modern_probe(vp_dev);
  452. if (rc == -ENODEV)
  453. rc = virtio_pci_legacy_probe(vp_dev);
  454. if (rc)
  455. goto err_probe;
  456. }
  457. pci_set_master(pci_dev);
  458. rc = register_virtio_device(&vp_dev->vdev);
  459. if (rc)
  460. goto err_register;
  461. return 0;
  462. err_register:
  463. if (vp_dev->ioaddr)
  464. virtio_pci_legacy_remove(vp_dev);
  465. else
  466. virtio_pci_modern_remove(vp_dev);
  467. err_probe:
  468. pci_disable_device(pci_dev);
  469. err_enable_device:
  470. kfree(vp_dev);
  471. return rc;
  472. }
  473. static void virtio_pci_remove(struct pci_dev *pci_dev)
  474. {
  475. struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
  476. struct device *dev = get_device(&vp_dev->vdev.dev);
  477. unregister_virtio_device(&vp_dev->vdev);
  478. if (vp_dev->ioaddr)
  479. virtio_pci_legacy_remove(vp_dev);
  480. else
  481. virtio_pci_modern_remove(vp_dev);
  482. pci_disable_device(pci_dev);
  483. put_device(dev);
  484. }
  485. static struct pci_driver virtio_pci_driver = {
  486. .name = "virtio-pci",
  487. .id_table = virtio_pci_id_table,
  488. .probe = virtio_pci_probe,
  489. .remove = virtio_pci_remove,
  490. #ifdef CONFIG_PM_SLEEP
  491. .driver.pm = &virtio_pci_pm_ops,
  492. #endif
  493. };
  494. module_pci_driver(virtio_pci_driver);
  495. MODULE_AUTHOR("Anthony Liguori <aliguori@us.ibm.com>");
  496. MODULE_DESCRIPTION("virtio-pci");
  497. MODULE_LICENSE("GPL");
  498. MODULE_VERSION("1");