virtio_pci_common.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598
  1. /*
  2. * Virtio PCI driver - common functionality for all device versions
  3. *
  4. * This module allows virtio devices to be used over a virtual PCI device.
  5. * This can be used with QEMU based VMMs like KVM or Xen.
  6. *
  7. * Copyright IBM Corp. 2007
  8. * Copyright Red Hat, Inc. 2014
  9. *
  10. * Authors:
  11. * Anthony Liguori <aliguori@us.ibm.com>
  12. * Rusty Russell <rusty@rustcorp.com.au>
  13. * Michael S. Tsirkin <mst@redhat.com>
  14. *
  15. * This work is licensed under the terms of the GNU GPL, version 2 or later.
  16. * See the COPYING file in the top-level directory.
  17. *
  18. */
  19. #include "virtio_pci_common.h"
  20. static bool force_legacy = false;
  21. #if IS_ENABLED(CONFIG_VIRTIO_PCI_LEGACY)
  22. module_param(force_legacy, bool, 0444);
  23. MODULE_PARM_DESC(force_legacy,
  24. "Force legacy mode for transitional virtio 1 devices");
  25. #endif
  26. /* wait for pending irq handlers */
  27. void vp_synchronize_vectors(struct virtio_device *vdev)
  28. {
  29. struct virtio_pci_device *vp_dev = to_vp_device(vdev);
  30. int i;
  31. if (vp_dev->intx_enabled)
  32. synchronize_irq(vp_dev->pci_dev->irq);
  33. for (i = 0; i < vp_dev->msix_vectors; ++i)
  34. synchronize_irq(pci_irq_vector(vp_dev->pci_dev, i));
  35. }
  36. /* the notify function used when creating a virt queue */
  37. bool vp_notify(struct virtqueue *vq)
  38. {
  39. /* we write the queue's selector into the notification register to
  40. * signal the other end */
  41. iowrite16(vq->index, (void __iomem *)vq->priv);
  42. return true;
  43. }
  44. /* Handle a configuration change: Tell driver if it wants to know. */
  45. static irqreturn_t vp_config_changed(int irq, void *opaque)
  46. {
  47. struct virtio_pci_device *vp_dev = opaque;
  48. virtio_config_changed(&vp_dev->vdev);
  49. return IRQ_HANDLED;
  50. }
  51. /* Notify all virtqueues on an interrupt. */
  52. static irqreturn_t vp_vring_interrupt(int irq, void *opaque)
  53. {
  54. struct virtio_pci_device *vp_dev = opaque;
  55. struct virtio_pci_vq_info *info;
  56. irqreturn_t ret = IRQ_NONE;
  57. unsigned long flags;
  58. spin_lock_irqsave(&vp_dev->lock, flags);
  59. list_for_each_entry(info, &vp_dev->virtqueues, node) {
  60. if (vring_interrupt(irq, info->vq) == IRQ_HANDLED)
  61. ret = IRQ_HANDLED;
  62. }
  63. spin_unlock_irqrestore(&vp_dev->lock, flags);
  64. return ret;
  65. }
  66. /* A small wrapper to also acknowledge the interrupt when it's handled.
  67. * I really need an EIO hook for the vring so I can ack the interrupt once we
  68. * know that we'll be handling the IRQ but before we invoke the callback since
  69. * the callback may notify the host which results in the host attempting to
  70. * raise an interrupt that we would then mask once we acknowledged the
  71. * interrupt. */
  72. static irqreturn_t vp_interrupt(int irq, void *opaque)
  73. {
  74. struct virtio_pci_device *vp_dev = opaque;
  75. u8 isr;
  76. /* reading the ISR has the effect of also clearing it so it's very
  77. * important to save off the value. */
  78. isr = ioread8(vp_dev->isr);
  79. /* It's definitely not us if the ISR was not high */
  80. if (!isr)
  81. return IRQ_NONE;
  82. /* Configuration change? Tell driver if it wants to know. */
  83. if (isr & VIRTIO_PCI_ISR_CONFIG)
  84. vp_config_changed(irq, opaque);
  85. return vp_vring_interrupt(irq, opaque);
  86. }
  87. static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors,
  88. bool per_vq_vectors, struct irq_affinity *desc)
  89. {
  90. struct virtio_pci_device *vp_dev = to_vp_device(vdev);
  91. const char *name = dev_name(&vp_dev->vdev.dev);
  92. unsigned i, v;
  93. int err = -ENOMEM;
  94. vp_dev->msix_vectors = nvectors;
  95. vp_dev->msix_names = kmalloc(nvectors * sizeof *vp_dev->msix_names,
  96. GFP_KERNEL);
  97. if (!vp_dev->msix_names)
  98. goto error;
  99. vp_dev->msix_affinity_masks
  100. = kzalloc(nvectors * sizeof *vp_dev->msix_affinity_masks,
  101. GFP_KERNEL);
  102. if (!vp_dev->msix_affinity_masks)
  103. goto error;
  104. for (i = 0; i < nvectors; ++i)
  105. if (!alloc_cpumask_var(&vp_dev->msix_affinity_masks[i],
  106. GFP_KERNEL))
  107. goto error;
  108. err = pci_alloc_irq_vectors_affinity(vp_dev->pci_dev, nvectors,
  109. nvectors, PCI_IRQ_MSIX |
  110. (desc ? PCI_IRQ_AFFINITY : 0),
  111. desc);
  112. if (err < 0)
  113. goto error;
  114. vp_dev->msix_enabled = 1;
  115. /* Set the vector used for configuration */
  116. v = vp_dev->msix_used_vectors;
  117. snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
  118. "%s-config", name);
  119. err = request_irq(pci_irq_vector(vp_dev->pci_dev, v),
  120. vp_config_changed, 0, vp_dev->msix_names[v],
  121. vp_dev);
  122. if (err)
  123. goto error;
  124. ++vp_dev->msix_used_vectors;
  125. v = vp_dev->config_vector(vp_dev, v);
  126. /* Verify we had enough resources to assign the vector */
  127. if (v == VIRTIO_MSI_NO_VECTOR) {
  128. err = -EBUSY;
  129. goto error;
  130. }
  131. if (!per_vq_vectors) {
  132. /* Shared vector for all VQs */
  133. v = vp_dev->msix_used_vectors;
  134. snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
  135. "%s-virtqueues", name);
  136. err = request_irq(pci_irq_vector(vp_dev->pci_dev, v),
  137. vp_vring_interrupt, 0, vp_dev->msix_names[v],
  138. vp_dev);
  139. if (err)
  140. goto error;
  141. ++vp_dev->msix_used_vectors;
  142. }
  143. return 0;
  144. error:
  145. return err;
  146. }
  147. static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned index,
  148. void (*callback)(struct virtqueue *vq),
  149. const char *name,
  150. bool ctx,
  151. u16 msix_vec)
  152. {
  153. struct virtio_pci_device *vp_dev = to_vp_device(vdev);
  154. struct virtio_pci_vq_info *info = kmalloc(sizeof *info, GFP_KERNEL);
  155. struct virtqueue *vq;
  156. unsigned long flags;
  157. /* fill out our structure that represents an active queue */
  158. if (!info)
  159. return ERR_PTR(-ENOMEM);
  160. vq = vp_dev->setup_vq(vp_dev, info, index, callback, name, ctx,
  161. msix_vec);
  162. if (IS_ERR(vq))
  163. goto out_info;
  164. info->vq = vq;
  165. if (callback) {
  166. spin_lock_irqsave(&vp_dev->lock, flags);
  167. list_add(&info->node, &vp_dev->virtqueues);
  168. spin_unlock_irqrestore(&vp_dev->lock, flags);
  169. } else {
  170. INIT_LIST_HEAD(&info->node);
  171. }
  172. vp_dev->vqs[index] = info;
  173. return vq;
  174. out_info:
  175. kfree(info);
  176. return vq;
  177. }
  178. static void vp_del_vq(struct virtqueue *vq)
  179. {
  180. struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
  181. struct virtio_pci_vq_info *info = vp_dev->vqs[vq->index];
  182. unsigned long flags;
  183. spin_lock_irqsave(&vp_dev->lock, flags);
  184. list_del(&info->node);
  185. spin_unlock_irqrestore(&vp_dev->lock, flags);
  186. vp_dev->del_vq(info);
  187. kfree(info);
  188. }
  189. /* the config->del_vqs() implementation */
  190. void vp_del_vqs(struct virtio_device *vdev)
  191. {
  192. struct virtio_pci_device *vp_dev = to_vp_device(vdev);
  193. struct virtqueue *vq, *n;
  194. int i;
  195. list_for_each_entry_safe(vq, n, &vdev->vqs, list) {
  196. if (vp_dev->per_vq_vectors) {
  197. int v = vp_dev->vqs[vq->index]->msix_vector;
  198. if (v != VIRTIO_MSI_NO_VECTOR) {
  199. int irq = pci_irq_vector(vp_dev->pci_dev, v);
  200. irq_set_affinity_hint(irq, NULL);
  201. free_irq(irq, vq);
  202. }
  203. }
  204. vp_del_vq(vq);
  205. }
  206. vp_dev->per_vq_vectors = false;
  207. if (vp_dev->intx_enabled) {
  208. free_irq(vp_dev->pci_dev->irq, vp_dev);
  209. vp_dev->intx_enabled = 0;
  210. }
  211. for (i = 0; i < vp_dev->msix_used_vectors; ++i)
  212. free_irq(pci_irq_vector(vp_dev->pci_dev, i), vp_dev);
  213. for (i = 0; i < vp_dev->msix_vectors; i++)
  214. if (vp_dev->msix_affinity_masks[i])
  215. free_cpumask_var(vp_dev->msix_affinity_masks[i]);
  216. if (vp_dev->msix_enabled) {
  217. /* Disable the vector used for configuration */
  218. vp_dev->config_vector(vp_dev, VIRTIO_MSI_NO_VECTOR);
  219. pci_free_irq_vectors(vp_dev->pci_dev);
  220. vp_dev->msix_enabled = 0;
  221. }
  222. vp_dev->msix_vectors = 0;
  223. vp_dev->msix_used_vectors = 0;
  224. kfree(vp_dev->msix_names);
  225. vp_dev->msix_names = NULL;
  226. kfree(vp_dev->msix_affinity_masks);
  227. vp_dev->msix_affinity_masks = NULL;
  228. kfree(vp_dev->vqs);
  229. vp_dev->vqs = NULL;
  230. }
  231. static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
  232. struct virtqueue *vqs[], vq_callback_t *callbacks[],
  233. const char * const names[], bool per_vq_vectors,
  234. const bool *ctx,
  235. struct irq_affinity *desc)
  236. {
  237. struct virtio_pci_device *vp_dev = to_vp_device(vdev);
  238. u16 msix_vec;
  239. int i, err, nvectors, allocated_vectors;
  240. vp_dev->vqs = kcalloc(nvqs, sizeof(*vp_dev->vqs), GFP_KERNEL);
  241. if (!vp_dev->vqs)
  242. return -ENOMEM;
  243. if (per_vq_vectors) {
  244. /* Best option: one for change interrupt, one per vq. */
  245. nvectors = 1;
  246. for (i = 0; i < nvqs; ++i)
  247. if (callbacks[i])
  248. ++nvectors;
  249. } else {
  250. /* Second best: one for change, shared for all vqs. */
  251. nvectors = 2;
  252. }
  253. err = vp_request_msix_vectors(vdev, nvectors, per_vq_vectors,
  254. per_vq_vectors ? desc : NULL);
  255. if (err)
  256. goto error_find;
  257. vp_dev->per_vq_vectors = per_vq_vectors;
  258. allocated_vectors = vp_dev->msix_used_vectors;
  259. for (i = 0; i < nvqs; ++i) {
  260. if (!names[i]) {
  261. vqs[i] = NULL;
  262. continue;
  263. }
  264. if (!callbacks[i])
  265. msix_vec = VIRTIO_MSI_NO_VECTOR;
  266. else if (vp_dev->per_vq_vectors)
  267. msix_vec = allocated_vectors++;
  268. else
  269. msix_vec = VP_MSIX_VQ_VECTOR;
  270. vqs[i] = vp_setup_vq(vdev, i, callbacks[i], names[i],
  271. ctx ? ctx[i] : false,
  272. msix_vec);
  273. if (IS_ERR(vqs[i])) {
  274. err = PTR_ERR(vqs[i]);
  275. goto error_find;
  276. }
  277. if (!vp_dev->per_vq_vectors || msix_vec == VIRTIO_MSI_NO_VECTOR)
  278. continue;
  279. /* allocate per-vq irq if available and necessary */
  280. snprintf(vp_dev->msix_names[msix_vec],
  281. sizeof *vp_dev->msix_names,
  282. "%s-%s",
  283. dev_name(&vp_dev->vdev.dev), names[i]);
  284. err = request_irq(pci_irq_vector(vp_dev->pci_dev, msix_vec),
  285. vring_interrupt, 0,
  286. vp_dev->msix_names[msix_vec],
  287. vqs[i]);
  288. if (err)
  289. goto error_find;
  290. }
  291. return 0;
  292. error_find:
  293. vp_del_vqs(vdev);
  294. return err;
  295. }
  296. static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned nvqs,
  297. struct virtqueue *vqs[], vq_callback_t *callbacks[],
  298. const char * const names[], const bool *ctx)
  299. {
  300. struct virtio_pci_device *vp_dev = to_vp_device(vdev);
  301. int i, err;
  302. vp_dev->vqs = kcalloc(nvqs, sizeof(*vp_dev->vqs), GFP_KERNEL);
  303. if (!vp_dev->vqs)
  304. return -ENOMEM;
  305. err = request_irq(vp_dev->pci_dev->irq, vp_interrupt, IRQF_SHARED,
  306. dev_name(&vdev->dev), vp_dev);
  307. if (err)
  308. goto out_del_vqs;
  309. vp_dev->intx_enabled = 1;
  310. vp_dev->per_vq_vectors = false;
  311. for (i = 0; i < nvqs; ++i) {
  312. if (!names[i]) {
  313. vqs[i] = NULL;
  314. continue;
  315. }
  316. vqs[i] = vp_setup_vq(vdev, i, callbacks[i], names[i],
  317. ctx ? ctx[i] : false,
  318. VIRTIO_MSI_NO_VECTOR);
  319. if (IS_ERR(vqs[i])) {
  320. err = PTR_ERR(vqs[i]);
  321. goto out_del_vqs;
  322. }
  323. }
  324. return 0;
  325. out_del_vqs:
  326. vp_del_vqs(vdev);
  327. return err;
  328. }
  329. /* the config->find_vqs() implementation */
  330. int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs,
  331. struct virtqueue *vqs[], vq_callback_t *callbacks[],
  332. const char * const names[], const bool *ctx,
  333. struct irq_affinity *desc)
  334. {
  335. int err;
  336. /* Try MSI-X with one vector per queue. */
  337. err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, true, ctx, desc);
  338. if (!err)
  339. return 0;
  340. /* Fallback: MSI-X with one vector for config, one shared for queues. */
  341. err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, false, ctx, desc);
  342. if (!err)
  343. return 0;
  344. /* Finally fall back to regular interrupts. */
  345. return vp_find_vqs_intx(vdev, nvqs, vqs, callbacks, names, ctx);
  346. }
  347. const char *vp_bus_name(struct virtio_device *vdev)
  348. {
  349. struct virtio_pci_device *vp_dev = to_vp_device(vdev);
  350. return pci_name(vp_dev->pci_dev);
  351. }
  352. /* Setup the affinity for a virtqueue:
  353. * - force the affinity for per vq vector
  354. * - OR over all affinities for shared MSI
  355. * - ignore the affinity request if we're using INTX
  356. */
  357. int vp_set_vq_affinity(struct virtqueue *vq, int cpu)
  358. {
  359. struct virtio_device *vdev = vq->vdev;
  360. struct virtio_pci_device *vp_dev = to_vp_device(vdev);
  361. struct virtio_pci_vq_info *info = vp_dev->vqs[vq->index];
  362. struct cpumask *mask;
  363. unsigned int irq;
  364. if (!vq->callback)
  365. return -EINVAL;
  366. if (vp_dev->msix_enabled) {
  367. mask = vp_dev->msix_affinity_masks[info->msix_vector];
  368. irq = pci_irq_vector(vp_dev->pci_dev, info->msix_vector);
  369. if (cpu == -1)
  370. irq_set_affinity_hint(irq, NULL);
  371. else {
  372. cpumask_clear(mask);
  373. cpumask_set_cpu(cpu, mask);
  374. irq_set_affinity_hint(irq, mask);
  375. }
  376. }
  377. return 0;
  378. }
  379. const struct cpumask *vp_get_vq_affinity(struct virtio_device *vdev, int index)
  380. {
  381. struct virtio_pci_device *vp_dev = to_vp_device(vdev);
  382. if (!vp_dev->per_vq_vectors ||
  383. vp_dev->vqs[index]->msix_vector == VIRTIO_MSI_NO_VECTOR)
  384. return NULL;
  385. return pci_irq_get_affinity(vp_dev->pci_dev,
  386. vp_dev->vqs[index]->msix_vector);
  387. }
  388. #ifdef CONFIG_PM_SLEEP
  389. static int virtio_pci_freeze(struct device *dev)
  390. {
  391. struct pci_dev *pci_dev = to_pci_dev(dev);
  392. struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
  393. int ret;
  394. ret = virtio_device_freeze(&vp_dev->vdev);
  395. if (!ret)
  396. pci_disable_device(pci_dev);
  397. return ret;
  398. }
  399. static int virtio_pci_restore(struct device *dev)
  400. {
  401. struct pci_dev *pci_dev = to_pci_dev(dev);
  402. struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
  403. int ret;
  404. ret = pci_enable_device(pci_dev);
  405. if (ret)
  406. return ret;
  407. pci_set_master(pci_dev);
  408. return virtio_device_restore(&vp_dev->vdev);
  409. }
  410. static const struct dev_pm_ops virtio_pci_pm_ops = {
  411. SET_SYSTEM_SLEEP_PM_OPS(virtio_pci_freeze, virtio_pci_restore)
  412. };
  413. #endif
  414. /* Qumranet donated their vendor ID for devices 0x1000 thru 0x10FF. */
  415. static const struct pci_device_id virtio_pci_id_table[] = {
  416. { PCI_DEVICE(PCI_VENDOR_ID_REDHAT_QUMRANET, PCI_ANY_ID) },
  417. { 0 }
  418. };
  419. MODULE_DEVICE_TABLE(pci, virtio_pci_id_table);
  420. static void virtio_pci_release_dev(struct device *_d)
  421. {
  422. struct virtio_device *vdev = dev_to_virtio(_d);
  423. struct virtio_pci_device *vp_dev = to_vp_device(vdev);
  424. /* As struct device is a kobject, it's not safe to
  425. * free the memory (including the reference counter itself)
  426. * until it's release callback. */
  427. kfree(vp_dev);
  428. }
  429. static int virtio_pci_probe(struct pci_dev *pci_dev,
  430. const struct pci_device_id *id)
  431. {
  432. struct virtio_pci_device *vp_dev;
  433. int rc;
  434. /* allocate our structure and fill it out */
  435. vp_dev = kzalloc(sizeof(struct virtio_pci_device), GFP_KERNEL);
  436. if (!vp_dev)
  437. return -ENOMEM;
  438. pci_set_drvdata(pci_dev, vp_dev);
  439. vp_dev->vdev.dev.parent = &pci_dev->dev;
  440. vp_dev->vdev.dev.release = virtio_pci_release_dev;
  441. vp_dev->pci_dev = pci_dev;
  442. INIT_LIST_HEAD(&vp_dev->virtqueues);
  443. spin_lock_init(&vp_dev->lock);
  444. /* enable the device */
  445. rc = pci_enable_device(pci_dev);
  446. if (rc)
  447. goto err_enable_device;
  448. if (force_legacy) {
  449. rc = virtio_pci_legacy_probe(vp_dev);
  450. /* Also try modern mode if we can't map BAR0 (no IO space). */
  451. if (rc == -ENODEV || rc == -ENOMEM)
  452. rc = virtio_pci_modern_probe(vp_dev);
  453. if (rc)
  454. goto err_probe;
  455. } else {
  456. rc = virtio_pci_modern_probe(vp_dev);
  457. if (rc == -ENODEV)
  458. rc = virtio_pci_legacy_probe(vp_dev);
  459. if (rc)
  460. goto err_probe;
  461. }
  462. pci_set_master(pci_dev);
  463. rc = register_virtio_device(&vp_dev->vdev);
  464. if (rc)
  465. goto err_register;
  466. return 0;
  467. err_register:
  468. if (vp_dev->ioaddr)
  469. virtio_pci_legacy_remove(vp_dev);
  470. else
  471. virtio_pci_modern_remove(vp_dev);
  472. err_probe:
  473. pci_disable_device(pci_dev);
  474. err_enable_device:
  475. kfree(vp_dev);
  476. return rc;
  477. }
  478. static void virtio_pci_remove(struct pci_dev *pci_dev)
  479. {
  480. struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
  481. struct device *dev = get_device(&vp_dev->vdev.dev);
  482. unregister_virtio_device(&vp_dev->vdev);
  483. if (vp_dev->ioaddr)
  484. virtio_pci_legacy_remove(vp_dev);
  485. else
  486. virtio_pci_modern_remove(vp_dev);
  487. pci_disable_device(pci_dev);
  488. put_device(dev);
  489. }
  490. static struct pci_driver virtio_pci_driver = {
  491. .name = "virtio-pci",
  492. .id_table = virtio_pci_id_table,
  493. .probe = virtio_pci_probe,
  494. .remove = virtio_pci_remove,
  495. #ifdef CONFIG_PM_SLEEP
  496. .driver.pm = &virtio_pci_pm_ops,
  497. #endif
  498. };
  499. module_pci_driver(virtio_pci_driver);
  500. MODULE_AUTHOR("Anthony Liguori <aliguori@us.ibm.com>");
  501. MODULE_DESCRIPTION("virtio-pci");
  502. MODULE_LICENSE("GPL");
  503. MODULE_VERSION("1");