vop_vringh.c 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170
  1. /*
  2. * Intel MIC Platform Software Stack (MPSS)
  3. *
  4. * Copyright(c) 2016 Intel Corporation.
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License, version 2, as
  8. * published by the Free Software Foundation.
  9. *
  10. * This program is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. * General Public License for more details.
  14. *
  15. * The full GNU General Public License is included in this distribution in
  16. * the file called "COPYING".
  17. *
  18. * Intel Virtio Over PCIe (VOP) driver.
  19. *
  20. */
  21. #include <linux/sched.h>
  22. #include <linux/poll.h>
  23. #include <linux/dma-mapping.h>
  24. #include <linux/mic_common.h>
  25. #include "../common/mic_dev.h"
  26. #include <linux/mic_ioctl.h>
  27. #include "vop_main.h"
  28. /* Helper API to obtain the VOP PCIe device */
  29. static inline struct device *vop_dev(struct vop_vdev *vdev)
  30. {
  31. return vdev->vpdev->dev.parent;
  32. }
  33. /* Helper API to check if a virtio device is initialized */
  34. static inline int vop_vdev_inited(struct vop_vdev *vdev)
  35. {
  36. if (!vdev)
  37. return -EINVAL;
  38. /* Device has not been created yet */
  39. if (!vdev->dd || !vdev->dd->type) {
  40. dev_err(vop_dev(vdev), "%s %d err %d\n",
  41. __func__, __LINE__, -EINVAL);
  42. return -EINVAL;
  43. }
  44. /* Device has been removed/deleted */
  45. if (vdev->dd->type == -1) {
  46. dev_dbg(vop_dev(vdev), "%s %d err %d\n",
  47. __func__, __LINE__, -ENODEV);
  48. return -ENODEV;
  49. }
  50. return 0;
  51. }
  52. static void _vop_notify(struct vringh *vrh)
  53. {
  54. struct vop_vringh *vvrh = container_of(vrh, struct vop_vringh, vrh);
  55. struct vop_vdev *vdev = vvrh->vdev;
  56. struct vop_device *vpdev = vdev->vpdev;
  57. s8 db = vdev->dc->h2c_vdev_db;
  58. if (db != -1)
  59. vpdev->hw_ops->send_intr(vpdev, db);
  60. }
  61. static void vop_virtio_init_post(struct vop_vdev *vdev)
  62. {
  63. struct mic_vqconfig *vqconfig = mic_vq_config(vdev->dd);
  64. struct vop_device *vpdev = vdev->vpdev;
  65. int i, used_size;
  66. for (i = 0; i < vdev->dd->num_vq; i++) {
  67. used_size = PAGE_ALIGN(sizeof(u16) * 3 +
  68. sizeof(struct vring_used_elem) *
  69. le16_to_cpu(vqconfig->num));
  70. if (!le64_to_cpu(vqconfig[i].used_address)) {
  71. dev_warn(vop_dev(vdev), "used_address zero??\n");
  72. continue;
  73. }
  74. vdev->vvr[i].vrh.vring.used =
  75. (void __force *)vpdev->hw_ops->ioremap(
  76. vpdev,
  77. le64_to_cpu(vqconfig[i].used_address),
  78. used_size);
  79. }
  80. vdev->dc->used_address_updated = 0;
  81. dev_info(vop_dev(vdev), "%s: device type %d LINKUP\n",
  82. __func__, vdev->virtio_id);
  83. }
  84. static inline void vop_virtio_device_reset(struct vop_vdev *vdev)
  85. {
  86. int i;
  87. dev_dbg(vop_dev(vdev), "%s: status %d device type %d RESET\n",
  88. __func__, vdev->dd->status, vdev->virtio_id);
  89. for (i = 0; i < vdev->dd->num_vq; i++)
  90. /*
  91. * Avoid lockdep false positive. The + 1 is for the vop
  92. * mutex which is held in the reset devices code path.
  93. */
  94. mutex_lock_nested(&vdev->vvr[i].vr_mutex, i + 1);
  95. /* 0 status means "reset" */
  96. vdev->dd->status = 0;
  97. vdev->dc->vdev_reset = 0;
  98. vdev->dc->host_ack = 1;
  99. for (i = 0; i < vdev->dd->num_vq; i++) {
  100. struct vringh *vrh = &vdev->vvr[i].vrh;
  101. vdev->vvr[i].vring.info->avail_idx = 0;
  102. vrh->completed = 0;
  103. vrh->last_avail_idx = 0;
  104. vrh->last_used_idx = 0;
  105. }
  106. for (i = 0; i < vdev->dd->num_vq; i++)
  107. mutex_unlock(&vdev->vvr[i].vr_mutex);
  108. }
  109. static void vop_virtio_reset_devices(struct vop_info *vi)
  110. {
  111. struct list_head *pos, *tmp;
  112. struct vop_vdev *vdev;
  113. list_for_each_safe(pos, tmp, &vi->vdev_list) {
  114. vdev = list_entry(pos, struct vop_vdev, list);
  115. vop_virtio_device_reset(vdev);
  116. vdev->poll_wake = 1;
  117. wake_up(&vdev->waitq);
  118. }
  119. }
  120. static void vop_bh_handler(struct work_struct *work)
  121. {
  122. struct vop_vdev *vdev = container_of(work, struct vop_vdev,
  123. virtio_bh_work);
  124. if (vdev->dc->used_address_updated)
  125. vop_virtio_init_post(vdev);
  126. if (vdev->dc->vdev_reset)
  127. vop_virtio_device_reset(vdev);
  128. vdev->poll_wake = 1;
  129. wake_up(&vdev->waitq);
  130. }
  131. static irqreturn_t _vop_virtio_intr_handler(int irq, void *data)
  132. {
  133. struct vop_vdev *vdev = data;
  134. struct vop_device *vpdev = vdev->vpdev;
  135. vpdev->hw_ops->ack_interrupt(vpdev, vdev->virtio_db);
  136. schedule_work(&vdev->virtio_bh_work);
  137. return IRQ_HANDLED;
  138. }
  139. static int vop_virtio_config_change(struct vop_vdev *vdev, void *argp)
  140. {
  141. DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake);
  142. int ret = 0, retry, i;
  143. struct vop_device *vpdev = vdev->vpdev;
  144. struct vop_info *vi = dev_get_drvdata(&vpdev->dev);
  145. struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev);
  146. s8 db = bootparam->h2c_config_db;
  147. mutex_lock(&vi->vop_mutex);
  148. for (i = 0; i < vdev->dd->num_vq; i++)
  149. mutex_lock_nested(&vdev->vvr[i].vr_mutex, i + 1);
  150. if (db == -1 || vdev->dd->type == -1) {
  151. ret = -EIO;
  152. goto exit;
  153. }
  154. memcpy(mic_vq_configspace(vdev->dd), argp, vdev->dd->config_len);
  155. vdev->dc->config_change = MIC_VIRTIO_PARAM_CONFIG_CHANGED;
  156. vpdev->hw_ops->send_intr(vpdev, db);
  157. for (retry = 100; retry--;) {
  158. ret = wait_event_timeout(wake, vdev->dc->guest_ack,
  159. msecs_to_jiffies(100));
  160. if (ret)
  161. break;
  162. }
  163. dev_dbg(vop_dev(vdev),
  164. "%s %d retry: %d\n", __func__, __LINE__, retry);
  165. vdev->dc->config_change = 0;
  166. vdev->dc->guest_ack = 0;
  167. exit:
  168. for (i = 0; i < vdev->dd->num_vq; i++)
  169. mutex_unlock(&vdev->vvr[i].vr_mutex);
  170. mutex_unlock(&vi->vop_mutex);
  171. return ret;
  172. }
  173. static int vop_copy_dp_entry(struct vop_vdev *vdev,
  174. struct mic_device_desc *argp, __u8 *type,
  175. struct mic_device_desc **devpage)
  176. {
  177. struct vop_device *vpdev = vdev->vpdev;
  178. struct mic_device_desc *devp;
  179. struct mic_vqconfig *vqconfig;
  180. int ret = 0, i;
  181. bool slot_found = false;
  182. vqconfig = mic_vq_config(argp);
  183. for (i = 0; i < argp->num_vq; i++) {
  184. if (le16_to_cpu(vqconfig[i].num) > MIC_MAX_VRING_ENTRIES) {
  185. ret = -EINVAL;
  186. dev_err(vop_dev(vdev), "%s %d err %d\n",
  187. __func__, __LINE__, ret);
  188. goto exit;
  189. }
  190. }
  191. /* Find the first free device page entry */
  192. for (i = sizeof(struct mic_bootparam);
  193. i < MIC_DP_SIZE - mic_total_desc_size(argp);
  194. i += mic_total_desc_size(devp)) {
  195. devp = vpdev->hw_ops->get_dp(vpdev) + i;
  196. if (devp->type == 0 || devp->type == -1) {
  197. slot_found = true;
  198. break;
  199. }
  200. }
  201. if (!slot_found) {
  202. ret = -EINVAL;
  203. dev_err(vop_dev(vdev), "%s %d err %d\n",
  204. __func__, __LINE__, ret);
  205. goto exit;
  206. }
  207. /*
  208. * Save off the type before doing the memcpy. Type will be set in the
  209. * end after completing all initialization for the new device.
  210. */
  211. *type = argp->type;
  212. argp->type = 0;
  213. memcpy(devp, argp, mic_desc_size(argp));
  214. *devpage = devp;
  215. exit:
  216. return ret;
  217. }
  218. static void vop_init_device_ctrl(struct vop_vdev *vdev,
  219. struct mic_device_desc *devpage)
  220. {
  221. struct mic_device_ctrl *dc;
  222. dc = (void *)devpage + mic_aligned_desc_size(devpage);
  223. dc->config_change = 0;
  224. dc->guest_ack = 0;
  225. dc->vdev_reset = 0;
  226. dc->host_ack = 0;
  227. dc->used_address_updated = 0;
  228. dc->c2h_vdev_db = -1;
  229. dc->h2c_vdev_db = -1;
  230. vdev->dc = dc;
  231. }
  232. static int vop_virtio_add_device(struct vop_vdev *vdev,
  233. struct mic_device_desc *argp)
  234. {
  235. struct vop_info *vi = vdev->vi;
  236. struct vop_device *vpdev = vi->vpdev;
  237. struct mic_device_desc *dd = NULL;
  238. struct mic_vqconfig *vqconfig;
  239. int vr_size, i, j, ret;
  240. u8 type = 0;
  241. s8 db = -1;
  242. char irqname[16];
  243. struct mic_bootparam *bootparam;
  244. u16 num;
  245. dma_addr_t vr_addr;
  246. bootparam = vpdev->hw_ops->get_dp(vpdev);
  247. init_waitqueue_head(&vdev->waitq);
  248. INIT_LIST_HEAD(&vdev->list);
  249. vdev->vpdev = vpdev;
  250. ret = vop_copy_dp_entry(vdev, argp, &type, &dd);
  251. if (ret) {
  252. dev_err(vop_dev(vdev), "%s %d err %d\n",
  253. __func__, __LINE__, ret);
  254. kfree(vdev);
  255. return ret;
  256. }
  257. vop_init_device_ctrl(vdev, dd);
  258. vdev->dd = dd;
  259. vdev->virtio_id = type;
  260. vqconfig = mic_vq_config(dd);
  261. INIT_WORK(&vdev->virtio_bh_work, vop_bh_handler);
  262. for (i = 0; i < dd->num_vq; i++) {
  263. struct vop_vringh *vvr = &vdev->vvr[i];
  264. struct mic_vring *vr = &vdev->vvr[i].vring;
  265. num = le16_to_cpu(vqconfig[i].num);
  266. mutex_init(&vvr->vr_mutex);
  267. vr_size = PAGE_ALIGN(vring_size(num, MIC_VIRTIO_RING_ALIGN) +
  268. sizeof(struct _mic_vring_info));
  269. vr->va = (void *)
  270. __get_free_pages(GFP_KERNEL | __GFP_ZERO,
  271. get_order(vr_size));
  272. if (!vr->va) {
  273. ret = -ENOMEM;
  274. dev_err(vop_dev(vdev), "%s %d err %d\n",
  275. __func__, __LINE__, ret);
  276. goto err;
  277. }
  278. vr->len = vr_size;
  279. vr->info = vr->va + vring_size(num, MIC_VIRTIO_RING_ALIGN);
  280. vr->info->magic = cpu_to_le32(MIC_MAGIC + vdev->virtio_id + i);
  281. vr_addr = dma_map_single(&vpdev->dev, vr->va, vr_size,
  282. DMA_BIDIRECTIONAL);
  283. if (dma_mapping_error(&vpdev->dev, vr_addr)) {
  284. free_pages((unsigned long)vr->va, get_order(vr_size));
  285. ret = -ENOMEM;
  286. dev_err(vop_dev(vdev), "%s %d err %d\n",
  287. __func__, __LINE__, ret);
  288. goto err;
  289. }
  290. vqconfig[i].address = cpu_to_le64(vr_addr);
  291. vring_init(&vr->vr, num, vr->va, MIC_VIRTIO_RING_ALIGN);
  292. ret = vringh_init_kern(&vvr->vrh,
  293. *(u32 *)mic_vq_features(vdev->dd),
  294. num, false, vr->vr.desc, vr->vr.avail,
  295. vr->vr.used);
  296. if (ret) {
  297. dev_err(vop_dev(vdev), "%s %d err %d\n",
  298. __func__, __LINE__, ret);
  299. goto err;
  300. }
  301. vringh_kiov_init(&vvr->riov, NULL, 0);
  302. vringh_kiov_init(&vvr->wiov, NULL, 0);
  303. vvr->head = USHRT_MAX;
  304. vvr->vdev = vdev;
  305. vvr->vrh.notify = _vop_notify;
  306. dev_dbg(&vpdev->dev,
  307. "%s %d index %d va %p info %p vr_size 0x%x\n",
  308. __func__, __LINE__, i, vr->va, vr->info, vr_size);
  309. vvr->buf = (void *)__get_free_pages(GFP_KERNEL,
  310. get_order(VOP_INT_DMA_BUF_SIZE));
  311. vvr->buf_da = dma_map_single(&vpdev->dev,
  312. vvr->buf, VOP_INT_DMA_BUF_SIZE,
  313. DMA_BIDIRECTIONAL);
  314. }
  315. snprintf(irqname, sizeof(irqname), "vop%dvirtio%d", vpdev->index,
  316. vdev->virtio_id);
  317. vdev->virtio_db = vpdev->hw_ops->next_db(vpdev);
  318. vdev->virtio_cookie = vpdev->hw_ops->request_irq(vpdev,
  319. _vop_virtio_intr_handler, irqname, vdev,
  320. vdev->virtio_db);
  321. if (IS_ERR(vdev->virtio_cookie)) {
  322. ret = PTR_ERR(vdev->virtio_cookie);
  323. dev_dbg(&vpdev->dev, "request irq failed\n");
  324. goto err;
  325. }
  326. vdev->dc->c2h_vdev_db = vdev->virtio_db;
  327. /*
  328. * Order the type update with previous stores. This write barrier
  329. * is paired with the corresponding read barrier before the uncached
  330. * system memory read of the type, on the card while scanning the
  331. * device page.
  332. */
  333. smp_wmb();
  334. dd->type = type;
  335. argp->type = type;
  336. if (bootparam) {
  337. db = bootparam->h2c_config_db;
  338. if (db != -1)
  339. vpdev->hw_ops->send_intr(vpdev, db);
  340. }
  341. dev_dbg(&vpdev->dev, "Added virtio id %d db %d\n", dd->type, db);
  342. return 0;
  343. err:
  344. vqconfig = mic_vq_config(dd);
  345. for (j = 0; j < i; j++) {
  346. struct vop_vringh *vvr = &vdev->vvr[j];
  347. dma_unmap_single(&vpdev->dev, le64_to_cpu(vqconfig[j].address),
  348. vvr->vring.len, DMA_BIDIRECTIONAL);
  349. free_pages((unsigned long)vvr->vring.va,
  350. get_order(vvr->vring.len));
  351. }
  352. return ret;
  353. }
  354. static void vop_dev_remove(struct vop_info *pvi, struct mic_device_ctrl *devp,
  355. struct vop_device *vpdev)
  356. {
  357. struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev);
  358. s8 db;
  359. int ret, retry;
  360. DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake);
  361. devp->config_change = MIC_VIRTIO_PARAM_DEV_REMOVE;
  362. db = bootparam->h2c_config_db;
  363. if (db != -1)
  364. vpdev->hw_ops->send_intr(vpdev, db);
  365. else
  366. goto done;
  367. for (retry = 15; retry--;) {
  368. ret = wait_event_timeout(wake, devp->guest_ack,
  369. msecs_to_jiffies(1000));
  370. if (ret)
  371. break;
  372. }
  373. done:
  374. devp->config_change = 0;
  375. devp->guest_ack = 0;
  376. }
  377. static void vop_virtio_del_device(struct vop_vdev *vdev)
  378. {
  379. struct vop_info *vi = vdev->vi;
  380. struct vop_device *vpdev = vdev->vpdev;
  381. int i;
  382. struct mic_vqconfig *vqconfig;
  383. struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev);
  384. if (!bootparam)
  385. goto skip_hot_remove;
  386. vop_dev_remove(vi, vdev->dc, vpdev);
  387. skip_hot_remove:
  388. vpdev->hw_ops->free_irq(vpdev, vdev->virtio_cookie, vdev);
  389. flush_work(&vdev->virtio_bh_work);
  390. vqconfig = mic_vq_config(vdev->dd);
  391. for (i = 0; i < vdev->dd->num_vq; i++) {
  392. struct vop_vringh *vvr = &vdev->vvr[i];
  393. dma_unmap_single(&vpdev->dev,
  394. vvr->buf_da, VOP_INT_DMA_BUF_SIZE,
  395. DMA_BIDIRECTIONAL);
  396. free_pages((unsigned long)vvr->buf,
  397. get_order(VOP_INT_DMA_BUF_SIZE));
  398. vringh_kiov_cleanup(&vvr->riov);
  399. vringh_kiov_cleanup(&vvr->wiov);
  400. dma_unmap_single(&vpdev->dev, le64_to_cpu(vqconfig[i].address),
  401. vvr->vring.len, DMA_BIDIRECTIONAL);
  402. free_pages((unsigned long)vvr->vring.va,
  403. get_order(vvr->vring.len));
  404. }
  405. /*
  406. * Order the type update with previous stores. This write barrier
  407. * is paired with the corresponding read barrier before the uncached
  408. * system memory read of the type, on the card while scanning the
  409. * device page.
  410. */
  411. smp_wmb();
  412. vdev->dd->type = -1;
  413. }
  414. /*
  415. * vop_sync_dma - Wrapper for synchronous DMAs.
  416. *
  417. * @dev - The address of the pointer to the device instance used
  418. * for DMA registration.
  419. * @dst - destination DMA address.
  420. * @src - source DMA address.
  421. * @len - size of the transfer.
  422. *
  423. * Return DMA_SUCCESS on success
  424. */
  425. static int vop_sync_dma(struct vop_vdev *vdev, dma_addr_t dst, dma_addr_t src,
  426. size_t len)
  427. {
  428. int err = 0;
  429. struct dma_device *ddev;
  430. struct dma_async_tx_descriptor *tx;
  431. struct vop_info *vi = dev_get_drvdata(&vdev->vpdev->dev);
  432. struct dma_chan *vop_ch = vi->dma_ch;
  433. if (!vop_ch) {
  434. err = -EBUSY;
  435. goto error;
  436. }
  437. ddev = vop_ch->device;
  438. tx = ddev->device_prep_dma_memcpy(vop_ch, dst, src, len,
  439. DMA_PREP_FENCE);
  440. if (!tx) {
  441. err = -ENOMEM;
  442. goto error;
  443. } else {
  444. dma_cookie_t cookie;
  445. cookie = tx->tx_submit(tx);
  446. if (dma_submit_error(cookie)) {
  447. err = -ENOMEM;
  448. goto error;
  449. }
  450. dma_async_issue_pending(vop_ch);
  451. err = dma_sync_wait(vop_ch, cookie);
  452. }
  453. error:
  454. if (err)
  455. dev_err(&vi->vpdev->dev, "%s %d err %d\n",
  456. __func__, __LINE__, err);
  457. return err;
  458. }
  459. #define VOP_USE_DMA true
  460. /*
  461. * Initiates the copies across the PCIe bus from card memory to a user
  462. * space buffer. When transfers are done using DMA, source/destination
  463. * addresses and transfer length must follow the alignment requirements of
  464. * the MIC DMA engine.
  465. */
  466. static int vop_virtio_copy_to_user(struct vop_vdev *vdev, void __user *ubuf,
  467. size_t len, u64 daddr, size_t dlen,
  468. int vr_idx)
  469. {
  470. struct vop_device *vpdev = vdev->vpdev;
  471. void __iomem *dbuf = vpdev->hw_ops->ioremap(vpdev, daddr, len);
  472. struct vop_vringh *vvr = &vdev->vvr[vr_idx];
  473. struct vop_info *vi = dev_get_drvdata(&vpdev->dev);
  474. size_t dma_alignment = 1 << vi->dma_ch->device->copy_align;
  475. bool x200 = is_dma_copy_aligned(vi->dma_ch->device, 1, 1, 1);
  476. size_t dma_offset, partlen;
  477. int err;
  478. if (!VOP_USE_DMA) {
  479. if (copy_to_user(ubuf, (void __force *)dbuf, len)) {
  480. err = -EFAULT;
  481. dev_err(vop_dev(vdev), "%s %d err %d\n",
  482. __func__, __LINE__, err);
  483. goto err;
  484. }
  485. vdev->in_bytes += len;
  486. err = 0;
  487. goto err;
  488. }
  489. dma_offset = daddr - round_down(daddr, dma_alignment);
  490. daddr -= dma_offset;
  491. len += dma_offset;
  492. /*
  493. * X100 uses DMA addresses as seen by the card so adding
  494. * the aperture base is not required for DMA. However x200
  495. * requires DMA addresses to be an offset into the bar so
  496. * add the aperture base for x200.
  497. */
  498. if (x200)
  499. daddr += vpdev->aper->pa;
  500. while (len) {
  501. partlen = min_t(size_t, len, VOP_INT_DMA_BUF_SIZE);
  502. err = vop_sync_dma(vdev, vvr->buf_da, daddr,
  503. ALIGN(partlen, dma_alignment));
  504. if (err) {
  505. dev_err(vop_dev(vdev), "%s %d err %d\n",
  506. __func__, __LINE__, err);
  507. goto err;
  508. }
  509. if (copy_to_user(ubuf, vvr->buf + dma_offset,
  510. partlen - dma_offset)) {
  511. err = -EFAULT;
  512. dev_err(vop_dev(vdev), "%s %d err %d\n",
  513. __func__, __LINE__, err);
  514. goto err;
  515. }
  516. daddr += partlen;
  517. ubuf += partlen;
  518. dbuf += partlen;
  519. vdev->in_bytes_dma += partlen;
  520. vdev->in_bytes += partlen;
  521. len -= partlen;
  522. dma_offset = 0;
  523. }
  524. err = 0;
  525. err:
  526. vpdev->hw_ops->iounmap(vpdev, dbuf);
  527. dev_dbg(vop_dev(vdev),
  528. "%s: ubuf %p dbuf %p len 0x%lx vr_idx 0x%x\n",
  529. __func__, ubuf, dbuf, len, vr_idx);
  530. return err;
  531. }
  532. /*
  533. * Initiates copies across the PCIe bus from a user space buffer to card
  534. * memory. When transfers are done using DMA, source/destination addresses
  535. * and transfer length must follow the alignment requirements of the MIC
  536. * DMA engine.
  537. */
  538. static int vop_virtio_copy_from_user(struct vop_vdev *vdev, void __user *ubuf,
  539. size_t len, u64 daddr, size_t dlen,
  540. int vr_idx)
  541. {
  542. struct vop_device *vpdev = vdev->vpdev;
  543. void __iomem *dbuf = vpdev->hw_ops->ioremap(vpdev, daddr, len);
  544. struct vop_vringh *vvr = &vdev->vvr[vr_idx];
  545. struct vop_info *vi = dev_get_drvdata(&vdev->vpdev->dev);
  546. size_t dma_alignment = 1 << vi->dma_ch->device->copy_align;
  547. bool x200 = is_dma_copy_aligned(vi->dma_ch->device, 1, 1, 1);
  548. size_t partlen;
  549. bool dma = VOP_USE_DMA;
  550. int err = 0;
  551. if (daddr & (dma_alignment - 1)) {
  552. vdev->tx_dst_unaligned += len;
  553. dma = false;
  554. } else if (ALIGN(len, dma_alignment) > dlen) {
  555. vdev->tx_len_unaligned += len;
  556. dma = false;
  557. }
  558. if (!dma)
  559. goto memcpy;
  560. /*
  561. * X100 uses DMA addresses as seen by the card so adding
  562. * the aperture base is not required for DMA. However x200
  563. * requires DMA addresses to be an offset into the bar so
  564. * add the aperture base for x200.
  565. */
  566. if (x200)
  567. daddr += vpdev->aper->pa;
  568. while (len) {
  569. partlen = min_t(size_t, len, VOP_INT_DMA_BUF_SIZE);
  570. if (copy_from_user(vvr->buf, ubuf, partlen)) {
  571. err = -EFAULT;
  572. dev_err(vop_dev(vdev), "%s %d err %d\n",
  573. __func__, __LINE__, err);
  574. goto err;
  575. }
  576. err = vop_sync_dma(vdev, daddr, vvr->buf_da,
  577. ALIGN(partlen, dma_alignment));
  578. if (err) {
  579. dev_err(vop_dev(vdev), "%s %d err %d\n",
  580. __func__, __LINE__, err);
  581. goto err;
  582. }
  583. daddr += partlen;
  584. ubuf += partlen;
  585. dbuf += partlen;
  586. vdev->out_bytes_dma += partlen;
  587. vdev->out_bytes += partlen;
  588. len -= partlen;
  589. }
  590. memcpy:
  591. /*
  592. * We are copying to IO below and should ideally use something
  593. * like copy_from_user_toio(..) if it existed.
  594. */
  595. if (copy_from_user((void __force *)dbuf, ubuf, len)) {
  596. err = -EFAULT;
  597. dev_err(vop_dev(vdev), "%s %d err %d\n",
  598. __func__, __LINE__, err);
  599. goto err;
  600. }
  601. vdev->out_bytes += len;
  602. err = 0;
  603. err:
  604. vpdev->hw_ops->iounmap(vpdev, dbuf);
  605. dev_dbg(vop_dev(vdev),
  606. "%s: ubuf %p dbuf %p len 0x%lx vr_idx 0x%x\n",
  607. __func__, ubuf, dbuf, len, vr_idx);
  608. return err;
  609. }
  610. #define MIC_VRINGH_READ true
  611. /* Determine the total number of bytes consumed in a VRINGH KIOV */
  612. static inline u32 vop_vringh_iov_consumed(struct vringh_kiov *iov)
  613. {
  614. int i;
  615. u32 total = iov->consumed;
  616. for (i = 0; i < iov->i; i++)
  617. total += iov->iov[i].iov_len;
  618. return total;
  619. }
  620. /*
  621. * Traverse the VRINGH KIOV and issue the APIs to trigger the copies.
  622. * This API is heavily based on the vringh_iov_xfer(..) implementation
  623. * in vringh.c. The reason we cannot reuse vringh_iov_pull_kern(..)
  624. * and vringh_iov_push_kern(..) directly is because there is no
  625. * way to override the VRINGH xfer(..) routines as of v3.10.
  626. */
  627. static int vop_vringh_copy(struct vop_vdev *vdev, struct vringh_kiov *iov,
  628. void __user *ubuf, size_t len, bool read, int vr_idx,
  629. size_t *out_len)
  630. {
  631. int ret = 0;
  632. size_t partlen, tot_len = 0;
  633. while (len && iov->i < iov->used) {
  634. struct kvec *kiov = &iov->iov[iov->i];
  635. partlen = min(kiov->iov_len, len);
  636. if (read)
  637. ret = vop_virtio_copy_to_user(vdev, ubuf, partlen,
  638. (u64)kiov->iov_base,
  639. kiov->iov_len,
  640. vr_idx);
  641. else
  642. ret = vop_virtio_copy_from_user(vdev, ubuf, partlen,
  643. (u64)kiov->iov_base,
  644. kiov->iov_len,
  645. vr_idx);
  646. if (ret) {
  647. dev_err(vop_dev(vdev), "%s %d err %d\n",
  648. __func__, __LINE__, ret);
  649. break;
  650. }
  651. len -= partlen;
  652. ubuf += partlen;
  653. tot_len += partlen;
  654. iov->consumed += partlen;
  655. kiov->iov_len -= partlen;
  656. kiov->iov_base += partlen;
  657. if (!kiov->iov_len) {
  658. /* Fix up old iov element then increment. */
  659. kiov->iov_len = iov->consumed;
  660. kiov->iov_base -= iov->consumed;
  661. iov->consumed = 0;
  662. iov->i++;
  663. }
  664. }
  665. *out_len = tot_len;
  666. return ret;
  667. }
  668. /*
  669. * Use the standard VRINGH infrastructure in the kernel to fetch new
  670. * descriptors, initiate the copies and update the used ring.
  671. */
  672. static int _vop_virtio_copy(struct vop_vdev *vdev, struct mic_copy_desc *copy)
  673. {
  674. int ret = 0;
  675. u32 iovcnt = copy->iovcnt;
  676. struct iovec iov;
  677. struct iovec __user *u_iov = copy->iov;
  678. void __user *ubuf = NULL;
  679. struct vop_vringh *vvr = &vdev->vvr[copy->vr_idx];
  680. struct vringh_kiov *riov = &vvr->riov;
  681. struct vringh_kiov *wiov = &vvr->wiov;
  682. struct vringh *vrh = &vvr->vrh;
  683. u16 *head = &vvr->head;
  684. struct mic_vring *vr = &vvr->vring;
  685. size_t len = 0, out_len;
  686. copy->out_len = 0;
  687. /* Fetch a new IOVEC if all previous elements have been processed */
  688. if (riov->i == riov->used && wiov->i == wiov->used) {
  689. ret = vringh_getdesc_kern(vrh, riov, wiov,
  690. head, GFP_KERNEL);
  691. /* Check if there are available descriptors */
  692. if (ret <= 0)
  693. return ret;
  694. }
  695. while (iovcnt) {
  696. if (!len) {
  697. /* Copy over a new iovec from user space. */
  698. ret = copy_from_user(&iov, u_iov, sizeof(*u_iov));
  699. if (ret) {
  700. ret = -EINVAL;
  701. dev_err(vop_dev(vdev), "%s %d err %d\n",
  702. __func__, __LINE__, ret);
  703. break;
  704. }
  705. len = iov.iov_len;
  706. ubuf = iov.iov_base;
  707. }
  708. /* Issue all the read descriptors first */
  709. ret = vop_vringh_copy(vdev, riov, ubuf, len,
  710. MIC_VRINGH_READ, copy->vr_idx, &out_len);
  711. if (ret) {
  712. dev_err(vop_dev(vdev), "%s %d err %d\n",
  713. __func__, __LINE__, ret);
  714. break;
  715. }
  716. len -= out_len;
  717. ubuf += out_len;
  718. copy->out_len += out_len;
  719. /* Issue the write descriptors next */
  720. ret = vop_vringh_copy(vdev, wiov, ubuf, len,
  721. !MIC_VRINGH_READ, copy->vr_idx, &out_len);
  722. if (ret) {
  723. dev_err(vop_dev(vdev), "%s %d err %d\n",
  724. __func__, __LINE__, ret);
  725. break;
  726. }
  727. len -= out_len;
  728. ubuf += out_len;
  729. copy->out_len += out_len;
  730. if (!len) {
  731. /* One user space iovec is now completed */
  732. iovcnt--;
  733. u_iov++;
  734. }
  735. /* Exit loop if all elements in KIOVs have been processed. */
  736. if (riov->i == riov->used && wiov->i == wiov->used)
  737. break;
  738. }
  739. /*
  740. * Update the used ring if a descriptor was available and some data was
  741. * copied in/out and the user asked for a used ring update.
  742. */
  743. if (*head != USHRT_MAX && copy->out_len && copy->update_used) {
  744. u32 total = 0;
  745. /* Determine the total data consumed */
  746. total += vop_vringh_iov_consumed(riov);
  747. total += vop_vringh_iov_consumed(wiov);
  748. vringh_complete_kern(vrh, *head, total);
  749. *head = USHRT_MAX;
  750. if (vringh_need_notify_kern(vrh) > 0)
  751. vringh_notify(vrh);
  752. vringh_kiov_cleanup(riov);
  753. vringh_kiov_cleanup(wiov);
  754. /* Update avail idx for user space */
  755. vr->info->avail_idx = vrh->last_avail_idx;
  756. }
  757. return ret;
  758. }
  759. static inline int vop_verify_copy_args(struct vop_vdev *vdev,
  760. struct mic_copy_desc *copy)
  761. {
  762. if (!vdev || copy->vr_idx >= vdev->dd->num_vq)
  763. return -EINVAL;
  764. return 0;
  765. }
  766. /* Copy a specified number of virtio descriptors in a chain */
  767. static int vop_virtio_copy_desc(struct vop_vdev *vdev,
  768. struct mic_copy_desc *copy)
  769. {
  770. int err;
  771. struct vop_vringh *vvr;
  772. err = vop_verify_copy_args(vdev, copy);
  773. if (err)
  774. return err;
  775. vvr = &vdev->vvr[copy->vr_idx];
  776. mutex_lock(&vvr->vr_mutex);
  777. if (!vop_vdevup(vdev)) {
  778. err = -ENODEV;
  779. dev_err(vop_dev(vdev), "%s %d err %d\n",
  780. __func__, __LINE__, err);
  781. goto err;
  782. }
  783. err = _vop_virtio_copy(vdev, copy);
  784. if (err) {
  785. dev_err(vop_dev(vdev), "%s %d err %d\n",
  786. __func__, __LINE__, err);
  787. }
  788. err:
  789. mutex_unlock(&vvr->vr_mutex);
  790. return err;
  791. }
  792. static int vop_open(struct inode *inode, struct file *f)
  793. {
  794. struct vop_vdev *vdev;
  795. struct vop_info *vi = container_of(f->private_data,
  796. struct vop_info, miscdev);
  797. vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
  798. if (!vdev)
  799. return -ENOMEM;
  800. vdev->vi = vi;
  801. mutex_init(&vdev->vdev_mutex);
  802. f->private_data = vdev;
  803. init_completion(&vdev->destroy);
  804. complete(&vdev->destroy);
  805. return 0;
  806. }
  807. static int vop_release(struct inode *inode, struct file *f)
  808. {
  809. struct vop_vdev *vdev = f->private_data, *vdev_tmp;
  810. struct vop_info *vi = vdev->vi;
  811. struct list_head *pos, *tmp;
  812. bool found = false;
  813. mutex_lock(&vdev->vdev_mutex);
  814. if (vdev->deleted)
  815. goto unlock;
  816. mutex_lock(&vi->vop_mutex);
  817. list_for_each_safe(pos, tmp, &vi->vdev_list) {
  818. vdev_tmp = list_entry(pos, struct vop_vdev, list);
  819. if (vdev == vdev_tmp) {
  820. vop_virtio_del_device(vdev);
  821. list_del(pos);
  822. found = true;
  823. break;
  824. }
  825. }
  826. mutex_unlock(&vi->vop_mutex);
  827. unlock:
  828. mutex_unlock(&vdev->vdev_mutex);
  829. if (!found)
  830. wait_for_completion(&vdev->destroy);
  831. f->private_data = NULL;
  832. kfree(vdev);
  833. return 0;
  834. }
  835. static long vop_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
  836. {
  837. struct vop_vdev *vdev = f->private_data;
  838. struct vop_info *vi = vdev->vi;
  839. void __user *argp = (void __user *)arg;
  840. int ret;
  841. switch (cmd) {
  842. case MIC_VIRTIO_ADD_DEVICE:
  843. {
  844. struct mic_device_desc dd, *dd_config;
  845. if (copy_from_user(&dd, argp, sizeof(dd)))
  846. return -EFAULT;
  847. if (mic_aligned_desc_size(&dd) > MIC_MAX_DESC_BLK_SIZE ||
  848. dd.num_vq > MIC_MAX_VRINGS)
  849. return -EINVAL;
  850. dd_config = kzalloc(mic_desc_size(&dd), GFP_KERNEL);
  851. if (!dd_config)
  852. return -ENOMEM;
  853. if (copy_from_user(dd_config, argp, mic_desc_size(&dd))) {
  854. ret = -EFAULT;
  855. goto free_ret;
  856. }
  857. /* Ensure desc has not changed between the two reads */
  858. if (memcmp(&dd, dd_config, sizeof(dd))) {
  859. ret = -EINVAL;
  860. goto free_ret;
  861. }
  862. mutex_lock(&vdev->vdev_mutex);
  863. mutex_lock(&vi->vop_mutex);
  864. ret = vop_virtio_add_device(vdev, dd_config);
  865. if (ret)
  866. goto unlock_ret;
  867. list_add_tail(&vdev->list, &vi->vdev_list);
  868. unlock_ret:
  869. mutex_unlock(&vi->vop_mutex);
  870. mutex_unlock(&vdev->vdev_mutex);
  871. free_ret:
  872. kfree(dd_config);
  873. return ret;
  874. }
  875. case MIC_VIRTIO_COPY_DESC:
  876. {
  877. struct mic_copy_desc copy;
  878. mutex_lock(&vdev->vdev_mutex);
  879. ret = vop_vdev_inited(vdev);
  880. if (ret)
  881. goto _unlock_ret;
  882. if (copy_from_user(&copy, argp, sizeof(copy))) {
  883. ret = -EFAULT;
  884. goto _unlock_ret;
  885. }
  886. ret = vop_virtio_copy_desc(vdev, &copy);
  887. if (ret < 0)
  888. goto _unlock_ret;
  889. if (copy_to_user(
  890. &((struct mic_copy_desc __user *)argp)->out_len,
  891. &copy.out_len, sizeof(copy.out_len)))
  892. ret = -EFAULT;
  893. _unlock_ret:
  894. mutex_unlock(&vdev->vdev_mutex);
  895. return ret;
  896. }
  897. case MIC_VIRTIO_CONFIG_CHANGE:
  898. {
  899. void *buf;
  900. mutex_lock(&vdev->vdev_mutex);
  901. ret = vop_vdev_inited(vdev);
  902. if (ret)
  903. goto __unlock_ret;
  904. buf = kzalloc(vdev->dd->config_len, GFP_KERNEL);
  905. if (!buf) {
  906. ret = -ENOMEM;
  907. goto __unlock_ret;
  908. }
  909. if (copy_from_user(buf, argp, vdev->dd->config_len)) {
  910. ret = -EFAULT;
  911. goto done;
  912. }
  913. ret = vop_virtio_config_change(vdev, buf);
  914. done:
  915. kfree(buf);
  916. __unlock_ret:
  917. mutex_unlock(&vdev->vdev_mutex);
  918. return ret;
  919. }
  920. default:
  921. return -ENOIOCTLCMD;
  922. };
  923. return 0;
  924. }
  925. /*
  926. * We return POLLIN | POLLOUT from poll when new buffers are enqueued, and
  927. * not when previously enqueued buffers may be available. This means that
  928. * in the card->host (TX) path, when userspace is unblocked by poll it
  929. * must drain all available descriptors or it can stall.
  930. */
  931. static unsigned int vop_poll(struct file *f, poll_table *wait)
  932. {
  933. struct vop_vdev *vdev = f->private_data;
  934. int mask = 0;
  935. mutex_lock(&vdev->vdev_mutex);
  936. if (vop_vdev_inited(vdev)) {
  937. mask = POLLERR;
  938. goto done;
  939. }
  940. poll_wait(f, &vdev->waitq, wait);
  941. if (vop_vdev_inited(vdev)) {
  942. mask = POLLERR;
  943. } else if (vdev->poll_wake) {
  944. vdev->poll_wake = 0;
  945. mask = POLLIN | POLLOUT;
  946. }
  947. done:
  948. mutex_unlock(&vdev->vdev_mutex);
  949. return mask;
  950. }
  951. static inline int
  952. vop_query_offset(struct vop_vdev *vdev, unsigned long offset,
  953. unsigned long *size, unsigned long *pa)
  954. {
  955. struct vop_device *vpdev = vdev->vpdev;
  956. unsigned long start = MIC_DP_SIZE;
  957. int i;
  958. /*
  959. * MMAP interface is as follows:
  960. * offset region
  961. * 0x0 virtio device_page
  962. * 0x1000 first vring
  963. * 0x1000 + size of 1st vring second vring
  964. * ....
  965. */
  966. if (!offset) {
  967. *pa = virt_to_phys(vpdev->hw_ops->get_dp(vpdev));
  968. *size = MIC_DP_SIZE;
  969. return 0;
  970. }
  971. for (i = 0; i < vdev->dd->num_vq; i++) {
  972. struct vop_vringh *vvr = &vdev->vvr[i];
  973. if (offset == start) {
  974. *pa = virt_to_phys(vvr->vring.va);
  975. *size = vvr->vring.len;
  976. return 0;
  977. }
  978. start += vvr->vring.len;
  979. }
  980. return -1;
  981. }
  982. /*
  983. * Maps the device page and virtio rings to user space for readonly access.
  984. */
  985. static int vop_mmap(struct file *f, struct vm_area_struct *vma)
  986. {
  987. struct vop_vdev *vdev = f->private_data;
  988. unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
  989. unsigned long pa, size = vma->vm_end - vma->vm_start, size_rem = size;
  990. int i, err;
  991. err = vop_vdev_inited(vdev);
  992. if (err)
  993. goto ret;
  994. if (vma->vm_flags & VM_WRITE) {
  995. err = -EACCES;
  996. goto ret;
  997. }
  998. while (size_rem) {
  999. i = vop_query_offset(vdev, offset, &size, &pa);
  1000. if (i < 0) {
  1001. err = -EINVAL;
  1002. goto ret;
  1003. }
  1004. err = remap_pfn_range(vma, vma->vm_start + offset,
  1005. pa >> PAGE_SHIFT, size,
  1006. vma->vm_page_prot);
  1007. if (err)
  1008. goto ret;
  1009. size_rem -= size;
  1010. offset += size;
  1011. }
  1012. ret:
  1013. return err;
  1014. }
  1015. static const struct file_operations vop_fops = {
  1016. .open = vop_open,
  1017. .release = vop_release,
  1018. .unlocked_ioctl = vop_ioctl,
  1019. .poll = vop_poll,
  1020. .mmap = vop_mmap,
  1021. .owner = THIS_MODULE,
  1022. };
  1023. int vop_host_init(struct vop_info *vi)
  1024. {
  1025. int rc;
  1026. struct miscdevice *mdev;
  1027. struct vop_device *vpdev = vi->vpdev;
  1028. INIT_LIST_HEAD(&vi->vdev_list);
  1029. vi->dma_ch = vpdev->dma_ch;
  1030. mdev = &vi->miscdev;
  1031. mdev->minor = MISC_DYNAMIC_MINOR;
  1032. snprintf(vi->name, sizeof(vi->name), "vop_virtio%d", vpdev->index);
  1033. mdev->name = vi->name;
  1034. mdev->fops = &vop_fops;
  1035. mdev->parent = &vpdev->dev;
  1036. rc = misc_register(mdev);
  1037. if (rc)
  1038. dev_err(&vpdev->dev, "%s failed rc %d\n", __func__, rc);
  1039. return rc;
  1040. }
  1041. void vop_host_uninit(struct vop_info *vi)
  1042. {
  1043. struct list_head *pos, *tmp;
  1044. struct vop_vdev *vdev;
  1045. mutex_lock(&vi->vop_mutex);
  1046. vop_virtio_reset_devices(vi);
  1047. list_for_each_safe(pos, tmp, &vi->vdev_list) {
  1048. vdev = list_entry(pos, struct vop_vdev, list);
  1049. list_del(pos);
  1050. reinit_completion(&vdev->destroy);
  1051. mutex_unlock(&vi->vop_mutex);
  1052. mutex_lock(&vdev->vdev_mutex);
  1053. vop_virtio_del_device(vdev);
  1054. vdev->deleted = true;
  1055. mutex_unlock(&vdev->vdev_mutex);
  1056. complete(&vdev->destroy);
  1057. mutex_lock(&vi->vop_mutex);
  1058. }
  1059. mutex_unlock(&vi->vop_mutex);
  1060. misc_deregister(&vi->miscdev);
  1061. }