pvrdma_qp.c 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972
  1. /*
  2. * Copyright (c) 2012-2016 VMware, Inc. All rights reserved.
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of EITHER the GNU General Public License
  6. * version 2 as published by the Free Software Foundation or the BSD
  7. * 2-Clause License. This program is distributed in the hope that it
  8. * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED
  9. * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
  10. * See the GNU General Public License version 2 for more details at
  11. * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html.
  12. *
  13. * You should have received a copy of the GNU General Public License
  14. * along with this program available in the file COPYING in the main
  15. * directory of this source tree.
  16. *
  17. * The BSD 2-Clause License
  18. *
  19. * Redistribution and use in source and binary forms, with or
  20. * without modification, are permitted provided that the following
  21. * conditions are met:
  22. *
  23. * - Redistributions of source code must retain the above
  24. * copyright notice, this list of conditions and the following
  25. * disclaimer.
  26. *
  27. * - Redistributions in binary form must reproduce the above
  28. * copyright notice, this list of conditions and the following
  29. * disclaimer in the documentation and/or other materials
  30. * provided with the distribution.
  31. *
  32. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  33. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  34. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  35. * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  36. * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
  37. * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  38. * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  39. * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  40. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  41. * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  42. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
  43. * OF THE POSSIBILITY OF SUCH DAMAGE.
  44. */
  45. #include <asm/page.h>
  46. #include <linux/io.h>
  47. #include <linux/wait.h>
  48. #include <rdma/ib_addr.h>
  49. #include <rdma/ib_smi.h>
  50. #include <rdma/ib_user_verbs.h>
  51. #include "pvrdma.h"
  52. static inline void get_cqs(struct pvrdma_qp *qp, struct pvrdma_cq **send_cq,
  53. struct pvrdma_cq **recv_cq)
  54. {
  55. *send_cq = to_vcq(qp->ibqp.send_cq);
  56. *recv_cq = to_vcq(qp->ibqp.recv_cq);
  57. }
  58. static void pvrdma_lock_cqs(struct pvrdma_cq *scq, struct pvrdma_cq *rcq,
  59. unsigned long *scq_flags,
  60. unsigned long *rcq_flags)
  61. __acquires(scq->cq_lock) __acquires(rcq->cq_lock)
  62. {
  63. if (scq == rcq) {
  64. spin_lock_irqsave(&scq->cq_lock, *scq_flags);
  65. __acquire(rcq->cq_lock);
  66. } else if (scq->cq_handle < rcq->cq_handle) {
  67. spin_lock_irqsave(&scq->cq_lock, *scq_flags);
  68. spin_lock_irqsave_nested(&rcq->cq_lock, *rcq_flags,
  69. SINGLE_DEPTH_NESTING);
  70. } else {
  71. spin_lock_irqsave(&rcq->cq_lock, *rcq_flags);
  72. spin_lock_irqsave_nested(&scq->cq_lock, *scq_flags,
  73. SINGLE_DEPTH_NESTING);
  74. }
  75. }
  76. static void pvrdma_unlock_cqs(struct pvrdma_cq *scq, struct pvrdma_cq *rcq,
  77. unsigned long *scq_flags,
  78. unsigned long *rcq_flags)
  79. __releases(scq->cq_lock) __releases(rcq->cq_lock)
  80. {
  81. if (scq == rcq) {
  82. __release(rcq->cq_lock);
  83. spin_unlock_irqrestore(&scq->cq_lock, *scq_flags);
  84. } else if (scq->cq_handle < rcq->cq_handle) {
  85. spin_unlock_irqrestore(&rcq->cq_lock, *rcq_flags);
  86. spin_unlock_irqrestore(&scq->cq_lock, *scq_flags);
  87. } else {
  88. spin_unlock_irqrestore(&scq->cq_lock, *scq_flags);
  89. spin_unlock_irqrestore(&rcq->cq_lock, *rcq_flags);
  90. }
  91. }
  92. static void pvrdma_reset_qp(struct pvrdma_qp *qp)
  93. {
  94. struct pvrdma_cq *scq, *rcq;
  95. unsigned long scq_flags, rcq_flags;
  96. /* Clean up cqes */
  97. get_cqs(qp, &scq, &rcq);
  98. pvrdma_lock_cqs(scq, rcq, &scq_flags, &rcq_flags);
  99. _pvrdma_flush_cqe(qp, scq);
  100. if (scq != rcq)
  101. _pvrdma_flush_cqe(qp, rcq);
  102. pvrdma_unlock_cqs(scq, rcq, &scq_flags, &rcq_flags);
  103. /*
  104. * Reset queuepair. The checks are because usermode queuepairs won't
  105. * have kernel ringstates.
  106. */
  107. if (qp->rq.ring) {
  108. atomic_set(&qp->rq.ring->cons_head, 0);
  109. atomic_set(&qp->rq.ring->prod_tail, 0);
  110. }
  111. if (qp->sq.ring) {
  112. atomic_set(&qp->sq.ring->cons_head, 0);
  113. atomic_set(&qp->sq.ring->prod_tail, 0);
  114. }
  115. }
  116. static int pvrdma_set_rq_size(struct pvrdma_dev *dev,
  117. struct ib_qp_cap *req_cap,
  118. struct pvrdma_qp *qp)
  119. {
  120. if (req_cap->max_recv_wr > dev->dsr->caps.max_qp_wr ||
  121. req_cap->max_recv_sge > dev->dsr->caps.max_sge) {
  122. dev_warn(&dev->pdev->dev, "recv queue size invalid\n");
  123. return -EINVAL;
  124. }
  125. qp->rq.wqe_cnt = roundup_pow_of_two(max(1U, req_cap->max_recv_wr));
  126. qp->rq.max_sg = roundup_pow_of_two(max(1U, req_cap->max_recv_sge));
  127. /* Write back */
  128. req_cap->max_recv_wr = qp->rq.wqe_cnt;
  129. req_cap->max_recv_sge = qp->rq.max_sg;
  130. qp->rq.wqe_size = roundup_pow_of_two(sizeof(struct pvrdma_rq_wqe_hdr) +
  131. sizeof(struct pvrdma_sge) *
  132. qp->rq.max_sg);
  133. qp->npages_recv = (qp->rq.wqe_cnt * qp->rq.wqe_size + PAGE_SIZE - 1) /
  134. PAGE_SIZE;
  135. return 0;
  136. }
  137. static int pvrdma_set_sq_size(struct pvrdma_dev *dev, struct ib_qp_cap *req_cap,
  138. enum ib_qp_type type, struct pvrdma_qp *qp)
  139. {
  140. if (req_cap->max_send_wr > dev->dsr->caps.max_qp_wr ||
  141. req_cap->max_send_sge > dev->dsr->caps.max_sge) {
  142. dev_warn(&dev->pdev->dev, "send queue size invalid\n");
  143. return -EINVAL;
  144. }
  145. qp->sq.wqe_cnt = roundup_pow_of_two(max(1U, req_cap->max_send_wr));
  146. qp->sq.max_sg = roundup_pow_of_two(max(1U, req_cap->max_send_sge));
  147. /* Write back */
  148. req_cap->max_send_wr = qp->sq.wqe_cnt;
  149. req_cap->max_send_sge = qp->sq.max_sg;
  150. qp->sq.wqe_size = roundup_pow_of_two(sizeof(struct pvrdma_sq_wqe_hdr) +
  151. sizeof(struct pvrdma_sge) *
  152. qp->sq.max_sg);
  153. /* Note: one extra page for the header. */
  154. qp->npages_send = 1 + (qp->sq.wqe_cnt * qp->sq.wqe_size +
  155. PAGE_SIZE - 1) / PAGE_SIZE;
  156. return 0;
  157. }
  158. /**
  159. * pvrdma_create_qp - create queue pair
  160. * @pd: protection domain
  161. * @init_attr: queue pair attributes
  162. * @udata: user data
  163. *
  164. * @return: the ib_qp pointer on success, otherwise returns an errno.
  165. */
  166. struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
  167. struct ib_qp_init_attr *init_attr,
  168. struct ib_udata *udata)
  169. {
  170. struct pvrdma_qp *qp = NULL;
  171. struct pvrdma_dev *dev = to_vdev(pd->device);
  172. union pvrdma_cmd_req req;
  173. union pvrdma_cmd_resp rsp;
  174. struct pvrdma_cmd_create_qp *cmd = &req.create_qp;
  175. struct pvrdma_cmd_create_qp_resp *resp = &rsp.create_qp_resp;
  176. struct pvrdma_create_qp ucmd;
  177. unsigned long flags;
  178. int ret;
  179. if (init_attr->create_flags) {
  180. dev_warn(&dev->pdev->dev,
  181. "invalid create queuepair flags %#x\n",
  182. init_attr->create_flags);
  183. return ERR_PTR(-EINVAL);
  184. }
  185. if (init_attr->qp_type != IB_QPT_RC &&
  186. init_attr->qp_type != IB_QPT_UD &&
  187. init_attr->qp_type != IB_QPT_GSI) {
  188. dev_warn(&dev->pdev->dev, "queuepair type %d not supported\n",
  189. init_attr->qp_type);
  190. return ERR_PTR(-EINVAL);
  191. }
  192. if (!atomic_add_unless(&dev->num_qps, 1, dev->dsr->caps.max_qp))
  193. return ERR_PTR(-ENOMEM);
  194. switch (init_attr->qp_type) {
  195. case IB_QPT_GSI:
  196. if (init_attr->port_num == 0 ||
  197. init_attr->port_num > pd->device->phys_port_cnt ||
  198. udata) {
  199. dev_warn(&dev->pdev->dev, "invalid queuepair attrs\n");
  200. ret = -EINVAL;
  201. goto err_qp;
  202. }
  203. /* fall through */
  204. case IB_QPT_RC:
  205. case IB_QPT_UD:
  206. qp = kzalloc(sizeof(*qp), GFP_KERNEL);
  207. if (!qp) {
  208. ret = -ENOMEM;
  209. goto err_qp;
  210. }
  211. spin_lock_init(&qp->sq.lock);
  212. spin_lock_init(&qp->rq.lock);
  213. mutex_init(&qp->mutex);
  214. atomic_set(&qp->refcnt, 1);
  215. init_waitqueue_head(&qp->wait);
  216. qp->state = IB_QPS_RESET;
  217. if (pd->uobject && udata) {
  218. dev_dbg(&dev->pdev->dev,
  219. "create queuepair from user space\n");
  220. if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
  221. ret = -EFAULT;
  222. goto err_qp;
  223. }
  224. /* set qp->sq.wqe_cnt, shift, buf_size.. */
  225. qp->rumem = ib_umem_get(pd->uobject->context,
  226. ucmd.rbuf_addr,
  227. ucmd.rbuf_size, 0, 0);
  228. if (IS_ERR(qp->rumem)) {
  229. ret = PTR_ERR(qp->rumem);
  230. goto err_qp;
  231. }
  232. qp->sumem = ib_umem_get(pd->uobject->context,
  233. ucmd.sbuf_addr,
  234. ucmd.sbuf_size, 0, 0);
  235. if (IS_ERR(qp->sumem)) {
  236. ib_umem_release(qp->rumem);
  237. ret = PTR_ERR(qp->sumem);
  238. goto err_qp;
  239. }
  240. qp->npages_send = ib_umem_page_count(qp->sumem);
  241. qp->npages_recv = ib_umem_page_count(qp->rumem);
  242. qp->npages = qp->npages_send + qp->npages_recv;
  243. } else {
  244. qp->is_kernel = true;
  245. ret = pvrdma_set_sq_size(to_vdev(pd->device),
  246. &init_attr->cap,
  247. init_attr->qp_type, qp);
  248. if (ret)
  249. goto err_qp;
  250. ret = pvrdma_set_rq_size(to_vdev(pd->device),
  251. &init_attr->cap, qp);
  252. if (ret)
  253. goto err_qp;
  254. qp->npages = qp->npages_send + qp->npages_recv;
  255. /* Skip header page. */
  256. qp->sq.offset = PAGE_SIZE;
  257. /* Recv queue pages are after send pages. */
  258. qp->rq.offset = qp->npages_send * PAGE_SIZE;
  259. }
  260. if (qp->npages < 0 || qp->npages > PVRDMA_PAGE_DIR_MAX_PAGES) {
  261. dev_warn(&dev->pdev->dev,
  262. "overflow pages in queuepair\n");
  263. ret = -EINVAL;
  264. goto err_umem;
  265. }
  266. ret = pvrdma_page_dir_init(dev, &qp->pdir, qp->npages,
  267. qp->is_kernel);
  268. if (ret) {
  269. dev_warn(&dev->pdev->dev,
  270. "could not allocate page directory\n");
  271. goto err_umem;
  272. }
  273. if (!qp->is_kernel) {
  274. pvrdma_page_dir_insert_umem(&qp->pdir, qp->sumem, 0);
  275. pvrdma_page_dir_insert_umem(&qp->pdir, qp->rumem,
  276. qp->npages_send);
  277. } else {
  278. /* Ring state is always the first page. */
  279. qp->sq.ring = qp->pdir.pages[0];
  280. qp->rq.ring = &qp->sq.ring[1];
  281. }
  282. break;
  283. default:
  284. ret = -EINVAL;
  285. goto err_qp;
  286. }
  287. /* Not supported */
  288. init_attr->cap.max_inline_data = 0;
  289. memset(cmd, 0, sizeof(*cmd));
  290. cmd->hdr.cmd = PVRDMA_CMD_CREATE_QP;
  291. cmd->pd_handle = to_vpd(pd)->pd_handle;
  292. cmd->send_cq_handle = to_vcq(init_attr->send_cq)->cq_handle;
  293. cmd->recv_cq_handle = to_vcq(init_attr->recv_cq)->cq_handle;
  294. cmd->max_send_wr = init_attr->cap.max_send_wr;
  295. cmd->max_recv_wr = init_attr->cap.max_recv_wr;
  296. cmd->max_send_sge = init_attr->cap.max_send_sge;
  297. cmd->max_recv_sge = init_attr->cap.max_recv_sge;
  298. cmd->max_inline_data = init_attr->cap.max_inline_data;
  299. cmd->sq_sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) ? 1 : 0;
  300. cmd->qp_type = ib_qp_type_to_pvrdma(init_attr->qp_type);
  301. cmd->access_flags = IB_ACCESS_LOCAL_WRITE;
  302. cmd->total_chunks = qp->npages;
  303. cmd->send_chunks = qp->npages_send - 1;
  304. cmd->pdir_dma = qp->pdir.dir_dma;
  305. dev_dbg(&dev->pdev->dev, "create queuepair with %d, %d, %d, %d\n",
  306. cmd->max_send_wr, cmd->max_recv_wr, cmd->max_send_sge,
  307. cmd->max_recv_sge);
  308. ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_QP_RESP);
  309. if (ret < 0) {
  310. dev_warn(&dev->pdev->dev,
  311. "could not create queuepair, error: %d\n", ret);
  312. goto err_pdir;
  313. }
  314. /* max_send_wr/_recv_wr/_send_sge/_recv_sge/_inline_data */
  315. qp->qp_handle = resp->qpn;
  316. qp->port = init_attr->port_num;
  317. qp->ibqp.qp_num = resp->qpn;
  318. spin_lock_irqsave(&dev->qp_tbl_lock, flags);
  319. dev->qp_tbl[qp->qp_handle % dev->dsr->caps.max_qp] = qp;
  320. spin_unlock_irqrestore(&dev->qp_tbl_lock, flags);
  321. return &qp->ibqp;
  322. err_pdir:
  323. pvrdma_page_dir_cleanup(dev, &qp->pdir);
  324. err_umem:
  325. if (pd->uobject && udata) {
  326. if (qp->rumem)
  327. ib_umem_release(qp->rumem);
  328. if (qp->sumem)
  329. ib_umem_release(qp->sumem);
  330. }
  331. err_qp:
  332. kfree(qp);
  333. atomic_dec(&dev->num_qps);
  334. return ERR_PTR(ret);
  335. }
  336. static void pvrdma_free_qp(struct pvrdma_qp *qp)
  337. {
  338. struct pvrdma_dev *dev = to_vdev(qp->ibqp.device);
  339. struct pvrdma_cq *scq;
  340. struct pvrdma_cq *rcq;
  341. unsigned long flags, scq_flags, rcq_flags;
  342. /* In case cq is polling */
  343. get_cqs(qp, &scq, &rcq);
  344. pvrdma_lock_cqs(scq, rcq, &scq_flags, &rcq_flags);
  345. _pvrdma_flush_cqe(qp, scq);
  346. if (scq != rcq)
  347. _pvrdma_flush_cqe(qp, rcq);
  348. spin_lock_irqsave(&dev->qp_tbl_lock, flags);
  349. dev->qp_tbl[qp->qp_handle] = NULL;
  350. spin_unlock_irqrestore(&dev->qp_tbl_lock, flags);
  351. pvrdma_unlock_cqs(scq, rcq, &scq_flags, &rcq_flags);
  352. atomic_dec(&qp->refcnt);
  353. wait_event(qp->wait, !atomic_read(&qp->refcnt));
  354. pvrdma_page_dir_cleanup(dev, &qp->pdir);
  355. kfree(qp);
  356. atomic_dec(&dev->num_qps);
  357. }
  358. /**
  359. * pvrdma_destroy_qp - destroy a queue pair
  360. * @qp: the queue pair to destroy
  361. *
  362. * @return: 0 on success.
  363. */
  364. int pvrdma_destroy_qp(struct ib_qp *qp)
  365. {
  366. struct pvrdma_qp *vqp = to_vqp(qp);
  367. union pvrdma_cmd_req req;
  368. struct pvrdma_cmd_destroy_qp *cmd = &req.destroy_qp;
  369. int ret;
  370. memset(cmd, 0, sizeof(*cmd));
  371. cmd->hdr.cmd = PVRDMA_CMD_DESTROY_QP;
  372. cmd->qp_handle = vqp->qp_handle;
  373. ret = pvrdma_cmd_post(to_vdev(qp->device), &req, NULL, 0);
  374. if (ret < 0)
  375. dev_warn(&to_vdev(qp->device)->pdev->dev,
  376. "destroy queuepair failed, error: %d\n", ret);
  377. pvrdma_free_qp(vqp);
  378. return 0;
  379. }
  380. /**
  381. * pvrdma_modify_qp - modify queue pair attributes
  382. * @ibqp: the queue pair
  383. * @attr: the new queue pair's attributes
  384. * @attr_mask: attributes mask
  385. * @udata: user data
  386. *
  387. * @returns 0 on success, otherwise returns an errno.
  388. */
  389. int pvrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
  390. int attr_mask, struct ib_udata *udata)
  391. {
  392. struct pvrdma_dev *dev = to_vdev(ibqp->device);
  393. struct pvrdma_qp *qp = to_vqp(ibqp);
  394. union pvrdma_cmd_req req;
  395. union pvrdma_cmd_resp rsp;
  396. struct pvrdma_cmd_modify_qp *cmd = &req.modify_qp;
  397. int cur_state, next_state;
  398. int ret;
  399. /* Sanity checking. Should need lock here */
  400. mutex_lock(&qp->mutex);
  401. cur_state = (attr_mask & IB_QP_CUR_STATE) ? attr->cur_qp_state :
  402. qp->state;
  403. next_state = (attr_mask & IB_QP_STATE) ? attr->qp_state : cur_state;
  404. if (!ib_modify_qp_is_ok(cur_state, next_state, ibqp->qp_type,
  405. attr_mask, IB_LINK_LAYER_ETHERNET)) {
  406. ret = -EINVAL;
  407. goto out;
  408. }
  409. if (attr_mask & IB_QP_PORT) {
  410. if (attr->port_num == 0 ||
  411. attr->port_num > ibqp->device->phys_port_cnt) {
  412. ret = -EINVAL;
  413. goto out;
  414. }
  415. }
  416. if (attr_mask & IB_QP_MIN_RNR_TIMER) {
  417. if (attr->min_rnr_timer > 31) {
  418. ret = -EINVAL;
  419. goto out;
  420. }
  421. }
  422. if (attr_mask & IB_QP_PKEY_INDEX) {
  423. if (attr->pkey_index >= dev->dsr->caps.max_pkeys) {
  424. ret = -EINVAL;
  425. goto out;
  426. }
  427. }
  428. if (attr_mask & IB_QP_QKEY)
  429. qp->qkey = attr->qkey;
  430. if (cur_state == next_state && cur_state == IB_QPS_RESET) {
  431. ret = 0;
  432. goto out;
  433. }
  434. qp->state = next_state;
  435. memset(cmd, 0, sizeof(*cmd));
  436. cmd->hdr.cmd = PVRDMA_CMD_MODIFY_QP;
  437. cmd->qp_handle = qp->qp_handle;
  438. cmd->attr_mask = ib_qp_attr_mask_to_pvrdma(attr_mask);
  439. cmd->attrs.qp_state = ib_qp_state_to_pvrdma(attr->qp_state);
  440. cmd->attrs.cur_qp_state =
  441. ib_qp_state_to_pvrdma(attr->cur_qp_state);
  442. cmd->attrs.path_mtu = ib_mtu_to_pvrdma(attr->path_mtu);
  443. cmd->attrs.path_mig_state =
  444. ib_mig_state_to_pvrdma(attr->path_mig_state);
  445. cmd->attrs.qkey = attr->qkey;
  446. cmd->attrs.rq_psn = attr->rq_psn;
  447. cmd->attrs.sq_psn = attr->sq_psn;
  448. cmd->attrs.dest_qp_num = attr->dest_qp_num;
  449. cmd->attrs.qp_access_flags =
  450. ib_access_flags_to_pvrdma(attr->qp_access_flags);
  451. cmd->attrs.pkey_index = attr->pkey_index;
  452. cmd->attrs.alt_pkey_index = attr->alt_pkey_index;
  453. cmd->attrs.en_sqd_async_notify = attr->en_sqd_async_notify;
  454. cmd->attrs.sq_draining = attr->sq_draining;
  455. cmd->attrs.max_rd_atomic = attr->max_rd_atomic;
  456. cmd->attrs.max_dest_rd_atomic = attr->max_dest_rd_atomic;
  457. cmd->attrs.min_rnr_timer = attr->min_rnr_timer;
  458. cmd->attrs.port_num = attr->port_num;
  459. cmd->attrs.timeout = attr->timeout;
  460. cmd->attrs.retry_cnt = attr->retry_cnt;
  461. cmd->attrs.rnr_retry = attr->rnr_retry;
  462. cmd->attrs.alt_port_num = attr->alt_port_num;
  463. cmd->attrs.alt_timeout = attr->alt_timeout;
  464. ib_qp_cap_to_pvrdma(&cmd->attrs.cap, &attr->cap);
  465. ib_ah_attr_to_pvrdma(&cmd->attrs.ah_attr, &attr->ah_attr);
  466. ib_ah_attr_to_pvrdma(&cmd->attrs.alt_ah_attr, &attr->alt_ah_attr);
  467. ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_MODIFY_QP_RESP);
  468. if (ret < 0) {
  469. dev_warn(&dev->pdev->dev,
  470. "could not modify queuepair, error: %d\n", ret);
  471. } else if (rsp.hdr.err > 0) {
  472. dev_warn(&dev->pdev->dev,
  473. "cannot modify queuepair, error: %d\n", rsp.hdr.err);
  474. ret = -EINVAL;
  475. }
  476. if (ret == 0 && next_state == IB_QPS_RESET)
  477. pvrdma_reset_qp(qp);
  478. out:
  479. mutex_unlock(&qp->mutex);
  480. return ret;
  481. }
  482. static inline void *get_sq_wqe(struct pvrdma_qp *qp, int n)
  483. {
  484. return pvrdma_page_dir_get_ptr(&qp->pdir,
  485. qp->sq.offset + n * qp->sq.wqe_size);
  486. }
  487. static inline void *get_rq_wqe(struct pvrdma_qp *qp, int n)
  488. {
  489. return pvrdma_page_dir_get_ptr(&qp->pdir,
  490. qp->rq.offset + n * qp->rq.wqe_size);
  491. }
  492. static int set_reg_seg(struct pvrdma_sq_wqe_hdr *wqe_hdr, struct ib_reg_wr *wr)
  493. {
  494. struct pvrdma_user_mr *mr = to_vmr(wr->mr);
  495. wqe_hdr->wr.fast_reg.iova_start = mr->ibmr.iova;
  496. wqe_hdr->wr.fast_reg.pl_pdir_dma = mr->pdir.dir_dma;
  497. wqe_hdr->wr.fast_reg.page_shift = mr->page_shift;
  498. wqe_hdr->wr.fast_reg.page_list_len = mr->npages;
  499. wqe_hdr->wr.fast_reg.length = mr->ibmr.length;
  500. wqe_hdr->wr.fast_reg.access_flags = wr->access;
  501. wqe_hdr->wr.fast_reg.rkey = wr->key;
  502. return pvrdma_page_dir_insert_page_list(&mr->pdir, mr->pages,
  503. mr->npages);
  504. }
  505. /**
  506. * pvrdma_post_send - post send work request entries on a QP
  507. * @ibqp: the QP
  508. * @wr: work request list to post
  509. * @bad_wr: the first bad WR returned
  510. *
  511. * @return: 0 on success, otherwise errno returned.
  512. */
  513. int pvrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
  514. struct ib_send_wr **bad_wr)
  515. {
  516. struct pvrdma_qp *qp = to_vqp(ibqp);
  517. struct pvrdma_dev *dev = to_vdev(ibqp->device);
  518. unsigned long flags;
  519. struct pvrdma_sq_wqe_hdr *wqe_hdr;
  520. struct pvrdma_sge *sge;
  521. int i, index;
  522. int nreq;
  523. int ret;
  524. /*
  525. * In states lower than RTS, we can fail immediately. In other states,
  526. * just post and let the device figure it out.
  527. */
  528. if (qp->state < IB_QPS_RTS) {
  529. *bad_wr = wr;
  530. return -EINVAL;
  531. }
  532. spin_lock_irqsave(&qp->sq.lock, flags);
  533. index = pvrdma_idx(&qp->sq.ring->prod_tail, qp->sq.wqe_cnt);
  534. for (nreq = 0; wr; nreq++, wr = wr->next) {
  535. unsigned int tail;
  536. if (unlikely(!pvrdma_idx_ring_has_space(
  537. qp->sq.ring, qp->sq.wqe_cnt, &tail))) {
  538. dev_warn_ratelimited(&dev->pdev->dev,
  539. "send queue is full\n");
  540. *bad_wr = wr;
  541. ret = -ENOMEM;
  542. goto out;
  543. }
  544. if (unlikely(wr->num_sge > qp->sq.max_sg || wr->num_sge < 0)) {
  545. dev_warn_ratelimited(&dev->pdev->dev,
  546. "send SGE overflow\n");
  547. *bad_wr = wr;
  548. ret = -EINVAL;
  549. goto out;
  550. }
  551. if (unlikely(wr->opcode < 0)) {
  552. dev_warn_ratelimited(&dev->pdev->dev,
  553. "invalid send opcode\n");
  554. *bad_wr = wr;
  555. ret = -EINVAL;
  556. goto out;
  557. }
  558. /*
  559. * Only support UD, RC.
  560. * Need to check opcode table for thorough checking.
  561. * opcode _UD _UC _RC
  562. * _SEND x x x
  563. * _SEND_WITH_IMM x x x
  564. * _RDMA_WRITE x x
  565. * _RDMA_WRITE_WITH_IMM x x
  566. * _LOCAL_INV x x
  567. * _SEND_WITH_INV x x
  568. * _RDMA_READ x
  569. * _ATOMIC_CMP_AND_SWP x
  570. * _ATOMIC_FETCH_AND_ADD x
  571. * _MASK_ATOMIC_CMP_AND_SWP x
  572. * _MASK_ATOMIC_FETCH_AND_ADD x
  573. * _REG_MR x
  574. *
  575. */
  576. if (qp->ibqp.qp_type != IB_QPT_UD &&
  577. qp->ibqp.qp_type != IB_QPT_RC &&
  578. wr->opcode != IB_WR_SEND) {
  579. dev_warn_ratelimited(&dev->pdev->dev,
  580. "unsupported queuepair type\n");
  581. *bad_wr = wr;
  582. ret = -EINVAL;
  583. goto out;
  584. } else if (qp->ibqp.qp_type == IB_QPT_UD ||
  585. qp->ibqp.qp_type == IB_QPT_GSI) {
  586. if (wr->opcode != IB_WR_SEND &&
  587. wr->opcode != IB_WR_SEND_WITH_IMM) {
  588. dev_warn_ratelimited(&dev->pdev->dev,
  589. "invalid send opcode\n");
  590. *bad_wr = wr;
  591. ret = -EINVAL;
  592. goto out;
  593. }
  594. }
  595. wqe_hdr = (struct pvrdma_sq_wqe_hdr *)get_sq_wqe(qp, index);
  596. memset(wqe_hdr, 0, sizeof(*wqe_hdr));
  597. wqe_hdr->wr_id = wr->wr_id;
  598. wqe_hdr->num_sge = wr->num_sge;
  599. wqe_hdr->opcode = ib_wr_opcode_to_pvrdma(wr->opcode);
  600. wqe_hdr->send_flags = ib_send_flags_to_pvrdma(wr->send_flags);
  601. if (wr->opcode == IB_WR_SEND_WITH_IMM ||
  602. wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)
  603. wqe_hdr->ex.imm_data = wr->ex.imm_data;
  604. switch (qp->ibqp.qp_type) {
  605. case IB_QPT_GSI:
  606. case IB_QPT_UD:
  607. if (unlikely(!ud_wr(wr)->ah)) {
  608. dev_warn_ratelimited(&dev->pdev->dev,
  609. "invalid address handle\n");
  610. *bad_wr = wr;
  611. ret = -EINVAL;
  612. goto out;
  613. }
  614. /*
  615. * Use qkey from qp context if high order bit set,
  616. * otherwise from work request.
  617. */
  618. wqe_hdr->wr.ud.remote_qpn = ud_wr(wr)->remote_qpn;
  619. wqe_hdr->wr.ud.remote_qkey =
  620. ud_wr(wr)->remote_qkey & 0x80000000 ?
  621. qp->qkey : ud_wr(wr)->remote_qkey;
  622. wqe_hdr->wr.ud.av = to_vah(ud_wr(wr)->ah)->av;
  623. break;
  624. case IB_QPT_RC:
  625. switch (wr->opcode) {
  626. case IB_WR_RDMA_READ:
  627. case IB_WR_RDMA_WRITE:
  628. case IB_WR_RDMA_WRITE_WITH_IMM:
  629. wqe_hdr->wr.rdma.remote_addr =
  630. rdma_wr(wr)->remote_addr;
  631. wqe_hdr->wr.rdma.rkey = rdma_wr(wr)->rkey;
  632. break;
  633. case IB_WR_LOCAL_INV:
  634. case IB_WR_SEND_WITH_INV:
  635. wqe_hdr->ex.invalidate_rkey =
  636. wr->ex.invalidate_rkey;
  637. break;
  638. case IB_WR_ATOMIC_CMP_AND_SWP:
  639. case IB_WR_ATOMIC_FETCH_AND_ADD:
  640. wqe_hdr->wr.atomic.remote_addr =
  641. atomic_wr(wr)->remote_addr;
  642. wqe_hdr->wr.atomic.rkey = atomic_wr(wr)->rkey;
  643. wqe_hdr->wr.atomic.compare_add =
  644. atomic_wr(wr)->compare_add;
  645. if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP)
  646. wqe_hdr->wr.atomic.swap =
  647. atomic_wr(wr)->swap;
  648. break;
  649. case IB_WR_REG_MR:
  650. ret = set_reg_seg(wqe_hdr, reg_wr(wr));
  651. if (ret < 0) {
  652. dev_warn_ratelimited(&dev->pdev->dev,
  653. "Failed to set fast register work request\n");
  654. *bad_wr = wr;
  655. goto out;
  656. }
  657. break;
  658. default:
  659. break;
  660. }
  661. break;
  662. default:
  663. dev_warn_ratelimited(&dev->pdev->dev,
  664. "invalid queuepair type\n");
  665. ret = -EINVAL;
  666. *bad_wr = wr;
  667. goto out;
  668. }
  669. sge = (struct pvrdma_sge *)(wqe_hdr + 1);
  670. for (i = 0; i < wr->num_sge; i++) {
  671. /* Need to check wqe_size 0 or max size */
  672. sge->addr = wr->sg_list[i].addr;
  673. sge->length = wr->sg_list[i].length;
  674. sge->lkey = wr->sg_list[i].lkey;
  675. sge++;
  676. }
  677. /* Make sure wqe is written before index update */
  678. smp_wmb();
  679. index++;
  680. if (unlikely(index >= qp->sq.wqe_cnt))
  681. index = 0;
  682. /* Update shared sq ring */
  683. pvrdma_idx_ring_inc(&qp->sq.ring->prod_tail,
  684. qp->sq.wqe_cnt);
  685. }
  686. ret = 0;
  687. out:
  688. spin_unlock_irqrestore(&qp->sq.lock, flags);
  689. if (!ret)
  690. pvrdma_write_uar_qp(dev, PVRDMA_UAR_QP_SEND | qp->qp_handle);
  691. return ret;
  692. }
  693. /**
  694. * pvrdma_post_receive - post receive work request entries on a QP
  695. * @ibqp: the QP
  696. * @wr: the work request list to post
  697. * @bad_wr: the first bad WR returned
  698. *
  699. * @return: 0 on success, otherwise errno returned.
  700. */
  701. int pvrdma_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
  702. struct ib_recv_wr **bad_wr)
  703. {
  704. struct pvrdma_dev *dev = to_vdev(ibqp->device);
  705. unsigned long flags;
  706. struct pvrdma_qp *qp = to_vqp(ibqp);
  707. struct pvrdma_rq_wqe_hdr *wqe_hdr;
  708. struct pvrdma_sge *sge;
  709. int index, nreq;
  710. int ret = 0;
  711. int i;
  712. /*
  713. * In the RESET state, we can fail immediately. For other states,
  714. * just post and let the device figure it out.
  715. */
  716. if (qp->state == IB_QPS_RESET) {
  717. *bad_wr = wr;
  718. return -EINVAL;
  719. }
  720. spin_lock_irqsave(&qp->rq.lock, flags);
  721. index = pvrdma_idx(&qp->rq.ring->prod_tail, qp->rq.wqe_cnt);
  722. for (nreq = 0; wr; nreq++, wr = wr->next) {
  723. unsigned int tail;
  724. if (unlikely(wr->num_sge > qp->rq.max_sg ||
  725. wr->num_sge < 0)) {
  726. ret = -EINVAL;
  727. *bad_wr = wr;
  728. dev_warn_ratelimited(&dev->pdev->dev,
  729. "recv SGE overflow\n");
  730. goto out;
  731. }
  732. if (unlikely(!pvrdma_idx_ring_has_space(
  733. qp->rq.ring, qp->rq.wqe_cnt, &tail))) {
  734. ret = -ENOMEM;
  735. *bad_wr = wr;
  736. dev_warn_ratelimited(&dev->pdev->dev,
  737. "recv queue full\n");
  738. goto out;
  739. }
  740. wqe_hdr = (struct pvrdma_rq_wqe_hdr *)get_rq_wqe(qp, index);
  741. wqe_hdr->wr_id = wr->wr_id;
  742. wqe_hdr->num_sge = wr->num_sge;
  743. wqe_hdr->total_len = 0;
  744. sge = (struct pvrdma_sge *)(wqe_hdr + 1);
  745. for (i = 0; i < wr->num_sge; i++) {
  746. sge->addr = wr->sg_list[i].addr;
  747. sge->length = wr->sg_list[i].length;
  748. sge->lkey = wr->sg_list[i].lkey;
  749. sge++;
  750. }
  751. /* Make sure wqe is written before index update */
  752. smp_wmb();
  753. index++;
  754. if (unlikely(index >= qp->rq.wqe_cnt))
  755. index = 0;
  756. /* Update shared rq ring */
  757. pvrdma_idx_ring_inc(&qp->rq.ring->prod_tail,
  758. qp->rq.wqe_cnt);
  759. }
  760. spin_unlock_irqrestore(&qp->rq.lock, flags);
  761. pvrdma_write_uar_qp(dev, PVRDMA_UAR_QP_RECV | qp->qp_handle);
  762. return ret;
  763. out:
  764. spin_unlock_irqrestore(&qp->rq.lock, flags);
  765. return ret;
  766. }
  767. /**
  768. * pvrdma_query_qp - query a queue pair's attributes
  769. * @ibqp: the queue pair to query
  770. * @attr: the queue pair's attributes
  771. * @attr_mask: attributes mask
  772. * @init_attr: initial queue pair attributes
  773. *
  774. * @returns 0 on success, otherwise returns an errno.
  775. */
  776. int pvrdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
  777. int attr_mask, struct ib_qp_init_attr *init_attr)
  778. {
  779. struct pvrdma_dev *dev = to_vdev(ibqp->device);
  780. struct pvrdma_qp *qp = to_vqp(ibqp);
  781. union pvrdma_cmd_req req;
  782. union pvrdma_cmd_resp rsp;
  783. struct pvrdma_cmd_query_qp *cmd = &req.query_qp;
  784. struct pvrdma_cmd_query_qp_resp *resp = &rsp.query_qp_resp;
  785. int ret = 0;
  786. mutex_lock(&qp->mutex);
  787. if (qp->state == IB_QPS_RESET) {
  788. attr->qp_state = IB_QPS_RESET;
  789. goto out;
  790. }
  791. memset(cmd, 0, sizeof(*cmd));
  792. cmd->hdr.cmd = PVRDMA_CMD_QUERY_QP;
  793. cmd->qp_handle = qp->qp_handle;
  794. cmd->attr_mask = ib_qp_attr_mask_to_pvrdma(attr_mask);
  795. ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_QUERY_QP_RESP);
  796. if (ret < 0) {
  797. dev_warn(&dev->pdev->dev,
  798. "could not query queuepair, error: %d\n", ret);
  799. goto out;
  800. }
  801. attr->qp_state = pvrdma_qp_state_to_ib(resp->attrs.qp_state);
  802. attr->cur_qp_state =
  803. pvrdma_qp_state_to_ib(resp->attrs.cur_qp_state);
  804. attr->path_mtu = pvrdma_mtu_to_ib(resp->attrs.path_mtu);
  805. attr->path_mig_state =
  806. pvrdma_mig_state_to_ib(resp->attrs.path_mig_state);
  807. attr->qkey = resp->attrs.qkey;
  808. attr->rq_psn = resp->attrs.rq_psn;
  809. attr->sq_psn = resp->attrs.sq_psn;
  810. attr->dest_qp_num = resp->attrs.dest_qp_num;
  811. attr->qp_access_flags =
  812. pvrdma_access_flags_to_ib(resp->attrs.qp_access_flags);
  813. attr->pkey_index = resp->attrs.pkey_index;
  814. attr->alt_pkey_index = resp->attrs.alt_pkey_index;
  815. attr->en_sqd_async_notify = resp->attrs.en_sqd_async_notify;
  816. attr->sq_draining = resp->attrs.sq_draining;
  817. attr->max_rd_atomic = resp->attrs.max_rd_atomic;
  818. attr->max_dest_rd_atomic = resp->attrs.max_dest_rd_atomic;
  819. attr->min_rnr_timer = resp->attrs.min_rnr_timer;
  820. attr->port_num = resp->attrs.port_num;
  821. attr->timeout = resp->attrs.timeout;
  822. attr->retry_cnt = resp->attrs.retry_cnt;
  823. attr->rnr_retry = resp->attrs.rnr_retry;
  824. attr->alt_port_num = resp->attrs.alt_port_num;
  825. attr->alt_timeout = resp->attrs.alt_timeout;
  826. pvrdma_qp_cap_to_ib(&attr->cap, &resp->attrs.cap);
  827. pvrdma_ah_attr_to_ib(&attr->ah_attr, &resp->attrs.ah_attr);
  828. pvrdma_ah_attr_to_ib(&attr->alt_ah_attr, &resp->attrs.alt_ah_attr);
  829. qp->state = attr->qp_state;
  830. ret = 0;
  831. out:
  832. attr->cur_qp_state = attr->qp_state;
  833. init_attr->event_handler = qp->ibqp.event_handler;
  834. init_attr->qp_context = qp->ibqp.qp_context;
  835. init_attr->send_cq = qp->ibqp.send_cq;
  836. init_attr->recv_cq = qp->ibqp.recv_cq;
  837. init_attr->srq = qp->ibqp.srq;
  838. init_attr->xrcd = NULL;
  839. init_attr->cap = attr->cap;
  840. init_attr->sq_sig_type = 0;
  841. init_attr->qp_type = qp->ibqp.qp_type;
  842. init_attr->create_flags = 0;
  843. init_attr->port_num = qp->port;
  844. mutex_unlock(&qp->mutex);
  845. return ret;
  846. }