cq.c 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041
  1. /*
  2. * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved.
  3. *
  4. * This software is available to you under a choice of one of two
  5. * licenses. You may choose to be licensed under the terms of the GNU
  6. * General Public License (GPL) Version 2, available from the file
  7. * COPYING in the main directory of this source tree, or the
  8. * OpenIB.org BSD license below:
  9. *
  10. * Redistribution and use in source and binary forms, with or
  11. * without modification, are permitted provided that the following
  12. * conditions are met:
  13. *
  14. * - Redistributions of source code must retain the above
  15. * copyright notice, this list of conditions and the following
  16. * disclaimer.
  17. *
  18. * - Redistributions in binary form must reproduce the above
  19. * copyright notice, this list of conditions and the following
  20. * disclaimer in the documentation and/or other materials
  21. * provided with the distribution.
  22. *
  23. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26. * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27. * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28. * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29. * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30. * SOFTWARE.
  31. */
  32. #include "iw_cxgb4.h"
  33. static int destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
  34. struct c4iw_dev_ucontext *uctx, struct sk_buff *skb,
  35. struct c4iw_wr_wait *wr_waitp)
  36. {
  37. struct fw_ri_res_wr *res_wr;
  38. struct fw_ri_res *res;
  39. int wr_len;
  40. int ret;
  41. wr_len = sizeof *res_wr + sizeof *res;
  42. set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
  43. res_wr = __skb_put_zero(skb, wr_len);
  44. res_wr->op_nres = cpu_to_be32(
  45. FW_WR_OP_V(FW_RI_RES_WR) |
  46. FW_RI_RES_WR_NRES_V(1) |
  47. FW_WR_COMPL_F);
  48. res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16));
  49. res_wr->cookie = (uintptr_t)wr_waitp;
  50. res = res_wr->res;
  51. res->u.cq.restype = FW_RI_RES_TYPE_CQ;
  52. res->u.cq.op = FW_RI_RES_OP_RESET;
  53. res->u.cq.iqid = cpu_to_be32(cq->cqid);
  54. c4iw_init_wr_wait(wr_waitp);
  55. ret = c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, 0, __func__);
  56. kfree(cq->sw_queue);
  57. dma_free_coherent(&(rdev->lldi.pdev->dev),
  58. cq->memsize, cq->queue,
  59. dma_unmap_addr(cq, mapping));
  60. c4iw_put_cqid(rdev, cq->cqid, uctx);
  61. return ret;
  62. }
  63. static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
  64. struct c4iw_dev_ucontext *uctx,
  65. struct c4iw_wr_wait *wr_waitp)
  66. {
  67. struct fw_ri_res_wr *res_wr;
  68. struct fw_ri_res *res;
  69. int wr_len;
  70. int user = (uctx != &rdev->uctx);
  71. int ret;
  72. struct sk_buff *skb;
  73. cq->cqid = c4iw_get_cqid(rdev, uctx);
  74. if (!cq->cqid) {
  75. ret = -ENOMEM;
  76. goto err1;
  77. }
  78. if (!user) {
  79. cq->sw_queue = kzalloc(cq->memsize, GFP_KERNEL);
  80. if (!cq->sw_queue) {
  81. ret = -ENOMEM;
  82. goto err2;
  83. }
  84. }
  85. cq->queue = dma_alloc_coherent(&rdev->lldi.pdev->dev, cq->memsize,
  86. &cq->dma_addr, GFP_KERNEL);
  87. if (!cq->queue) {
  88. ret = -ENOMEM;
  89. goto err3;
  90. }
  91. dma_unmap_addr_set(cq, mapping, cq->dma_addr);
  92. memset(cq->queue, 0, cq->memsize);
  93. /* build fw_ri_res_wr */
  94. wr_len = sizeof *res_wr + sizeof *res;
  95. skb = alloc_skb(wr_len, GFP_KERNEL);
  96. if (!skb) {
  97. ret = -ENOMEM;
  98. goto err4;
  99. }
  100. set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
  101. res_wr = __skb_put_zero(skb, wr_len);
  102. res_wr->op_nres = cpu_to_be32(
  103. FW_WR_OP_V(FW_RI_RES_WR) |
  104. FW_RI_RES_WR_NRES_V(1) |
  105. FW_WR_COMPL_F);
  106. res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16));
  107. res_wr->cookie = (uintptr_t)wr_waitp;
  108. res = res_wr->res;
  109. res->u.cq.restype = FW_RI_RES_TYPE_CQ;
  110. res->u.cq.op = FW_RI_RES_OP_WRITE;
  111. res->u.cq.iqid = cpu_to_be32(cq->cqid);
  112. res->u.cq.iqandst_to_iqandstindex = cpu_to_be32(
  113. FW_RI_RES_WR_IQANUS_V(0) |
  114. FW_RI_RES_WR_IQANUD_V(1) |
  115. FW_RI_RES_WR_IQANDST_F |
  116. FW_RI_RES_WR_IQANDSTINDEX_V(
  117. rdev->lldi.ciq_ids[cq->vector]));
  118. res->u.cq.iqdroprss_to_iqesize = cpu_to_be16(
  119. FW_RI_RES_WR_IQDROPRSS_F |
  120. FW_RI_RES_WR_IQPCIECH_V(2) |
  121. FW_RI_RES_WR_IQINTCNTTHRESH_V(0) |
  122. FW_RI_RES_WR_IQO_F |
  123. FW_RI_RES_WR_IQESIZE_V(1));
  124. res->u.cq.iqsize = cpu_to_be16(cq->size);
  125. res->u.cq.iqaddr = cpu_to_be64(cq->dma_addr);
  126. c4iw_init_wr_wait(wr_waitp);
  127. ret = c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, 0, __func__);
  128. if (ret)
  129. goto err4;
  130. cq->gen = 1;
  131. cq->gts = rdev->lldi.gts_reg;
  132. cq->rdev = rdev;
  133. cq->bar2_va = c4iw_bar2_addrs(rdev, cq->cqid, T4_BAR2_QTYPE_INGRESS,
  134. &cq->bar2_qid,
  135. user ? &cq->bar2_pa : NULL);
  136. if (user && !cq->bar2_pa) {
  137. pr_warn("%s: cqid %u not in BAR2 range\n",
  138. pci_name(rdev->lldi.pdev), cq->cqid);
  139. ret = -EINVAL;
  140. goto err4;
  141. }
  142. return 0;
  143. err4:
  144. dma_free_coherent(&rdev->lldi.pdev->dev, cq->memsize, cq->queue,
  145. dma_unmap_addr(cq, mapping));
  146. err3:
  147. kfree(cq->sw_queue);
  148. err2:
  149. c4iw_put_cqid(rdev, cq->cqid, uctx);
  150. err1:
  151. return ret;
  152. }
  153. static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq)
  154. {
  155. struct t4_cqe cqe;
  156. pr_debug("wq %p cq %p sw_cidx %u sw_pidx %u\n",
  157. wq, cq, cq->sw_cidx, cq->sw_pidx);
  158. memset(&cqe, 0, sizeof(cqe));
  159. cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) |
  160. CQE_OPCODE_V(FW_RI_SEND) |
  161. CQE_TYPE_V(0) |
  162. CQE_SWCQE_V(1) |
  163. CQE_QPID_V(wq->sq.qid));
  164. cqe.bits_type_ts = cpu_to_be64(CQE_GENBIT_V((u64)cq->gen));
  165. cq->sw_queue[cq->sw_pidx] = cqe;
  166. t4_swcq_produce(cq);
  167. }
  168. int c4iw_flush_rq(struct t4_wq *wq, struct t4_cq *cq, int count)
  169. {
  170. int flushed = 0;
  171. int in_use = wq->rq.in_use - count;
  172. pr_debug("wq %p cq %p rq.in_use %u skip count %u\n",
  173. wq, cq, wq->rq.in_use, count);
  174. while (in_use--) {
  175. insert_recv_cqe(wq, cq);
  176. flushed++;
  177. }
  178. return flushed;
  179. }
  180. static void insert_sq_cqe(struct t4_wq *wq, struct t4_cq *cq,
  181. struct t4_swsqe *swcqe)
  182. {
  183. struct t4_cqe cqe;
  184. pr_debug("wq %p cq %p sw_cidx %u sw_pidx %u\n",
  185. wq, cq, cq->sw_cidx, cq->sw_pidx);
  186. memset(&cqe, 0, sizeof(cqe));
  187. cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) |
  188. CQE_OPCODE_V(swcqe->opcode) |
  189. CQE_TYPE_V(1) |
  190. CQE_SWCQE_V(1) |
  191. CQE_QPID_V(wq->sq.qid));
  192. CQE_WRID_SQ_IDX(&cqe) = swcqe->idx;
  193. cqe.bits_type_ts = cpu_to_be64(CQE_GENBIT_V((u64)cq->gen));
  194. cq->sw_queue[cq->sw_pidx] = cqe;
  195. t4_swcq_produce(cq);
  196. }
  197. static void advance_oldest_read(struct t4_wq *wq);
  198. int c4iw_flush_sq(struct c4iw_qp *qhp)
  199. {
  200. int flushed = 0;
  201. struct t4_wq *wq = &qhp->wq;
  202. struct c4iw_cq *chp = to_c4iw_cq(qhp->ibqp.send_cq);
  203. struct t4_cq *cq = &chp->cq;
  204. int idx;
  205. struct t4_swsqe *swsqe;
  206. if (wq->sq.flush_cidx == -1)
  207. wq->sq.flush_cidx = wq->sq.cidx;
  208. idx = wq->sq.flush_cidx;
  209. while (idx != wq->sq.pidx) {
  210. swsqe = &wq->sq.sw_sq[idx];
  211. swsqe->flushed = 1;
  212. insert_sq_cqe(wq, cq, swsqe);
  213. if (wq->sq.oldest_read == swsqe) {
  214. advance_oldest_read(wq);
  215. }
  216. flushed++;
  217. if (++idx == wq->sq.size)
  218. idx = 0;
  219. }
  220. wq->sq.flush_cidx += flushed;
  221. if (wq->sq.flush_cidx >= wq->sq.size)
  222. wq->sq.flush_cidx -= wq->sq.size;
  223. return flushed;
  224. }
  225. static void flush_completed_wrs(struct t4_wq *wq, struct t4_cq *cq)
  226. {
  227. struct t4_swsqe *swsqe;
  228. int cidx;
  229. if (wq->sq.flush_cidx == -1)
  230. wq->sq.flush_cidx = wq->sq.cidx;
  231. cidx = wq->sq.flush_cidx;
  232. while (cidx != wq->sq.pidx) {
  233. swsqe = &wq->sq.sw_sq[cidx];
  234. if (!swsqe->signaled) {
  235. if (++cidx == wq->sq.size)
  236. cidx = 0;
  237. } else if (swsqe->complete) {
  238. /*
  239. * Insert this completed cqe into the swcq.
  240. */
  241. pr_debug("moving cqe into swcq sq idx %u cq idx %u\n",
  242. cidx, cq->sw_pidx);
  243. swsqe->cqe.header |= htonl(CQE_SWCQE_V(1));
  244. cq->sw_queue[cq->sw_pidx] = swsqe->cqe;
  245. t4_swcq_produce(cq);
  246. swsqe->flushed = 1;
  247. if (++cidx == wq->sq.size)
  248. cidx = 0;
  249. wq->sq.flush_cidx = cidx;
  250. } else
  251. break;
  252. }
  253. }
  254. static void create_read_req_cqe(struct t4_wq *wq, struct t4_cqe *hw_cqe,
  255. struct t4_cqe *read_cqe)
  256. {
  257. read_cqe->u.scqe.cidx = wq->sq.oldest_read->idx;
  258. read_cqe->len = htonl(wq->sq.oldest_read->read_len);
  259. read_cqe->header = htonl(CQE_QPID_V(CQE_QPID(hw_cqe)) |
  260. CQE_SWCQE_V(SW_CQE(hw_cqe)) |
  261. CQE_OPCODE_V(FW_RI_READ_REQ) |
  262. CQE_TYPE_V(1));
  263. read_cqe->bits_type_ts = hw_cqe->bits_type_ts;
  264. }
  265. static void advance_oldest_read(struct t4_wq *wq)
  266. {
  267. u32 rptr = wq->sq.oldest_read - wq->sq.sw_sq + 1;
  268. if (rptr == wq->sq.size)
  269. rptr = 0;
  270. while (rptr != wq->sq.pidx) {
  271. wq->sq.oldest_read = &wq->sq.sw_sq[rptr];
  272. if (wq->sq.oldest_read->opcode == FW_RI_READ_REQ)
  273. return;
  274. if (++rptr == wq->sq.size)
  275. rptr = 0;
  276. }
  277. wq->sq.oldest_read = NULL;
  278. }
  279. /*
  280. * Move all CQEs from the HWCQ into the SWCQ.
  281. * Deal with out-of-order and/or completions that complete
  282. * prior unsignalled WRs.
  283. */
  284. void c4iw_flush_hw_cq(struct c4iw_cq *chp, struct c4iw_qp *flush_qhp)
  285. {
  286. struct t4_cqe *hw_cqe, *swcqe, read_cqe;
  287. struct c4iw_qp *qhp;
  288. struct t4_swsqe *swsqe;
  289. int ret;
  290. pr_debug("cqid 0x%x\n", chp->cq.cqid);
  291. ret = t4_next_hw_cqe(&chp->cq, &hw_cqe);
  292. /*
  293. * This logic is similar to poll_cq(), but not quite the same
  294. * unfortunately. Need to move pertinent HW CQEs to the SW CQ but
  295. * also do any translation magic that poll_cq() normally does.
  296. */
  297. while (!ret) {
  298. qhp = get_qhp(chp->rhp, CQE_QPID(hw_cqe));
  299. /*
  300. * drop CQEs with no associated QP
  301. */
  302. if (qhp == NULL)
  303. goto next_cqe;
  304. if (flush_qhp != qhp) {
  305. spin_lock(&qhp->lock);
  306. if (qhp->wq.flushed == 1)
  307. goto next_cqe;
  308. }
  309. if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE)
  310. goto next_cqe;
  311. if (CQE_OPCODE(hw_cqe) == FW_RI_READ_RESP) {
  312. /* If we have reached here because of async
  313. * event or other error, and have egress error
  314. * then drop
  315. */
  316. if (CQE_TYPE(hw_cqe) == 1)
  317. goto next_cqe;
  318. /* drop peer2peer RTR reads.
  319. */
  320. if (CQE_WRID_STAG(hw_cqe) == 1)
  321. goto next_cqe;
  322. /*
  323. * Eat completions for unsignaled read WRs.
  324. */
  325. if (!qhp->wq.sq.oldest_read->signaled) {
  326. advance_oldest_read(&qhp->wq);
  327. goto next_cqe;
  328. }
  329. /*
  330. * Don't write to the HWCQ, create a new read req CQE
  331. * in local memory and move it into the swcq.
  332. */
  333. create_read_req_cqe(&qhp->wq, hw_cqe, &read_cqe);
  334. hw_cqe = &read_cqe;
  335. advance_oldest_read(&qhp->wq);
  336. }
  337. /* if its a SQ completion, then do the magic to move all the
  338. * unsignaled and now in-order completions into the swcq.
  339. */
  340. if (SQ_TYPE(hw_cqe)) {
  341. swsqe = &qhp->wq.sq.sw_sq[CQE_WRID_SQ_IDX(hw_cqe)];
  342. swsqe->cqe = *hw_cqe;
  343. swsqe->complete = 1;
  344. flush_completed_wrs(&qhp->wq, &chp->cq);
  345. } else {
  346. swcqe = &chp->cq.sw_queue[chp->cq.sw_pidx];
  347. *swcqe = *hw_cqe;
  348. swcqe->header |= cpu_to_be32(CQE_SWCQE_V(1));
  349. t4_swcq_produce(&chp->cq);
  350. }
  351. next_cqe:
  352. t4_hwcq_consume(&chp->cq);
  353. ret = t4_next_hw_cqe(&chp->cq, &hw_cqe);
  354. if (qhp && flush_qhp != qhp)
  355. spin_unlock(&qhp->lock);
  356. }
  357. }
  358. static int cqe_completes_wr(struct t4_cqe *cqe, struct t4_wq *wq)
  359. {
  360. if (DRAIN_CQE(cqe)) {
  361. WARN_ONCE(1, "Unexpected DRAIN CQE qp id %u!\n", wq->sq.qid);
  362. return 0;
  363. }
  364. if (CQE_OPCODE(cqe) == FW_RI_TERMINATE)
  365. return 0;
  366. if ((CQE_OPCODE(cqe) == FW_RI_RDMA_WRITE) && RQ_TYPE(cqe))
  367. return 0;
  368. if ((CQE_OPCODE(cqe) == FW_RI_READ_RESP) && SQ_TYPE(cqe))
  369. return 0;
  370. if (CQE_SEND_OPCODE(cqe) && RQ_TYPE(cqe) && t4_rq_empty(wq))
  371. return 0;
  372. return 1;
  373. }
  374. void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count)
  375. {
  376. struct t4_cqe *cqe;
  377. u32 ptr;
  378. *count = 0;
  379. pr_debug("count zero %d\n", *count);
  380. ptr = cq->sw_cidx;
  381. while (ptr != cq->sw_pidx) {
  382. cqe = &cq->sw_queue[ptr];
  383. if (RQ_TYPE(cqe) && (CQE_OPCODE(cqe) != FW_RI_READ_RESP) &&
  384. (CQE_QPID(cqe) == wq->sq.qid) && cqe_completes_wr(cqe, wq))
  385. (*count)++;
  386. if (++ptr == cq->size)
  387. ptr = 0;
  388. }
  389. pr_debug("cq %p count %d\n", cq, *count);
  390. }
  391. /*
  392. * poll_cq
  393. *
  394. * Caller must:
  395. * check the validity of the first CQE,
  396. * supply the wq assicated with the qpid.
  397. *
  398. * credit: cq credit to return to sge.
  399. * cqe_flushed: 1 iff the CQE is flushed.
  400. * cqe: copy of the polled CQE.
  401. *
  402. * return value:
  403. * 0 CQE returned ok.
  404. * -EAGAIN CQE skipped, try again.
  405. * -EOVERFLOW CQ overflow detected.
  406. */
  407. static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
  408. u8 *cqe_flushed, u64 *cookie, u32 *credit)
  409. {
  410. int ret = 0;
  411. struct t4_cqe *hw_cqe, read_cqe;
  412. *cqe_flushed = 0;
  413. *credit = 0;
  414. ret = t4_next_cqe(cq, &hw_cqe);
  415. if (ret)
  416. return ret;
  417. pr_debug("CQE OVF %u qpid 0x%0x genbit %u type %u status 0x%0x opcode 0x%0x len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x\n",
  418. CQE_OVFBIT(hw_cqe), CQE_QPID(hw_cqe),
  419. CQE_GENBIT(hw_cqe), CQE_TYPE(hw_cqe), CQE_STATUS(hw_cqe),
  420. CQE_OPCODE(hw_cqe), CQE_LEN(hw_cqe), CQE_WRID_HI(hw_cqe),
  421. CQE_WRID_LOW(hw_cqe));
  422. /*
  423. * skip cqe's not affiliated with a QP.
  424. */
  425. if (wq == NULL) {
  426. ret = -EAGAIN;
  427. goto skip_cqe;
  428. }
  429. /*
  430. * skip hw cqe's if the wq is flushed.
  431. */
  432. if (wq->flushed && !SW_CQE(hw_cqe)) {
  433. ret = -EAGAIN;
  434. goto skip_cqe;
  435. }
  436. /*
  437. * skip TERMINATE cqes...
  438. */
  439. if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE) {
  440. ret = -EAGAIN;
  441. goto skip_cqe;
  442. }
  443. /*
  444. * Special cqe for drain WR completions...
  445. */
  446. if (DRAIN_CQE(hw_cqe)) {
  447. *cookie = CQE_DRAIN_COOKIE(hw_cqe);
  448. *cqe = *hw_cqe;
  449. goto skip_cqe;
  450. }
  451. /*
  452. * Gotta tweak READ completions:
  453. * 1) the cqe doesn't contain the sq_wptr from the wr.
  454. * 2) opcode not reflected from the wr.
  455. * 3) read_len not reflected from the wr.
  456. * 4) cq_type is RQ_TYPE not SQ_TYPE.
  457. */
  458. if (RQ_TYPE(hw_cqe) && (CQE_OPCODE(hw_cqe) == FW_RI_READ_RESP)) {
  459. /* If we have reached here because of async
  460. * event or other error, and have egress error
  461. * then drop
  462. */
  463. if (CQE_TYPE(hw_cqe) == 1) {
  464. if (CQE_STATUS(hw_cqe))
  465. t4_set_wq_in_error(wq);
  466. ret = -EAGAIN;
  467. goto skip_cqe;
  468. }
  469. /* If this is an unsolicited read response, then the read
  470. * was generated by the kernel driver as part of peer-2-peer
  471. * connection setup. So ignore the completion.
  472. */
  473. if (CQE_WRID_STAG(hw_cqe) == 1) {
  474. if (CQE_STATUS(hw_cqe))
  475. t4_set_wq_in_error(wq);
  476. ret = -EAGAIN;
  477. goto skip_cqe;
  478. }
  479. /*
  480. * Eat completions for unsignaled read WRs.
  481. */
  482. if (!wq->sq.oldest_read->signaled) {
  483. advance_oldest_read(wq);
  484. ret = -EAGAIN;
  485. goto skip_cqe;
  486. }
  487. /*
  488. * Don't write to the HWCQ, so create a new read req CQE
  489. * in local memory.
  490. */
  491. create_read_req_cqe(wq, hw_cqe, &read_cqe);
  492. hw_cqe = &read_cqe;
  493. advance_oldest_read(wq);
  494. }
  495. if (CQE_STATUS(hw_cqe) || t4_wq_in_error(wq)) {
  496. *cqe_flushed = (CQE_STATUS(hw_cqe) == T4_ERR_SWFLUSH);
  497. t4_set_wq_in_error(wq);
  498. }
  499. /*
  500. * RECV completion.
  501. */
  502. if (RQ_TYPE(hw_cqe)) {
  503. /*
  504. * HW only validates 4 bits of MSN. So we must validate that
  505. * the MSN in the SEND is the next expected MSN. If its not,
  506. * then we complete this with T4_ERR_MSN and mark the wq in
  507. * error.
  508. */
  509. if (t4_rq_empty(wq)) {
  510. t4_set_wq_in_error(wq);
  511. ret = -EAGAIN;
  512. goto skip_cqe;
  513. }
  514. if (unlikely(!CQE_STATUS(hw_cqe) &&
  515. CQE_WRID_MSN(hw_cqe) != wq->rq.msn)) {
  516. t4_set_wq_in_error(wq);
  517. hw_cqe->header |= cpu_to_be32(CQE_STATUS_V(T4_ERR_MSN));
  518. }
  519. goto proc_cqe;
  520. }
  521. /*
  522. * If we get here its a send completion.
  523. *
  524. * Handle out of order completion. These get stuffed
  525. * in the SW SQ. Then the SW SQ is walked to move any
  526. * now in-order completions into the SW CQ. This handles
  527. * 2 cases:
  528. * 1) reaping unsignaled WRs when the first subsequent
  529. * signaled WR is completed.
  530. * 2) out of order read completions.
  531. */
  532. if (!SW_CQE(hw_cqe) && (CQE_WRID_SQ_IDX(hw_cqe) != wq->sq.cidx)) {
  533. struct t4_swsqe *swsqe;
  534. pr_debug("out of order completion going in sw_sq at idx %u\n",
  535. CQE_WRID_SQ_IDX(hw_cqe));
  536. swsqe = &wq->sq.sw_sq[CQE_WRID_SQ_IDX(hw_cqe)];
  537. swsqe->cqe = *hw_cqe;
  538. swsqe->complete = 1;
  539. ret = -EAGAIN;
  540. goto flush_wq;
  541. }
  542. proc_cqe:
  543. *cqe = *hw_cqe;
  544. /*
  545. * Reap the associated WR(s) that are freed up with this
  546. * completion.
  547. */
  548. if (SQ_TYPE(hw_cqe)) {
  549. int idx = CQE_WRID_SQ_IDX(hw_cqe);
  550. /*
  551. * Account for any unsignaled completions completed by
  552. * this signaled completion. In this case, cidx points
  553. * to the first unsignaled one, and idx points to the
  554. * signaled one. So adjust in_use based on this delta.
  555. * if this is not completing any unsigned wrs, then the
  556. * delta will be 0. Handle wrapping also!
  557. */
  558. if (idx < wq->sq.cidx)
  559. wq->sq.in_use -= wq->sq.size + idx - wq->sq.cidx;
  560. else
  561. wq->sq.in_use -= idx - wq->sq.cidx;
  562. wq->sq.cidx = (uint16_t)idx;
  563. pr_debug("completing sq idx %u\n", wq->sq.cidx);
  564. *cookie = wq->sq.sw_sq[wq->sq.cidx].wr_id;
  565. if (c4iw_wr_log)
  566. c4iw_log_wr_stats(wq, hw_cqe);
  567. t4_sq_consume(wq);
  568. } else {
  569. pr_debug("completing rq idx %u\n", wq->rq.cidx);
  570. *cookie = wq->rq.sw_rq[wq->rq.cidx].wr_id;
  571. if (c4iw_wr_log)
  572. c4iw_log_wr_stats(wq, hw_cqe);
  573. t4_rq_consume(wq);
  574. goto skip_cqe;
  575. }
  576. flush_wq:
  577. /*
  578. * Flush any completed cqes that are now in-order.
  579. */
  580. flush_completed_wrs(wq, cq);
  581. skip_cqe:
  582. if (SW_CQE(hw_cqe)) {
  583. pr_debug("cq %p cqid 0x%x skip sw cqe cidx %u\n",
  584. cq, cq->cqid, cq->sw_cidx);
  585. t4_swcq_consume(cq);
  586. } else {
  587. pr_debug("cq %p cqid 0x%x skip hw cqe cidx %u\n",
  588. cq, cq->cqid, cq->cidx);
  589. t4_hwcq_consume(cq);
  590. }
  591. return ret;
  592. }
  593. /*
  594. * Get one cq entry from c4iw and map it to openib.
  595. *
  596. * Returns:
  597. * 0 cqe returned
  598. * -ENODATA EMPTY;
  599. * -EAGAIN caller must try again
  600. * any other -errno fatal error
  601. */
  602. static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
  603. {
  604. struct c4iw_qp *qhp = NULL;
  605. struct t4_cqe uninitialized_var(cqe), *rd_cqe;
  606. struct t4_wq *wq;
  607. u32 credit = 0;
  608. u8 cqe_flushed;
  609. u64 cookie = 0;
  610. int ret;
  611. ret = t4_next_cqe(&chp->cq, &rd_cqe);
  612. if (ret)
  613. return ret;
  614. qhp = get_qhp(chp->rhp, CQE_QPID(rd_cqe));
  615. if (!qhp)
  616. wq = NULL;
  617. else {
  618. spin_lock(&qhp->lock);
  619. wq = &(qhp->wq);
  620. }
  621. ret = poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie, &credit);
  622. if (ret)
  623. goto out;
  624. wc->wr_id = cookie;
  625. wc->qp = &qhp->ibqp;
  626. wc->vendor_err = CQE_STATUS(&cqe);
  627. wc->wc_flags = 0;
  628. pr_debug("qpid 0x%x type %d opcode %d status 0x%x len %u wrid hi 0x%x lo 0x%x cookie 0x%llx\n",
  629. CQE_QPID(&cqe),
  630. CQE_TYPE(&cqe), CQE_OPCODE(&cqe),
  631. CQE_STATUS(&cqe), CQE_LEN(&cqe),
  632. CQE_WRID_HI(&cqe), CQE_WRID_LOW(&cqe),
  633. (unsigned long long)cookie);
  634. if (CQE_TYPE(&cqe) == 0) {
  635. if (!CQE_STATUS(&cqe))
  636. wc->byte_len = CQE_LEN(&cqe);
  637. else
  638. wc->byte_len = 0;
  639. wc->opcode = IB_WC_RECV;
  640. if (CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_INV ||
  641. CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_SE_INV) {
  642. wc->ex.invalidate_rkey = CQE_WRID_STAG(&cqe);
  643. wc->wc_flags |= IB_WC_WITH_INVALIDATE;
  644. c4iw_invalidate_mr(qhp->rhp, wc->ex.invalidate_rkey);
  645. }
  646. } else {
  647. switch (CQE_OPCODE(&cqe)) {
  648. case FW_RI_RDMA_WRITE:
  649. wc->opcode = IB_WC_RDMA_WRITE;
  650. break;
  651. case FW_RI_READ_REQ:
  652. wc->opcode = IB_WC_RDMA_READ;
  653. wc->byte_len = CQE_LEN(&cqe);
  654. break;
  655. case FW_RI_SEND_WITH_INV:
  656. case FW_RI_SEND_WITH_SE_INV:
  657. wc->opcode = IB_WC_SEND;
  658. wc->wc_flags |= IB_WC_WITH_INVALIDATE;
  659. break;
  660. case FW_RI_SEND:
  661. case FW_RI_SEND_WITH_SE:
  662. wc->opcode = IB_WC_SEND;
  663. break;
  664. case FW_RI_LOCAL_INV:
  665. wc->opcode = IB_WC_LOCAL_INV;
  666. break;
  667. case FW_RI_FAST_REGISTER:
  668. wc->opcode = IB_WC_REG_MR;
  669. /* Invalidate the MR if the fastreg failed */
  670. if (CQE_STATUS(&cqe) != T4_ERR_SUCCESS)
  671. c4iw_invalidate_mr(qhp->rhp,
  672. CQE_WRID_FR_STAG(&cqe));
  673. break;
  674. default:
  675. pr_err("Unexpected opcode %d in the CQE received for QPID=0x%0x\n",
  676. CQE_OPCODE(&cqe), CQE_QPID(&cqe));
  677. ret = -EINVAL;
  678. goto out;
  679. }
  680. }
  681. if (cqe_flushed)
  682. wc->status = IB_WC_WR_FLUSH_ERR;
  683. else {
  684. switch (CQE_STATUS(&cqe)) {
  685. case T4_ERR_SUCCESS:
  686. wc->status = IB_WC_SUCCESS;
  687. break;
  688. case T4_ERR_STAG:
  689. wc->status = IB_WC_LOC_ACCESS_ERR;
  690. break;
  691. case T4_ERR_PDID:
  692. wc->status = IB_WC_LOC_PROT_ERR;
  693. break;
  694. case T4_ERR_QPID:
  695. case T4_ERR_ACCESS:
  696. wc->status = IB_WC_LOC_ACCESS_ERR;
  697. break;
  698. case T4_ERR_WRAP:
  699. wc->status = IB_WC_GENERAL_ERR;
  700. break;
  701. case T4_ERR_BOUND:
  702. wc->status = IB_WC_LOC_LEN_ERR;
  703. break;
  704. case T4_ERR_INVALIDATE_SHARED_MR:
  705. case T4_ERR_INVALIDATE_MR_WITH_MW_BOUND:
  706. wc->status = IB_WC_MW_BIND_ERR;
  707. break;
  708. case T4_ERR_CRC:
  709. case T4_ERR_MARKER:
  710. case T4_ERR_PDU_LEN_ERR:
  711. case T4_ERR_OUT_OF_RQE:
  712. case T4_ERR_DDP_VERSION:
  713. case T4_ERR_RDMA_VERSION:
  714. case T4_ERR_DDP_QUEUE_NUM:
  715. case T4_ERR_MSN:
  716. case T4_ERR_TBIT:
  717. case T4_ERR_MO:
  718. case T4_ERR_MSN_RANGE:
  719. case T4_ERR_IRD_OVERFLOW:
  720. case T4_ERR_OPCODE:
  721. case T4_ERR_INTERNAL_ERR:
  722. wc->status = IB_WC_FATAL_ERR;
  723. break;
  724. case T4_ERR_SWFLUSH:
  725. wc->status = IB_WC_WR_FLUSH_ERR;
  726. break;
  727. default:
  728. pr_err("Unexpected cqe_status 0x%x for QPID=0x%0x\n",
  729. CQE_STATUS(&cqe), CQE_QPID(&cqe));
  730. wc->status = IB_WC_FATAL_ERR;
  731. }
  732. }
  733. out:
  734. if (wq)
  735. spin_unlock(&qhp->lock);
  736. return ret;
  737. }
  738. int c4iw_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
  739. {
  740. struct c4iw_cq *chp;
  741. unsigned long flags;
  742. int npolled;
  743. int err = 0;
  744. chp = to_c4iw_cq(ibcq);
  745. spin_lock_irqsave(&chp->lock, flags);
  746. for (npolled = 0; npolled < num_entries; ++npolled) {
  747. do {
  748. err = c4iw_poll_cq_one(chp, wc + npolled);
  749. } while (err == -EAGAIN);
  750. if (err)
  751. break;
  752. }
  753. spin_unlock_irqrestore(&chp->lock, flags);
  754. return !err || err == -ENODATA ? npolled : err;
  755. }
  756. int c4iw_destroy_cq(struct ib_cq *ib_cq)
  757. {
  758. struct c4iw_cq *chp;
  759. struct c4iw_ucontext *ucontext;
  760. pr_debug("ib_cq %p\n", ib_cq);
  761. chp = to_c4iw_cq(ib_cq);
  762. remove_handle(chp->rhp, &chp->rhp->cqidr, chp->cq.cqid);
  763. atomic_dec(&chp->refcnt);
  764. wait_event(chp->wait, !atomic_read(&chp->refcnt));
  765. ucontext = ib_cq->uobject ? to_c4iw_ucontext(ib_cq->uobject->context)
  766. : NULL;
  767. destroy_cq(&chp->rhp->rdev, &chp->cq,
  768. ucontext ? &ucontext->uctx : &chp->cq.rdev->uctx,
  769. chp->destroy_skb, chp->wr_waitp);
  770. c4iw_put_wr_wait(chp->wr_waitp);
  771. kfree(chp);
  772. return 0;
  773. }
  774. struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
  775. const struct ib_cq_init_attr *attr,
  776. struct ib_ucontext *ib_context,
  777. struct ib_udata *udata)
  778. {
  779. int entries = attr->cqe;
  780. int vector = attr->comp_vector;
  781. struct c4iw_dev *rhp;
  782. struct c4iw_cq *chp;
  783. struct c4iw_create_cq_resp uresp;
  784. struct c4iw_ucontext *ucontext = NULL;
  785. int ret, wr_len;
  786. size_t memsize, hwentries;
  787. struct c4iw_mm_entry *mm, *mm2;
  788. pr_debug("ib_dev %p entries %d\n", ibdev, entries);
  789. if (attr->flags)
  790. return ERR_PTR(-EINVAL);
  791. rhp = to_c4iw_dev(ibdev);
  792. if (vector >= rhp->rdev.lldi.nciq)
  793. return ERR_PTR(-EINVAL);
  794. chp = kzalloc(sizeof(*chp), GFP_KERNEL);
  795. if (!chp)
  796. return ERR_PTR(-ENOMEM);
  797. chp->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL);
  798. if (!chp->wr_waitp) {
  799. ret = -ENOMEM;
  800. goto err_free_chp;
  801. }
  802. c4iw_init_wr_wait(chp->wr_waitp);
  803. wr_len = sizeof(struct fw_ri_res_wr) + sizeof(struct fw_ri_res);
  804. chp->destroy_skb = alloc_skb(wr_len, GFP_KERNEL);
  805. if (!chp->destroy_skb) {
  806. ret = -ENOMEM;
  807. goto err_free_wr_wait;
  808. }
  809. if (ib_context)
  810. ucontext = to_c4iw_ucontext(ib_context);
  811. /* account for the status page. */
  812. entries++;
  813. /* IQ needs one extra entry to differentiate full vs empty. */
  814. entries++;
  815. /*
  816. * entries must be multiple of 16 for HW.
  817. */
  818. entries = roundup(entries, 16);
  819. /*
  820. * Make actual HW queue 2x to avoid cdix_inc overflows.
  821. */
  822. hwentries = min(entries * 2, rhp->rdev.hw_queue.t4_max_iq_size);
  823. /*
  824. * Make HW queue at least 64 entries so GTS updates aren't too
  825. * frequent.
  826. */
  827. if (hwentries < 64)
  828. hwentries = 64;
  829. memsize = hwentries * sizeof *chp->cq.queue;
  830. /*
  831. * memsize must be a multiple of the page size if its a user cq.
  832. */
  833. if (ucontext)
  834. memsize = roundup(memsize, PAGE_SIZE);
  835. chp->cq.size = hwentries;
  836. chp->cq.memsize = memsize;
  837. chp->cq.vector = vector;
  838. ret = create_cq(&rhp->rdev, &chp->cq,
  839. ucontext ? &ucontext->uctx : &rhp->rdev.uctx,
  840. chp->wr_waitp);
  841. if (ret)
  842. goto err_free_skb;
  843. chp->rhp = rhp;
  844. chp->cq.size--; /* status page */
  845. chp->ibcq.cqe = entries - 2;
  846. spin_lock_init(&chp->lock);
  847. spin_lock_init(&chp->comp_handler_lock);
  848. atomic_set(&chp->refcnt, 1);
  849. init_waitqueue_head(&chp->wait);
  850. ret = insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid);
  851. if (ret)
  852. goto err_destroy_cq;
  853. if (ucontext) {
  854. ret = -ENOMEM;
  855. mm = kmalloc(sizeof *mm, GFP_KERNEL);
  856. if (!mm)
  857. goto err_remove_handle;
  858. mm2 = kmalloc(sizeof *mm2, GFP_KERNEL);
  859. if (!mm2)
  860. goto err_free_mm;
  861. uresp.qid_mask = rhp->rdev.cqmask;
  862. uresp.cqid = chp->cq.cqid;
  863. uresp.size = chp->cq.size;
  864. uresp.memsize = chp->cq.memsize;
  865. spin_lock(&ucontext->mmap_lock);
  866. uresp.key = ucontext->key;
  867. ucontext->key += PAGE_SIZE;
  868. uresp.gts_key = ucontext->key;
  869. ucontext->key += PAGE_SIZE;
  870. spin_unlock(&ucontext->mmap_lock);
  871. ret = ib_copy_to_udata(udata, &uresp,
  872. sizeof(uresp) - sizeof(uresp.reserved));
  873. if (ret)
  874. goto err_free_mm2;
  875. mm->key = uresp.key;
  876. mm->addr = virt_to_phys(chp->cq.queue);
  877. mm->len = chp->cq.memsize;
  878. insert_mmap(ucontext, mm);
  879. mm2->key = uresp.gts_key;
  880. mm2->addr = chp->cq.bar2_pa;
  881. mm2->len = PAGE_SIZE;
  882. insert_mmap(ucontext, mm2);
  883. }
  884. pr_debug("cqid 0x%0x chp %p size %u memsize %zu, dma_addr 0x%0llx\n",
  885. chp->cq.cqid, chp, chp->cq.size,
  886. chp->cq.memsize, (unsigned long long)chp->cq.dma_addr);
  887. return &chp->ibcq;
  888. err_free_mm2:
  889. kfree(mm2);
  890. err_free_mm:
  891. kfree(mm);
  892. err_remove_handle:
  893. remove_handle(rhp, &rhp->cqidr, chp->cq.cqid);
  894. err_destroy_cq:
  895. destroy_cq(&chp->rhp->rdev, &chp->cq,
  896. ucontext ? &ucontext->uctx : &rhp->rdev.uctx,
  897. chp->destroy_skb, chp->wr_waitp);
  898. err_free_skb:
  899. kfree_skb(chp->destroy_skb);
  900. err_free_wr_wait:
  901. c4iw_put_wr_wait(chp->wr_waitp);
  902. err_free_chp:
  903. kfree(chp);
  904. return ERR_PTR(ret);
  905. }
  906. int c4iw_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata)
  907. {
  908. return -ENOSYS;
  909. }
  910. int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
  911. {
  912. struct c4iw_cq *chp;
  913. int ret = 0;
  914. unsigned long flag;
  915. chp = to_c4iw_cq(ibcq);
  916. spin_lock_irqsave(&chp->lock, flag);
  917. t4_arm_cq(&chp->cq,
  918. (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED);
  919. if (flags & IB_CQ_REPORT_MISSED_EVENTS)
  920. ret = t4_cq_notempty(&chp->cq);
  921. spin_unlock_irqrestore(&chp->lock, flag);
  922. return ret;
  923. }