svc_rdma_recvfrom.c 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723
  1. /*
  2. * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved.
  3. * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
  4. *
  5. * This software is available to you under a choice of one of two
  6. * licenses. You may choose to be licensed under the terms of the GNU
  7. * General Public License (GPL) Version 2, available from the file
  8. * COPYING in the main directory of this source tree, or the BSD-type
  9. * license below:
  10. *
  11. * Redistribution and use in source and binary forms, with or without
  12. * modification, are permitted provided that the following conditions
  13. * are met:
  14. *
  15. * Redistributions of source code must retain the above copyright
  16. * notice, this list of conditions and the following disclaimer.
  17. *
  18. * Redistributions in binary form must reproduce the above
  19. * copyright notice, this list of conditions and the following
  20. * disclaimer in the documentation and/or other materials provided
  21. * with the distribution.
  22. *
  23. * Neither the name of the Network Appliance, Inc. nor the names of
  24. * its contributors may be used to endorse or promote products
  25. * derived from this software without specific prior written
  26. * permission.
  27. *
  28. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  29. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  30. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  31. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  32. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  33. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  34. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  35. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  36. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  37. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  38. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  39. *
  40. * Author: Tom Tucker <tom@opengridcomputing.com>
  41. */
  42. #include <linux/sunrpc/debug.h>
  43. #include <linux/sunrpc/rpc_rdma.h>
  44. #include <linux/spinlock.h>
  45. #include <asm/unaligned.h>
  46. #include <rdma/ib_verbs.h>
  47. #include <rdma/rdma_cm.h>
  48. #include <linux/sunrpc/svc_rdma.h>
  49. #define RPCDBG_FACILITY RPCDBG_SVCXPRT
  50. /*
  51. * Replace the pages in the rq_argpages array with the pages from the SGE in
  52. * the RDMA_RECV completion. The SGL should contain full pages up until the
  53. * last one.
  54. */
  55. static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
  56. struct svc_rdma_op_ctxt *ctxt,
  57. u32 byte_count)
  58. {
  59. struct rpcrdma_msg *rmsgp;
  60. struct page *page;
  61. u32 bc;
  62. int sge_no;
  63. /* Swap the page in the SGE with the page in argpages */
  64. page = ctxt->pages[0];
  65. put_page(rqstp->rq_pages[0]);
  66. rqstp->rq_pages[0] = page;
  67. /* Set up the XDR head */
  68. rqstp->rq_arg.head[0].iov_base = page_address(page);
  69. rqstp->rq_arg.head[0].iov_len =
  70. min_t(size_t, byte_count, ctxt->sge[0].length);
  71. rqstp->rq_arg.len = byte_count;
  72. rqstp->rq_arg.buflen = byte_count;
  73. /* Compute bytes past head in the SGL */
  74. bc = byte_count - rqstp->rq_arg.head[0].iov_len;
  75. /* If data remains, store it in the pagelist */
  76. rqstp->rq_arg.page_len = bc;
  77. rqstp->rq_arg.page_base = 0;
  78. /* RDMA_NOMSG: RDMA READ data should land just after RDMA RECV data */
  79. rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
  80. if (rmsgp->rm_type == rdma_nomsg)
  81. rqstp->rq_arg.pages = &rqstp->rq_pages[0];
  82. else
  83. rqstp->rq_arg.pages = &rqstp->rq_pages[1];
  84. sge_no = 1;
  85. while (bc && sge_no < ctxt->count) {
  86. page = ctxt->pages[sge_no];
  87. put_page(rqstp->rq_pages[sge_no]);
  88. rqstp->rq_pages[sge_no] = page;
  89. bc -= min_t(u32, bc, ctxt->sge[sge_no].length);
  90. rqstp->rq_arg.buflen += ctxt->sge[sge_no].length;
  91. sge_no++;
  92. }
  93. rqstp->rq_respages = &rqstp->rq_pages[sge_no];
  94. rqstp->rq_next_page = rqstp->rq_respages + 1;
  95. /* If not all pages were used from the SGL, free the remaining ones */
  96. bc = sge_no;
  97. while (sge_no < ctxt->count) {
  98. page = ctxt->pages[sge_no++];
  99. put_page(page);
  100. }
  101. ctxt->count = bc;
  102. /* Set up tail */
  103. rqstp->rq_arg.tail[0].iov_base = NULL;
  104. rqstp->rq_arg.tail[0].iov_len = 0;
  105. }
  106. /* Issue an RDMA_READ using the local lkey to map the data sink */
  107. int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
  108. struct svc_rqst *rqstp,
  109. struct svc_rdma_op_ctxt *head,
  110. int *page_no,
  111. u32 *page_offset,
  112. u32 rs_handle,
  113. u32 rs_length,
  114. u64 rs_offset,
  115. bool last)
  116. {
  117. struct ib_rdma_wr read_wr;
  118. int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT;
  119. struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt);
  120. int ret, read, pno;
  121. u32 pg_off = *page_offset;
  122. u32 pg_no = *page_no;
  123. ctxt->direction = DMA_FROM_DEVICE;
  124. ctxt->read_hdr = head;
  125. pages_needed = min_t(int, pages_needed, xprt->sc_max_sge_rd);
  126. read = min_t(int, (pages_needed << PAGE_SHIFT) - *page_offset,
  127. rs_length);
  128. for (pno = 0; pno < pages_needed; pno++) {
  129. int len = min_t(int, rs_length, PAGE_SIZE - pg_off);
  130. head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no];
  131. head->arg.page_len += len;
  132. head->arg.len += len;
  133. if (!pg_off)
  134. head->count++;
  135. rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1];
  136. rqstp->rq_next_page = rqstp->rq_respages + 1;
  137. ctxt->sge[pno].addr =
  138. ib_dma_map_page(xprt->sc_cm_id->device,
  139. head->arg.pages[pg_no], pg_off,
  140. PAGE_SIZE - pg_off,
  141. DMA_FROM_DEVICE);
  142. ret = ib_dma_mapping_error(xprt->sc_cm_id->device,
  143. ctxt->sge[pno].addr);
  144. if (ret)
  145. goto err;
  146. atomic_inc(&xprt->sc_dma_used);
  147. ctxt->sge[pno].lkey = xprt->sc_pd->local_dma_lkey;
  148. ctxt->sge[pno].length = len;
  149. ctxt->count++;
  150. /* adjust offset and wrap to next page if needed */
  151. pg_off += len;
  152. if (pg_off == PAGE_SIZE) {
  153. pg_off = 0;
  154. pg_no++;
  155. }
  156. rs_length -= len;
  157. }
  158. if (last && rs_length == 0)
  159. set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
  160. else
  161. clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
  162. memset(&read_wr, 0, sizeof(read_wr));
  163. read_wr.wr.wr_id = (unsigned long)ctxt;
  164. read_wr.wr.opcode = IB_WR_RDMA_READ;
  165. ctxt->wr_op = read_wr.wr.opcode;
  166. read_wr.wr.send_flags = IB_SEND_SIGNALED;
  167. read_wr.rkey = rs_handle;
  168. read_wr.remote_addr = rs_offset;
  169. read_wr.wr.sg_list = ctxt->sge;
  170. read_wr.wr.num_sge = pages_needed;
  171. ret = svc_rdma_send(xprt, &read_wr.wr);
  172. if (ret) {
  173. pr_err("svcrdma: Error %d posting RDMA_READ\n", ret);
  174. set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
  175. goto err;
  176. }
  177. /* return current location in page array */
  178. *page_no = pg_no;
  179. *page_offset = pg_off;
  180. ret = read;
  181. atomic_inc(&rdma_stat_read);
  182. return ret;
  183. err:
  184. svc_rdma_unmap_dma(ctxt);
  185. svc_rdma_put_context(ctxt, 0);
  186. return ret;
  187. }
  188. /* Issue an RDMA_READ using an FRMR to map the data sink */
  189. int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
  190. struct svc_rqst *rqstp,
  191. struct svc_rdma_op_ctxt *head,
  192. int *page_no,
  193. u32 *page_offset,
  194. u32 rs_handle,
  195. u32 rs_length,
  196. u64 rs_offset,
  197. bool last)
  198. {
  199. struct ib_rdma_wr read_wr;
  200. struct ib_send_wr inv_wr;
  201. struct ib_reg_wr reg_wr;
  202. u8 key;
  203. int nents = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT;
  204. struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt);
  205. struct svc_rdma_fastreg_mr *frmr = svc_rdma_get_frmr(xprt);
  206. int ret, read, pno, dma_nents, n;
  207. u32 pg_off = *page_offset;
  208. u32 pg_no = *page_no;
  209. if (IS_ERR(frmr))
  210. return -ENOMEM;
  211. ctxt->direction = DMA_FROM_DEVICE;
  212. ctxt->frmr = frmr;
  213. nents = min_t(unsigned int, nents, xprt->sc_frmr_pg_list_len);
  214. read = min_t(int, (nents << PAGE_SHIFT) - *page_offset, rs_length);
  215. frmr->direction = DMA_FROM_DEVICE;
  216. frmr->access_flags = (IB_ACCESS_LOCAL_WRITE|IB_ACCESS_REMOTE_WRITE);
  217. frmr->sg_nents = nents;
  218. for (pno = 0; pno < nents; pno++) {
  219. int len = min_t(int, rs_length, PAGE_SIZE - pg_off);
  220. head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no];
  221. head->arg.page_len += len;
  222. head->arg.len += len;
  223. if (!pg_off)
  224. head->count++;
  225. sg_set_page(&frmr->sg[pno], rqstp->rq_arg.pages[pg_no],
  226. len, pg_off);
  227. rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1];
  228. rqstp->rq_next_page = rqstp->rq_respages + 1;
  229. /* adjust offset and wrap to next page if needed */
  230. pg_off += len;
  231. if (pg_off == PAGE_SIZE) {
  232. pg_off = 0;
  233. pg_no++;
  234. }
  235. rs_length -= len;
  236. }
  237. if (last && rs_length == 0)
  238. set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
  239. else
  240. clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
  241. dma_nents = ib_dma_map_sg(xprt->sc_cm_id->device,
  242. frmr->sg, frmr->sg_nents,
  243. frmr->direction);
  244. if (!dma_nents) {
  245. pr_err("svcrdma: failed to dma map sg %p\n",
  246. frmr->sg);
  247. return -ENOMEM;
  248. }
  249. atomic_inc(&xprt->sc_dma_used);
  250. n = ib_map_mr_sg(frmr->mr, frmr->sg, frmr->sg_nents, PAGE_SIZE);
  251. if (unlikely(n != frmr->sg_nents)) {
  252. pr_err("svcrdma: failed to map mr %p (%d/%d elements)\n",
  253. frmr->mr, n, frmr->sg_nents);
  254. return n < 0 ? n : -EINVAL;
  255. }
  256. /* Bump the key */
  257. key = (u8)(frmr->mr->lkey & 0x000000FF);
  258. ib_update_fast_reg_key(frmr->mr, ++key);
  259. ctxt->sge[0].addr = frmr->mr->iova;
  260. ctxt->sge[0].lkey = frmr->mr->lkey;
  261. ctxt->sge[0].length = frmr->mr->length;
  262. ctxt->count = 1;
  263. ctxt->read_hdr = head;
  264. /* Prepare REG WR */
  265. reg_wr.wr.opcode = IB_WR_REG_MR;
  266. reg_wr.wr.wr_id = 0;
  267. reg_wr.wr.send_flags = IB_SEND_SIGNALED;
  268. reg_wr.wr.num_sge = 0;
  269. reg_wr.mr = frmr->mr;
  270. reg_wr.key = frmr->mr->lkey;
  271. reg_wr.access = frmr->access_flags;
  272. reg_wr.wr.next = &read_wr.wr;
  273. /* Prepare RDMA_READ */
  274. memset(&read_wr, 0, sizeof(read_wr));
  275. read_wr.wr.send_flags = IB_SEND_SIGNALED;
  276. read_wr.rkey = rs_handle;
  277. read_wr.remote_addr = rs_offset;
  278. read_wr.wr.sg_list = ctxt->sge;
  279. read_wr.wr.num_sge = 1;
  280. if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_READ_W_INV) {
  281. read_wr.wr.opcode = IB_WR_RDMA_READ_WITH_INV;
  282. read_wr.wr.wr_id = (unsigned long)ctxt;
  283. read_wr.wr.ex.invalidate_rkey = ctxt->frmr->mr->lkey;
  284. } else {
  285. read_wr.wr.opcode = IB_WR_RDMA_READ;
  286. read_wr.wr.next = &inv_wr;
  287. /* Prepare invalidate */
  288. memset(&inv_wr, 0, sizeof(inv_wr));
  289. inv_wr.wr_id = (unsigned long)ctxt;
  290. inv_wr.opcode = IB_WR_LOCAL_INV;
  291. inv_wr.send_flags = IB_SEND_SIGNALED | IB_SEND_FENCE;
  292. inv_wr.ex.invalidate_rkey = frmr->mr->lkey;
  293. }
  294. ctxt->wr_op = read_wr.wr.opcode;
  295. /* Post the chain */
  296. ret = svc_rdma_send(xprt, &reg_wr.wr);
  297. if (ret) {
  298. pr_err("svcrdma: Error %d posting RDMA_READ\n", ret);
  299. set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
  300. goto err;
  301. }
  302. /* return current location in page array */
  303. *page_no = pg_no;
  304. *page_offset = pg_off;
  305. ret = read;
  306. atomic_inc(&rdma_stat_read);
  307. return ret;
  308. err:
  309. ib_dma_unmap_sg(xprt->sc_cm_id->device,
  310. frmr->sg, frmr->sg_nents, frmr->direction);
  311. svc_rdma_put_context(ctxt, 0);
  312. svc_rdma_put_frmr(xprt, frmr);
  313. return ret;
  314. }
  315. static unsigned int
  316. rdma_rcl_chunk_count(struct rpcrdma_read_chunk *ch)
  317. {
  318. unsigned int count;
  319. for (count = 0; ch->rc_discrim != xdr_zero; ch++)
  320. count++;
  321. return count;
  322. }
  323. /* If there was additional inline content, append it to the end of arg.pages.
  324. * Tail copy has to be done after the reader function has determined how many
  325. * pages are needed for RDMA READ.
  326. */
  327. static int
  328. rdma_copy_tail(struct svc_rqst *rqstp, struct svc_rdma_op_ctxt *head,
  329. u32 position, u32 byte_count, u32 page_offset, int page_no)
  330. {
  331. char *srcp, *destp;
  332. int ret;
  333. ret = 0;
  334. srcp = head->arg.head[0].iov_base + position;
  335. byte_count = head->arg.head[0].iov_len - position;
  336. if (byte_count > PAGE_SIZE) {
  337. dprintk("svcrdma: large tail unsupported\n");
  338. return 0;
  339. }
  340. /* Fit as much of the tail on the current page as possible */
  341. if (page_offset != PAGE_SIZE) {
  342. destp = page_address(rqstp->rq_arg.pages[page_no]);
  343. destp += page_offset;
  344. while (byte_count--) {
  345. *destp++ = *srcp++;
  346. page_offset++;
  347. if (page_offset == PAGE_SIZE && byte_count)
  348. goto more;
  349. }
  350. goto done;
  351. }
  352. more:
  353. /* Fit the rest on the next page */
  354. page_no++;
  355. destp = page_address(rqstp->rq_arg.pages[page_no]);
  356. while (byte_count--)
  357. *destp++ = *srcp++;
  358. rqstp->rq_respages = &rqstp->rq_arg.pages[page_no+1];
  359. rqstp->rq_next_page = rqstp->rq_respages + 1;
  360. done:
  361. byte_count = head->arg.head[0].iov_len - position;
  362. head->arg.page_len += byte_count;
  363. head->arg.len += byte_count;
  364. head->arg.buflen += byte_count;
  365. return 1;
  366. }
  367. static int rdma_read_chunks(struct svcxprt_rdma *xprt,
  368. struct rpcrdma_msg *rmsgp,
  369. struct svc_rqst *rqstp,
  370. struct svc_rdma_op_ctxt *head)
  371. {
  372. int page_no, ret;
  373. struct rpcrdma_read_chunk *ch;
  374. u32 handle, page_offset, byte_count;
  375. u32 position;
  376. u64 rs_offset;
  377. bool last;
  378. /* If no read list is present, return 0 */
  379. ch = svc_rdma_get_read_chunk(rmsgp);
  380. if (!ch)
  381. return 0;
  382. if (rdma_rcl_chunk_count(ch) > RPCSVC_MAXPAGES)
  383. return -EINVAL;
  384. /* The request is completed when the RDMA_READs complete. The
  385. * head context keeps all the pages that comprise the
  386. * request.
  387. */
  388. head->arg.head[0] = rqstp->rq_arg.head[0];
  389. head->arg.tail[0] = rqstp->rq_arg.tail[0];
  390. head->hdr_count = head->count;
  391. head->arg.page_base = 0;
  392. head->arg.page_len = 0;
  393. head->arg.len = rqstp->rq_arg.len;
  394. head->arg.buflen = rqstp->rq_arg.buflen;
  395. ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
  396. position = be32_to_cpu(ch->rc_position);
  397. /* RDMA_NOMSG: RDMA READ data should land just after RDMA RECV data */
  398. if (position == 0) {
  399. head->arg.pages = &head->pages[0];
  400. page_offset = head->byte_len;
  401. } else {
  402. head->arg.pages = &head->pages[head->count];
  403. page_offset = 0;
  404. }
  405. ret = 0;
  406. page_no = 0;
  407. for (; ch->rc_discrim != xdr_zero; ch++) {
  408. if (be32_to_cpu(ch->rc_position) != position)
  409. goto err;
  410. handle = be32_to_cpu(ch->rc_target.rs_handle),
  411. byte_count = be32_to_cpu(ch->rc_target.rs_length);
  412. xdr_decode_hyper((__be32 *)&ch->rc_target.rs_offset,
  413. &rs_offset);
  414. while (byte_count > 0) {
  415. last = (ch + 1)->rc_discrim == xdr_zero;
  416. ret = xprt->sc_reader(xprt, rqstp, head,
  417. &page_no, &page_offset,
  418. handle, byte_count,
  419. rs_offset, last);
  420. if (ret < 0)
  421. goto err;
  422. byte_count -= ret;
  423. rs_offset += ret;
  424. head->arg.buflen += ret;
  425. }
  426. }
  427. /* Read list may need XDR round-up (see RFC 5666, s. 3.7) */
  428. if (page_offset & 3) {
  429. u32 pad = 4 - (page_offset & 3);
  430. head->arg.page_len += pad;
  431. head->arg.len += pad;
  432. head->arg.buflen += pad;
  433. page_offset += pad;
  434. }
  435. ret = 1;
  436. if (position && position < head->arg.head[0].iov_len)
  437. ret = rdma_copy_tail(rqstp, head, position,
  438. byte_count, page_offset, page_no);
  439. head->arg.head[0].iov_len = position;
  440. head->position = position;
  441. err:
  442. /* Detach arg pages. svc_recv will replenish them */
  443. for (page_no = 0;
  444. &rqstp->rq_pages[page_no] < rqstp->rq_respages; page_no++)
  445. rqstp->rq_pages[page_no] = NULL;
  446. return ret;
  447. }
  448. static int rdma_read_complete(struct svc_rqst *rqstp,
  449. struct svc_rdma_op_ctxt *head)
  450. {
  451. int page_no;
  452. int ret;
  453. /* Copy RPC pages */
  454. for (page_no = 0; page_no < head->count; page_no++) {
  455. put_page(rqstp->rq_pages[page_no]);
  456. rqstp->rq_pages[page_no] = head->pages[page_no];
  457. }
  458. /* Adjustments made for RDMA_NOMSG type requests */
  459. if (head->position == 0) {
  460. if (head->arg.len <= head->sge[0].length) {
  461. head->arg.head[0].iov_len = head->arg.len -
  462. head->byte_len;
  463. head->arg.page_len = 0;
  464. } else {
  465. head->arg.head[0].iov_len = head->sge[0].length -
  466. head->byte_len;
  467. head->arg.page_len = head->arg.len -
  468. head->sge[0].length;
  469. }
  470. }
  471. /* Point rq_arg.pages past header */
  472. rqstp->rq_arg.pages = &rqstp->rq_pages[head->hdr_count];
  473. rqstp->rq_arg.page_len = head->arg.page_len;
  474. rqstp->rq_arg.page_base = head->arg.page_base;
  475. /* rq_respages starts after the last arg page */
  476. rqstp->rq_respages = &rqstp->rq_pages[page_no];
  477. rqstp->rq_next_page = rqstp->rq_respages + 1;
  478. /* Rebuild rq_arg head and tail. */
  479. rqstp->rq_arg.head[0] = head->arg.head[0];
  480. rqstp->rq_arg.tail[0] = head->arg.tail[0];
  481. rqstp->rq_arg.len = head->arg.len;
  482. rqstp->rq_arg.buflen = head->arg.buflen;
  483. /* Free the context */
  484. svc_rdma_put_context(head, 0);
  485. /* XXX: What should this be? */
  486. rqstp->rq_prot = IPPROTO_MAX;
  487. svc_xprt_copy_addrs(rqstp, rqstp->rq_xprt);
  488. ret = rqstp->rq_arg.head[0].iov_len
  489. + rqstp->rq_arg.page_len
  490. + rqstp->rq_arg.tail[0].iov_len;
  491. dprintk("svcrdma: deferred read ret=%d, rq_arg.len=%u, "
  492. "rq_arg.head[0].iov_base=%p, rq_arg.head[0].iov_len=%zu\n",
  493. ret, rqstp->rq_arg.len, rqstp->rq_arg.head[0].iov_base,
  494. rqstp->rq_arg.head[0].iov_len);
  495. return ret;
  496. }
  497. /* By convention, backchannel calls arrive via rdma_msg type
  498. * messages, and never populate the chunk lists. This makes
  499. * the RPC/RDMA header small and fixed in size, so it is
  500. * straightforward to check the RPC header's direction field.
  501. */
  502. static bool
  503. svc_rdma_is_backchannel_reply(struct svc_xprt *xprt, struct rpcrdma_msg *rmsgp)
  504. {
  505. __be32 *p = (__be32 *)rmsgp;
  506. if (!xprt->xpt_bc_xprt)
  507. return false;
  508. if (rmsgp->rm_type != rdma_msg)
  509. return false;
  510. if (rmsgp->rm_body.rm_chunks[0] != xdr_zero)
  511. return false;
  512. if (rmsgp->rm_body.rm_chunks[1] != xdr_zero)
  513. return false;
  514. if (rmsgp->rm_body.rm_chunks[2] != xdr_zero)
  515. return false;
  516. /* sanity */
  517. if (p[7] != rmsgp->rm_xid)
  518. return false;
  519. /* call direction */
  520. if (p[8] == cpu_to_be32(RPC_CALL))
  521. return false;
  522. return true;
  523. }
  524. /*
  525. * Set up the rqstp thread context to point to the RQ buffer. If
  526. * necessary, pull additional data from the client with an RDMA_READ
  527. * request.
  528. */
  529. int svc_rdma_recvfrom(struct svc_rqst *rqstp)
  530. {
  531. struct svc_xprt *xprt = rqstp->rq_xprt;
  532. struct svcxprt_rdma *rdma_xprt =
  533. container_of(xprt, struct svcxprt_rdma, sc_xprt);
  534. struct svc_rdma_op_ctxt *ctxt = NULL;
  535. struct rpcrdma_msg *rmsgp;
  536. int ret = 0;
  537. int len;
  538. dprintk("svcrdma: rqstp=%p\n", rqstp);
  539. spin_lock_bh(&rdma_xprt->sc_rq_dto_lock);
  540. if (!list_empty(&rdma_xprt->sc_read_complete_q)) {
  541. ctxt = list_entry(rdma_xprt->sc_read_complete_q.next,
  542. struct svc_rdma_op_ctxt,
  543. dto_q);
  544. list_del_init(&ctxt->dto_q);
  545. spin_unlock_bh(&rdma_xprt->sc_rq_dto_lock);
  546. return rdma_read_complete(rqstp, ctxt);
  547. } else if (!list_empty(&rdma_xprt->sc_rq_dto_q)) {
  548. ctxt = list_entry(rdma_xprt->sc_rq_dto_q.next,
  549. struct svc_rdma_op_ctxt,
  550. dto_q);
  551. list_del_init(&ctxt->dto_q);
  552. } else {
  553. atomic_inc(&rdma_stat_rq_starve);
  554. clear_bit(XPT_DATA, &xprt->xpt_flags);
  555. ctxt = NULL;
  556. }
  557. spin_unlock_bh(&rdma_xprt->sc_rq_dto_lock);
  558. if (!ctxt) {
  559. /* This is the EAGAIN path. The svc_recv routine will
  560. * return -EAGAIN, the nfsd thread will go to call into
  561. * svc_recv again and we shouldn't be on the active
  562. * transport list
  563. */
  564. if (test_bit(XPT_CLOSE, &xprt->xpt_flags))
  565. goto close_out;
  566. goto out;
  567. }
  568. dprintk("svcrdma: processing ctxt=%p on xprt=%p, rqstp=%p, status=%d\n",
  569. ctxt, rdma_xprt, rqstp, ctxt->wc_status);
  570. atomic_inc(&rdma_stat_recv);
  571. /* Build up the XDR from the receive buffers. */
  572. rdma_build_arg_xdr(rqstp, ctxt, ctxt->byte_len);
  573. /* Decode the RDMA header. */
  574. len = svc_rdma_xdr_decode_req(&rmsgp, rqstp);
  575. rqstp->rq_xprt_hlen = len;
  576. /* If the request is invalid, reply with an error */
  577. if (len < 0) {
  578. if (len == -ENOSYS)
  579. svc_rdma_send_error(rdma_xprt, rmsgp, ERR_VERS);
  580. goto close_out;
  581. }
  582. if (svc_rdma_is_backchannel_reply(xprt, rmsgp)) {
  583. ret = svc_rdma_handle_bc_reply(xprt->xpt_bc_xprt, rmsgp,
  584. &rqstp->rq_arg);
  585. svc_rdma_put_context(ctxt, 0);
  586. if (ret)
  587. goto repost;
  588. return ret;
  589. }
  590. /* Read read-list data. */
  591. ret = rdma_read_chunks(rdma_xprt, rmsgp, rqstp, ctxt);
  592. if (ret > 0) {
  593. /* read-list posted, defer until data received from client. */
  594. goto defer;
  595. } else if (ret < 0) {
  596. /* Post of read-list failed, free context. */
  597. svc_rdma_put_context(ctxt, 1);
  598. return 0;
  599. }
  600. ret = rqstp->rq_arg.head[0].iov_len
  601. + rqstp->rq_arg.page_len
  602. + rqstp->rq_arg.tail[0].iov_len;
  603. svc_rdma_put_context(ctxt, 0);
  604. out:
  605. dprintk("svcrdma: ret=%d, rq_arg.len=%u, "
  606. "rq_arg.head[0].iov_base=%p, rq_arg.head[0].iov_len=%zd\n",
  607. ret, rqstp->rq_arg.len,
  608. rqstp->rq_arg.head[0].iov_base,
  609. rqstp->rq_arg.head[0].iov_len);
  610. rqstp->rq_prot = IPPROTO_MAX;
  611. svc_xprt_copy_addrs(rqstp, xprt);
  612. return ret;
  613. close_out:
  614. if (ctxt)
  615. svc_rdma_put_context(ctxt, 1);
  616. dprintk("svcrdma: transport %p is closing\n", xprt);
  617. /*
  618. * Set the close bit and enqueue it. svc_recv will see the
  619. * close bit and call svc_xprt_delete
  620. */
  621. set_bit(XPT_CLOSE, &xprt->xpt_flags);
  622. defer:
  623. return 0;
  624. repost:
  625. ret = svc_rdma_post_recv(rdma_xprt, GFP_KERNEL);
  626. if (ret) {
  627. pr_err("svcrdma: could not post a receive buffer, err=%d.\n",
  628. ret);
  629. pr_err("svcrdma: closing transport %p.\n", rdma_xprt);
  630. set_bit(XPT_CLOSE, &rdma_xprt->sc_xprt.xpt_flags);
  631. ret = -ENOTCONN;
  632. }
  633. return ret;
  634. }