fmr_ops.c 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304
  1. /*
  2. * Copyright (c) 2015 Oracle. All rights reserved.
  3. * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
  4. */
  5. /* Lightweight memory registration using Fast Memory Regions (FMR).
  6. * Referred to sometimes as MTHCAFMR mode.
  7. *
  8. * FMR uses synchronous memory registration and deregistration.
  9. * FMR registration is known to be fast, but FMR deregistration
  10. * can take tens of usecs to complete.
  11. */
  12. /* Normal operation
  13. *
  14. * A Memory Region is prepared for RDMA READ or WRITE using the
  15. * ib_map_phys_fmr verb (fmr_op_map). When the RDMA operation is
  16. * finished, the Memory Region is unmapped using the ib_unmap_fmr
  17. * verb (fmr_op_unmap).
  18. */
  19. /* Transport recovery
  20. *
  21. * After a transport reconnect, fmr_op_map re-uses the MR already
  22. * allocated for the RPC, but generates a fresh rkey then maps the
  23. * MR again. This process is synchronous.
  24. */
  25. #include "xprt_rdma.h"
  26. #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
  27. # define RPCDBG_FACILITY RPCDBG_TRANS
  28. #endif
  29. /* Maximum scatter/gather per FMR */
  30. #define RPCRDMA_MAX_FMR_SGES (64)
  31. static int
  32. fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
  33. struct rpcrdma_create_data_internal *cdata)
  34. {
  35. return 0;
  36. }
  37. /* FMR mode conveys up to 64 pages of payload per chunk segment.
  38. */
  39. static size_t
  40. fmr_op_maxpages(struct rpcrdma_xprt *r_xprt)
  41. {
  42. return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
  43. rpcrdma_max_segments(r_xprt) * RPCRDMA_MAX_FMR_SGES);
  44. }
  45. static int
  46. fmr_op_init(struct rpcrdma_xprt *r_xprt)
  47. {
  48. struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
  49. int mr_access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ;
  50. struct ib_fmr_attr fmr_attr = {
  51. .max_pages = RPCRDMA_MAX_FMR_SGES,
  52. .max_maps = 1,
  53. .page_shift = PAGE_SHIFT
  54. };
  55. struct ib_pd *pd = r_xprt->rx_ia.ri_pd;
  56. struct rpcrdma_mw *r;
  57. int i, rc;
  58. spin_lock_init(&buf->rb_mwlock);
  59. INIT_LIST_HEAD(&buf->rb_mws);
  60. INIT_LIST_HEAD(&buf->rb_all);
  61. i = max_t(int, RPCRDMA_MAX_DATA_SEGS / RPCRDMA_MAX_FMR_SGES, 1);
  62. i += 2; /* head + tail */
  63. i *= buf->rb_max_requests; /* one set for each RPC slot */
  64. dprintk("RPC: %s: initalizing %d FMRs\n", __func__, i);
  65. rc = -ENOMEM;
  66. while (i--) {
  67. r = kzalloc(sizeof(*r), GFP_KERNEL);
  68. if (!r)
  69. goto out;
  70. r->r.fmr.physaddrs = kmalloc(RPCRDMA_MAX_FMR_SGES *
  71. sizeof(u64), GFP_KERNEL);
  72. if (!r->r.fmr.physaddrs)
  73. goto out_free;
  74. r->r.fmr.fmr = ib_alloc_fmr(pd, mr_access_flags, &fmr_attr);
  75. if (IS_ERR(r->r.fmr.fmr))
  76. goto out_fmr_err;
  77. list_add(&r->mw_list, &buf->rb_mws);
  78. list_add(&r->mw_all, &buf->rb_all);
  79. }
  80. return 0;
  81. out_fmr_err:
  82. rc = PTR_ERR(r->r.fmr.fmr);
  83. dprintk("RPC: %s: ib_alloc_fmr status %i\n", __func__, rc);
  84. kfree(r->r.fmr.physaddrs);
  85. out_free:
  86. kfree(r);
  87. out:
  88. return rc;
  89. }
  90. static int
  91. __fmr_unmap(struct rpcrdma_mw *r)
  92. {
  93. LIST_HEAD(l);
  94. list_add(&r->r.fmr.fmr->list, &l);
  95. return ib_unmap_fmr(&l);
  96. }
  97. /* Use the ib_map_phys_fmr() verb to register a memory region
  98. * for remote access via RDMA READ or RDMA WRITE.
  99. */
  100. static int
  101. fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
  102. int nsegs, bool writing)
  103. {
  104. struct rpcrdma_ia *ia = &r_xprt->rx_ia;
  105. struct ib_device *device = ia->ri_device;
  106. enum dma_data_direction direction = rpcrdma_data_dir(writing);
  107. struct rpcrdma_mr_seg *seg1 = seg;
  108. int len, pageoff, i, rc;
  109. struct rpcrdma_mw *mw;
  110. mw = seg1->rl_mw;
  111. seg1->rl_mw = NULL;
  112. if (!mw) {
  113. mw = rpcrdma_get_mw(r_xprt);
  114. if (!mw)
  115. return -ENOMEM;
  116. } else {
  117. /* this is a retransmit; generate a fresh rkey */
  118. rc = __fmr_unmap(mw);
  119. if (rc)
  120. return rc;
  121. }
  122. pageoff = offset_in_page(seg1->mr_offset);
  123. seg1->mr_offset -= pageoff; /* start of page */
  124. seg1->mr_len += pageoff;
  125. len = -pageoff;
  126. if (nsegs > RPCRDMA_MAX_FMR_SGES)
  127. nsegs = RPCRDMA_MAX_FMR_SGES;
  128. for (i = 0; i < nsegs;) {
  129. rpcrdma_map_one(device, seg, direction);
  130. mw->r.fmr.physaddrs[i] = seg->mr_dma;
  131. len += seg->mr_len;
  132. ++seg;
  133. ++i;
  134. /* Check for holes */
  135. if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
  136. offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
  137. break;
  138. }
  139. rc = ib_map_phys_fmr(mw->r.fmr.fmr, mw->r.fmr.physaddrs,
  140. i, seg1->mr_dma);
  141. if (rc)
  142. goto out_maperr;
  143. seg1->rl_mw = mw;
  144. seg1->mr_rkey = mw->r.fmr.fmr->rkey;
  145. seg1->mr_base = seg1->mr_dma + pageoff;
  146. seg1->mr_nsegs = i;
  147. seg1->mr_len = len;
  148. return i;
  149. out_maperr:
  150. dprintk("RPC: %s: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n",
  151. __func__, len, (unsigned long long)seg1->mr_dma,
  152. pageoff, i, rc);
  153. while (i--)
  154. rpcrdma_unmap_one(device, --seg);
  155. return rc;
  156. }
  157. static void
  158. __fmr_dma_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
  159. {
  160. struct ib_device *device = r_xprt->rx_ia.ri_device;
  161. struct rpcrdma_mw *mw = seg->rl_mw;
  162. int nsegs = seg->mr_nsegs;
  163. seg->rl_mw = NULL;
  164. while (nsegs--)
  165. rpcrdma_unmap_one(device, seg++);
  166. rpcrdma_put_mw(r_xprt, mw);
  167. }
  168. /* Invalidate all memory regions that were registered for "req".
  169. *
  170. * Sleeps until it is safe for the host CPU to access the
  171. * previously mapped memory regions.
  172. */
  173. static void
  174. fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
  175. {
  176. struct rpcrdma_mr_seg *seg;
  177. unsigned int i, nchunks;
  178. struct rpcrdma_mw *mw;
  179. LIST_HEAD(unmap_list);
  180. int rc;
  181. dprintk("RPC: %s: req %p\n", __func__, req);
  182. /* ORDER: Invalidate all of the req's MRs first
  183. *
  184. * ib_unmap_fmr() is slow, so use a single call instead
  185. * of one call per mapped MR.
  186. */
  187. for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) {
  188. seg = &req->rl_segments[i];
  189. mw = seg->rl_mw;
  190. list_add(&mw->r.fmr.fmr->list, &unmap_list);
  191. i += seg->mr_nsegs;
  192. }
  193. rc = ib_unmap_fmr(&unmap_list);
  194. if (rc)
  195. pr_warn("%s: ib_unmap_fmr failed (%i)\n", __func__, rc);
  196. /* ORDER: Now DMA unmap all of the req's MRs, and return
  197. * them to the free MW list.
  198. */
  199. for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) {
  200. seg = &req->rl_segments[i];
  201. __fmr_dma_unmap(r_xprt, seg);
  202. i += seg->mr_nsegs;
  203. seg->mr_nsegs = 0;
  204. }
  205. req->rl_nchunks = 0;
  206. }
  207. /* Use the ib_unmap_fmr() verb to prevent further remote
  208. * access via RDMA READ or RDMA WRITE.
  209. */
  210. static int
  211. fmr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
  212. {
  213. struct rpcrdma_ia *ia = &r_xprt->rx_ia;
  214. struct rpcrdma_mr_seg *seg1 = seg;
  215. struct rpcrdma_mw *mw = seg1->rl_mw;
  216. int rc, nsegs = seg->mr_nsegs;
  217. dprintk("RPC: %s: FMR %p\n", __func__, mw);
  218. seg1->rl_mw = NULL;
  219. while (seg1->mr_nsegs--)
  220. rpcrdma_unmap_one(ia->ri_device, seg++);
  221. rc = __fmr_unmap(mw);
  222. if (rc)
  223. goto out_err;
  224. rpcrdma_put_mw(r_xprt, mw);
  225. return nsegs;
  226. out_err:
  227. /* The FMR is abandoned, but remains in rb_all. fmr_op_destroy
  228. * will attempt to release it when the transport is destroyed.
  229. */
  230. dprintk("RPC: %s: ib_unmap_fmr status %i\n", __func__, rc);
  231. return nsegs;
  232. }
  233. static void
  234. fmr_op_destroy(struct rpcrdma_buffer *buf)
  235. {
  236. struct rpcrdma_mw *r;
  237. int rc;
  238. while (!list_empty(&buf->rb_all)) {
  239. r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
  240. list_del(&r->mw_all);
  241. kfree(r->r.fmr.physaddrs);
  242. rc = ib_dealloc_fmr(r->r.fmr.fmr);
  243. if (rc)
  244. dprintk("RPC: %s: ib_dealloc_fmr failed %i\n",
  245. __func__, rc);
  246. kfree(r);
  247. }
  248. }
  249. const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
  250. .ro_map = fmr_op_map,
  251. .ro_unmap_sync = fmr_op_unmap_sync,
  252. .ro_unmap = fmr_op_unmap,
  253. .ro_open = fmr_op_open,
  254. .ro_maxpages = fmr_op_maxpages,
  255. .ro_init = fmr_op_init,
  256. .ro_destroy = fmr_op_destroy,
  257. .ro_displayname = "fmr",
  258. };