瀏覽代碼

xprtrdma: Support for SG_GAP devices

Some devices (such as the Mellanox CX-4) can register, under a
single R_key, a set of memory regions that are not contiguous. When
this is done, all the segments in a Reply list, say, can then be
invalidated in a single LocalInv Work Request (or via Remote
Invalidation, which can invalidate exactly one R_key when completing
a Receive).

This means a single FastReg WR is used to register, and one or zero
LocalInv WRs can invalidate, the memory involved with RDMA transfers
on behalf of an RPC.

In addition, xprtrdma constructs some Reply chunks from three or
more segments. By registering them with SG_GAP, only one segment
is needed for the Reply chunk, allowing the whole chunk to be
invalidated remotely.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Chuck Lever 8 年之前
父節點
當前提交
5e9fc6a06b
共有 2 個文件被更改,包括 14 次插入7 次删除
  1. 13 7
      net/sunrpc/xprtrdma/frwr_ops.c
  2. 1 0
      net/sunrpc/xprtrdma/xprt_rdma.h

+ 13 - 7
net/sunrpc/xprtrdma/frwr_ops.c

@@ -101,7 +101,7 @@ frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r)
 	struct rpcrdma_frmr *f = &r->frmr;
 	struct rpcrdma_frmr *f = &r->frmr;
 	int rc;
 	int rc;
 
 
-	f->fr_mr = ib_alloc_mr(ia->ri_pd, IB_MR_TYPE_MEM_REG, depth);
+	f->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, depth);
 	if (IS_ERR(f->fr_mr))
 	if (IS_ERR(f->fr_mr))
 		goto out_mr_err;
 		goto out_mr_err;
 
 
@@ -157,7 +157,7 @@ __frwr_reset_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r)
 		return rc;
 		return rc;
 	}
 	}
 
 
-	f->fr_mr = ib_alloc_mr(ia->ri_pd, IB_MR_TYPE_MEM_REG,
+	f->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype,
 			       ia->ri_max_frmr_depth);
 			       ia->ri_max_frmr_depth);
 	if (IS_ERR(f->fr_mr)) {
 	if (IS_ERR(f->fr_mr)) {
 		pr_warn("rpcrdma: ib_alloc_mr status %ld, frwr %p orphaned\n",
 		pr_warn("rpcrdma: ib_alloc_mr status %ld, frwr %p orphaned\n",
@@ -210,11 +210,16 @@ static int
 frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
 frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
 	     struct rpcrdma_create_data_internal *cdata)
 	     struct rpcrdma_create_data_internal *cdata)
 {
 {
+	struct ib_device_attr *attrs = &ia->ri_device->attrs;
 	int depth, delta;
 	int depth, delta;
 
 
+	ia->ri_mrtype = IB_MR_TYPE_MEM_REG;
+	if (attrs->device_cap_flags & IB_DEVICE_SG_GAPS_REG)
+		ia->ri_mrtype = IB_MR_TYPE_SG_GAPS;
+
 	ia->ri_max_frmr_depth =
 	ia->ri_max_frmr_depth =
 			min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
 			min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
-			      ia->ri_device->attrs.max_fast_reg_page_list_len);
+			      attrs->max_fast_reg_page_list_len);
 	dprintk("RPC:       %s: device's max FR page list len = %u\n",
 	dprintk("RPC:       %s: device's max FR page list len = %u\n",
 		__func__, ia->ri_max_frmr_depth);
 		__func__, ia->ri_max_frmr_depth);
 
 
@@ -241,8 +246,8 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
 	}
 	}
 
 
 	ep->rep_attr.cap.max_send_wr *= depth;
 	ep->rep_attr.cap.max_send_wr *= depth;
-	if (ep->rep_attr.cap.max_send_wr > ia->ri_device->attrs.max_qp_wr) {
-		cdata->max_requests = ia->ri_device->attrs.max_qp_wr / depth;
+	if (ep->rep_attr.cap.max_send_wr > attrs->max_qp_wr) {
+		cdata->max_requests = attrs->max_qp_wr / depth;
 		if (!cdata->max_requests)
 		if (!cdata->max_requests)
 			return -EINVAL;
 			return -EINVAL;
 		ep->rep_attr.cap.max_send_wr = cdata->max_requests *
 		ep->rep_attr.cap.max_send_wr = cdata->max_requests *
@@ -348,6 +353,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
 	    int nsegs, bool writing, struct rpcrdma_mw **out)
 	    int nsegs, bool writing, struct rpcrdma_mw **out)
 {
 {
 	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
 	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+	bool holes_ok = ia->ri_mrtype == IB_MR_TYPE_SG_GAPS;
 	struct rpcrdma_mw *mw;
 	struct rpcrdma_mw *mw;
 	struct rpcrdma_frmr *frmr;
 	struct rpcrdma_frmr *frmr;
 	struct ib_mr *mr;
 	struct ib_mr *mr;
@@ -383,8 +389,8 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
 
 
 		++seg;
 		++seg;
 		++i;
 		++i;
-
-		/* Check for holes */
+		if (holes_ok)
+			continue;
 		if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
 		if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
 		    offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
 		    offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
 			break;
 			break;

+ 1 - 0
net/sunrpc/xprtrdma/xprt_rdma.h

@@ -75,6 +75,7 @@ struct rpcrdma_ia {
 	unsigned int		ri_max_inline_write;
 	unsigned int		ri_max_inline_write;
 	unsigned int		ri_max_inline_read;
 	unsigned int		ri_max_inline_read;
 	bool			ri_reminv_expected;
 	bool			ri_reminv_expected;
+	enum ib_mr_type		ri_mrtype;
 	struct ib_qp_attr	ri_qp_attr;
 	struct ib_qp_attr	ri_qp_attr;
 	struct ib_qp_init_attr	ri_qp_init_attr;
 	struct ib_qp_init_attr	ri_qp_init_attr;
 };
 };