|
@@ -52,6 +52,7 @@
|
|
|
#include <linux/prefetch.h>
|
|
|
#include <linux/sunrpc/addr.h>
|
|
|
#include <asm/bitops.h>
|
|
|
+#include <linux/module.h> /* try_module_get()/module_put() */
|
|
|
|
|
|
#include "xprt_rdma.h"
|
|
|
|
|
@@ -414,6 +415,14 @@ connected:
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
+static void rpcrdma_destroy_id(struct rdma_cm_id *id)
|
|
|
+{
|
|
|
+ if (id) {
|
|
|
+ module_put(id->device->owner);
|
|
|
+ rdma_destroy_id(id);
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
static struct rdma_cm_id *
|
|
|
rpcrdma_create_id(struct rpcrdma_xprt *xprt,
|
|
|
struct rpcrdma_ia *ia, struct sockaddr *addr)
|
|
@@ -440,6 +449,17 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
|
|
|
}
|
|
|
wait_for_completion_interruptible_timeout(&ia->ri_done,
|
|
|
msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
|
|
|
+
|
|
|
+ /* FIXME:
|
|
|
+ * Until xprtrdma supports DEVICE_REMOVAL, the provider must
|
|
|
+ * be pinned while there are active NFS/RDMA mounts to prevent
|
|
|
+ * hangs and crashes at umount time.
|
|
|
+ */
|
|
|
+ if (!ia->ri_async_rc && !try_module_get(id->device->owner)) {
|
|
|
+ dprintk("RPC: %s: Failed to get device module\n",
|
|
|
+ __func__);
|
|
|
+ ia->ri_async_rc = -ENODEV;
|
|
|
+ }
|
|
|
rc = ia->ri_async_rc;
|
|
|
if (rc)
|
|
|
goto out;
|
|
@@ -449,16 +469,17 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
|
|
|
if (rc) {
|
|
|
dprintk("RPC: %s: rdma_resolve_route() failed %i\n",
|
|
|
__func__, rc);
|
|
|
- goto out;
|
|
|
+ goto put;
|
|
|
}
|
|
|
wait_for_completion_interruptible_timeout(&ia->ri_done,
|
|
|
msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
|
|
|
rc = ia->ri_async_rc;
|
|
|
if (rc)
|
|
|
- goto out;
|
|
|
+ goto put;
|
|
|
|
|
|
return id;
|
|
|
-
|
|
|
+put:
|
|
|
+ module_put(id->device->owner);
|
|
|
out:
|
|
|
rdma_destroy_id(id);
|
|
|
return ERR_PTR(rc);
|
|
@@ -493,9 +514,11 @@ rpcrdma_clean_cq(struct ib_cq *cq)
|
|
|
int
|
|
|
rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
|
|
|
{
|
|
|
- int rc, mem_priv;
|
|
|
struct rpcrdma_ia *ia = &xprt->rx_ia;
|
|
|
struct ib_device_attr *devattr = &ia->ri_devattr;
|
|
|
+ int rc;
|
|
|
+
|
|
|
+ ia->ri_dma_mr = NULL;
|
|
|
|
|
|
ia->ri_id = rpcrdma_create_id(xprt, ia, addr);
|
|
|
if (IS_ERR(ia->ri_id)) {
|
|
@@ -519,11 +542,6 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
|
|
|
goto out3;
|
|
|
}
|
|
|
|
|
|
- if (devattr->device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) {
|
|
|
- ia->ri_have_dma_lkey = 1;
|
|
|
- ia->ri_dma_lkey = ia->ri_device->local_dma_lkey;
|
|
|
- }
|
|
|
-
|
|
|
if (memreg == RPCRDMA_FRMR) {
|
|
|
/* Requires both frmr reg and local dma lkey */
|
|
|
if (((devattr->device_cap_flags &
|
|
@@ -539,42 +557,19 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
|
|
|
if (!ia->ri_device->alloc_fmr) {
|
|
|
dprintk("RPC: %s: MTHCAFMR registration "
|
|
|
"not supported by HCA\n", __func__);
|
|
|
- memreg = RPCRDMA_ALLPHYSICAL;
|
|
|
+ goto out3;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- /*
|
|
|
- * Optionally obtain an underlying physical identity mapping in
|
|
|
- * order to do a memory window-based bind. This base registration
|
|
|
- * is protected from remote access - that is enabled only by binding
|
|
|
- * for the specific bytes targeted during each RPC operation, and
|
|
|
- * revoked after the corresponding completion similar to a storage
|
|
|
- * adapter.
|
|
|
- */
|
|
|
switch (memreg) {
|
|
|
case RPCRDMA_FRMR:
|
|
|
ia->ri_ops = &rpcrdma_frwr_memreg_ops;
|
|
|
break;
|
|
|
case RPCRDMA_ALLPHYSICAL:
|
|
|
ia->ri_ops = &rpcrdma_physical_memreg_ops;
|
|
|
- mem_priv = IB_ACCESS_LOCAL_WRITE |
|
|
|
- IB_ACCESS_REMOTE_WRITE |
|
|
|
- IB_ACCESS_REMOTE_READ;
|
|
|
- goto register_setup;
|
|
|
+ break;
|
|
|
case RPCRDMA_MTHCAFMR:
|
|
|
ia->ri_ops = &rpcrdma_fmr_memreg_ops;
|
|
|
- if (ia->ri_have_dma_lkey)
|
|
|
- break;
|
|
|
- mem_priv = IB_ACCESS_LOCAL_WRITE;
|
|
|
- register_setup:
|
|
|
- ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv);
|
|
|
- if (IS_ERR(ia->ri_bind_mem)) {
|
|
|
- printk(KERN_ALERT "%s: ib_get_dma_mr for "
|
|
|
- "phys register failed with %lX\n",
|
|
|
- __func__, PTR_ERR(ia->ri_bind_mem));
|
|
|
- rc = -ENOMEM;
|
|
|
- goto out3;
|
|
|
- }
|
|
|
break;
|
|
|
default:
|
|
|
printk(KERN_ERR "RPC: Unsupported memory "
|
|
@@ -592,7 +587,7 @@ out3:
|
|
|
ib_dealloc_pd(ia->ri_pd);
|
|
|
ia->ri_pd = NULL;
|
|
|
out2:
|
|
|
- rdma_destroy_id(ia->ri_id);
|
|
|
+ rpcrdma_destroy_id(ia->ri_id);
|
|
|
ia->ri_id = NULL;
|
|
|
out1:
|
|
|
return rc;
|
|
@@ -606,19 +601,11 @@ out1:
|
|
|
void
|
|
|
rpcrdma_ia_close(struct rpcrdma_ia *ia)
|
|
|
{
|
|
|
- int rc;
|
|
|
-
|
|
|
dprintk("RPC: %s: entering\n", __func__);
|
|
|
- if (ia->ri_bind_mem != NULL) {
|
|
|
- rc = ib_dereg_mr(ia->ri_bind_mem);
|
|
|
- dprintk("RPC: %s: ib_dereg_mr returned %i\n",
|
|
|
- __func__, rc);
|
|
|
- }
|
|
|
-
|
|
|
if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) {
|
|
|
if (ia->ri_id->qp)
|
|
|
rdma_destroy_qp(ia->ri_id);
|
|
|
- rdma_destroy_id(ia->ri_id);
|
|
|
+ rpcrdma_destroy_id(ia->ri_id);
|
|
|
ia->ri_id = NULL;
|
|
|
}
|
|
|
|
|
@@ -639,6 +626,12 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
|
|
|
struct ib_cq_init_attr cq_attr = {};
|
|
|
int rc, err;
|
|
|
|
|
|
+ if (devattr->max_sge < RPCRDMA_MAX_IOVS) {
|
|
|
+ dprintk("RPC: %s: insufficient sge's available\n",
|
|
|
+ __func__);
|
|
|
+ return -ENOMEM;
|
|
|
+ }
|
|
|
+
|
|
|
/* check provider's send/recv wr limits */
|
|
|
if (cdata->max_requests > devattr->max_qp_wr)
|
|
|
cdata->max_requests = devattr->max_qp_wr;
|
|
@@ -651,21 +644,13 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
|
|
|
if (rc)
|
|
|
return rc;
|
|
|
ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
|
|
|
- ep->rep_attr.cap.max_send_sge = (cdata->padding ? 4 : 2);
|
|
|
+ ep->rep_attr.cap.max_send_sge = RPCRDMA_MAX_IOVS;
|
|
|
ep->rep_attr.cap.max_recv_sge = 1;
|
|
|
ep->rep_attr.cap.max_inline_data = 0;
|
|
|
ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
|
|
|
ep->rep_attr.qp_type = IB_QPT_RC;
|
|
|
ep->rep_attr.port_num = ~0;
|
|
|
|
|
|
- if (cdata->padding) {
|
|
|
- ep->rep_padbuf = rpcrdma_alloc_regbuf(ia, cdata->padding,
|
|
|
- GFP_KERNEL);
|
|
|
- if (IS_ERR(ep->rep_padbuf))
|
|
|
- return PTR_ERR(ep->rep_padbuf);
|
|
|
- } else
|
|
|
- ep->rep_padbuf = NULL;
|
|
|
-
|
|
|
dprintk("RPC: %s: requested max: dtos: send %d recv %d; "
|
|
|
"iovs: send %d recv %d\n",
|
|
|
__func__,
|
|
@@ -748,7 +733,8 @@ out2:
|
|
|
dprintk("RPC: %s: ib_destroy_cq returned %i\n",
|
|
|
__func__, err);
|
|
|
out1:
|
|
|
- rpcrdma_free_regbuf(ia, ep->rep_padbuf);
|
|
|
+ if (ia->ri_dma_mr)
|
|
|
+ ib_dereg_mr(ia->ri_dma_mr);
|
|
|
return rc;
|
|
|
}
|
|
|
|
|
@@ -775,8 +761,6 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
|
|
|
ia->ri_id->qp = NULL;
|
|
|
}
|
|
|
|
|
|
- rpcrdma_free_regbuf(ia, ep->rep_padbuf);
|
|
|
-
|
|
|
rpcrdma_clean_cq(ep->rep_attr.recv_cq);
|
|
|
rc = ib_destroy_cq(ep->rep_attr.recv_cq);
|
|
|
if (rc)
|
|
@@ -788,6 +772,12 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
|
|
|
if (rc)
|
|
|
dprintk("RPC: %s: ib_destroy_cq returned %i\n",
|
|
|
__func__, rc);
|
|
|
+
|
|
|
+ if (ia->ri_dma_mr) {
|
|
|
+ rc = ib_dereg_mr(ia->ri_dma_mr);
|
|
|
+ dprintk("RPC: %s: ib_dereg_mr returned %i\n",
|
|
|
+ __func__, rc);
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -825,7 +815,7 @@ retry:
|
|
|
if (ia->ri_device != id->device) {
|
|
|
printk("RPC: %s: can't reconnect on "
|
|
|
"different device!\n", __func__);
|
|
|
- rdma_destroy_id(id);
|
|
|
+ rpcrdma_destroy_id(id);
|
|
|
rc = -ENETUNREACH;
|
|
|
goto out;
|
|
|
}
|
|
@@ -834,7 +824,7 @@ retry:
|
|
|
if (rc) {
|
|
|
dprintk("RPC: %s: rdma_create_qp failed %i\n",
|
|
|
__func__, rc);
|
|
|
- rdma_destroy_id(id);
|
|
|
+ rpcrdma_destroy_id(id);
|
|
|
rc = -ENETUNREACH;
|
|
|
goto out;
|
|
|
}
|
|
@@ -845,7 +835,7 @@ retry:
|
|
|
write_unlock(&ia->ri_qplock);
|
|
|
|
|
|
rdma_destroy_qp(old);
|
|
|
- rdma_destroy_id(old);
|
|
|
+ rpcrdma_destroy_id(old);
|
|
|
} else {
|
|
|
dprintk("RPC: %s: connecting...\n", __func__);
|
|
|
rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
|
|
@@ -1229,75 +1219,6 @@ rpcrdma_mapping_error(struct rpcrdma_mr_seg *seg)
|
|
|
(unsigned long long)seg->mr_dma, seg->mr_dmalen);
|
|
|
}
|
|
|
|
|
|
-static int
|
|
|
-rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len,
|
|
|
- struct ib_mr **mrp, struct ib_sge *iov)
|
|
|
-{
|
|
|
- struct ib_phys_buf ipb;
|
|
|
- struct ib_mr *mr;
|
|
|
- int rc;
|
|
|
-
|
|
|
- /*
|
|
|
- * All memory passed here was kmalloc'ed, therefore phys-contiguous.
|
|
|
- */
|
|
|
- iov->addr = ib_dma_map_single(ia->ri_device,
|
|
|
- va, len, DMA_BIDIRECTIONAL);
|
|
|
- if (ib_dma_mapping_error(ia->ri_device, iov->addr))
|
|
|
- return -ENOMEM;
|
|
|
-
|
|
|
- iov->length = len;
|
|
|
-
|
|
|
- if (ia->ri_have_dma_lkey) {
|
|
|
- *mrp = NULL;
|
|
|
- iov->lkey = ia->ri_dma_lkey;
|
|
|
- return 0;
|
|
|
- } else if (ia->ri_bind_mem != NULL) {
|
|
|
- *mrp = NULL;
|
|
|
- iov->lkey = ia->ri_bind_mem->lkey;
|
|
|
- return 0;
|
|
|
- }
|
|
|
-
|
|
|
- ipb.addr = iov->addr;
|
|
|
- ipb.size = iov->length;
|
|
|
- mr = ib_reg_phys_mr(ia->ri_pd, &ipb, 1,
|
|
|
- IB_ACCESS_LOCAL_WRITE, &iov->addr);
|
|
|
-
|
|
|
- dprintk("RPC: %s: phys convert: 0x%llx "
|
|
|
- "registered 0x%llx length %d\n",
|
|
|
- __func__, (unsigned long long)ipb.addr,
|
|
|
- (unsigned long long)iov->addr, len);
|
|
|
-
|
|
|
- if (IS_ERR(mr)) {
|
|
|
- *mrp = NULL;
|
|
|
- rc = PTR_ERR(mr);
|
|
|
- dprintk("RPC: %s: failed with %i\n", __func__, rc);
|
|
|
- } else {
|
|
|
- *mrp = mr;
|
|
|
- iov->lkey = mr->lkey;
|
|
|
- rc = 0;
|
|
|
- }
|
|
|
-
|
|
|
- return rc;
|
|
|
-}
|
|
|
-
|
|
|
-static int
|
|
|
-rpcrdma_deregister_internal(struct rpcrdma_ia *ia,
|
|
|
- struct ib_mr *mr, struct ib_sge *iov)
|
|
|
-{
|
|
|
- int rc;
|
|
|
-
|
|
|
- ib_dma_unmap_single(ia->ri_device,
|
|
|
- iov->addr, iov->length, DMA_BIDIRECTIONAL);
|
|
|
-
|
|
|
- if (NULL == mr)
|
|
|
- return 0;
|
|
|
-
|
|
|
- rc = ib_dereg_mr(mr);
|
|
|
- if (rc)
|
|
|
- dprintk("RPC: %s: ib_dereg_mr failed %i\n", __func__, rc);
|
|
|
- return rc;
|
|
|
-}
|
|
|
-
|
|
|
/**
|
|
|
* rpcrdma_alloc_regbuf - kmalloc and register memory for SEND/RECV buffers
|
|
|
* @ia: controlling rpcrdma_ia
|
|
@@ -1317,26 +1238,29 @@ struct rpcrdma_regbuf *
|
|
|
rpcrdma_alloc_regbuf(struct rpcrdma_ia *ia, size_t size, gfp_t flags)
|
|
|
{
|
|
|
struct rpcrdma_regbuf *rb;
|
|
|
- int rc;
|
|
|
+ struct ib_sge *iov;
|
|
|
|
|
|
- rc = -ENOMEM;
|
|
|
rb = kmalloc(sizeof(*rb) + size, flags);
|
|
|
if (rb == NULL)
|
|
|
goto out;
|
|
|
|
|
|
- rb->rg_size = size;
|
|
|
- rb->rg_owner = NULL;
|
|
|
- rc = rpcrdma_register_internal(ia, rb->rg_base, size,
|
|
|
- &rb->rg_mr, &rb->rg_iov);
|
|
|
- if (rc)
|
|
|
+ iov = &rb->rg_iov;
|
|
|
+ iov->addr = ib_dma_map_single(ia->ri_device,
|
|
|
+ (void *)rb->rg_base, size,
|
|
|
+ DMA_BIDIRECTIONAL);
|
|
|
+ if (ib_dma_mapping_error(ia->ri_device, iov->addr))
|
|
|
goto out_free;
|
|
|
|
|
|
+ iov->length = size;
|
|
|
+ iov->lkey = ia->ri_dma_lkey;
|
|
|
+ rb->rg_size = size;
|
|
|
+ rb->rg_owner = NULL;
|
|
|
return rb;
|
|
|
|
|
|
out_free:
|
|
|
kfree(rb);
|
|
|
out:
|
|
|
- return ERR_PTR(rc);
|
|
|
+ return ERR_PTR(-ENOMEM);
|
|
|
}
|
|
|
|
|
|
/**
|
|
@@ -1347,10 +1271,15 @@ out:
|
|
|
void
|
|
|
rpcrdma_free_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb)
|
|
|
{
|
|
|
- if (rb) {
|
|
|
- rpcrdma_deregister_internal(ia, rb->rg_mr, &rb->rg_iov);
|
|
|
- kfree(rb);
|
|
|
- }
|
|
|
+ struct ib_sge *iov;
|
|
|
+
|
|
|
+ if (!rb)
|
|
|
+ return;
|
|
|
+
|
|
|
+ iov = &rb->rg_iov;
|
|
|
+ ib_dma_unmap_single(ia->ri_device,
|
|
|
+ iov->addr, iov->length, DMA_BIDIRECTIONAL);
|
|
|
+ kfree(rb);
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -1363,9 +1292,11 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
|
|
|
struct rpcrdma_ep *ep,
|
|
|
struct rpcrdma_req *req)
|
|
|
{
|
|
|
+ struct ib_device *device = ia->ri_device;
|
|
|
struct ib_send_wr send_wr, *send_wr_fail;
|
|
|
struct rpcrdma_rep *rep = req->rl_reply;
|
|
|
- int rc;
|
|
|
+ struct ib_sge *iov = req->rl_send_iov;
|
|
|
+ int i, rc;
|
|
|
|
|
|
if (rep) {
|
|
|
rc = rpcrdma_ep_post_recv(ia, ep, rep);
|
|
@@ -1376,22 +1307,15 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
|
|
|
|
|
|
send_wr.next = NULL;
|
|
|
send_wr.wr_id = RPCRDMA_IGNORE_COMPLETION;
|
|
|
- send_wr.sg_list = req->rl_send_iov;
|
|
|
+ send_wr.sg_list = iov;
|
|
|
send_wr.num_sge = req->rl_niovs;
|
|
|
send_wr.opcode = IB_WR_SEND;
|
|
|
- if (send_wr.num_sge == 4) /* no need to sync any pad (constant) */
|
|
|
- ib_dma_sync_single_for_device(ia->ri_device,
|
|
|
- req->rl_send_iov[3].addr,
|
|
|
- req->rl_send_iov[3].length,
|
|
|
- DMA_TO_DEVICE);
|
|
|
- ib_dma_sync_single_for_device(ia->ri_device,
|
|
|
- req->rl_send_iov[1].addr,
|
|
|
- req->rl_send_iov[1].length,
|
|
|
- DMA_TO_DEVICE);
|
|
|
- ib_dma_sync_single_for_device(ia->ri_device,
|
|
|
- req->rl_send_iov[0].addr,
|
|
|
- req->rl_send_iov[0].length,
|
|
|
- DMA_TO_DEVICE);
|
|
|
+
|
|
|
+ for (i = 0; i < send_wr.num_sge; i++)
|
|
|
+ ib_dma_sync_single_for_device(device, iov[i].addr,
|
|
|
+ iov[i].length, DMA_TO_DEVICE);
|
|
|
+ dprintk("RPC: %s: posting %d s/g entries\n",
|
|
|
+ __func__, send_wr.num_sge);
|
|
|
|
|
|
if (DECR_CQCOUNT(ep) > 0)
|
|
|
send_wr.send_flags = 0;
|