|
@@ -15,6 +15,7 @@
|
|
|
#include <linux/module.h>
|
|
#include <linux/module.h>
|
|
|
#include <linux/init.h>
|
|
#include <linux/init.h>
|
|
|
#include <linux/slab.h>
|
|
#include <linux/slab.h>
|
|
|
|
|
+#include <rdma/mr_pool.h>
|
|
|
#include <linux/err.h>
|
|
#include <linux/err.h>
|
|
|
#include <linux/string.h>
|
|
#include <linux/string.h>
|
|
|
#include <linux/atomic.h>
|
|
#include <linux/atomic.h>
|
|
@@ -59,6 +60,9 @@ struct nvme_rdma_request {
|
|
|
struct nvme_request req;
|
|
struct nvme_request req;
|
|
|
struct ib_mr *mr;
|
|
struct ib_mr *mr;
|
|
|
struct nvme_rdma_qe sqe;
|
|
struct nvme_rdma_qe sqe;
|
|
|
|
|
+ union nvme_result result;
|
|
|
|
|
+ __le16 status;
|
|
|
|
|
+ refcount_t ref;
|
|
|
struct ib_sge sge[1 + NVME_RDMA_MAX_INLINE_SEGMENTS];
|
|
struct ib_sge sge[1 + NVME_RDMA_MAX_INLINE_SEGMENTS];
|
|
|
u32 num_sge;
|
|
u32 num_sge;
|
|
|
int nents;
|
|
int nents;
|
|
@@ -73,11 +77,11 @@ struct nvme_rdma_request {
|
|
|
enum nvme_rdma_queue_flags {
|
|
enum nvme_rdma_queue_flags {
|
|
|
NVME_RDMA_Q_ALLOCATED = 0,
|
|
NVME_RDMA_Q_ALLOCATED = 0,
|
|
|
NVME_RDMA_Q_LIVE = 1,
|
|
NVME_RDMA_Q_LIVE = 1,
|
|
|
|
|
+ NVME_RDMA_Q_TR_READY = 2,
|
|
|
};
|
|
};
|
|
|
|
|
|
|
|
struct nvme_rdma_queue {
|
|
struct nvme_rdma_queue {
|
|
|
struct nvme_rdma_qe *rsp_ring;
|
|
struct nvme_rdma_qe *rsp_ring;
|
|
|
- atomic_t sig_count;
|
|
|
|
|
int queue_size;
|
|
int queue_size;
|
|
|
size_t cmnd_capsule_len;
|
|
size_t cmnd_capsule_len;
|
|
|
struct nvme_rdma_ctrl *ctrl;
|
|
struct nvme_rdma_ctrl *ctrl;
|
|
@@ -258,32 +262,6 @@ static int nvme_rdma_create_qp(struct nvme_rdma_queue *queue, const int factor)
|
|
|
return ret;
|
|
return ret;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-static int nvme_rdma_reinit_request(void *data, struct request *rq)
|
|
|
|
|
-{
|
|
|
|
|
- struct nvme_rdma_ctrl *ctrl = data;
|
|
|
|
|
- struct nvme_rdma_device *dev = ctrl->device;
|
|
|
|
|
- struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
|
|
|
|
|
- int ret = 0;
|
|
|
|
|
-
|
|
|
|
|
- if (WARN_ON_ONCE(!req->mr))
|
|
|
|
|
- return 0;
|
|
|
|
|
-
|
|
|
|
|
- ib_dereg_mr(req->mr);
|
|
|
|
|
-
|
|
|
|
|
- req->mr = ib_alloc_mr(dev->pd, IB_MR_TYPE_MEM_REG,
|
|
|
|
|
- ctrl->max_fr_pages);
|
|
|
|
|
- if (IS_ERR(req->mr)) {
|
|
|
|
|
- ret = PTR_ERR(req->mr);
|
|
|
|
|
- req->mr = NULL;
|
|
|
|
|
- goto out;
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- req->mr->need_inval = false;
|
|
|
|
|
-
|
|
|
|
|
-out:
|
|
|
|
|
- return ret;
|
|
|
|
|
-}
|
|
|
|
|
-
|
|
|
|
|
static void nvme_rdma_exit_request(struct blk_mq_tag_set *set,
|
|
static void nvme_rdma_exit_request(struct blk_mq_tag_set *set,
|
|
|
struct request *rq, unsigned int hctx_idx)
|
|
struct request *rq, unsigned int hctx_idx)
|
|
|
{
|
|
{
|
|
@@ -293,9 +271,6 @@ static void nvme_rdma_exit_request(struct blk_mq_tag_set *set,
|
|
|
struct nvme_rdma_queue *queue = &ctrl->queues[queue_idx];
|
|
struct nvme_rdma_queue *queue = &ctrl->queues[queue_idx];
|
|
|
struct nvme_rdma_device *dev = queue->device;
|
|
struct nvme_rdma_device *dev = queue->device;
|
|
|
|
|
|
|
|
- if (req->mr)
|
|
|
|
|
- ib_dereg_mr(req->mr);
|
|
|
|
|
-
|
|
|
|
|
nvme_rdma_free_qe(dev->dev, &req->sqe, sizeof(struct nvme_command),
|
|
nvme_rdma_free_qe(dev->dev, &req->sqe, sizeof(struct nvme_command),
|
|
|
DMA_TO_DEVICE);
|
|
DMA_TO_DEVICE);
|
|
|
}
|
|
}
|
|
@@ -317,21 +292,9 @@ static int nvme_rdma_init_request(struct blk_mq_tag_set *set,
|
|
|
if (ret)
|
|
if (ret)
|
|
|
return ret;
|
|
return ret;
|
|
|
|
|
|
|
|
- req->mr = ib_alloc_mr(dev->pd, IB_MR_TYPE_MEM_REG,
|
|
|
|
|
- ctrl->max_fr_pages);
|
|
|
|
|
- if (IS_ERR(req->mr)) {
|
|
|
|
|
- ret = PTR_ERR(req->mr);
|
|
|
|
|
- goto out_free_qe;
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
req->queue = queue;
|
|
req->queue = queue;
|
|
|
|
|
|
|
|
return 0;
|
|
return 0;
|
|
|
-
|
|
|
|
|
-out_free_qe:
|
|
|
|
|
- nvme_rdma_free_qe(dev->dev, &req->sqe, sizeof(struct nvme_command),
|
|
|
|
|
- DMA_TO_DEVICE);
|
|
|
|
|
- return -ENOMEM;
|
|
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
static int nvme_rdma_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
|
|
static int nvme_rdma_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
|
|
@@ -428,10 +391,23 @@ out_err:
|
|
|
|
|
|
|
|
static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue)
|
|
static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue)
|
|
|
{
|
|
{
|
|
|
- struct nvme_rdma_device *dev = queue->device;
|
|
|
|
|
- struct ib_device *ibdev = dev->dev;
|
|
|
|
|
|
|
+ struct nvme_rdma_device *dev;
|
|
|
|
|
+ struct ib_device *ibdev;
|
|
|
|
|
|
|
|
- rdma_destroy_qp(queue->cm_id);
|
|
|
|
|
|
|
+ if (!test_and_clear_bit(NVME_RDMA_Q_TR_READY, &queue->flags))
|
|
|
|
|
+ return;
|
|
|
|
|
+
|
|
|
|
|
+ dev = queue->device;
|
|
|
|
|
+ ibdev = dev->dev;
|
|
|
|
|
+
|
|
|
|
|
+ ib_mr_pool_destroy(queue->qp, &queue->qp->rdma_mrs);
|
|
|
|
|
+
|
|
|
|
|
+ /*
|
|
|
|
|
+ * The cm_id object might have been destroyed during RDMA connection
|
|
|
|
|
+ * establishment error flow to avoid getting other cma events, thus
|
|
|
|
|
+ * the destruction of the QP shouldn't use rdma_cm API.
|
|
|
|
|
+ */
|
|
|
|
|
+ ib_destroy_qp(queue->qp);
|
|
|
ib_free_cq(queue->ib_cq);
|
|
ib_free_cq(queue->ib_cq);
|
|
|
|
|
|
|
|
nvme_rdma_free_ring(ibdev, queue->rsp_ring, queue->queue_size,
|
|
nvme_rdma_free_ring(ibdev, queue->rsp_ring, queue->queue_size,
|
|
@@ -440,6 +416,12 @@ static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue)
|
|
|
nvme_rdma_dev_put(dev);
|
|
nvme_rdma_dev_put(dev);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+static int nvme_rdma_get_max_fr_pages(struct ib_device *ibdev)
|
|
|
|
|
+{
|
|
|
|
|
+ return min_t(u32, NVME_RDMA_MAX_SEGMENTS,
|
|
|
|
|
+ ibdev->attrs.max_fast_reg_page_list_len);
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
|
|
static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
|
|
|
{
|
|
{
|
|
|
struct ib_device *ibdev;
|
|
struct ib_device *ibdev;
|
|
@@ -482,8 +464,24 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
|
|
|
goto out_destroy_qp;
|
|
goto out_destroy_qp;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+ ret = ib_mr_pool_init(queue->qp, &queue->qp->rdma_mrs,
|
|
|
|
|
+ queue->queue_size,
|
|
|
|
|
+ IB_MR_TYPE_MEM_REG,
|
|
|
|
|
+ nvme_rdma_get_max_fr_pages(ibdev));
|
|
|
|
|
+ if (ret) {
|
|
|
|
|
+ dev_err(queue->ctrl->ctrl.device,
|
|
|
|
|
+ "failed to initialize MR pool sized %d for QID %d\n",
|
|
|
|
|
+ queue->queue_size, idx);
|
|
|
|
|
+ goto out_destroy_ring;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ set_bit(NVME_RDMA_Q_TR_READY, &queue->flags);
|
|
|
|
|
+
|
|
|
return 0;
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
+out_destroy_ring:
|
|
|
|
|
+ nvme_rdma_free_ring(ibdev, queue->rsp_ring, queue->queue_size,
|
|
|
|
|
+ sizeof(struct nvme_completion), DMA_FROM_DEVICE);
|
|
|
out_destroy_qp:
|
|
out_destroy_qp:
|
|
|
rdma_destroy_qp(queue->cm_id);
|
|
rdma_destroy_qp(queue->cm_id);
|
|
|
out_destroy_ib_cq:
|
|
out_destroy_ib_cq:
|
|
@@ -510,7 +508,6 @@ static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl,
|
|
|
queue->cmnd_capsule_len = sizeof(struct nvme_command);
|
|
queue->cmnd_capsule_len = sizeof(struct nvme_command);
|
|
|
|
|
|
|
|
queue->queue_size = queue_size;
|
|
queue->queue_size = queue_size;
|
|
|
- atomic_set(&queue->sig_count, 0);
|
|
|
|
|
|
|
|
|
|
queue->cm_id = rdma_create_id(&init_net, nvme_rdma_cm_handler, queue,
|
|
queue->cm_id = rdma_create_id(&init_net, nvme_rdma_cm_handler, queue,
|
|
|
RDMA_PS_TCP, IB_QPT_RC);
|
|
RDMA_PS_TCP, IB_QPT_RC);
|
|
@@ -546,6 +543,7 @@ static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl,
|
|
|
|
|
|
|
|
out_destroy_cm_id:
|
|
out_destroy_cm_id:
|
|
|
rdma_destroy_id(queue->cm_id);
|
|
rdma_destroy_id(queue->cm_id);
|
|
|
|
|
+ nvme_rdma_destroy_queue_ib(queue);
|
|
|
return ret;
|
|
return ret;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
@@ -756,8 +754,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
|
|
|
|
|
|
|
|
ctrl->device = ctrl->queues[0].device;
|
|
ctrl->device = ctrl->queues[0].device;
|
|
|
|
|
|
|
|
- ctrl->max_fr_pages = min_t(u32, NVME_RDMA_MAX_SEGMENTS,
|
|
|
|
|
- ctrl->device->dev->attrs.max_fast_reg_page_list_len);
|
|
|
|
|
|
|
+ ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev);
|
|
|
|
|
|
|
|
if (new) {
|
|
if (new) {
|
|
|
ctrl->ctrl.admin_tagset = nvme_rdma_alloc_tagset(&ctrl->ctrl, true);
|
|
ctrl->ctrl.admin_tagset = nvme_rdma_alloc_tagset(&ctrl->ctrl, true);
|
|
@@ -771,10 +768,6 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
|
|
|
error = PTR_ERR(ctrl->ctrl.admin_q);
|
|
error = PTR_ERR(ctrl->ctrl.admin_q);
|
|
|
goto out_free_tagset;
|
|
goto out_free_tagset;
|
|
|
}
|
|
}
|
|
|
- } else {
|
|
|
|
|
- error = nvme_reinit_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset);
|
|
|
|
|
- if (error)
|
|
|
|
|
- goto out_free_queue;
|
|
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
error = nvme_rdma_start_queue(ctrl, 0);
|
|
error = nvme_rdma_start_queue(ctrl, 0);
|
|
@@ -854,10 +847,6 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new)
|
|
|
goto out_free_tag_set;
|
|
goto out_free_tag_set;
|
|
|
}
|
|
}
|
|
|
} else {
|
|
} else {
|
|
|
- ret = nvme_reinit_tagset(&ctrl->ctrl, ctrl->ctrl.tagset);
|
|
|
|
|
- if (ret)
|
|
|
|
|
- goto out_free_io_queues;
|
|
|
|
|
-
|
|
|
|
|
blk_mq_update_nr_hw_queues(&ctrl->tag_set,
|
|
blk_mq_update_nr_hw_queues(&ctrl->tag_set,
|
|
|
ctrl->ctrl.queue_count - 1);
|
|
ctrl->ctrl.queue_count - 1);
|
|
|
}
|
|
}
|
|
@@ -1018,8 +1007,18 @@ static void nvme_rdma_memreg_done(struct ib_cq *cq, struct ib_wc *wc)
|
|
|
|
|
|
|
|
static void nvme_rdma_inv_rkey_done(struct ib_cq *cq, struct ib_wc *wc)
|
|
static void nvme_rdma_inv_rkey_done(struct ib_cq *cq, struct ib_wc *wc)
|
|
|
{
|
|
{
|
|
|
- if (unlikely(wc->status != IB_WC_SUCCESS))
|
|
|
|
|
|
|
+ struct nvme_rdma_request *req =
|
|
|
|
|
+ container_of(wc->wr_cqe, struct nvme_rdma_request, reg_cqe);
|
|
|
|
|
+ struct request *rq = blk_mq_rq_from_pdu(req);
|
|
|
|
|
+
|
|
|
|
|
+ if (unlikely(wc->status != IB_WC_SUCCESS)) {
|
|
|
nvme_rdma_wr_error(cq, wc, "LOCAL_INV");
|
|
nvme_rdma_wr_error(cq, wc, "LOCAL_INV");
|
|
|
|
|
+ return;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ if (refcount_dec_and_test(&req->ref))
|
|
|
|
|
+ nvme_end_request(rq, req->status, req->result);
|
|
|
|
|
+
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
static int nvme_rdma_inv_rkey(struct nvme_rdma_queue *queue,
|
|
static int nvme_rdma_inv_rkey(struct nvme_rdma_queue *queue,
|
|
@@ -1030,7 +1029,7 @@ static int nvme_rdma_inv_rkey(struct nvme_rdma_queue *queue,
|
|
|
.opcode = IB_WR_LOCAL_INV,
|
|
.opcode = IB_WR_LOCAL_INV,
|
|
|
.next = NULL,
|
|
.next = NULL,
|
|
|
.num_sge = 0,
|
|
.num_sge = 0,
|
|
|
- .send_flags = 0,
|
|
|
|
|
|
|
+ .send_flags = IB_SEND_SIGNALED,
|
|
|
.ex.invalidate_rkey = req->mr->rkey,
|
|
.ex.invalidate_rkey = req->mr->rkey,
|
|
|
};
|
|
};
|
|
|
|
|
|
|
@@ -1044,22 +1043,15 @@ static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue,
|
|
|
struct request *rq)
|
|
struct request *rq)
|
|
|
{
|
|
{
|
|
|
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
|
|
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
|
|
|
- struct nvme_rdma_ctrl *ctrl = queue->ctrl;
|
|
|
|
|
struct nvme_rdma_device *dev = queue->device;
|
|
struct nvme_rdma_device *dev = queue->device;
|
|
|
struct ib_device *ibdev = dev->dev;
|
|
struct ib_device *ibdev = dev->dev;
|
|
|
- int res;
|
|
|
|
|
|
|
|
|
|
if (!blk_rq_bytes(rq))
|
|
if (!blk_rq_bytes(rq))
|
|
|
return;
|
|
return;
|
|
|
|
|
|
|
|
- if (req->mr->need_inval && test_bit(NVME_RDMA_Q_LIVE, &req->queue->flags)) {
|
|
|
|
|
- res = nvme_rdma_inv_rkey(queue, req);
|
|
|
|
|
- if (unlikely(res < 0)) {
|
|
|
|
|
- dev_err(ctrl->ctrl.device,
|
|
|
|
|
- "Queueing INV WR for rkey %#x failed (%d)\n",
|
|
|
|
|
- req->mr->rkey, res);
|
|
|
|
|
- nvme_rdma_error_recovery(queue->ctrl);
|
|
|
|
|
- }
|
|
|
|
|
|
|
+ if (req->mr) {
|
|
|
|
|
+ ib_mr_pool_put(queue->qp, &queue->qp->rdma_mrs, req->mr);
|
|
|
|
|
+ req->mr = NULL;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
ib_dma_unmap_sg(ibdev, req->sg_table.sgl,
|
|
ib_dma_unmap_sg(ibdev, req->sg_table.sgl,
|
|
@@ -1118,12 +1110,18 @@ static int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue,
|
|
|
struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl;
|
|
struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl;
|
|
|
int nr;
|
|
int nr;
|
|
|
|
|
|
|
|
|
|
+ req->mr = ib_mr_pool_get(queue->qp, &queue->qp->rdma_mrs);
|
|
|
|
|
+ if (WARN_ON_ONCE(!req->mr))
|
|
|
|
|
+ return -EAGAIN;
|
|
|
|
|
+
|
|
|
/*
|
|
/*
|
|
|
* Align the MR to a 4K page size to match the ctrl page size and
|
|
* Align the MR to a 4K page size to match the ctrl page size and
|
|
|
* the block virtual boundary.
|
|
* the block virtual boundary.
|
|
|
*/
|
|
*/
|
|
|
nr = ib_map_mr_sg(req->mr, req->sg_table.sgl, count, NULL, SZ_4K);
|
|
nr = ib_map_mr_sg(req->mr, req->sg_table.sgl, count, NULL, SZ_4K);
|
|
|
if (unlikely(nr < count)) {
|
|
if (unlikely(nr < count)) {
|
|
|
|
|
+ ib_mr_pool_put(queue->qp, &queue->qp->rdma_mrs, req->mr);
|
|
|
|
|
+ req->mr = NULL;
|
|
|
if (nr < 0)
|
|
if (nr < 0)
|
|
|
return nr;
|
|
return nr;
|
|
|
return -EINVAL;
|
|
return -EINVAL;
|
|
@@ -1142,8 +1140,6 @@ static int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue,
|
|
|
IB_ACCESS_REMOTE_READ |
|
|
IB_ACCESS_REMOTE_READ |
|
|
|
IB_ACCESS_REMOTE_WRITE;
|
|
IB_ACCESS_REMOTE_WRITE;
|
|
|
|
|
|
|
|
- req->mr->need_inval = true;
|
|
|
|
|
-
|
|
|
|
|
sg->addr = cpu_to_le64(req->mr->iova);
|
|
sg->addr = cpu_to_le64(req->mr->iova);
|
|
|
put_unaligned_le24(req->mr->length, sg->length);
|
|
put_unaligned_le24(req->mr->length, sg->length);
|
|
|
put_unaligned_le32(req->mr->rkey, sg->key);
|
|
put_unaligned_le32(req->mr->rkey, sg->key);
|
|
@@ -1163,7 +1159,7 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
|
|
|
|
|
|
|
|
req->num_sge = 1;
|
|
req->num_sge = 1;
|
|
|
req->inline_data = false;
|
|
req->inline_data = false;
|
|
|
- req->mr->need_inval = false;
|
|
|
|
|
|
|
+ refcount_set(&req->ref, 2); /* send and recv completions */
|
|
|
|
|
|
|
|
c->common.flags |= NVME_CMD_SGL_METABUF;
|
|
c->common.flags |= NVME_CMD_SGL_METABUF;
|
|
|
|
|
|
|
@@ -1200,25 +1196,24 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
|
|
|
|
|
|
|
|
static void nvme_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc)
|
|
static void nvme_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc)
|
|
|
{
|
|
{
|
|
|
- if (unlikely(wc->status != IB_WC_SUCCESS))
|
|
|
|
|
- nvme_rdma_wr_error(cq, wc, "SEND");
|
|
|
|
|
-}
|
|
|
|
|
|
|
+ struct nvme_rdma_qe *qe =
|
|
|
|
|
+ container_of(wc->wr_cqe, struct nvme_rdma_qe, cqe);
|
|
|
|
|
+ struct nvme_rdma_request *req =
|
|
|
|
|
+ container_of(qe, struct nvme_rdma_request, sqe);
|
|
|
|
|
+ struct request *rq = blk_mq_rq_from_pdu(req);
|
|
|
|
|
|
|
|
-/*
|
|
|
|
|
- * We want to signal completion at least every queue depth/2. This returns the
|
|
|
|
|
- * largest power of two that is not above half of (queue size + 1) to optimize
|
|
|
|
|
- * (avoid divisions).
|
|
|
|
|
- */
|
|
|
|
|
-static inline bool nvme_rdma_queue_sig_limit(struct nvme_rdma_queue *queue)
|
|
|
|
|
-{
|
|
|
|
|
- int limit = 1 << ilog2((queue->queue_size + 1) / 2);
|
|
|
|
|
|
|
+ if (unlikely(wc->status != IB_WC_SUCCESS)) {
|
|
|
|
|
+ nvme_rdma_wr_error(cq, wc, "SEND");
|
|
|
|
|
+ return;
|
|
|
|
|
+ }
|
|
|
|
|
|
|
|
- return (atomic_inc_return(&queue->sig_count) & (limit - 1)) == 0;
|
|
|
|
|
|
|
+ if (refcount_dec_and_test(&req->ref))
|
|
|
|
|
+ nvme_end_request(rq, req->status, req->result);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
static int nvme_rdma_post_send(struct nvme_rdma_queue *queue,
|
|
static int nvme_rdma_post_send(struct nvme_rdma_queue *queue,
|
|
|
struct nvme_rdma_qe *qe, struct ib_sge *sge, u32 num_sge,
|
|
struct nvme_rdma_qe *qe, struct ib_sge *sge, u32 num_sge,
|
|
|
- struct ib_send_wr *first, bool flush)
|
|
|
|
|
|
|
+ struct ib_send_wr *first)
|
|
|
{
|
|
{
|
|
|
struct ib_send_wr wr, *bad_wr;
|
|
struct ib_send_wr wr, *bad_wr;
|
|
|
int ret;
|
|
int ret;
|
|
@@ -1227,31 +1222,12 @@ static int nvme_rdma_post_send(struct nvme_rdma_queue *queue,
|
|
|
sge->length = sizeof(struct nvme_command),
|
|
sge->length = sizeof(struct nvme_command),
|
|
|
sge->lkey = queue->device->pd->local_dma_lkey;
|
|
sge->lkey = queue->device->pd->local_dma_lkey;
|
|
|
|
|
|
|
|
- qe->cqe.done = nvme_rdma_send_done;
|
|
|
|
|
-
|
|
|
|
|
wr.next = NULL;
|
|
wr.next = NULL;
|
|
|
wr.wr_cqe = &qe->cqe;
|
|
wr.wr_cqe = &qe->cqe;
|
|
|
wr.sg_list = sge;
|
|
wr.sg_list = sge;
|
|
|
wr.num_sge = num_sge;
|
|
wr.num_sge = num_sge;
|
|
|
wr.opcode = IB_WR_SEND;
|
|
wr.opcode = IB_WR_SEND;
|
|
|
- wr.send_flags = 0;
|
|
|
|
|
-
|
|
|
|
|
- /*
|
|
|
|
|
- * Unsignalled send completions are another giant desaster in the
|
|
|
|
|
- * IB Verbs spec: If we don't regularly post signalled sends
|
|
|
|
|
- * the send queue will fill up and only a QP reset will rescue us.
|
|
|
|
|
- * Would have been way to obvious to handle this in hardware or
|
|
|
|
|
- * at least the RDMA stack..
|
|
|
|
|
- *
|
|
|
|
|
- * Always signal the flushes. The magic request used for the flush
|
|
|
|
|
- * sequencer is not allocated in our driver's tagset and it's
|
|
|
|
|
- * triggered to be freed by blk_cleanup_queue(). So we need to
|
|
|
|
|
- * always mark it as signaled to ensure that the "wr_cqe", which is
|
|
|
|
|
- * embedded in request's payload, is not freed when __ib_process_cq()
|
|
|
|
|
- * calls wr_cqe->done().
|
|
|
|
|
- */
|
|
|
|
|
- if (nvme_rdma_queue_sig_limit(queue) || flush)
|
|
|
|
|
- wr.send_flags |= IB_SEND_SIGNALED;
|
|
|
|
|
|
|
+ wr.send_flags = IB_SEND_SIGNALED;
|
|
|
|
|
|
|
|
if (first)
|
|
if (first)
|
|
|
first->next = ≀
|
|
first->next = ≀
|
|
@@ -1301,6 +1277,12 @@ static struct blk_mq_tags *nvme_rdma_tagset(struct nvme_rdma_queue *queue)
|
|
|
return queue->ctrl->tag_set.tags[queue_idx - 1];
|
|
return queue->ctrl->tag_set.tags[queue_idx - 1];
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+static void nvme_rdma_async_done(struct ib_cq *cq, struct ib_wc *wc)
|
|
|
|
|
+{
|
|
|
|
|
+ if (unlikely(wc->status != IB_WC_SUCCESS))
|
|
|
|
|
+ nvme_rdma_wr_error(cq, wc, "ASYNC");
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
static void nvme_rdma_submit_async_event(struct nvme_ctrl *arg)
|
|
static void nvme_rdma_submit_async_event(struct nvme_ctrl *arg)
|
|
|
{
|
|
{
|
|
|
struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(arg);
|
|
struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(arg);
|
|
@@ -1319,10 +1301,12 @@ static void nvme_rdma_submit_async_event(struct nvme_ctrl *arg)
|
|
|
cmd->common.flags |= NVME_CMD_SGL_METABUF;
|
|
cmd->common.flags |= NVME_CMD_SGL_METABUF;
|
|
|
nvme_rdma_set_sg_null(cmd);
|
|
nvme_rdma_set_sg_null(cmd);
|
|
|
|
|
|
|
|
|
|
+ sqe->cqe.done = nvme_rdma_async_done;
|
|
|
|
|
+
|
|
|
ib_dma_sync_single_for_device(dev, sqe->dma, sizeof(*cmd),
|
|
ib_dma_sync_single_for_device(dev, sqe->dma, sizeof(*cmd),
|
|
|
DMA_TO_DEVICE);
|
|
DMA_TO_DEVICE);
|
|
|
|
|
|
|
|
- ret = nvme_rdma_post_send(queue, sqe, &sge, 1, NULL, false);
|
|
|
|
|
|
|
+ ret = nvme_rdma_post_send(queue, sqe, &sge, 1, NULL);
|
|
|
WARN_ON_ONCE(ret);
|
|
WARN_ON_ONCE(ret);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
@@ -1343,14 +1327,34 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue,
|
|
|
}
|
|
}
|
|
|
req = blk_mq_rq_to_pdu(rq);
|
|
req = blk_mq_rq_to_pdu(rq);
|
|
|
|
|
|
|
|
- if (rq->tag == tag)
|
|
|
|
|
- ret = 1;
|
|
|
|
|
|
|
+ req->status = cqe->status;
|
|
|
|
|
+ req->result = cqe->result;
|
|
|
|
|
+
|
|
|
|
|
+ if (wc->wc_flags & IB_WC_WITH_INVALIDATE) {
|
|
|
|
|
+ if (unlikely(wc->ex.invalidate_rkey != req->mr->rkey)) {
|
|
|
|
|
+ dev_err(queue->ctrl->ctrl.device,
|
|
|
|
|
+ "Bogus remote invalidation for rkey %#x\n",
|
|
|
|
|
+ req->mr->rkey);
|
|
|
|
|
+ nvme_rdma_error_recovery(queue->ctrl);
|
|
|
|
|
+ }
|
|
|
|
|
+ } else if (req->mr) {
|
|
|
|
|
+ ret = nvme_rdma_inv_rkey(queue, req);
|
|
|
|
|
+ if (unlikely(ret < 0)) {
|
|
|
|
|
+ dev_err(queue->ctrl->ctrl.device,
|
|
|
|
|
+ "Queueing INV WR for rkey %#x failed (%d)\n",
|
|
|
|
|
+ req->mr->rkey, ret);
|
|
|
|
|
+ nvme_rdma_error_recovery(queue->ctrl);
|
|
|
|
|
+ }
|
|
|
|
|
+ /* the local invalidation completion will end the request */
|
|
|
|
|
+ return 0;
|
|
|
|
|
+ }
|
|
|
|
|
|
|
|
- if ((wc->wc_flags & IB_WC_WITH_INVALIDATE) &&
|
|
|
|
|
- wc->ex.invalidate_rkey == req->mr->rkey)
|
|
|
|
|
- req->mr->need_inval = false;
|
|
|
|
|
|
|
+ if (refcount_dec_and_test(&req->ref)) {
|
|
|
|
|
+ if (rq->tag == tag)
|
|
|
|
|
+ ret = 1;
|
|
|
|
|
+ nvme_end_request(rq, req->status, req->result);
|
|
|
|
|
+ }
|
|
|
|
|
|
|
|
- nvme_end_request(rq, cqe->status, cqe->result);
|
|
|
|
|
return ret;
|
|
return ret;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
@@ -1607,7 +1611,6 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
|
|
|
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
|
|
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
|
|
|
struct nvme_rdma_qe *sqe = &req->sqe;
|
|
struct nvme_rdma_qe *sqe = &req->sqe;
|
|
|
struct nvme_command *c = sqe->data;
|
|
struct nvme_command *c = sqe->data;
|
|
|
- bool flush = false;
|
|
|
|
|
struct ib_device *dev;
|
|
struct ib_device *dev;
|
|
|
blk_status_t ret;
|
|
blk_status_t ret;
|
|
|
int err;
|
|
int err;
|
|
@@ -1636,13 +1639,13 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
|
|
|
goto err;
|
|
goto err;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+ sqe->cqe.done = nvme_rdma_send_done;
|
|
|
|
|
+
|
|
|
ib_dma_sync_single_for_device(dev, sqe->dma,
|
|
ib_dma_sync_single_for_device(dev, sqe->dma,
|
|
|
sizeof(struct nvme_command), DMA_TO_DEVICE);
|
|
sizeof(struct nvme_command), DMA_TO_DEVICE);
|
|
|
|
|
|
|
|
- if (req_op(rq) == REQ_OP_FLUSH)
|
|
|
|
|
- flush = true;
|
|
|
|
|
err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
|
|
err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
|
|
|
- req->mr->need_inval ? &req->reg_wr.wr : NULL, flush);
|
|
|
|
|
|
|
+ req->mr ? &req->reg_wr.wr : NULL);
|
|
|
if (unlikely(err)) {
|
|
if (unlikely(err)) {
|
|
|
nvme_rdma_unmap_data(queue, rq);
|
|
nvme_rdma_unmap_data(queue, rq);
|
|
|
goto err;
|
|
goto err;
|
|
@@ -1790,7 +1793,6 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
|
|
|
.submit_async_event = nvme_rdma_submit_async_event,
|
|
.submit_async_event = nvme_rdma_submit_async_event,
|
|
|
.delete_ctrl = nvme_rdma_delete_ctrl,
|
|
.delete_ctrl = nvme_rdma_delete_ctrl,
|
|
|
.get_address = nvmf_get_address,
|
|
.get_address = nvmf_get_address,
|
|
|
- .reinit_request = nvme_rdma_reinit_request,
|
|
|
|
|
};
|
|
};
|
|
|
|
|
|
|
|
static inline bool
|
|
static inline bool
|