|
@@ -40,13 +40,14 @@
|
|
|
|
|
|
#define NVME_RDMA_MAX_SEGMENTS 256
|
|
|
|
|
|
-#define NVME_RDMA_MAX_INLINE_SEGMENTS 1
|
|
|
+#define NVME_RDMA_MAX_INLINE_SEGMENTS 4
|
|
|
|
|
|
struct nvme_rdma_device {
|
|
|
struct ib_device *dev;
|
|
|
struct ib_pd *pd;
|
|
|
struct kref ref;
|
|
|
struct list_head entry;
|
|
|
+ unsigned int num_inline_segments;
|
|
|
};
|
|
|
|
|
|
struct nvme_rdma_qe {
|
|
@@ -117,6 +118,7 @@ struct nvme_rdma_ctrl {
|
|
|
struct sockaddr_storage src_addr;
|
|
|
|
|
|
struct nvme_ctrl ctrl;
|
|
|
+ bool use_inline_data;
|
|
|
};
|
|
|
|
|
|
static inline struct nvme_rdma_ctrl *to_rdma_ctrl(struct nvme_ctrl *ctrl)
|
|
@@ -249,7 +251,7 @@ static int nvme_rdma_create_qp(struct nvme_rdma_queue *queue, const int factor)
|
|
|
/* +1 for drain */
|
|
|
init_attr.cap.max_recv_wr = queue->queue_size + 1;
|
|
|
init_attr.cap.max_recv_sge = 1;
|
|
|
- init_attr.cap.max_send_sge = 1 + NVME_RDMA_MAX_INLINE_SEGMENTS;
|
|
|
+ init_attr.cap.max_send_sge = 1 + dev->num_inline_segments;
|
|
|
init_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
|
|
|
init_attr.qp_type = IB_QPT_RC;
|
|
|
init_attr.send_cq = queue->ib_cq;
|
|
@@ -286,6 +288,7 @@ static int nvme_rdma_init_request(struct blk_mq_tag_set *set,
|
|
|
struct ib_device *ibdev = dev->dev;
|
|
|
int ret;
|
|
|
|
|
|
+ nvme_req(rq)->ctrl = &ctrl->ctrl;
|
|
|
ret = nvme_rdma_alloc_qe(ibdev, &req->sqe, sizeof(struct nvme_command),
|
|
|
DMA_TO_DEVICE);
|
|
|
if (ret)
|
|
@@ -374,6 +377,8 @@ nvme_rdma_find_get_device(struct rdma_cm_id *cm_id)
|
|
|
goto out_free_pd;
|
|
|
}
|
|
|
|
|
|
+ ndev->num_inline_segments = min(NVME_RDMA_MAX_INLINE_SEGMENTS,
|
|
|
+ ndev->dev->attrs.max_sge - 1);
|
|
|
list_add(&ndev->entry, &device_list);
|
|
|
out_unlock:
|
|
|
mutex_unlock(&device_list_mutex);
|
|
@@ -868,6 +873,31 @@ out_free_io_queues:
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
|
+static void nvme_rdma_teardown_admin_queue(struct nvme_rdma_ctrl *ctrl,
|
|
|
+ bool remove)
|
|
|
+{
|
|
|
+ blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
|
|
|
+ nvme_rdma_stop_queue(&ctrl->queues[0]);
|
|
|
+ blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, nvme_cancel_request,
|
|
|
+ &ctrl->ctrl);
|
|
|
+ blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
|
|
|
+ nvme_rdma_destroy_admin_queue(ctrl, remove);
|
|
|
+}
|
|
|
+
|
|
|
+static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl,
|
|
|
+ bool remove)
|
|
|
+{
|
|
|
+ if (ctrl->ctrl.queue_count > 1) {
|
|
|
+ nvme_stop_queues(&ctrl->ctrl);
|
|
|
+ nvme_rdma_stop_io_queues(ctrl);
|
|
|
+ blk_mq_tagset_busy_iter(&ctrl->tag_set, nvme_cancel_request,
|
|
|
+ &ctrl->ctrl);
|
|
|
+ if (remove)
|
|
|
+ nvme_start_queues(&ctrl->ctrl);
|
|
|
+ nvme_rdma_destroy_io_queues(ctrl, remove);
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
static void nvme_rdma_stop_ctrl(struct nvme_ctrl *nctrl)
|
|
|
{
|
|
|
struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
|
|
@@ -912,21 +942,44 @@ static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
|
|
|
+static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new)
|
|
|
{
|
|
|
- struct nvme_rdma_ctrl *ctrl = container_of(to_delayed_work(work),
|
|
|
- struct nvme_rdma_ctrl, reconnect_work);
|
|
|
+ int ret = -EINVAL;
|
|
|
bool changed;
|
|
|
- int ret;
|
|
|
|
|
|
- ++ctrl->ctrl.nr_reconnects;
|
|
|
-
|
|
|
- ret = nvme_rdma_configure_admin_queue(ctrl, false);
|
|
|
+ ret = nvme_rdma_configure_admin_queue(ctrl, new);
|
|
|
if (ret)
|
|
|
- goto requeue;
|
|
|
+ return ret;
|
|
|
+
|
|
|
+ if (ctrl->ctrl.icdoff) {
|
|
|
+ dev_err(ctrl->ctrl.device, "icdoff is not supported!\n");
|
|
|
+ goto destroy_admin;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (!(ctrl->ctrl.sgls & (1 << 2))) {
|
|
|
+ dev_err(ctrl->ctrl.device,
|
|
|
+ "Mandatory keyed sgls are not supported!\n");
|
|
|
+ goto destroy_admin;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (ctrl->ctrl.opts->queue_size > ctrl->ctrl.sqsize + 1) {
|
|
|
+ dev_warn(ctrl->ctrl.device,
|
|
|
+ "queue_size %zu > ctrl sqsize %u, clamping down\n",
|
|
|
+ ctrl->ctrl.opts->queue_size, ctrl->ctrl.sqsize + 1);
|
|
|
+ }
|
|
|
+
|
|
|
+ if (ctrl->ctrl.sqsize + 1 > ctrl->ctrl.maxcmd) {
|
|
|
+ dev_warn(ctrl->ctrl.device,
|
|
|
+ "sqsize %u > ctrl maxcmd %u, clamping down\n",
|
|
|
+ ctrl->ctrl.sqsize + 1, ctrl->ctrl.maxcmd);
|
|
|
+ ctrl->ctrl.sqsize = ctrl->ctrl.maxcmd - 1;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (ctrl->ctrl.sgls & (1 << 20))
|
|
|
+ ctrl->use_inline_data = true;
|
|
|
|
|
|
if (ctrl->ctrl.queue_count > 1) {
|
|
|
- ret = nvme_rdma_configure_io_queues(ctrl, false);
|
|
|
+ ret = nvme_rdma_configure_io_queues(ctrl, new);
|
|
|
if (ret)
|
|
|
goto destroy_admin;
|
|
|
}
|
|
@@ -935,10 +988,31 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
|
|
|
if (!changed) {
|
|
|
/* state change failure is ok if we're in DELETING state */
|
|
|
WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING);
|
|
|
- return;
|
|
|
+ ret = -EINVAL;
|
|
|
+ goto destroy_io;
|
|
|
}
|
|
|
|
|
|
nvme_start_ctrl(&ctrl->ctrl);
|
|
|
+ return 0;
|
|
|
+
|
|
|
+destroy_io:
|
|
|
+ if (ctrl->ctrl.queue_count > 1)
|
|
|
+ nvme_rdma_destroy_io_queues(ctrl, new);
|
|
|
+destroy_admin:
|
|
|
+ nvme_rdma_stop_queue(&ctrl->queues[0]);
|
|
|
+ nvme_rdma_destroy_admin_queue(ctrl, new);
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
+static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
|
|
|
+{
|
|
|
+ struct nvme_rdma_ctrl *ctrl = container_of(to_delayed_work(work),
|
|
|
+ struct nvme_rdma_ctrl, reconnect_work);
|
|
|
+
|
|
|
+ ++ctrl->ctrl.nr_reconnects;
|
|
|
+
|
|
|
+ if (nvme_rdma_setup_ctrl(ctrl, false))
|
|
|
+ goto requeue;
|
|
|
|
|
|
dev_info(ctrl->ctrl.device, "Successfully reconnected (%d attempts)\n",
|
|
|
ctrl->ctrl.nr_reconnects);
|
|
@@ -947,9 +1021,6 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
|
|
|
|
|
|
return;
|
|
|
|
|
|
-destroy_admin:
|
|
|
- nvme_rdma_stop_queue(&ctrl->queues[0]);
|
|
|
- nvme_rdma_destroy_admin_queue(ctrl, false);
|
|
|
requeue:
|
|
|
dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n",
|
|
|
ctrl->ctrl.nr_reconnects);
|
|
@@ -962,27 +1033,9 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
|
|
|
struct nvme_rdma_ctrl, err_work);
|
|
|
|
|
|
nvme_stop_keep_alive(&ctrl->ctrl);
|
|
|
-
|
|
|
- if (ctrl->ctrl.queue_count > 1) {
|
|
|
- nvme_stop_queues(&ctrl->ctrl);
|
|
|
- nvme_rdma_stop_io_queues(ctrl);
|
|
|
- blk_mq_tagset_busy_iter(&ctrl->tag_set,
|
|
|
- nvme_cancel_request, &ctrl->ctrl);
|
|
|
- nvme_rdma_destroy_io_queues(ctrl, false);
|
|
|
- }
|
|
|
-
|
|
|
- blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
|
|
|
- nvme_rdma_stop_queue(&ctrl->queues[0]);
|
|
|
- blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
|
|
|
- nvme_cancel_request, &ctrl->ctrl);
|
|
|
- nvme_rdma_destroy_admin_queue(ctrl, false);
|
|
|
-
|
|
|
- /*
|
|
|
- * queues are not a live anymore, so restart the queues to fail fast
|
|
|
- * new IO
|
|
|
- */
|
|
|
- blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
|
|
|
+ nvme_rdma_teardown_io_queues(ctrl, false);
|
|
|
nvme_start_queues(&ctrl->ctrl);
|
|
|
+ nvme_rdma_teardown_admin_queue(ctrl, false);
|
|
|
|
|
|
if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
|
|
|
/* state change failure is ok if we're in DELETING state */
|
|
@@ -1090,19 +1143,27 @@ static int nvme_rdma_set_sg_null(struct nvme_command *c)
|
|
|
}
|
|
|
|
|
|
static int nvme_rdma_map_sg_inline(struct nvme_rdma_queue *queue,
|
|
|
- struct nvme_rdma_request *req, struct nvme_command *c)
|
|
|
+ struct nvme_rdma_request *req, struct nvme_command *c,
|
|
|
+ int count)
|
|
|
{
|
|
|
struct nvme_sgl_desc *sg = &c->common.dptr.sgl;
|
|
|
+ struct scatterlist *sgl = req->sg_table.sgl;
|
|
|
+ struct ib_sge *sge = &req->sge[1];
|
|
|
+ u32 len = 0;
|
|
|
+ int i;
|
|
|
|
|
|
- req->sge[1].addr = sg_dma_address(req->sg_table.sgl);
|
|
|
- req->sge[1].length = sg_dma_len(req->sg_table.sgl);
|
|
|
- req->sge[1].lkey = queue->device->pd->local_dma_lkey;
|
|
|
+ for (i = 0; i < count; i++, sgl++, sge++) {
|
|
|
+ sge->addr = sg_dma_address(sgl);
|
|
|
+ sge->length = sg_dma_len(sgl);
|
|
|
+ sge->lkey = queue->device->pd->local_dma_lkey;
|
|
|
+ len += sge->length;
|
|
|
+ }
|
|
|
|
|
|
sg->addr = cpu_to_le64(queue->ctrl->ctrl.icdoff);
|
|
|
- sg->length = cpu_to_le32(sg_dma_len(req->sg_table.sgl));
|
|
|
+ sg->length = cpu_to_le32(len);
|
|
|
sg->type = (NVME_SGL_FMT_DATA_DESC << 4) | NVME_SGL_FMT_OFFSET;
|
|
|
|
|
|
- req->num_sge++;
|
|
|
+ req->num_sge += count;
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
@@ -1195,15 +1256,16 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
|
|
|
goto out_free_table;
|
|
|
}
|
|
|
|
|
|
- if (count == 1) {
|
|
|
+ if (count <= dev->num_inline_segments) {
|
|
|
if (rq_data_dir(rq) == WRITE && nvme_rdma_queue_idx(queue) &&
|
|
|
+ queue->ctrl->use_inline_data &&
|
|
|
blk_rq_payload_bytes(rq) <=
|
|
|
nvme_rdma_inline_data_size(queue)) {
|
|
|
- ret = nvme_rdma_map_sg_inline(queue, req, c);
|
|
|
+ ret = nvme_rdma_map_sg_inline(queue, req, c, count);
|
|
|
goto out;
|
|
|
}
|
|
|
|
|
|
- if (dev->pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) {
|
|
|
+ if (count == 1 && dev->pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) {
|
|
|
ret = nvme_rdma_map_sg_single(queue, req, c);
|
|
|
goto out;
|
|
|
}
|
|
@@ -1574,6 +1636,7 @@ static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
|
|
|
case RDMA_CM_EVENT_CONNECT_ERROR:
|
|
|
case RDMA_CM_EVENT_UNREACHABLE:
|
|
|
nvme_rdma_destroy_queue_ib(queue);
|
|
|
+ /* fall through */
|
|
|
case RDMA_CM_EVENT_ADDR_ERROR:
|
|
|
dev_dbg(queue->ctrl->ctrl.device,
|
|
|
"CM error event %d\n", ev->event);
|
|
@@ -1736,25 +1799,12 @@ static const struct blk_mq_ops nvme_rdma_admin_mq_ops = {
|
|
|
|
|
|
static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown)
|
|
|
{
|
|
|
- if (ctrl->ctrl.queue_count > 1) {
|
|
|
- nvme_stop_queues(&ctrl->ctrl);
|
|
|
- nvme_rdma_stop_io_queues(ctrl);
|
|
|
- blk_mq_tagset_busy_iter(&ctrl->tag_set,
|
|
|
- nvme_cancel_request, &ctrl->ctrl);
|
|
|
- nvme_rdma_destroy_io_queues(ctrl, shutdown);
|
|
|
- }
|
|
|
-
|
|
|
+ nvme_rdma_teardown_io_queues(ctrl, shutdown);
|
|
|
if (shutdown)
|
|
|
nvme_shutdown_ctrl(&ctrl->ctrl);
|
|
|
else
|
|
|
nvme_disable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap);
|
|
|
-
|
|
|
- blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
|
|
|
- nvme_rdma_stop_queue(&ctrl->queues[0]);
|
|
|
- blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
|
|
|
- nvme_cancel_request, &ctrl->ctrl);
|
|
|
- blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
|
|
|
- nvme_rdma_destroy_admin_queue(ctrl, shutdown);
|
|
|
+ nvme_rdma_teardown_admin_queue(ctrl, shutdown);
|
|
|
}
|
|
|
|
|
|
static void nvme_rdma_delete_ctrl(struct nvme_ctrl *ctrl)
|
|
@@ -1766,8 +1816,6 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
|
|
|
{
|
|
|
struct nvme_rdma_ctrl *ctrl =
|
|
|
container_of(work, struct nvme_rdma_ctrl, ctrl.reset_work);
|
|
|
- int ret;
|
|
|
- bool changed;
|
|
|
|
|
|
nvme_stop_ctrl(&ctrl->ctrl);
|
|
|
nvme_rdma_shutdown_ctrl(ctrl, false);
|
|
@@ -1778,25 +1826,9 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
|
|
|
return;
|
|
|
}
|
|
|
|
|
|
- ret = nvme_rdma_configure_admin_queue(ctrl, false);
|
|
|
- if (ret)
|
|
|
+ if (nvme_rdma_setup_ctrl(ctrl, false))
|
|
|
goto out_fail;
|
|
|
|
|
|
- if (ctrl->ctrl.queue_count > 1) {
|
|
|
- ret = nvme_rdma_configure_io_queues(ctrl, false);
|
|
|
- if (ret)
|
|
|
- goto out_fail;
|
|
|
- }
|
|
|
-
|
|
|
- changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
|
|
|
- if (!changed) {
|
|
|
- /* state change failure is ok if we're in DELETING state */
|
|
|
- WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING);
|
|
|
- return;
|
|
|
- }
|
|
|
-
|
|
|
- nvme_start_ctrl(&ctrl->ctrl);
|
|
|
-
|
|
|
return;
|
|
|
|
|
|
out_fail:
|
|
@@ -1959,49 +1991,10 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
|
|
|
changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING);
|
|
|
WARN_ON_ONCE(!changed);
|
|
|
|
|
|
- ret = nvme_rdma_configure_admin_queue(ctrl, true);
|
|
|
+ ret = nvme_rdma_setup_ctrl(ctrl, true);
|
|
|
if (ret)
|
|
|
goto out_uninit_ctrl;
|
|
|
|
|
|
- /* sanity check icdoff */
|
|
|
- if (ctrl->ctrl.icdoff) {
|
|
|
- dev_err(ctrl->ctrl.device, "icdoff is not supported!\n");
|
|
|
- ret = -EINVAL;
|
|
|
- goto out_remove_admin_queue;
|
|
|
- }
|
|
|
-
|
|
|
- /* sanity check keyed sgls */
|
|
|
- if (!(ctrl->ctrl.sgls & (1 << 2))) {
|
|
|
- dev_err(ctrl->ctrl.device,
|
|
|
- "Mandatory keyed sgls are not supported!\n");
|
|
|
- ret = -EINVAL;
|
|
|
- goto out_remove_admin_queue;
|
|
|
- }
|
|
|
-
|
|
|
- /* only warn if argument is too large here, will clamp later */
|
|
|
- if (opts->queue_size > ctrl->ctrl.sqsize + 1) {
|
|
|
- dev_warn(ctrl->ctrl.device,
|
|
|
- "queue_size %zu > ctrl sqsize %u, clamping down\n",
|
|
|
- opts->queue_size, ctrl->ctrl.sqsize + 1);
|
|
|
- }
|
|
|
-
|
|
|
- /* warn if maxcmd is lower than sqsize+1 */
|
|
|
- if (ctrl->ctrl.sqsize + 1 > ctrl->ctrl.maxcmd) {
|
|
|
- dev_warn(ctrl->ctrl.device,
|
|
|
- "sqsize %u > ctrl maxcmd %u, clamping down\n",
|
|
|
- ctrl->ctrl.sqsize + 1, ctrl->ctrl.maxcmd);
|
|
|
- ctrl->ctrl.sqsize = ctrl->ctrl.maxcmd - 1;
|
|
|
- }
|
|
|
-
|
|
|
- if (opts->nr_io_queues) {
|
|
|
- ret = nvme_rdma_configure_io_queues(ctrl, true);
|
|
|
- if (ret)
|
|
|
- goto out_remove_admin_queue;
|
|
|
- }
|
|
|
-
|
|
|
- changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
|
|
|
- WARN_ON_ONCE(!changed);
|
|
|
-
|
|
|
dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISpcs\n",
|
|
|
ctrl->ctrl.opts->subsysnqn, &ctrl->addr);
|
|
|
|
|
@@ -2011,13 +2004,8 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
|
|
|
list_add_tail(&ctrl->list, &nvme_rdma_ctrl_list);
|
|
|
mutex_unlock(&nvme_rdma_ctrl_mutex);
|
|
|
|
|
|
- nvme_start_ctrl(&ctrl->ctrl);
|
|
|
-
|
|
|
return &ctrl->ctrl;
|
|
|
|
|
|
-out_remove_admin_queue:
|
|
|
- nvme_rdma_stop_queue(&ctrl->queues[0]);
|
|
|
- nvme_rdma_destroy_admin_queue(ctrl, true);
|
|
|
out_uninit_ctrl:
|
|
|
nvme_uninit_ctrl(&ctrl->ctrl);
|
|
|
nvme_put_ctrl(&ctrl->ctrl);
|