|
@@ -39,14 +39,14 @@
|
|
|
#include "iscsi_iser.h"
|
|
|
|
|
|
#define ISCSI_ISER_MAX_CONN 8
|
|
|
-#define ISER_MAX_RX_CQ_LEN (ISER_QP_MAX_RECV_DTOS * ISCSI_ISER_MAX_CONN)
|
|
|
-#define ISER_MAX_TX_CQ_LEN (ISER_QP_MAX_REQ_DTOS * ISCSI_ISER_MAX_CONN)
|
|
|
+#define ISER_MAX_RX_LEN (ISER_QP_MAX_RECV_DTOS * ISCSI_ISER_MAX_CONN)
|
|
|
+#define ISER_MAX_TX_LEN (ISER_QP_MAX_REQ_DTOS * ISCSI_ISER_MAX_CONN)
|
|
|
+#define ISER_MAX_CQ_LEN (ISER_MAX_RX_LEN + ISER_MAX_TX_LEN)
|
|
|
|
|
|
static int iser_cq_poll_limit = 512;
|
|
|
|
|
|
static void iser_cq_tasklet_fn(unsigned long data);
|
|
|
static void iser_cq_callback(struct ib_cq *cq, void *cq_context);
|
|
|
-static int iser_drain_tx_cq(struct iser_comp *comp);
|
|
|
|
|
|
static void iser_cq_event_callback(struct ib_event *cause, void *context)
|
|
|
{
|
|
@@ -117,26 +117,17 @@ static int iser_create_device_ib_res(struct iser_device *device)
|
|
|
struct iser_comp *comp = &device->comps[i];
|
|
|
|
|
|
comp->device = device;
|
|
|
- comp->rx_cq = ib_create_cq(device->ib_device,
|
|
|
- iser_cq_callback,
|
|
|
- iser_cq_event_callback,
|
|
|
- (void *)comp,
|
|
|
- ISER_MAX_RX_CQ_LEN, i);
|
|
|
- if (IS_ERR(comp->rx_cq)) {
|
|
|
- comp->rx_cq = NULL;
|
|
|
+ comp->cq = ib_create_cq(device->ib_device,
|
|
|
+ iser_cq_callback,
|
|
|
+ iser_cq_event_callback,
|
|
|
+ (void *)comp,
|
|
|
+ ISER_MAX_CQ_LEN, i);
|
|
|
+ if (IS_ERR(comp->cq)) {
|
|
|
+ comp->cq = NULL;
|
|
|
goto cq_err;
|
|
|
}
|
|
|
|
|
|
- comp->tx_cq = ib_create_cq(device->ib_device, NULL,
|
|
|
- iser_cq_event_callback,
|
|
|
- (void *)comp,
|
|
|
- ISER_MAX_TX_CQ_LEN, i);
|
|
|
- if (IS_ERR(comp->tx_cq)) {
|
|
|
- comp->tx_cq = NULL;
|
|
|
- goto cq_err;
|
|
|
- }
|
|
|
-
|
|
|
- if (ib_req_notify_cq(comp->rx_cq, IB_CQ_NEXT_COMP))
|
|
|
+ if (ib_req_notify_cq(comp->cq, IB_CQ_NEXT_COMP))
|
|
|
goto cq_err;
|
|
|
|
|
|
tasklet_init(&comp->tasklet, iser_cq_tasklet_fn,
|
|
@@ -165,10 +156,8 @@ cq_err:
|
|
|
for (i = 0; i < device->comps_used; i++) {
|
|
|
struct iser_comp *comp = &device->comps[i];
|
|
|
|
|
|
- if (comp->tx_cq)
|
|
|
- ib_destroy_cq(comp->tx_cq);
|
|
|
- if (comp->rx_cq)
|
|
|
- ib_destroy_cq(comp->rx_cq);
|
|
|
+ if (comp->cq)
|
|
|
+ ib_destroy_cq(comp->cq);
|
|
|
}
|
|
|
ib_dealloc_pd(device->pd);
|
|
|
pd_err:
|
|
@@ -189,10 +178,8 @@ static void iser_free_device_ib_res(struct iser_device *device)
|
|
|
struct iser_comp *comp = &device->comps[i];
|
|
|
|
|
|
tasklet_kill(&comp->tasklet);
|
|
|
- ib_destroy_cq(comp->tx_cq);
|
|
|
- ib_destroy_cq(comp->rx_cq);
|
|
|
- comp->tx_cq = NULL;
|
|
|
- comp->rx_cq = NULL;
|
|
|
+ ib_destroy_cq(comp->cq);
|
|
|
+ comp->cq = NULL;
|
|
|
}
|
|
|
|
|
|
(void)ib_unregister_event_handler(&device->event_handler);
|
|
@@ -462,8 +449,8 @@ static int iser_create_ib_conn_res(struct ib_conn *ib_conn)
|
|
|
|
|
|
init_attr.event_handler = iser_qp_event_callback;
|
|
|
init_attr.qp_context = (void *)ib_conn;
|
|
|
- init_attr.send_cq = ib_conn->comp->tx_cq;
|
|
|
- init_attr.recv_cq = ib_conn->comp->rx_cq;
|
|
|
+ init_attr.send_cq = ib_conn->comp->cq;
|
|
|
+ init_attr.recv_cq = ib_conn->comp->cq;
|
|
|
init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS;
|
|
|
init_attr.cap.max_send_sge = 2;
|
|
|
init_attr.cap.max_recv_sge = 1;
|
|
@@ -640,33 +627,6 @@ void iser_conn_release(struct iser_conn *iser_conn)
|
|
|
kfree(iser_conn);
|
|
|
}
|
|
|
|
|
|
-/**
|
|
|
- * iser_poll_for_flush_errors - Don't settle for less than all.
|
|
|
- * @struct ib_conn: IB context of the connection
|
|
|
- *
|
|
|
- * This routine is called when the QP is in error state
|
|
|
- * It polls the send CQ until all flush errors are consumed and
|
|
|
- * returns when all flush errors were processed.
|
|
|
- */
|
|
|
-static void iser_poll_for_flush_errors(struct ib_conn *ib_conn)
|
|
|
-{
|
|
|
- int count = 0;
|
|
|
-
|
|
|
- while (ib_conn->post_recv_buf_count > 0 ||
|
|
|
- atomic_read(&ib_conn->post_send_buf_count) > 0) {
|
|
|
- msleep(100);
|
|
|
- if (atomic_read(&ib_conn->post_send_buf_count) > 0)
|
|
|
- iser_drain_tx_cq(ib_conn->comp);
|
|
|
-
|
|
|
- count++;
|
|
|
- /* Don't flood with prints */
|
|
|
- if (count % 30 == 0)
|
|
|
- iser_dbg("post_recv %d post_send %d",
|
|
|
- ib_conn->post_recv_buf_count,
|
|
|
- atomic_read(&ib_conn->post_send_buf_count));
|
|
|
- }
|
|
|
-}
|
|
|
-
|
|
|
/**
|
|
|
* triggers start of the disconnect procedures and wait for them to be done
|
|
|
* Called with state mutex held
|
|
@@ -698,7 +658,7 @@ int iser_conn_terminate(struct iser_conn *iser_conn)
|
|
|
iser_err("Failed to disconnect, conn: 0x%p err %d\n",
|
|
|
iser_conn, err);
|
|
|
|
|
|
- iser_poll_for_flush_errors(ib_conn);
|
|
|
+ wait_for_completion(&ib_conn->flush_comp);
|
|
|
}
|
|
|
|
|
|
return 1;
|
|
@@ -908,6 +868,7 @@ void iser_conn_init(struct iser_conn *iser_conn)
|
|
|
iser_conn->state = ISER_CONN_INIT;
|
|
|
iser_conn->ib_conn.post_recv_buf_count = 0;
|
|
|
atomic_set(&iser_conn->ib_conn.post_send_buf_count, 0);
|
|
|
+ init_completion(&iser_conn->ib_conn.flush_comp);
|
|
|
init_completion(&iser_conn->stop_completion);
|
|
|
init_completion(&iser_conn->ib_completion);
|
|
|
init_completion(&iser_conn->up_completion);
|
|
@@ -1155,9 +1116,31 @@ int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc)
|
|
|
return ib_ret;
|
|
|
}
|
|
|
|
|
|
+/**
|
|
|
+ * is_iser_tx_desc - Indicate if the completion wr_id
|
|
|
+ * is a TX descriptor or not.
|
|
|
+ * @iser_conn: iser connection
|
|
|
+ * @wr_id: completion WR identifier
|
|
|
+ *
|
|
|
+ * Since we cannot rely on wc opcode in FLUSH errors
|
|
|
+ * we must work around it by checking if the wr_id address
|
|
|
+ * falls in the iser connection rx_descs buffer. If so
|
|
|
+ * it is an RX descriptor, otherwize it is a TX.
|
|
|
+ */
|
|
|
+static inline bool
|
|
|
+is_iser_tx_desc(struct iser_conn *iser_conn, void *wr_id)
|
|
|
+{
|
|
|
+ void *start = iser_conn->rx_descs;
|
|
|
+ int len = iser_conn->num_rx_descs * sizeof(*iser_conn->rx_descs);
|
|
|
+
|
|
|
+ if (wr_id >= start && wr_id < start + len)
|
|
|
+ return false;
|
|
|
+
|
|
|
+ return true;
|
|
|
+}
|
|
|
+
|
|
|
/**
|
|
|
* iser_handle_comp_error() - Handle error completion
|
|
|
- * @desc: iser TX descriptor
|
|
|
* @ib_conn: connection RDMA resources
|
|
|
* @wc: work completion
|
|
|
*
|
|
@@ -1167,8 +1150,7 @@ int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc)
|
|
|
* connection is failed (in case we passed bind stage).
|
|
|
*/
|
|
|
static void
|
|
|
-iser_handle_comp_error(struct iser_tx_desc *desc,
|
|
|
- struct ib_conn *ib_conn,
|
|
|
+iser_handle_comp_error(struct ib_conn *ib_conn,
|
|
|
struct ib_wc *wc)
|
|
|
{
|
|
|
struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
|
|
@@ -1179,85 +1161,90 @@ iser_handle_comp_error(struct iser_tx_desc *desc,
|
|
|
iscsi_conn_failure(iser_conn->iscsi_conn,
|
|
|
ISCSI_ERR_CONN_FAILED);
|
|
|
|
|
|
- if (desc && desc->type == ISCSI_TX_DATAOUT)
|
|
|
- kmem_cache_free(ig.desc_cache, desc);
|
|
|
+ if (is_iser_tx_desc(iser_conn, (void *)wc->wr_id)) {
|
|
|
+ struct iser_tx_desc *desc = (struct iser_tx_desc *)wc->wr_id;
|
|
|
+
|
|
|
+ atomic_dec(&ib_conn->post_send_buf_count);
|
|
|
+ if (desc->type == ISCSI_TX_DATAOUT)
|
|
|
+ kmem_cache_free(ig.desc_cache, desc);
|
|
|
+ } else {
|
|
|
+ ib_conn->post_recv_buf_count--;
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
-static int iser_drain_tx_cq(struct iser_comp *comp)
|
|
|
+/**
|
|
|
+ * iser_handle_wc - handle a single work completion
|
|
|
+ * @wc: work completion
|
|
|
+ *
|
|
|
+ * Soft-IRQ context, work completion can be either
|
|
|
+ * SEND or RECV, and can turn out successful or
|
|
|
+ * with error (or flush error).
|
|
|
+ */
|
|
|
+static void iser_handle_wc(struct ib_wc *wc)
|
|
|
{
|
|
|
- struct ib_cq *cq = comp->tx_cq;
|
|
|
- struct ib_wc wc;
|
|
|
- struct iser_tx_desc *tx_desc;
|
|
|
struct ib_conn *ib_conn;
|
|
|
- int completed_tx = 0;
|
|
|
+ struct iser_tx_desc *tx_desc;
|
|
|
+ struct iser_rx_desc *rx_desc;
|
|
|
|
|
|
- while (ib_poll_cq(cq, 1, &wc) == 1) {
|
|
|
- tx_desc = (struct iser_tx_desc *) (unsigned long) wc.wr_id;
|
|
|
- ib_conn = wc.qp->qp_context;
|
|
|
- if (wc.status == IB_WC_SUCCESS) {
|
|
|
- if (wc.opcode == IB_WC_SEND)
|
|
|
- iser_snd_completion(tx_desc, ib_conn);
|
|
|
- else
|
|
|
- iser_err("expected opcode %d got %d\n",
|
|
|
- IB_WC_SEND, wc.opcode);
|
|
|
+ ib_conn = wc->qp->qp_context;
|
|
|
+ if (wc->status == IB_WC_SUCCESS) {
|
|
|
+ if (wc->opcode == IB_WC_RECV) {
|
|
|
+ rx_desc = (struct iser_rx_desc *)wc->wr_id;
|
|
|
+ iser_rcv_completion(rx_desc, wc->byte_len,
|
|
|
+ ib_conn);
|
|
|
+ } else
|
|
|
+ if (wc->opcode == IB_WC_SEND) {
|
|
|
+ tx_desc = (struct iser_tx_desc *)wc->wr_id;
|
|
|
+ iser_snd_completion(tx_desc, ib_conn);
|
|
|
+ atomic_dec(&ib_conn->post_send_buf_count);
|
|
|
} else {
|
|
|
- iser_err("tx id %llx status %d vend_err %x\n",
|
|
|
- wc.wr_id, wc.status, wc.vendor_err);
|
|
|
- if (wc.wr_id != ISER_FASTREG_LI_WRID) {
|
|
|
- atomic_dec(&ib_conn->post_send_buf_count);
|
|
|
- iser_handle_comp_error(tx_desc, ib_conn, &wc);
|
|
|
- }
|
|
|
+ iser_err("Unknown wc opcode %d\n", wc->opcode);
|
|
|
}
|
|
|
- completed_tx++;
|
|
|
+ } else {
|
|
|
+ if (wc->status != IB_WC_WR_FLUSH_ERR)
|
|
|
+ iser_err("wr id %llx status %d vend_err %x\n",
|
|
|
+ wc->wr_id, wc->status, wc->vendor_err);
|
|
|
+ else
|
|
|
+ iser_dbg("flush error: wr id %llx\n", wc->wr_id);
|
|
|
+
|
|
|
+ if (wc->wr_id != ISER_FASTREG_LI_WRID)
|
|
|
+ iser_handle_comp_error(ib_conn, wc);
|
|
|
+
|
|
|
+ /* complete in case all flush errors were consumed */
|
|
|
+ if (ib_conn->post_recv_buf_count == 0 &&
|
|
|
+ atomic_read(&ib_conn->post_send_buf_count) == 0)
|
|
|
+ complete(&ib_conn->flush_comp);
|
|
|
}
|
|
|
- return completed_tx;
|
|
|
}
|
|
|
|
|
|
-
|
|
|
+/**
|
|
|
+ * iser_cq_tasklet_fn - iSER completion polling loop
|
|
|
+ * @data: iSER completion context
|
|
|
+ *
|
|
|
+ * Soft-IRQ context, polling connection CQ until
|
|
|
+ * either CQ was empty or we exausted polling budget
|
|
|
+ */
|
|
|
static void iser_cq_tasklet_fn(unsigned long data)
|
|
|
{
|
|
|
struct iser_comp *comp = (struct iser_comp *)data;
|
|
|
- struct ib_cq *cq = comp->rx_cq;
|
|
|
+ struct ib_cq *cq = comp->cq;
|
|
|
struct ib_wc wc;
|
|
|
- struct iser_rx_desc *desc;
|
|
|
- unsigned long xfer_len;
|
|
|
- struct ib_conn *ib_conn;
|
|
|
- int completed_tx, completed_rx = 0;
|
|
|
-
|
|
|
- /* First do tx drain, so in a case where we have rx flushes and a successful
|
|
|
- * tx completion we will still go through completion error handling.
|
|
|
- */
|
|
|
- completed_tx = iser_drain_tx_cq(comp);
|
|
|
+ int completed = 0;
|
|
|
|
|
|
while (ib_poll_cq(cq, 1, &wc) == 1) {
|
|
|
- desc = (struct iser_rx_desc *) (unsigned long) wc.wr_id;
|
|
|
- BUG_ON(desc == NULL);
|
|
|
- ib_conn = wc.qp->qp_context;
|
|
|
- if (wc.status == IB_WC_SUCCESS) {
|
|
|
- if (wc.opcode == IB_WC_RECV) {
|
|
|
- xfer_len = (unsigned long)wc.byte_len;
|
|
|
- iser_rcv_completion(desc, xfer_len, ib_conn);
|
|
|
- } else
|
|
|
- iser_err("expected opcode %d got %d\n",
|
|
|
- IB_WC_RECV, wc.opcode);
|
|
|
- } else {
|
|
|
- if (wc.status != IB_WC_WR_FLUSH_ERR)
|
|
|
- iser_err("rx id %llx status %d vend_err %x\n",
|
|
|
- wc.wr_id, wc.status, wc.vendor_err);
|
|
|
- ib_conn->post_recv_buf_count--;
|
|
|
- iser_handle_comp_error(NULL, ib_conn, &wc);
|
|
|
- }
|
|
|
- completed_rx++;
|
|
|
- if (!(completed_rx & 63))
|
|
|
- completed_tx += iser_drain_tx_cq(comp);
|
|
|
- if (completed_rx >= iser_cq_poll_limit)
|
|
|
+ iser_handle_wc(&wc);
|
|
|
+
|
|
|
+ if (++completed >= iser_cq_poll_limit)
|
|
|
break;
|
|
|
}
|
|
|
- /* #warning "it is assumed here that arming CQ only once its empty" *
|
|
|
- * " would not cause interrupts to be missed" */
|
|
|
+
|
|
|
+ /*
|
|
|
+ * It is assumed here that arming CQ only once its empty
|
|
|
+ * would not cause interrupts to be missed.
|
|
|
+ */
|
|
|
ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
|
|
|
|
|
|
- iser_dbg("got %d rx %d tx completions\n", completed_rx, completed_tx);
|
|
|
+ iser_dbg("got %d completions\n", completed);
|
|
|
}
|
|
|
|
|
|
static void iser_cq_callback(struct ib_cq *cq, void *cq_context)
|