|
|
@@ -113,24 +113,26 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
|
|
|
}
|
|
|
|
|
|
if (conn->c_version < RDS_PROTOCOL(3, 1)) {
|
|
|
- printk(KERN_NOTICE "RDS/IB: Connection to %pI4 version %u.%u failed,"
|
|
|
- " no longer supported\n",
|
|
|
- &conn->c_faddr,
|
|
|
- RDS_PROTOCOL_MAJOR(conn->c_version),
|
|
|
- RDS_PROTOCOL_MINOR(conn->c_version));
|
|
|
+ pr_notice("RDS/IB: Connection <%pI4,%pI4> version %u.%u no longer supported\n",
|
|
|
+ &conn->c_laddr, &conn->c_faddr,
|
|
|
+ RDS_PROTOCOL_MAJOR(conn->c_version),
|
|
|
+ RDS_PROTOCOL_MINOR(conn->c_version));
|
|
|
rds_conn_destroy(conn);
|
|
|
return;
|
|
|
} else {
|
|
|
- printk(KERN_NOTICE "RDS/IB: connected to %pI4 version %u.%u%s\n",
|
|
|
- &conn->c_faddr,
|
|
|
- RDS_PROTOCOL_MAJOR(conn->c_version),
|
|
|
- RDS_PROTOCOL_MINOR(conn->c_version),
|
|
|
- ic->i_flowctl ? ", flow control" : "");
|
|
|
+ pr_notice("RDS/IB: %s conn connected <%pI4,%pI4> version %u.%u%s\n",
|
|
|
+ ic->i_active_side ? "Active" : "Passive",
|
|
|
+ &conn->c_laddr, &conn->c_faddr,
|
|
|
+ RDS_PROTOCOL_MAJOR(conn->c_version),
|
|
|
+ RDS_PROTOCOL_MINOR(conn->c_version),
|
|
|
+ ic->i_flowctl ? ", flow control" : "");
|
|
|
}
|
|
|
|
|
|
- /*
|
|
|
- * Init rings and fill recv. this needs to wait until protocol negotiation
|
|
|
- * is complete, since ring layout is different from 3.0 to 3.1.
|
|
|
+ atomic_set(&ic->i_cq_quiesce, 0);
|
|
|
+
|
|
|
+ /* Init rings and fill recv. this needs to wait until protocol
|
|
|
+ * negotiation is complete, since ring layout is different
|
|
|
+ * from 3.1 to 4.1.
|
|
|
*/
|
|
|
rds_ib_send_init_ring(ic);
|
|
|
rds_ib_recv_init_ring(ic);
|
|
|
@@ -267,6 +269,10 @@ static void rds_ib_tasklet_fn_send(unsigned long data)
|
|
|
|
|
|
rds_ib_stats_inc(s_ib_tasklet_call);
|
|
|
|
|
|
+ /* if cq has been already reaped, ignore incoming cq event */
|
|
|
+ if (atomic_read(&ic->i_cq_quiesce))
|
|
|
+ return;
|
|
|
+
|
|
|
poll_scq(ic, ic->i_send_cq, ic->i_send_wc);
|
|
|
ib_req_notify_cq(ic->i_send_cq, IB_CQ_NEXT_COMP);
|
|
|
poll_scq(ic, ic->i_send_cq, ic->i_send_wc);
|
|
|
@@ -308,6 +314,10 @@ static void rds_ib_tasklet_fn_recv(unsigned long data)
|
|
|
|
|
|
rds_ib_stats_inc(s_ib_tasklet_call);
|
|
|
|
|
|
+ /* if cq has been already reaped, ignore incoming cq event */
|
|
|
+ if (atomic_read(&ic->i_cq_quiesce))
|
|
|
+ return;
|
|
|
+
|
|
|
memset(&state, 0, sizeof(state));
|
|
|
poll_rcq(ic, ic->i_recv_cq, ic->i_recv_wc, &state);
|
|
|
ib_req_notify_cq(ic->i_recv_cq, IB_CQ_SOLICITED);
|
|
|
@@ -358,6 +368,28 @@ static void rds_ib_cq_comp_handler_send(struct ib_cq *cq, void *context)
|
|
|
tasklet_schedule(&ic->i_send_tasklet);
|
|
|
}
|
|
|
|
|
|
+static inline int ibdev_get_unused_vector(struct rds_ib_device *rds_ibdev)
|
|
|
+{
|
|
|
+ int min = rds_ibdev->vector_load[rds_ibdev->dev->num_comp_vectors - 1];
|
|
|
+ int index = rds_ibdev->dev->num_comp_vectors - 1;
|
|
|
+ int i;
|
|
|
+
|
|
|
+ for (i = rds_ibdev->dev->num_comp_vectors - 1; i >= 0; i--) {
|
|
|
+ if (rds_ibdev->vector_load[i] < min) {
|
|
|
+ index = i;
|
|
|
+ min = rds_ibdev->vector_load[i];
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ rds_ibdev->vector_load[index]++;
|
|
|
+ return index;
|
|
|
+}
|
|
|
+
|
|
|
+static inline void ibdev_put_vector(struct rds_ib_device *rds_ibdev, int index)
|
|
|
+{
|
|
|
+ rds_ibdev->vector_load[index]--;
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* This needs to be very careful to not leave IS_ERR pointers around for
|
|
|
* cleanup to trip over.
|
|
|
@@ -383,7 +415,10 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
|
|
|
* completion queue and send queue. This extra space is used for FRMR
|
|
|
* registration and invalidation work requests
|
|
|
*/
|
|
|
- fr_queue_space = (rds_ibdev->use_fastreg ? RDS_IB_DEFAULT_FR_WR : 0);
|
|
|
+ fr_queue_space = rds_ibdev->use_fastreg ?
|
|
|
+ (RDS_IB_DEFAULT_FR_WR + 1) +
|
|
|
+ (RDS_IB_DEFAULT_FR_INV_WR + 1)
|
|
|
+ : 0;
|
|
|
|
|
|
/* add the conn now so that connection establishment has the dev */
|
|
|
rds_ib_add_conn(rds_ibdev, conn);
|
|
|
@@ -396,25 +431,30 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
|
|
|
/* Protection domain and memory range */
|
|
|
ic->i_pd = rds_ibdev->pd;
|
|
|
|
|
|
+ ic->i_scq_vector = ibdev_get_unused_vector(rds_ibdev);
|
|
|
cq_attr.cqe = ic->i_send_ring.w_nr + fr_queue_space + 1;
|
|
|
-
|
|
|
+ cq_attr.comp_vector = ic->i_scq_vector;
|
|
|
ic->i_send_cq = ib_create_cq(dev, rds_ib_cq_comp_handler_send,
|
|
|
rds_ib_cq_event_handler, conn,
|
|
|
&cq_attr);
|
|
|
if (IS_ERR(ic->i_send_cq)) {
|
|
|
ret = PTR_ERR(ic->i_send_cq);
|
|
|
ic->i_send_cq = NULL;
|
|
|
+ ibdev_put_vector(rds_ibdev, ic->i_scq_vector);
|
|
|
rdsdebug("ib_create_cq send failed: %d\n", ret);
|
|
|
goto out;
|
|
|
}
|
|
|
|
|
|
+ ic->i_rcq_vector = ibdev_get_unused_vector(rds_ibdev);
|
|
|
cq_attr.cqe = ic->i_recv_ring.w_nr;
|
|
|
+ cq_attr.comp_vector = ic->i_rcq_vector;
|
|
|
ic->i_recv_cq = ib_create_cq(dev, rds_ib_cq_comp_handler_recv,
|
|
|
rds_ib_cq_event_handler, conn,
|
|
|
&cq_attr);
|
|
|
if (IS_ERR(ic->i_recv_cq)) {
|
|
|
ret = PTR_ERR(ic->i_recv_cq);
|
|
|
ic->i_recv_cq = NULL;
|
|
|
+ ibdev_put_vector(rds_ibdev, ic->i_rcq_vector);
|
|
|
rdsdebug("ib_create_cq recv failed: %d\n", ret);
|
|
|
goto out;
|
|
|
}
|
|
|
@@ -445,6 +485,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
|
|
|
attr.send_cq = ic->i_send_cq;
|
|
|
attr.recv_cq = ic->i_recv_cq;
|
|
|
atomic_set(&ic->i_fastreg_wrs, RDS_IB_DEFAULT_FR_WR);
|
|
|
+ atomic_set(&ic->i_fastunreg_wrs, RDS_IB_DEFAULT_FR_INV_WR);
|
|
|
|
|
|
/*
|
|
|
* XXX this can fail if max_*_wr is too large? Are we supposed
|
|
|
@@ -682,6 +723,7 @@ out:
|
|
|
if (ic->i_cm_id == cm_id)
|
|
|
ret = 0;
|
|
|
}
|
|
|
+ ic->i_active_side = true;
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
|
@@ -767,17 +809,27 @@ void rds_ib_conn_path_shutdown(struct rds_conn_path *cp)
|
|
|
wait_event(rds_ib_ring_empty_wait,
|
|
|
rds_ib_ring_empty(&ic->i_recv_ring) &&
|
|
|
(atomic_read(&ic->i_signaled_sends) == 0) &&
|
|
|
- (atomic_read(&ic->i_fastreg_wrs) == RDS_IB_DEFAULT_FR_WR));
|
|
|
+ (atomic_read(&ic->i_fastreg_wrs) == RDS_IB_DEFAULT_FR_WR) &&
|
|
|
+ (atomic_read(&ic->i_fastunreg_wrs) == RDS_IB_DEFAULT_FR_INV_WR));
|
|
|
tasklet_kill(&ic->i_send_tasklet);
|
|
|
tasklet_kill(&ic->i_recv_tasklet);
|
|
|
|
|
|
+ atomic_set(&ic->i_cq_quiesce, 1);
|
|
|
+
|
|
|
/* first destroy the ib state that generates callbacks */
|
|
|
if (ic->i_cm_id->qp)
|
|
|
rdma_destroy_qp(ic->i_cm_id);
|
|
|
- if (ic->i_send_cq)
|
|
|
+ if (ic->i_send_cq) {
|
|
|
+ if (ic->rds_ibdev)
|
|
|
+ ibdev_put_vector(ic->rds_ibdev, ic->i_scq_vector);
|
|
|
ib_destroy_cq(ic->i_send_cq);
|
|
|
- if (ic->i_recv_cq)
|
|
|
+ }
|
|
|
+
|
|
|
+ if (ic->i_recv_cq) {
|
|
|
+ if (ic->rds_ibdev)
|
|
|
+ ibdev_put_vector(ic->rds_ibdev, ic->i_rcq_vector);
|
|
|
ib_destroy_cq(ic->i_recv_cq);
|
|
|
+ }
|
|
|
|
|
|
/* then free the resources that ib callbacks use */
|
|
|
if (ic->i_send_hdrs)
|
|
|
@@ -855,6 +907,7 @@ void rds_ib_conn_path_shutdown(struct rds_conn_path *cp)
|
|
|
ic->i_sends = NULL;
|
|
|
vfree(ic->i_recvs);
|
|
|
ic->i_recvs = NULL;
|
|
|
+ ic->i_active_side = false;
|
|
|
}
|
|
|
|
|
|
int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp)
|