|
@@ -596,8 +596,7 @@ void rds_ib_recv_init_ack(struct rds_ib_connection *ic)
|
|
|
* wr_id and avoids working with the ring in that case.
|
|
|
*/
|
|
|
#ifndef KERNEL_HAS_ATOMIC64
|
|
|
-static void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq,
|
|
|
- int ack_required)
|
|
|
+void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq, int ack_required)
|
|
|
{
|
|
|
unsigned long flags;
|
|
|
|
|
@@ -622,8 +621,7 @@ static u64 rds_ib_get_ack(struct rds_ib_connection *ic)
|
|
|
return seq;
|
|
|
}
|
|
|
#else
|
|
|
-static void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq,
|
|
|
- int ack_required)
|
|
|
+void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq, int ack_required)
|
|
|
{
|
|
|
atomic64_set(&ic->i_ack_next, seq);
|
|
|
if (ack_required) {
|
|
@@ -830,20 +828,6 @@ static void rds_ib_cong_recv(struct rds_connection *conn,
|
|
|
rds_cong_map_updated(map, uncongested);
|
|
|
}
|
|
|
|
|
|
-/*
|
|
|
- * Rings are posted with all the allocations they'll need to queue the
|
|
|
- * incoming message to the receiving socket so this can't fail.
|
|
|
- * All fragments start with a header, so we can make sure we're not receiving
|
|
|
- * garbage, and we can tell a small 8 byte fragment from an ACK frame.
|
|
|
- */
|
|
|
-struct rds_ib_ack_state {
|
|
|
- u64 ack_next;
|
|
|
- u64 ack_recv;
|
|
|
- unsigned int ack_required:1;
|
|
|
- unsigned int ack_next_valid:1;
|
|
|
- unsigned int ack_recv_valid:1;
|
|
|
-};
|
|
|
-
|
|
|
static void rds_ib_process_recv(struct rds_connection *conn,
|
|
|
struct rds_ib_recv_work *recv, u32 data_len,
|
|
|
struct rds_ib_ack_state *state)
|
|
@@ -969,96 +953,50 @@ static void rds_ib_process_recv(struct rds_connection *conn,
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-/*
|
|
|
- * Plucking the oldest entry from the ring can be done concurrently with
|
|
|
- * the thread refilling the ring. Each ring operation is protected by
|
|
|
- * spinlocks and the transient state of refilling doesn't change the
|
|
|
- * recording of which entry is oldest.
|
|
|
- *
|
|
|
- * This relies on IB only calling one cq comp_handler for each cq so that
|
|
|
- * there will only be one caller of rds_recv_incoming() per RDS connection.
|
|
|
- */
|
|
|
-void rds_ib_recv_cq_comp_handler(struct ib_cq *cq, void *context)
|
|
|
-{
|
|
|
- struct rds_connection *conn = context;
|
|
|
- struct rds_ib_connection *ic = conn->c_transport_data;
|
|
|
-
|
|
|
- rdsdebug("conn %p cq %p\n", conn, cq);
|
|
|
-
|
|
|
- rds_ib_stats_inc(s_ib_rx_cq_call);
|
|
|
-
|
|
|
- tasklet_schedule(&ic->i_recv_tasklet);
|
|
|
-}
|
|
|
-
|
|
|
-static inline void rds_poll_cq(struct rds_ib_connection *ic,
|
|
|
- struct rds_ib_ack_state *state)
|
|
|
+void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic,
|
|
|
+ struct ib_wc *wc,
|
|
|
+ struct rds_ib_ack_state *state)
|
|
|
{
|
|
|
struct rds_connection *conn = ic->conn;
|
|
|
- struct ib_wc wc;
|
|
|
struct rds_ib_recv_work *recv;
|
|
|
|
|
|
- while (ib_poll_cq(ic->i_recv_cq, 1, &wc) > 0) {
|
|
|
- rdsdebug("wc wr_id 0x%llx status %u (%s) byte_len %u imm_data %u\n",
|
|
|
- (unsigned long long)wc.wr_id, wc.status,
|
|
|
- ib_wc_status_msg(wc.status), wc.byte_len,
|
|
|
- be32_to_cpu(wc.ex.imm_data));
|
|
|
- rds_ib_stats_inc(s_ib_rx_cq_event);
|
|
|
+ rdsdebug("wc wr_id 0x%llx status %u (%s) byte_len %u imm_data %u\n",
|
|
|
+ (unsigned long long)wc->wr_id, wc->status,
|
|
|
+ ib_wc_status_msg(wc->status), wc->byte_len,
|
|
|
+ be32_to_cpu(wc->ex.imm_data));
|
|
|
|
|
|
- recv = &ic->i_recvs[rds_ib_ring_oldest(&ic->i_recv_ring)];
|
|
|
-
|
|
|
- ib_dma_unmap_sg(ic->i_cm_id->device, &recv->r_frag->f_sg, 1, DMA_FROM_DEVICE);
|
|
|
-
|
|
|
- /*
|
|
|
- * Also process recvs in connecting state because it is possible
|
|
|
- * to get a recv completion _before_ the rdmacm ESTABLISHED
|
|
|
- * event is processed.
|
|
|
- */
|
|
|
- if (wc.status == IB_WC_SUCCESS) {
|
|
|
- rds_ib_process_recv(conn, recv, wc.byte_len, state);
|
|
|
- } else {
|
|
|
- /* We expect errors as the qp is drained during shutdown */
|
|
|
- if (rds_conn_up(conn) || rds_conn_connecting(conn))
|
|
|
- rds_ib_conn_error(conn, "recv completion on %pI4 had "
|
|
|
- "status %u (%s), disconnecting and "
|
|
|
- "reconnecting\n", &conn->c_faddr,
|
|
|
- wc.status,
|
|
|
- ib_wc_status_msg(wc.status));
|
|
|
- }
|
|
|
+ rds_ib_stats_inc(s_ib_rx_cq_event);
|
|
|
+ recv = &ic->i_recvs[rds_ib_ring_oldest(&ic->i_recv_ring)];
|
|
|
+ ib_dma_unmap_sg(ic->i_cm_id->device, &recv->r_frag->f_sg, 1,
|
|
|
+ DMA_FROM_DEVICE);
|
|
|
|
|
|
- /*
|
|
|
- * rds_ib_process_recv() doesn't always consume the frag, and
|
|
|
- * we might not have called it at all if the wc didn't indicate
|
|
|
- * success. We already unmapped the frag's pages, though, and
|
|
|
- * the following rds_ib_ring_free() call tells the refill path
|
|
|
- * that it will not find an allocated frag here. Make sure we
|
|
|
- * keep that promise by freeing a frag that's still on the ring.
|
|
|
- */
|
|
|
- if (recv->r_frag) {
|
|
|
- rds_ib_frag_free(ic, recv->r_frag);
|
|
|
- recv->r_frag = NULL;
|
|
|
- }
|
|
|
- rds_ib_ring_free(&ic->i_recv_ring, 1);
|
|
|
+ /* Also process recvs in connecting state because it is possible
|
|
|
+ * to get a recv completion _before_ the rdmacm ESTABLISHED
|
|
|
+ * event is processed.
|
|
|
+ */
|
|
|
+ if (wc->status == IB_WC_SUCCESS) {
|
|
|
+ rds_ib_process_recv(conn, recv, wc->byte_len, state);
|
|
|
+ } else {
|
|
|
+ /* We expect errors as the qp is drained during shutdown */
|
|
|
+ if (rds_conn_up(conn) || rds_conn_connecting(conn))
|
|
|
+ rds_ib_conn_error(conn, "recv completion on %pI4 had status %u (%s), disconnecting and reconnecting\n",
|
|
|
+ &conn->c_faddr,
|
|
|
+ wc->status,
|
|
|
+ ib_wc_status_msg(wc->status));
|
|
|
}
|
|
|
-}
|
|
|
|
|
|
-void rds_ib_recv_tasklet_fn(unsigned long data)
|
|
|
-{
|
|
|
- struct rds_ib_connection *ic = (struct rds_ib_connection *) data;
|
|
|
- struct rds_connection *conn = ic->conn;
|
|
|
- struct rds_ib_ack_state state = { 0, };
|
|
|
-
|
|
|
- rds_poll_cq(ic, &state);
|
|
|
- ib_req_notify_cq(ic->i_recv_cq, IB_CQ_SOLICITED);
|
|
|
- rds_poll_cq(ic, &state);
|
|
|
-
|
|
|
- if (state.ack_next_valid)
|
|
|
- rds_ib_set_ack(ic, state.ack_next, state.ack_required);
|
|
|
- if (state.ack_recv_valid && state.ack_recv > ic->i_ack_recv) {
|
|
|
- rds_send_drop_acked(conn, state.ack_recv, NULL);
|
|
|
- ic->i_ack_recv = state.ack_recv;
|
|
|
+ /* rds_ib_process_recv() doesn't always consume the frag, and
|
|
|
+ * we might not have called it at all if the wc didn't indicate
|
|
|
+ * success. We already unmapped the frag's pages, though, and
|
|
|
+ * the following rds_ib_ring_free() call tells the refill path
|
|
|
+ * that it will not find an allocated frag here. Make sure we
|
|
|
+ * keep that promise by freeing a frag that's still on the ring.
|
|
|
+ */
|
|
|
+ if (recv->r_frag) {
|
|
|
+ rds_ib_frag_free(ic, recv->r_frag);
|
|
|
+ recv->r_frag = NULL;
|
|
|
}
|
|
|
- if (rds_conn_up(conn))
|
|
|
- rds_ib_attempt_ack(ic);
|
|
|
+ rds_ib_ring_free(&ic->i_recv_ring, 1);
|
|
|
|
|
|
/* If we ever end up with a really empty receive ring, we're
|
|
|
* in deep trouble, as the sender will definitely see RNR
|