Browse Source

Merge branch 'rds-packet-assembly-fixes'

Sowmini Varadhan says:

====================
RDS: TCP: socket locking RDS packet assembly fixes

This three part patchset fixes bugs in synchronization between
rds_tcp_accept_one() and the rds-tcp send/recv path.

Patch 1 ensures that the lock_sock() is taken appropriately
and the RDS datagram reassembly state is reset  to synchronize
with the receive path.

Patch 2 ensures that partially sent RDS datagrams will get
retransmitted after rds_tcp_accept_one() switches sockets.

Patch 3 fixes a race window which would prematurely re-enable
rds_send_xmit() before the rds_tcp_connection setup has been
completed in rds_tcp_accept_one().
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
David S. Miller 9 years ago
parent
commit
29a36611e9
7 changed files with 95 additions and 19 deletions
  1. 2 0
      net/rds/rds.h
  2. 1 0
      net/rds/send.c
  3. 75 3
      net/rds/tcp.c
  4. 1 0
      net/rds/tcp.h
  5. 1 1
      net/rds/tcp_connect.c
  6. 7 13
      net/rds/tcp_listen.c
  7. 8 2
      net/rds/threads.c

+ 2 - 0
net/rds/rds.h

@@ -74,6 +74,7 @@ enum {
 	RDS_CONN_CONNECTING,
 	RDS_CONN_DISCONNECTING,
 	RDS_CONN_UP,
+	RDS_CONN_RESETTING,
 	RDS_CONN_ERROR,
 };
 
@@ -813,6 +814,7 @@ void rds_connect_worker(struct work_struct *);
 void rds_shutdown_worker(struct work_struct *);
 void rds_send_worker(struct work_struct *);
 void rds_recv_worker(struct work_struct *);
+void rds_connect_path_complete(struct rds_connection *conn, int curr);
 void rds_connect_complete(struct rds_connection *conn);
 
 /* transport.c */

+ 1 - 0
net/rds/send.c

@@ -99,6 +99,7 @@ void rds_send_reset(struct rds_connection *conn)
 	list_splice_init(&conn->c_retrans, &conn->c_send_queue);
 	spin_unlock_irqrestore(&conn->c_lock, flags);
 }
+EXPORT_SYMBOL_GPL(rds_send_reset);
 
 static int acquire_in_xmit(struct rds_connection *conn)
 {

+ 75 - 3
net/rds/tcp.c

@@ -126,9 +126,81 @@ void rds_tcp_restore_callbacks(struct socket *sock,
 }
 
 /*
- * This is the only path that sets tc->t_sock.  Send and receive trust that
- * it is set.  The RDS_CONN_UP bit protects those paths from being
- * called while it isn't set.
+ * rds_tcp_reset_callbacks() switches the to the new sock and
+ * returns the existing tc->t_sock.
+ *
+ * The only functions that set tc->t_sock are rds_tcp_set_callbacks
+ * and rds_tcp_reset_callbacks.  Send and receive trust that
+ * it is set.  The absence of RDS_CONN_UP bit protects those paths
+ * from being called while it isn't set.
+ */
+void rds_tcp_reset_callbacks(struct socket *sock,
+			     struct rds_connection *conn)
+{
+	struct rds_tcp_connection *tc = conn->c_transport_data;
+	struct socket *osock = tc->t_sock;
+
+	if (!osock)
+		goto newsock;
+
+	/* Need to resolve a duelling SYN between peers.
+	 * We have an outstanding SYN to this peer, which may
+	 * potentially have transitioned to the RDS_CONN_UP state,
+	 * so we must quiesce any send threads before resetting
+	 * c_transport_data. We quiesce these threads by setting
+	 * c_state to something other than RDS_CONN_UP, and then
+	 * waiting for any existing threads in rds_send_xmit to
+	 * complete release_in_xmit(). (Subsequent threads entering
+	 * rds_send_xmit() will bail on !rds_conn_up().
+	 *
+	 * However an incoming syn-ack at this point would end up
+	 * marking the conn as RDS_CONN_UP, and would again permit
+	 * rds_send_xmi() threads through, so ideally we would
+	 * synchronize on RDS_CONN_UP after lock_sock(), but cannot
+	 * do that: waiting on !RDS_IN_XMIT after lock_sock() may
+	 * end up deadlocking with tcp_sendmsg(), and the RDS_IN_XMIT
+	 * would not get set. As a result, we set c_state to
+	 * RDS_CONN_RESETTTING, to ensure that rds_tcp_state_change
+	 * cannot mark rds_conn_path_up() in the window before lock_sock()
+	 */
+	atomic_set(&conn->c_state, RDS_CONN_RESETTING);
+	wait_event(conn->c_waitq, !test_bit(RDS_IN_XMIT, &conn->c_flags));
+	lock_sock(osock->sk);
+	/* reset receive side state for rds_tcp_data_recv() for osock  */
+	if (tc->t_tinc) {
+		rds_inc_put(&tc->t_tinc->ti_inc);
+		tc->t_tinc = NULL;
+	}
+	tc->t_tinc_hdr_rem = sizeof(struct rds_header);
+	tc->t_tinc_data_rem = 0;
+	tc->t_sock = NULL;
+
+	write_lock_bh(&osock->sk->sk_callback_lock);
+
+	osock->sk->sk_user_data = NULL;
+	osock->sk->sk_data_ready = tc->t_orig_data_ready;
+	osock->sk->sk_write_space = tc->t_orig_write_space;
+	osock->sk->sk_state_change = tc->t_orig_state_change;
+	write_unlock_bh(&osock->sk->sk_callback_lock);
+	release_sock(osock->sk);
+	sock_release(osock);
+newsock:
+	rds_send_reset(conn);
+	lock_sock(sock->sk);
+	write_lock_bh(&sock->sk->sk_callback_lock);
+	tc->t_sock = sock;
+	sock->sk->sk_user_data = conn;
+	sock->sk->sk_data_ready = rds_tcp_data_ready;
+	sock->sk->sk_write_space = rds_tcp_write_space;
+	sock->sk->sk_state_change = rds_tcp_state_change;
+
+	write_unlock_bh(&sock->sk->sk_callback_lock);
+	release_sock(sock->sk);
+}
+
+/* Add tc to rds_tcp_tc_list and set tc->t_sock. See comments
+ * above rds_tcp_reset_callbacks for notes about synchronization
+ * with data path
  */
 void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn)
 {

+ 1 - 0
net/rds/tcp.h

@@ -50,6 +50,7 @@ struct rds_tcp_statistics {
 void rds_tcp_tune(struct socket *sock);
 void rds_tcp_nonagle(struct socket *sock);
 void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn);
+void rds_tcp_reset_callbacks(struct socket *sock, struct rds_connection *conn);
 void rds_tcp_restore_callbacks(struct socket *sock,
 			       struct rds_tcp_connection *tc);
 u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc);

+ 1 - 1
net/rds/tcp_connect.c

@@ -60,7 +60,7 @@ void rds_tcp_state_change(struct sock *sk)
 		case TCP_SYN_RECV:
 			break;
 		case TCP_ESTABLISHED:
-			rds_connect_complete(conn);
+			rds_connect_path_complete(conn, RDS_CONN_CONNECTING);
 			break;
 		case TCP_CLOSE_WAIT:
 		case TCP_CLOSE:

+ 7 - 13
net/rds/tcp_listen.c

@@ -78,7 +78,6 @@ int rds_tcp_accept_one(struct socket *sock)
 	struct inet_sock *inet;
 	struct rds_tcp_connection *rs_tcp = NULL;
 	int conn_state;
-	struct sock *nsk;
 
 	if (!sock) /* module unload or netns delete in progress */
 		return -ENETUNREACH;
@@ -136,26 +135,21 @@ int rds_tcp_accept_one(struct socket *sock)
 		    !conn->c_outgoing) {
 			goto rst_nsk;
 		} else {
-			atomic_set(&conn->c_state, RDS_CONN_CONNECTING);
-			wait_event(conn->c_waitq,
-				   !test_bit(RDS_IN_XMIT, &conn->c_flags));
-			rds_tcp_restore_callbacks(rs_tcp->t_sock, rs_tcp);
+			rds_tcp_reset_callbacks(new_sock, conn);
 			conn->c_outgoing = 0;
+			/* rds_connect_path_complete() marks RDS_CONN_UP */
+			rds_connect_path_complete(conn, RDS_CONN_DISCONNECTING);
 		}
+	} else {
+		rds_tcp_set_callbacks(new_sock, conn);
+		rds_connect_path_complete(conn, RDS_CONN_CONNECTING);
 	}
-	rds_tcp_set_callbacks(new_sock, conn);
-	rds_connect_complete(conn); /* marks RDS_CONN_UP */
 	new_sock = NULL;
 	ret = 0;
 	goto out;
 rst_nsk:
 	/* reset the newly returned accept sock and bail */
-	nsk = new_sock->sk;
-	rds_tcp_stats_inc(s_tcp_listen_closed_stale);
-	nsk->sk_user_data = NULL;
-	nsk->sk_prot->disconnect(nsk, 0);
-	tcp_done(nsk);
-	new_sock = NULL;
+	kernel_sock_shutdown(new_sock, SHUT_RDWR);
 	ret = 0;
 out:
 	if (rs_tcp)

+ 8 - 2
net/rds/threads.c

@@ -71,9 +71,9 @@
 struct workqueue_struct *rds_wq;
 EXPORT_SYMBOL_GPL(rds_wq);
 
-void rds_connect_complete(struct rds_connection *conn)
+void rds_connect_path_complete(struct rds_connection *conn, int curr)
 {
-	if (!rds_conn_transition(conn, RDS_CONN_CONNECTING, RDS_CONN_UP)) {
+	if (!rds_conn_transition(conn, curr, RDS_CONN_UP)) {
 		printk(KERN_WARNING "%s: Cannot transition to state UP, "
 				"current state is %d\n",
 				__func__,
@@ -90,6 +90,12 @@ void rds_connect_complete(struct rds_connection *conn)
 	queue_delayed_work(rds_wq, &conn->c_send_w, 0);
 	queue_delayed_work(rds_wq, &conn->c_recv_w, 0);
 }
+EXPORT_SYMBOL_GPL(rds_connect_path_complete);
+
+void rds_connect_complete(struct rds_connection *conn)
+{
+	rds_connect_path_complete(conn, RDS_CONN_CONNECTING);
+}
 EXPORT_SYMBOL_GPL(rds_connect_complete);
 
 /*