Quellcode durchsuchen

Merge branch 'rds-tcp-misc-bug-fixes'

Sowmini Varadhan says:

====================
rds: tcp: misc bug fixes

This series contains 2 bug fixes (patch2, patch3) and one bit of
code cleanup (patch1) identified during database testing
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
David S. Miller vor 8 Jahren
Ursprung
Commit
34a95132e0
7 geänderte Dateien mit 43 neuen und 37 gelöschten Zeilen
  1. 0 5
      net/rds/connection.c
  2. 2 2
      net/rds/rds.h
  3. 7 5
      net/rds/recv.c
  4. 7 4
      net/rds/send.c
  5. 1 2
      net/rds/tcp_connect.c
  6. 23 17
      net/rds/tcp_listen.c
  7. 3 2
      net/rds/threads.c

+ 0 - 5
net/rds/connection.c

@@ -124,11 +124,6 @@ static void __rds_conn_path_init(struct rds_connection *conn,
 	cp->cp_conn = conn;
 	atomic_set(&cp->cp_state, RDS_CONN_DOWN);
 	cp->cp_send_gen = 0;
-	/* cp_outgoing is per-path. So we can only set it here
-	 * for the single-path transports.
-	 */
-	if (!conn->c_trans->t_mp_capable)
-		cp->cp_outgoing = (is_outgoing ? 1 : 0);
 	cp->cp_reconnect_jiffies = 0;
 	INIT_DELAYED_WORK(&cp->cp_send_w, rds_send_worker);
 	INIT_DELAYED_WORK(&cp->cp_recv_w, rds_recv_worker);

+ 2 - 2
net/rds/rds.h

@@ -92,6 +92,8 @@ enum {
 #define	RDS_MPATH_HASH(rs, n) (jhash_1word((rs)->rs_bound_port, \
 			       (rs)->rs_hash_initval) & ((n) - 1))
 
+#define IS_CANONICAL(laddr, faddr) (htonl(laddr) < htonl(faddr))
+
 /* Per mpath connection state */
 struct rds_conn_path {
 	struct rds_connection	*cp_conn;
@@ -125,8 +127,6 @@ struct rds_conn_path {
 
 	unsigned int		cp_unacked_packets;
 	unsigned int		cp_unacked_bytes;
-	unsigned int		cp_outgoing:1,
-				cp_pad_to_32:31;
 	unsigned int		cp_index;
 };
 

+ 7 - 5
net/rds/recv.c

@@ -215,10 +215,10 @@ static void rds_recv_hs_exthdrs(struct rds_header *hdr,
 		switch (type) {
 		case RDS_EXTHDR_NPATHS:
 			conn->c_npaths = min_t(int, RDS_MPATH_WORKERS,
-					       buffer.rds_npaths);
+					       be16_to_cpu(buffer.rds_npaths));
 			break;
 		case RDS_EXTHDR_GEN_NUM:
-			new_peer_gen_num = buffer.rds_gen_num;
+			new_peer_gen_num = be32_to_cpu(buffer.rds_gen_num);
 			break;
 		default:
 			pr_warn_ratelimited("ignoring unknown exthdr type "
@@ -254,7 +254,8 @@ static void rds_start_mprds(struct rds_connection *conn)
 	int i;
 	struct rds_conn_path *cp;
 
-	if (conn->c_npaths > 1 && conn->c_laddr < conn->c_faddr) {
+	if (conn->c_npaths > 1 &&
+	    IS_CANONICAL(conn->c_laddr, conn->c_faddr)) {
 		for (i = 1; i < conn->c_npaths; i++) {
 			cp = &conn->c_path[i];
 			rds_conn_path_connect_if_down(cp);
@@ -339,14 +340,15 @@ void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr,
 		rds_stats_inc(s_recv_ping);
 		rds_send_pong(cp, inc->i_hdr.h_sport);
 		/* if this is a handshake ping, start multipath if necessary */
-		if (RDS_HS_PROBE(inc->i_hdr.h_sport, inc->i_hdr.h_dport)) {
+		if (RDS_HS_PROBE(be16_to_cpu(inc->i_hdr.h_sport),
+				 be16_to_cpu(inc->i_hdr.h_dport))) {
 			rds_recv_hs_exthdrs(&inc->i_hdr, cp->cp_conn);
 			rds_start_mprds(cp->cp_conn);
 		}
 		goto out;
 	}
 
-	if (inc->i_hdr.h_dport ==  RDS_FLAG_PROBE_PORT &&
+	if (be16_to_cpu(inc->i_hdr.h_dport) ==  RDS_FLAG_PROBE_PORT &&
 	    inc->i_hdr.h_sport == 0) {
 		rds_recv_hs_exthdrs(&inc->i_hdr, cp->cp_conn);
 		/* if this is a handshake pong, start multipath if necessary */

+ 7 - 4
net/rds/send.c

@@ -1246,15 +1246,17 @@ rds_send_probe(struct rds_conn_path *cp, __be16 sport,
 	rm->m_inc.i_hdr.h_flags |= h_flags;
 	cp->cp_next_tx_seq++;
 
-	if (RDS_HS_PROBE(sport, dport) && cp->cp_conn->c_trans->t_mp_capable) {
-		u16 npaths = RDS_MPATH_WORKERS;
+	if (RDS_HS_PROBE(be16_to_cpu(sport), be16_to_cpu(dport)) &&
+	    cp->cp_conn->c_trans->t_mp_capable) {
+		u16 npaths = cpu_to_be16(RDS_MPATH_WORKERS);
+		u32 my_gen_num = cpu_to_be32(cp->cp_conn->c_my_gen_num);
 
 		rds_message_add_extension(&rm->m_inc.i_hdr,
 					  RDS_EXTHDR_NPATHS, &npaths,
 					  sizeof(npaths));
 		rds_message_add_extension(&rm->m_inc.i_hdr,
 					  RDS_EXTHDR_GEN_NUM,
-					  &cp->cp_conn->c_my_gen_num,
+					  &my_gen_num,
 					  sizeof(u32));
 	}
 	spin_unlock_irqrestore(&cp->cp_lock, flags);
@@ -1293,5 +1295,6 @@ rds_send_ping(struct rds_connection *conn)
 	}
 	conn->c_ping_triggered = 1;
 	spin_unlock_irqrestore(&cp->cp_lock, flags);
-	rds_send_probe(&conn->c_path[0], RDS_FLAG_PROBE_PORT, 0, 0);
+	rds_send_probe(&conn->c_path[0], cpu_to_be16(RDS_FLAG_PROBE_PORT),
+		       0, 0);
 }

+ 1 - 2
net/rds/tcp_connect.c

@@ -66,7 +66,7 @@ void rds_tcp_state_change(struct sock *sk)
 		 * RDS connection as RDS_CONN_UP until the reconnect,
 		 * to avoid RDS datagram loss.
 		 */
-		if (cp->cp_conn->c_laddr > cp->cp_conn->c_faddr &&
+		if (!IS_CANONICAL(cp->cp_conn->c_laddr, cp->cp_conn->c_faddr) &&
 		    rds_conn_path_transition(cp, RDS_CONN_CONNECTING,
 					     RDS_CONN_ERROR)) {
 			rds_conn_path_drop(cp);
@@ -135,7 +135,6 @@ int rds_tcp_conn_path_connect(struct rds_conn_path *cp)
 	ret = sock->ops->connect(sock, (struct sockaddr *)&dest, sizeof(dest),
 				 O_NONBLOCK);
 
-	cp->cp_outgoing = 1;
 	rdsdebug("connect to address %pI4 returned %d\n", &conn->c_faddr, ret);
 	if (ret == -EINPROGRESS)
 		ret = 0;

+ 23 - 17
net/rds/tcp_listen.c

@@ -83,7 +83,7 @@ static
 struct rds_tcp_connection *rds_tcp_accept_one_path(struct rds_connection *conn)
 {
 	int i;
-	bool peer_is_smaller = (conn->c_faddr < conn->c_laddr);
+	bool peer_is_smaller = IS_CANONICAL(conn->c_faddr, conn->c_laddr);
 	int npaths = max_t(int, 1, conn->c_npaths);
 
 	/* for mprds, all paths MUST be initiated by the peer
@@ -112,6 +112,17 @@ struct rds_tcp_connection *rds_tcp_accept_one_path(struct rds_connection *conn)
 	return NULL;
 }
 
+static void rds_tcp_set_linger(struct socket *sock)
+{
+	struct linger no_linger = {
+		.l_onoff = 1,
+		.l_linger = 0,
+	};
+
+	kernel_setsockopt(sock, SOL_SOCKET, SO_LINGER,
+			  (char *)&no_linger, sizeof(no_linger));
+}
+
 int rds_tcp_accept_one(struct socket *sock)
 {
 	struct socket *new_sock = NULL;
@@ -171,21 +182,10 @@ int rds_tcp_accept_one(struct socket *sock)
 	if (conn_state != RDS_CONN_CONNECTING && conn_state != RDS_CONN_ERROR)
 		goto rst_nsk;
 	if (rs_tcp->t_sock) {
-		/* Need to resolve a duelling SYN between peers.
-		 * We have an outstanding SYN to this peer, which may
-		 * potentially have transitioned to the RDS_CONN_UP state,
-		 * so we must quiesce any send threads before resetting
-		 * c_transport_data.
-		 */
-		if (ntohl(inet->inet_saddr) < ntohl(inet->inet_daddr) ||
-		    !cp->cp_outgoing) {
-			goto rst_nsk;
-		} else {
-			rds_tcp_reset_callbacks(new_sock, cp);
-			cp->cp_outgoing = 0;
-			/* rds_connect_path_complete() marks RDS_CONN_UP */
-			rds_connect_path_complete(cp, RDS_CONN_RESETTING);
-		}
+		/* Duelling SYN has been handled in rds_tcp_accept_one() */
+		rds_tcp_reset_callbacks(new_sock, cp);
+		/* rds_connect_path_complete() marks RDS_CONN_UP */
+		rds_connect_path_complete(cp, RDS_CONN_RESETTING);
 	} else {
 		rds_tcp_set_callbacks(new_sock, cp);
 		rds_connect_path_complete(cp, RDS_CONN_CONNECTING);
@@ -194,7 +194,13 @@ int rds_tcp_accept_one(struct socket *sock)
 	ret = 0;
 	goto out;
 rst_nsk:
-	/* reset the newly returned accept sock and bail */
+	/* reset the newly returned accept sock and bail.
+	 * It is safe to set linger on new_sock because the RDS connection
+	 * has not been brought up on new_sock, so no RDS-level data could
+	 * be pending on it. By setting linger, we achieve the side-effect
+	 * of avoiding TIME_WAIT state on new_sock.
+	 */
+	rds_tcp_set_linger(new_sock);
 	kernel_sock_shutdown(new_sock, SHUT_RDWR);
 	ret = 0;
 out:

+ 3 - 2
net/rds/threads.c

@@ -127,7 +127,7 @@ void rds_queue_reconnect(struct rds_conn_path *cp)
 
 	/* let peer with smaller addr initiate reconnect, to avoid duels */
 	if (conn->c_trans->t_type == RDS_TRANS_TCP &&
-	    conn->c_laddr > conn->c_faddr)
+	    !IS_CANONICAL(conn->c_laddr, conn->c_faddr))
 		return;
 
 	set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags);
@@ -156,7 +156,8 @@ void rds_connect_worker(struct work_struct *work)
 	struct rds_connection *conn = cp->cp_conn;
 	int ret;
 
-	if (cp->cp_index > 0 && cp->cp_conn->c_laddr > cp->cp_conn->c_faddr)
+	if (cp->cp_index > 0 &&
+	    !IS_CANONICAL(cp->cp_conn->c_laddr, cp->cp_conn->c_faddr))
 		return;
 	clear_bit(RDS_RECONNECT_PENDING, &cp->cp_flags);
 	ret = rds_conn_path_transition(cp, RDS_CONN_DOWN, RDS_CONN_CONNECTING);