소스 검색

Merge branch 'robust_listener'

Eric Dumazet says:

====================
tcp/dccp: make our listener code more robust

This patch series addresses request sockets leaks and listener dismantle
phase. This survives a stress test with listeners being added/removed
quite randomly.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
David S. Miller 9 년 전
부모
커밋
a302afe980
7개의 변경된 파일67개의 추가작업 그리고 57개의 파일을 삭제
  1. 3 7
      include/net/inet_connection_sock.h
  2. 0 19
      include/net/request_sock.h
  3. 2 2
      net/dccp/ipv4.c
  4. 1 1
      net/dccp/ipv6.c
  5. 57 24
      net/ipv4/inet_connection_sock.c
  6. 3 3
      net/ipv4/tcp_ipv4.c
  7. 1 1
      net/ipv6/tcp_ipv6.c

+ 3 - 7
include/net/inet_connection_sock.h

@@ -268,13 +268,8 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk,
 					    struct sock *newsk,
 					    const struct request_sock *req);
 
-static inline void inet_csk_reqsk_queue_add(struct sock *sk,
-					    struct request_sock *req,
-					    struct sock *child)
-{
-	reqsk_queue_add(&inet_csk(sk)->icsk_accept_queue, req, sk, child);
-}
-
+void inet_csk_reqsk_queue_add(struct sock *sk, struct request_sock *req,
+			      struct sock *child);
 void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
 				   unsigned long timeout);
 
@@ -299,6 +294,7 @@ static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk)
 }
 
 void inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req);
+void inet_csk_reqsk_queue_drop_and_put(struct sock *sk, struct request_sock *req);
 
 void inet_csk_destroy_sock(struct sock *sk);
 void inet_csk_prepare_forced_close(struct sock *sk);

+ 0 - 19
include/net/request_sock.h

@@ -186,25 +186,6 @@ static inline bool reqsk_queue_empty(const struct request_sock_queue *queue)
 	return queue->rskq_accept_head == NULL;
 }
 
-static inline void reqsk_queue_add(struct request_sock_queue *queue,
-				   struct request_sock *req,
-				   struct sock *parent,
-				   struct sock *child)
-{
-	spin_lock(&queue->rskq_lock);
-	req->sk = child;
-	sk_acceptq_added(parent);
-
-	if (queue->rskq_accept_head == NULL)
-		queue->rskq_accept_head = req;
-	else
-		queue->rskq_accept_tail->dl_next = req;
-
-	queue->rskq_accept_tail = req;
-	req->dl_next = NULL;
-	spin_unlock(&queue->rskq_lock);
-}
-
 static inline struct request_sock *reqsk_queue_remove(struct request_sock_queue *queue,
 						      struct sock *parent)
 {

+ 2 - 2
net/dccp/ipv4.c

@@ -208,7 +208,6 @@ void dccp_req_err(struct sock *sk, u64 seq)
 
 	if (!between48(seq, dccp_rsk(req)->dreq_iss, dccp_rsk(req)->dreq_gss)) {
 		NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
-		reqsk_put(req);
 	} else {
 		/*
 		 * Still in RESPOND, just remove it silently.
@@ -218,6 +217,7 @@ void dccp_req_err(struct sock *sk, u64 seq)
 		 */
 		inet_csk_reqsk_queue_drop(req->rsk_listener, req);
 	}
+	reqsk_put(req);
 }
 EXPORT_SYMBOL(dccp_req_err);
 
@@ -828,7 +828,7 @@ lookup:
 		if (likely(sk->sk_state == DCCP_LISTEN)) {
 			nsk = dccp_check_req(sk, skb, req);
 		} else {
-			inet_csk_reqsk_queue_drop(sk, req);
+			inet_csk_reqsk_queue_drop_and_put(sk, req);
 			goto lookup;
 		}
 		if (!nsk) {

+ 1 - 1
net/dccp/ipv6.c

@@ -686,7 +686,7 @@ lookup:
 		if (likely(sk->sk_state == DCCP_LISTEN)) {
 			nsk = dccp_check_req(sk, skb, req);
 		} else {
-			inet_csk_reqsk_queue_drop(sk, req);
+			inet_csk_reqsk_queue_drop_and_put(sk, req);
 			goto lookup;
 		}
 		if (!nsk) {

+ 57 - 24
net/ipv4/inet_connection_sock.c

@@ -546,6 +546,13 @@ void inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req)
 }
 EXPORT_SYMBOL(inet_csk_reqsk_queue_drop);
 
+void inet_csk_reqsk_queue_drop_and_put(struct sock *sk, struct request_sock *req)
+{
+	inet_csk_reqsk_queue_drop(sk, req);
+	reqsk_put(req);
+}
+EXPORT_SYMBOL(inet_csk_reqsk_queue_drop_and_put);
+
 static void reqsk_timer_handler(unsigned long data)
 {
 	struct request_sock *req = (struct request_sock *)data;
@@ -608,8 +615,7 @@ static void reqsk_timer_handler(unsigned long data)
 		return;
 	}
 drop:
-	inet_csk_reqsk_queue_drop(sk_listener, req);
-	reqsk_put(req);
+	inet_csk_reqsk_queue_drop_and_put(sk_listener, req);
 }
 
 static void reqsk_queue_hash_req(struct request_sock *req,
@@ -758,6 +764,53 @@ int inet_csk_listen_start(struct sock *sk, int backlog)
 }
 EXPORT_SYMBOL_GPL(inet_csk_listen_start);
 
+static void inet_child_forget(struct sock *sk, struct request_sock *req,
+			      struct sock *child)
+{
+	sk->sk_prot->disconnect(child, O_NONBLOCK);
+
+	sock_orphan(child);
+
+	percpu_counter_inc(sk->sk_prot->orphan_count);
+
+	if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->tfo_listener) {
+		BUG_ON(tcp_sk(child)->fastopen_rsk != req);
+		BUG_ON(sk != req->rsk_listener);
+
+		/* Paranoid, to prevent race condition if
+		 * an inbound pkt destined for child is
+		 * blocked by sock lock in tcp_v4_rcv().
+		 * Also to satisfy an assertion in
+		 * tcp_v4_destroy_sock().
+		 */
+		tcp_sk(child)->fastopen_rsk = NULL;
+	}
+	inet_csk_destroy_sock(child);
+	reqsk_put(req);
+}
+
+void inet_csk_reqsk_queue_add(struct sock *sk, struct request_sock *req,
+			      struct sock *child)
+{
+	struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
+
+	spin_lock(&queue->rskq_lock);
+	if (unlikely(sk->sk_state != TCP_LISTEN)) {
+		inet_child_forget(sk, req, child);
+	} else {
+		req->sk = child;
+		req->dl_next = NULL;
+		if (queue->rskq_accept_head == NULL)
+			queue->rskq_accept_head = req;
+		else
+			queue->rskq_accept_tail->dl_next = req;
+		queue->rskq_accept_tail = req;
+		sk_acceptq_added(sk);
+	}
+	spin_unlock(&queue->rskq_lock);
+}
+EXPORT_SYMBOL(inet_csk_reqsk_queue_add);
+
 /*
  *	This routine closes sockets which have been at least partially
  *	opened, but not yet accepted.
@@ -784,31 +837,11 @@ void inet_csk_listen_stop(struct sock *sk)
 		WARN_ON(sock_owned_by_user(child));
 		sock_hold(child);
 
-		sk->sk_prot->disconnect(child, O_NONBLOCK);
-
-		sock_orphan(child);
-
-		percpu_counter_inc(sk->sk_prot->orphan_count);
-
-		if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->tfo_listener) {
-			BUG_ON(tcp_sk(child)->fastopen_rsk != req);
-			BUG_ON(sk != req->rsk_listener);
-
-			/* Paranoid, to prevent race condition if
-			 * an inbound pkt destined for child is
-			 * blocked by sock lock in tcp_v4_rcv().
-			 * Also to satisfy an assertion in
-			 * tcp_v4_destroy_sock().
-			 */
-			tcp_sk(child)->fastopen_rsk = NULL;
-		}
-		inet_csk_destroy_sock(child);
-
+		inet_child_forget(sk, req, child);
 		bh_unlock_sock(child);
 		local_bh_enable();
 		sock_put(child);
 
-		reqsk_put(req);
 		cond_resched();
 	}
 	if (queue->fastopenq.rskq_rst_head) {
@@ -823,7 +856,7 @@ void inet_csk_listen_stop(struct sock *sk)
 			req = next;
 		}
 	}
-	WARN_ON(sk->sk_ack_backlog);
+	WARN_ON_ONCE(sk->sk_ack_backlog);
 }
 EXPORT_SYMBOL_GPL(inet_csk_listen_stop);
 

+ 3 - 3
net/ipv4/tcp_ipv4.c

@@ -324,7 +324,6 @@ void tcp_req_err(struct sock *sk, u32 seq)
 
 	if (seq != tcp_rsk(req)->snt_isn) {
 		NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
-		reqsk_put(req);
 	} else {
 		/*
 		 * Still in SYN_RECV, just remove it silently.
@@ -332,9 +331,10 @@ void tcp_req_err(struct sock *sk, u32 seq)
 		 * created socket, and POSIX does not want network
 		 * errors returned from accept().
 		 */
-		NET_INC_STATS_BH(net, LINUX_MIB_LISTENDROPS);
 		inet_csk_reqsk_queue_drop(req->rsk_listener, req);
+		NET_INC_STATS_BH(net, LINUX_MIB_LISTENDROPS);
 	}
+	reqsk_put(req);
 }
 EXPORT_SYMBOL(tcp_req_err);
 
@@ -1591,7 +1591,7 @@ process:
 		if (likely(sk->sk_state == TCP_LISTEN)) {
 			nsk = tcp_check_req(sk, skb, req, false);
 		} else {
-			inet_csk_reqsk_queue_drop(sk, req);
+			inet_csk_reqsk_queue_drop_and_put(sk, req);
 			goto lookup;
 		}
 		if (!nsk) {

+ 1 - 1
net/ipv6/tcp_ipv6.c

@@ -1386,7 +1386,7 @@ process:
 		if (likely(sk->sk_state == TCP_LISTEN)) {
 			nsk = tcp_check_req(sk, skb, req, false);
 		} else {
-			inet_csk_reqsk_queue_drop(sk, req);
+			inet_csk_reqsk_queue_drop_and_put(sk, req);
 			goto lookup;
 		}
 		if (!nsk) {