Browse Source

[TCP]: Appropriate Byte Count support

This is an updated version of the RFC3465 ABC patch originally
for Linux 2.6.11-rc4 by Yee-Ting Li. ABC is a way of counting
bytes ack'd rather than packets when updating congestion control.

The orignal ABC described in the RFC applied to a Reno style
algorithm. For advanced congestion control there is little
change after leaving slow start.

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Stephen Hemminger 20 years ago
parent
commit
9772efb970

+ 5 - 0
Documentation/networking/ip-sysctl.txt

@@ -78,6 +78,11 @@ inet_peer_gc_maxtime - INTEGER
 
 
 TCP variables: 
 TCP variables: 
 
 
+tcp_abc - INTEGER
+	Controls Appropriate Byte Count defined in RFC3465. If set to
+	0 then does congestion avoid once per ack. 1 is conservative
+	value, and 2 is more agressive.
+
 tcp_syn_retries - INTEGER
 tcp_syn_retries - INTEGER
 	Number of times initial SYNs for an active TCP connection attempt
 	Number of times initial SYNs for an active TCP connection attempt
 	will be retransmitted. Should not be higher than 255. Default value
 	will be retransmitted. Should not be higher than 255. Default value

+ 1 - 0
include/linux/sysctl.h

@@ -390,6 +390,7 @@ enum
 	NET_TCP_BIC_BETA=108,
 	NET_TCP_BIC_BETA=108,
 	NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR=109,
 	NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR=109,
 	NET_TCP_CONG_CONTROL=110,
 	NET_TCP_CONG_CONTROL=110,
+	NET_TCP_ABC=111,
 };
 };
 
 
 enum {
 enum {

+ 1 - 0
include/linux/tcp.h

@@ -326,6 +326,7 @@ struct tcp_sock {
 	__u32	snd_up;		/* Urgent pointer		*/
 	__u32	snd_up;		/* Urgent pointer		*/
 
 
 	__u32	total_retrans;	/* Total retransmits for entire connection */
 	__u32	total_retrans;	/* Total retransmits for entire connection */
+	__u32	bytes_acked;	/* Appropriate Byte Counting - RFC3465 */
 
 
 	unsigned int		keepalive_time;	  /* time before keep alive takes place */
 	unsigned int		keepalive_time;	  /* time before keep alive takes place */
 	unsigned int		keepalive_intvl;  /* time interval between keep alive probes */
 	unsigned int		keepalive_intvl;  /* time interval between keep alive probes */

+ 19 - 0
include/net/tcp.h

@@ -218,6 +218,7 @@ extern int sysctl_tcp_low_latency;
 extern int sysctl_tcp_nometrics_save;
 extern int sysctl_tcp_nometrics_save;
 extern int sysctl_tcp_moderate_rcvbuf;
 extern int sysctl_tcp_moderate_rcvbuf;
 extern int sysctl_tcp_tso_win_divisor;
 extern int sysctl_tcp_tso_win_divisor;
+extern int sysctl_tcp_abc;
 
 
 extern atomic_t tcp_memory_allocated;
 extern atomic_t tcp_memory_allocated;
 extern atomic_t tcp_sockets_allocated;
 extern atomic_t tcp_sockets_allocated;
@@ -770,6 +771,23 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk)
  */
  */
 static inline void tcp_slow_start(struct tcp_sock *tp)
 static inline void tcp_slow_start(struct tcp_sock *tp)
 {
 {
+	if (sysctl_tcp_abc) {
+		/* RFC3465: Slow Start
+		 * TCP sender SHOULD increase cwnd by the number of
+		 * previously unacknowledged bytes ACKed by each incoming
+		 * acknowledgment, provided the increase is not more than L
+		 */
+		if (tp->bytes_acked < tp->mss_cache)
+			return;
+
+		/* We MAY increase by 2 if discovered delayed ack */
+		if (sysctl_tcp_abc > 1 && tp->bytes_acked > 2*tp->mss_cache) {
+			if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+				tp->snd_cwnd++;
+		}
+	}
+	tp->bytes_acked = 0;
+
 	if (tp->snd_cwnd < tp->snd_cwnd_clamp)
 	if (tp->snd_cwnd < tp->snd_cwnd_clamp)
 		tp->snd_cwnd++;
 		tp->snd_cwnd++;
 }
 }
@@ -804,6 +822,7 @@ static inline void tcp_enter_cwr(struct sock *sk)
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 
 
 	tp->prior_ssthresh = 0;
 	tp->prior_ssthresh = 0;
+	tp->bytes_acked = 0;
 	if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
 	if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
 		__tcp_enter_cwr(sk);
 		__tcp_enter_cwr(sk);
 		tcp_set_ca_state(sk, TCP_CA_CWR);
 		tcp_set_ca_state(sk, TCP_CA_CWR);

+ 8 - 0
net/ipv4/sysctl_net_ipv4.c

@@ -645,6 +645,14 @@ ctl_table ipv4_table[] = {
 		.proc_handler	= &proc_tcp_congestion_control,
 		.proc_handler	= &proc_tcp_congestion_control,
 		.strategy	= &sysctl_tcp_congestion_control,
 		.strategy	= &sysctl_tcp_congestion_control,
 	},
 	},
+	{
+		.ctl_name	= NET_TCP_ABC,
+		.procname	= "tcp_abc",
+		.data		= &sysctl_tcp_abc,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
 
 
 	{ .ctl_name = 0 }
 	{ .ctl_name = 0 }
 };
 };

+ 1 - 0
net/ipv4/tcp.c

@@ -1669,6 +1669,7 @@ int tcp_disconnect(struct sock *sk, int flags)
 	tp->packets_out = 0;
 	tp->packets_out = 0;
 	tp->snd_ssthresh = 0x7fffffff;
 	tp->snd_ssthresh = 0x7fffffff;
 	tp->snd_cwnd_cnt = 0;
 	tp->snd_cwnd_cnt = 0;
+	tp->bytes_acked = 0;
 	tcp_set_ca_state(sk, TCP_CA_Open);
 	tcp_set_ca_state(sk, TCP_CA_Open);
 	tcp_clear_retrans(tp);
 	tcp_clear_retrans(tp);
 	inet_csk_delack_init(sk);
 	inet_csk_delack_init(sk);

+ 20 - 11
net/ipv4/tcp_cong.c

@@ -192,17 +192,26 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight,
 	/* In "safe" area, increase. */
 	/* In "safe" area, increase. */
         if (tp->snd_cwnd <= tp->snd_ssthresh)
         if (tp->snd_cwnd <= tp->snd_ssthresh)
 		tcp_slow_start(tp);
 		tcp_slow_start(tp);
-	else {
-		/* In dangerous area, increase slowly.
-		 * In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd
-		 */
-		if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
-			if (tp->snd_cwnd < tp->snd_cwnd_clamp)
-				tp->snd_cwnd++;
-			tp->snd_cwnd_cnt = 0;
-		} else
-			tp->snd_cwnd_cnt++;
-	}
+
+ 	/* In dangerous area, increase slowly. */
+	else if (sysctl_tcp_abc) {
+ 		/* RFC3465: Apppriate Byte Count
+ 		 * increase once for each full cwnd acked
+ 		 */
+ 		if (tp->bytes_acked >= tp->snd_cwnd*tp->mss_cache) {
+ 			tp->bytes_acked -= tp->snd_cwnd*tp->mss_cache;
+ 			if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+ 				tp->snd_cwnd++;
+ 		}
+ 	} else {
+ 		/* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd */
+ 		if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
+ 			if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+ 				tp->snd_cwnd++;
+ 			tp->snd_cwnd_cnt = 0;
+ 		} else
+ 			tp->snd_cwnd_cnt++;
+ 	}
 }
 }
 EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid);
 EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid);
 
 

+ 7 - 0
net/ipv4/tcp_input.c

@@ -89,6 +89,7 @@ int sysctl_tcp_frto;
 int sysctl_tcp_nometrics_save;
 int sysctl_tcp_nometrics_save;
 
 
 int sysctl_tcp_moderate_rcvbuf = 1;
 int sysctl_tcp_moderate_rcvbuf = 1;
+int sysctl_tcp_abc = 1;
 
 
 #define FLAG_DATA		0x01 /* Incoming frame contained data.		*/
 #define FLAG_DATA		0x01 /* Incoming frame contained data.		*/
 #define FLAG_WIN_UPDATE		0x02 /* Incoming ACK was a window update.	*/
 #define FLAG_WIN_UPDATE		0x02 /* Incoming ACK was a window update.	*/
@@ -1247,6 +1248,7 @@ void tcp_enter_loss(struct sock *sk, int how)
 	tp->snd_cwnd_cnt   = 0;
 	tp->snd_cwnd_cnt   = 0;
 	tp->snd_cwnd_stamp = tcp_time_stamp;
 	tp->snd_cwnd_stamp = tcp_time_stamp;
 
 
+	tp->bytes_acked = 0;
 	tcp_clear_retrans(tp);
 	tcp_clear_retrans(tp);
 
 
 	/* Push undo marker, if it was plain RTO and nothing
 	/* Push undo marker, if it was plain RTO and nothing
@@ -1904,6 +1906,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
 			TCP_ECN_queue_cwr(tp);
 			TCP_ECN_queue_cwr(tp);
 		}
 		}
 
 
+		tp->bytes_acked = 0;
 		tp->snd_cwnd_cnt = 0;
 		tp->snd_cwnd_cnt = 0;
 		tcp_set_ca_state(sk, TCP_CA_Recovery);
 		tcp_set_ca_state(sk, TCP_CA_Recovery);
 	}
 	}
@@ -2310,6 +2313,9 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
 	if (before(ack, prior_snd_una))
 	if (before(ack, prior_snd_una))
 		goto old_ack;
 		goto old_ack;
 
 
+	if (sysctl_tcp_abc && icsk->icsk_ca_state < TCP_CA_CWR)
+		tp->bytes_acked += ack - prior_snd_una;
+
 	if (!(flag&FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
 	if (!(flag&FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
 		/* Window is constant, pure forward advance.
 		/* Window is constant, pure forward advance.
 		 * No more checks are required.
 		 * No more checks are required.
@@ -4370,6 +4376,7 @@ discard:
 
 
 EXPORT_SYMBOL(sysctl_tcp_ecn);
 EXPORT_SYMBOL(sysctl_tcp_ecn);
 EXPORT_SYMBOL(sysctl_tcp_reordering);
 EXPORT_SYMBOL(sysctl_tcp_reordering);
+EXPORT_SYMBOL(sysctl_tcp_abc);
 EXPORT_SYMBOL(tcp_parse_options);
 EXPORT_SYMBOL(tcp_parse_options);
 EXPORT_SYMBOL(tcp_rcv_established);
 EXPORT_SYMBOL(tcp_rcv_established);
 EXPORT_SYMBOL(tcp_rcv_state_process);
 EXPORT_SYMBOL(tcp_rcv_state_process);

+ 1 - 0
net/ipv4/tcp_minisocks.c

@@ -380,6 +380,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 		 */
 		 */
 		newtp->snd_cwnd = 2;
 		newtp->snd_cwnd = 2;
 		newtp->snd_cwnd_cnt = 0;
 		newtp->snd_cwnd_cnt = 0;
+		newtp->bytes_acked = 0;
 
 
 		newtp->frto_counter = 0;
 		newtp->frto_counter = 0;
 		newtp->frto_highmark = 0;
 		newtp->frto_highmark = 0;