Browse Source

tcp: add tcp_comp_sack_delay_ns sysctl

This per netns sysctl allows for TCP SACK compression fine-tuning.

Its default value is 1,000,000, or 1 ms to meet TSO autosizing period.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Eric Dumazet 7 years ago
parent
commit
6d82aa2420

+ 7 - 0
Documentation/networking/ip-sysctl.txt

@@ -525,6 +525,13 @@ tcp_rmem - vector of 3 INTEGERs: min, default, max
 tcp_sack - BOOLEAN
 tcp_sack - BOOLEAN
 	Enable select acknowledgments (SACKS).
 	Enable select acknowledgments (SACKS).
 
 
+tcp_comp_sack_delay_ns - LONG INTEGER
+	TCP tries to reduce number of SACK sent, using a timer
+	based on 5% of SRTT, capped by this sysctl, in nano seconds.
+	The default is 1ms, based on TSO autosizing period.
+
+	Default : 1,000,000 ns (1 ms)
+
 tcp_slow_start_after_idle - BOOLEAN
 tcp_slow_start_after_idle - BOOLEAN
 	If set, provide RFC2861 behavior and time out the congestion
 	If set, provide RFC2861 behavior and time out the congestion
 	window after an idle period.  An idle period is defined at
 	window after an idle period.  An idle period is defined at

+ 1 - 0
include/net/netns/ipv4.h

@@ -160,6 +160,7 @@ struct netns_ipv4 {
 	int sysctl_tcp_pacing_ca_ratio;
 	int sysctl_tcp_pacing_ca_ratio;
 	int sysctl_tcp_wmem[3];
 	int sysctl_tcp_wmem[3];
 	int sysctl_tcp_rmem[3];
 	int sysctl_tcp_rmem[3];
+	unsigned long sysctl_tcp_comp_sack_delay_ns;
 	struct inet_timewait_death_row tcp_death_row;
 	struct inet_timewait_death_row tcp_death_row;
 	int sysctl_max_syn_backlog;
 	int sysctl_max_syn_backlog;
 	int sysctl_tcp_fastopen;
 	int sysctl_tcp_fastopen;

+ 7 - 0
net/ipv4/sysctl_net_ipv4.c

@@ -1151,6 +1151,13 @@ static struct ctl_table ipv4_net_table[] = {
 		.proc_handler	= proc_dointvec_minmax,
 		.proc_handler	= proc_dointvec_minmax,
 		.extra1		= &one,
 		.extra1		= &one,
 	},
 	},
+	{
+		.procname	= "tcp_comp_sack_delay_ns",
+		.data		= &init_net.ipv4.sysctl_tcp_comp_sack_delay_ns,
+		.maxlen		= sizeof(unsigned long),
+		.mode		= 0644,
+		.proc_handler	= proc_doulongvec_minmax,
+	},
 	{
 	{
 		.procname	= "udp_rmem_min",
 		.procname	= "udp_rmem_min",
 		.data		= &init_net.ipv4.sysctl_udp_rmem_min,
 		.data		= &init_net.ipv4.sysctl_udp_rmem_min,

+ 2 - 2
net/ipv4/tcp_input.c

@@ -5113,13 +5113,13 @@ send_now:
 	if (hrtimer_is_queued(&tp->compressed_ack_timer))
 	if (hrtimer_is_queued(&tp->compressed_ack_timer))
 		return;
 		return;
 
 
-	/* compress ack timer : 5 % of rtt, but no more than 1 ms */
+	/* compress ack timer : 5 % of rtt, but no more than tcp_comp_sack_delay_ns */
 
 
 	rtt = tp->rcv_rtt_est.rtt_us;
 	rtt = tp->rcv_rtt_est.rtt_us;
 	if (tp->srtt_us && tp->srtt_us < rtt)
 	if (tp->srtt_us && tp->srtt_us < rtt)
 		rtt = tp->srtt_us;
 		rtt = tp->srtt_us;
 
 
-	delay = min_t(unsigned long, NSEC_PER_MSEC,
+	delay = min_t(unsigned long, sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns,
 		      rtt * (NSEC_PER_USEC >> 3)/20);
 		      rtt * (NSEC_PER_USEC >> 3)/20);
 	sock_hold(sk);
 	sock_hold(sk);
 	hrtimer_start(&tp->compressed_ack_timer, ns_to_ktime(delay),
 	hrtimer_start(&tp->compressed_ack_timer, ns_to_ktime(delay),

+ 1 - 0
net/ipv4/tcp_ipv4.c

@@ -2572,6 +2572,7 @@ static int __net_init tcp_sk_init(struct net *net)
 		       init_net.ipv4.sysctl_tcp_wmem,
 		       init_net.ipv4.sysctl_tcp_wmem,
 		       sizeof(init_net.ipv4.sysctl_tcp_wmem));
 		       sizeof(init_net.ipv4.sysctl_tcp_wmem));
 	}
 	}
+	net->ipv4.sysctl_tcp_comp_sack_delay_ns = NSEC_PER_MSEC;
 	net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
 	net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
 	spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock);
 	spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock);
 	net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 60 * 60;
 	net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 60 * 60;