浏览代码

tcp: add tcp_comp_sack_delay_ns sysctl

This per netns sysctl allows for TCP SACK compression fine-tuning.

Its default value is 1,000,000, or 1 ms to meet TSO autosizing period.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Eric Dumazet 7 年之前
父节点
当前提交
6d82aa2420
共有 5 个文件被更改,包括 18 次插入2 次删除
  1. 7 0
      Documentation/networking/ip-sysctl.txt
  2. 1 0
      include/net/netns/ipv4.h
  3. 7 0
      net/ipv4/sysctl_net_ipv4.c
  4. 2 2
      net/ipv4/tcp_input.c
  5. 1 0
      net/ipv4/tcp_ipv4.c

+ 7 - 0
Documentation/networking/ip-sysctl.txt

@@ -525,6 +525,13 @@ tcp_rmem - vector of 3 INTEGERs: min, default, max
 tcp_sack - BOOLEAN
 tcp_sack - BOOLEAN
 	Enable select acknowledgments (SACKS).
 	Enable select acknowledgments (SACKS).
 
 
+tcp_comp_sack_delay_ns - LONG INTEGER
+	TCP tries to reduce number of SACK sent, using a timer
+	based on 5% of SRTT, capped by this sysctl, in nano seconds.
+	The default is 1ms, based on TSO autosizing period.
+
+	Default : 1,000,000 ns (1 ms)
+
 tcp_slow_start_after_idle - BOOLEAN
 tcp_slow_start_after_idle - BOOLEAN
 	If set, provide RFC2861 behavior and time out the congestion
 	If set, provide RFC2861 behavior and time out the congestion
 	window after an idle period.  An idle period is defined at
 	window after an idle period.  An idle period is defined at

+ 1 - 0
include/net/netns/ipv4.h

@@ -160,6 +160,7 @@ struct netns_ipv4 {
 	int sysctl_tcp_pacing_ca_ratio;
 	int sysctl_tcp_pacing_ca_ratio;
 	int sysctl_tcp_wmem[3];
 	int sysctl_tcp_wmem[3];
 	int sysctl_tcp_rmem[3];
 	int sysctl_tcp_rmem[3];
+	unsigned long sysctl_tcp_comp_sack_delay_ns;
 	struct inet_timewait_death_row tcp_death_row;
 	struct inet_timewait_death_row tcp_death_row;
 	int sysctl_max_syn_backlog;
 	int sysctl_max_syn_backlog;
 	int sysctl_tcp_fastopen;
 	int sysctl_tcp_fastopen;

+ 7 - 0
net/ipv4/sysctl_net_ipv4.c

@@ -1151,6 +1151,13 @@ static struct ctl_table ipv4_net_table[] = {
 		.proc_handler	= proc_dointvec_minmax,
 		.proc_handler	= proc_dointvec_minmax,
 		.extra1		= &one,
 		.extra1		= &one,
 	},
 	},
+	{
+		.procname	= "tcp_comp_sack_delay_ns",
+		.data		= &init_net.ipv4.sysctl_tcp_comp_sack_delay_ns,
+		.maxlen		= sizeof(unsigned long),
+		.mode		= 0644,
+		.proc_handler	= proc_doulongvec_minmax,
+	},
 	{
 	{
 		.procname	= "udp_rmem_min",
 		.procname	= "udp_rmem_min",
 		.data		= &init_net.ipv4.sysctl_udp_rmem_min,
 		.data		= &init_net.ipv4.sysctl_udp_rmem_min,

+ 2 - 2
net/ipv4/tcp_input.c

@@ -5113,13 +5113,13 @@ send_now:
 	if (hrtimer_is_queued(&tp->compressed_ack_timer))
 	if (hrtimer_is_queued(&tp->compressed_ack_timer))
 		return;
 		return;
 
 
-	/* compress ack timer : 5 % of rtt, but no more than 1 ms */
+	/* compress ack timer : 5 % of rtt, but no more than tcp_comp_sack_delay_ns */
 
 
 	rtt = tp->rcv_rtt_est.rtt_us;
 	rtt = tp->rcv_rtt_est.rtt_us;
 	if (tp->srtt_us && tp->srtt_us < rtt)
 	if (tp->srtt_us && tp->srtt_us < rtt)
 		rtt = tp->srtt_us;
 		rtt = tp->srtt_us;
 
 
-	delay = min_t(unsigned long, NSEC_PER_MSEC,
+	delay = min_t(unsigned long, sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns,
 		      rtt * (NSEC_PER_USEC >> 3)/20);
 		      rtt * (NSEC_PER_USEC >> 3)/20);
 	sock_hold(sk);
 	sock_hold(sk);
 	hrtimer_start(&tp->compressed_ack_timer, ns_to_ktime(delay),
 	hrtimer_start(&tp->compressed_ack_timer, ns_to_ktime(delay),

+ 1 - 0
net/ipv4/tcp_ipv4.c

@@ -2572,6 +2572,7 @@ static int __net_init tcp_sk_init(struct net *net)
 		       init_net.ipv4.sysctl_tcp_wmem,
 		       init_net.ipv4.sysctl_tcp_wmem,
 		       sizeof(init_net.ipv4.sysctl_tcp_wmem));
 		       sizeof(init_net.ipv4.sysctl_tcp_wmem));
 	}
 	}
+	net->ipv4.sysctl_tcp_comp_sack_delay_ns = NSEC_PER_MSEC;
 	net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
 	net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
 	spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock);
 	spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock);
 	net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 60 * 60;
 	net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 60 * 60;