|
@@ -285,6 +285,8 @@ int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
|
|
|
|
|
|
int sysctl_tcp_min_tso_segs __read_mostly = 2;
|
|
|
|
|
|
+int sysctl_tcp_autocorking __read_mostly = 1;
|
|
|
+
|
|
|
struct percpu_counter tcp_orphan_count;
|
|
|
EXPORT_SYMBOL_GPL(tcp_orphan_count);
|
|
|
|
|
@@ -619,19 +621,52 @@ static inline void tcp_mark_urg(struct tcp_sock *tp, int flags)
|
|
|
tp->snd_up = tp->write_seq;
|
|
|
}
|
|
|
|
|
|
-static inline void tcp_push(struct sock *sk, int flags, int mss_now,
|
|
|
- int nonagle)
|
|
|
+/* If a not yet filled skb is pushed, do not send it if
|
|
|
+ * we have packets in Qdisc or NIC queues :
|
|
|
+ * Because TX completion will happen shortly, it gives a chance
|
|
|
+ * to coalesce future sendmsg() payload into this skb, without
|
|
|
+ * need for a timer, and with no latency trade off.
|
|
|
+ * As packets containing data payload have a bigger truesize
|
|
|
+ * than pure acks (dataless) packets, the last check prevents
|
|
|
+ * autocorking if we only have an ACK in Qdisc/NIC queues.
|
|
|
+ */
|
|
|
+static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb,
|
|
|
+ int size_goal)
|
|
|
{
|
|
|
- if (tcp_send_head(sk)) {
|
|
|
- struct tcp_sock *tp = tcp_sk(sk);
|
|
|
+ return skb->len < size_goal &&
|
|
|
+ sysctl_tcp_autocorking &&
|
|
|
+ atomic_read(&sk->sk_wmem_alloc) > skb->truesize;
|
|
|
+}
|
|
|
+
|
|
|
+static void tcp_push(struct sock *sk, int flags, int mss_now,
|
|
|
+ int nonagle, int size_goal)
|
|
|
+{
|
|
|
+ struct tcp_sock *tp = tcp_sk(sk);
|
|
|
+ struct sk_buff *skb;
|
|
|
|
|
|
- if (!(flags & MSG_MORE) || forced_push(tp))
|
|
|
- tcp_mark_push(tp, tcp_write_queue_tail(sk));
|
|
|
+ if (!tcp_send_head(sk))
|
|
|
+ return;
|
|
|
+
|
|
|
+ skb = tcp_write_queue_tail(sk);
|
|
|
+ if (!(flags & MSG_MORE) || forced_push(tp))
|
|
|
+ tcp_mark_push(tp, skb);
|
|
|
+
|
|
|
+ tcp_mark_urg(tp, flags);
|
|
|
+
|
|
|
+ if (tcp_should_autocork(sk, skb, size_goal)) {
|
|
|
|
|
|
- tcp_mark_urg(tp, flags);
|
|
|
- __tcp_push_pending_frames(sk, mss_now,
|
|
|
- (flags & MSG_MORE) ? TCP_NAGLE_CORK : nonagle);
|
|
|
+ /* avoid atomic op if TSQ_THROTTLED bit is already set */
|
|
|
+ if (!test_bit(TSQ_THROTTLED, &tp->tsq_flags)) {
|
|
|
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAUTOCORKING);
|
|
|
+ set_bit(TSQ_THROTTLED, &tp->tsq_flags);
|
|
|
+ }
|
|
|
+ return;
|
|
|
}
|
|
|
+
|
|
|
+ if (flags & MSG_MORE)
|
|
|
+ nonagle = TCP_NAGLE_CORK;
|
|
|
+
|
|
|
+ __tcp_push_pending_frames(sk, mss_now, nonagle);
|
|
|
}
|
|
|
|
|
|
static int tcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
|
|
@@ -934,7 +969,8 @@ new_segment:
|
|
|
wait_for_sndbuf:
|
|
|
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
|
|
|
wait_for_memory:
|
|
|
- tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
|
|
|
+ tcp_push(sk, flags & ~MSG_MORE, mss_now,
|
|
|
+ TCP_NAGLE_PUSH, size_goal);
|
|
|
|
|
|
if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
|
|
|
goto do_error;
|
|
@@ -944,7 +980,7 @@ wait_for_memory:
|
|
|
|
|
|
out:
|
|
|
if (copied && !(flags & MSG_SENDPAGE_NOTLAST))
|
|
|
- tcp_push(sk, flags, mss_now, tp->nonagle);
|
|
|
+ tcp_push(sk, flags, mss_now, tp->nonagle, size_goal);
|
|
|
return copied;
|
|
|
|
|
|
do_error:
|
|
@@ -1225,7 +1261,8 @@ wait_for_sndbuf:
|
|
|
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
|
|
|
wait_for_memory:
|
|
|
if (copied)
|
|
|
- tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
|
|
|
+ tcp_push(sk, flags & ~MSG_MORE, mss_now,
|
|
|
+ TCP_NAGLE_PUSH, size_goal);
|
|
|
|
|
|
if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
|
|
|
goto do_error;
|
|
@@ -1236,7 +1273,7 @@ wait_for_memory:
|
|
|
|
|
|
out:
|
|
|
if (copied)
|
|
|
- tcp_push(sk, flags, mss_now, tp->nonagle);
|
|
|
+ tcp_push(sk, flags, mss_now, tp->nonagle, size_goal);
|
|
|
release_sock(sk);
|
|
|
return copied + copied_syn;
|
|
|
|