|
|
@@ -1514,6 +1514,18 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
|
|
|
if (sysctl_tcp_slow_start_after_idle &&
|
|
|
(s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto)
|
|
|
tcp_cwnd_application_limited(sk);
|
|
|
+
|
|
|
+ /* The following conditions together indicate the starvation
|
|
|
+ * is caused by insufficient sender buffer:
|
|
|
+ * 1) just sent some data (see tcp_write_xmit)
|
|
|
+ * 2) not cwnd limited (this else condition)
|
|
|
+ * 3) no more data to send (null tcp_send_head )
|
|
|
+ * 4) application is hitting buffer limit (SOCK_NOSPACE)
|
|
|
+ */
|
|
|
+ if (!tcp_send_head(sk) && sk->sk_socket &&
|
|
|
+ test_bit(SOCK_NOSPACE, &sk->sk_socket->flags) &&
|
|
|
+ (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))
|
|
|
+ tcp_chrono_start(sk, TCP_CHRONO_SNDBUF_LIMITED);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
@@ -2081,6 +2093,47 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
|
|
|
return false;
|
|
|
}
|
|
|
|
|
|
+static void tcp_chrono_set(struct tcp_sock *tp, const enum tcp_chrono new)
|
|
|
+{
|
|
|
+ const u32 now = tcp_time_stamp;
|
|
|
+
|
|
|
+ if (tp->chrono_type > TCP_CHRONO_UNSPEC)
|
|
|
+ tp->chrono_stat[tp->chrono_type - 1] += now - tp->chrono_start;
|
|
|
+ tp->chrono_start = now;
|
|
|
+ tp->chrono_type = new;
|
|
|
+}
|
|
|
+
|
|
|
+void tcp_chrono_start(struct sock *sk, const enum tcp_chrono type)
|
|
|
+{
|
|
|
+ struct tcp_sock *tp = tcp_sk(sk);
|
|
|
+
|
|
|
+ /* If there are multiple conditions worthy of tracking in a
|
|
|
+ * chronograph then the highest priority enum takes precedence
|
|
|
+ * over the other conditions. So that if something "more interesting"
|
|
|
+ * starts happening, stop the previous chrono and start a new one.
|
|
|
+ */
|
|
|
+ if (type > tp->chrono_type)
|
|
|
+ tcp_chrono_set(tp, type);
|
|
|
+}
|
|
|
+
|
|
|
+void tcp_chrono_stop(struct sock *sk, const enum tcp_chrono type)
|
|
|
+{
|
|
|
+ struct tcp_sock *tp = tcp_sk(sk);
|
|
|
+
|
|
|
+
|
|
|
+ /* There are multiple conditions worthy of tracking in a
|
|
|
+ * chronograph, so that the highest priority enum takes
|
|
|
+ * precedence over the other conditions (see tcp_chrono_start).
|
|
|
+ * If a condition stops, we only stop chrono tracking if
|
|
|
+ * it's the "most interesting" or current chrono we are
|
|
|
+ * tracking and starts busy chrono if we have pending data.
|
|
|
+ */
|
|
|
+ if (tcp_write_queue_empty(sk))
|
|
|
+ tcp_chrono_set(tp, TCP_CHRONO_UNSPEC);
|
|
|
+ else if (type == tp->chrono_type)
|
|
|
+ tcp_chrono_set(tp, TCP_CHRONO_BUSY);
|
|
|
+}
|
|
|
+
|
|
|
/* This routine writes packets to the network. It advances the
|
|
|
* send_head. This happens as incoming acks open up the remote
|
|
|
* window for us.
|
|
|
@@ -2103,7 +2156,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
|
|
|
unsigned int tso_segs, sent_pkts;
|
|
|
int cwnd_quota;
|
|
|
int result;
|
|
|
- bool is_cwnd_limited = false;
|
|
|
+ bool is_cwnd_limited = false, is_rwnd_limited = false;
|
|
|
u32 max_segs;
|
|
|
|
|
|
sent_pkts = 0;
|
|
|
@@ -2140,8 +2193,10 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
|
|
|
break;
|
|
|
}
|
|
|
|
|
|
- if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now)))
|
|
|
+ if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now))) {
|
|
|
+ is_rwnd_limited = true;
|
|
|
break;
|
|
|
+ }
|
|
|
|
|
|
if (tso_segs == 1) {
|
|
|
if (unlikely(!tcp_nagle_test(tp, skb, mss_now,
|
|
|
@@ -2186,6 +2241,11 @@ repair:
|
|
|
break;
|
|
|
}
|
|
|
|
|
|
+ if (is_rwnd_limited)
|
|
|
+ tcp_chrono_start(sk, TCP_CHRONO_RWND_LIMITED);
|
|
|
+ else
|
|
|
+ tcp_chrono_stop(sk, TCP_CHRONO_RWND_LIMITED);
|
|
|
+
|
|
|
if (likely(sent_pkts)) {
|
|
|
if (tcp_in_cwnd_reduction(sk))
|
|
|
tp->prr_out += sent_pkts;
|
|
|
@@ -3298,6 +3358,8 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
|
|
|
fo->copied = space;
|
|
|
|
|
|
tcp_connect_queue_skb(sk, syn_data);
|
|
|
+ if (syn_data->len)
|
|
|
+ tcp_chrono_start(sk, TCP_CHRONO_BUSY);
|
|
|
|
|
|
err = tcp_transmit_skb(sk, syn_data, 1, sk->sk_allocation);
|
|
|
|