|
@@ -45,6 +45,22 @@
|
|
|
|
|
|
#include <trace/events/tcp.h>
|
|
|
|
|
|
+/* Refresh clocks of a TCP socket,
|
|
|
+ * ensuring monotically increasing values.
|
|
|
+ */
|
|
|
+void tcp_mstamp_refresh(struct tcp_sock *tp)
|
|
|
+{
|
|
|
+ u64 val = tcp_clock_ns();
|
|
|
+
|
|
|
+ /* departure time for next data packet */
|
|
|
+ if (val > tp->tcp_wstamp_ns)
|
|
|
+ tp->tcp_wstamp_ns = val;
|
|
|
+
|
|
|
+ val = div_u64(val, NSEC_PER_USEC);
|
|
|
+ if (val > tp->tcp_mstamp)
|
|
|
+ tp->tcp_mstamp = val;
|
|
|
+}
|
|
|
+
|
|
|
static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
|
|
|
int push_one, gfp_t gfp);
|
|
|
|
|
@@ -977,28 +993,34 @@ enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer)
|
|
|
return HRTIMER_NORESTART;
|
|
|
}
|
|
|
|
|
|
-static void tcp_internal_pacing(struct sock *sk, const struct sk_buff *skb)
|
|
|
+static void tcp_internal_pacing(struct sock *sk)
|
|
|
{
|
|
|
- u64 len_ns;
|
|
|
- u32 rate;
|
|
|
-
|
|
|
if (!tcp_needs_internal_pacing(sk))
|
|
|
return;
|
|
|
- rate = sk->sk_pacing_rate;
|
|
|
- if (!rate || rate == ~0U)
|
|
|
- return;
|
|
|
-
|
|
|
- len_ns = (u64)skb->len * NSEC_PER_SEC;
|
|
|
- do_div(len_ns, rate);
|
|
|
hrtimer_start(&tcp_sk(sk)->pacing_timer,
|
|
|
- ktime_add_ns(ktime_get(), len_ns),
|
|
|
+ ns_to_ktime(tcp_sk(sk)->tcp_wstamp_ns),
|
|
|
HRTIMER_MODE_ABS_PINNED_SOFT);
|
|
|
sock_hold(sk);
|
|
|
}
|
|
|
|
|
|
-static void tcp_update_skb_after_send(struct tcp_sock *tp, struct sk_buff *skb)
|
|
|
+static void tcp_update_skb_after_send(struct sock *sk, struct sk_buff *skb)
|
|
|
{
|
|
|
- skb->skb_mstamp = tp->tcp_mstamp;
|
|
|
+ struct tcp_sock *tp = tcp_sk(sk);
|
|
|
+
|
|
|
+ skb->skb_mstamp_ns = tp->tcp_wstamp_ns;
|
|
|
+ if (sk->sk_pacing_status != SK_PACING_NONE) {
|
|
|
+ u32 rate = sk->sk_pacing_rate;
|
|
|
+
|
|
|
+ /* Original sch_fq does not pace first 10 MSS
|
|
|
+ * Note that tp->data_segs_out overflows after 2^32 packets,
|
|
|
+ * this is a minor annoyance.
|
|
|
+ */
|
|
|
+ if (rate != ~0U && rate && tp->data_segs_out >= 10) {
|
|
|
+ tp->tcp_wstamp_ns += div_u64((u64)skb->len * NSEC_PER_SEC, rate);
|
|
|
+
|
|
|
+ tcp_internal_pacing(sk);
|
|
|
+ }
|
|
|
+ }
|
|
|
list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue);
|
|
|
}
|
|
|
|
|
@@ -1045,7 +1067,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
|
|
|
if (unlikely(!skb))
|
|
|
return -ENOBUFS;
|
|
|
}
|
|
|
- skb->skb_mstamp = tp->tcp_mstamp;
|
|
|
+ skb->skb_mstamp_ns = tp->tcp_wstamp_ns;
|
|
|
|
|
|
inet = inet_sk(sk);
|
|
|
tcb = TCP_SKB_CB(skb);
|
|
@@ -1137,7 +1159,6 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
|
|
|
tcp_event_data_sent(tp, sk);
|
|
|
tp->data_segs_out += tcp_skb_pcount(skb);
|
|
|
tp->bytes_sent += skb->len - tcp_header_size;
|
|
|
- tcp_internal_pacing(sk, skb);
|
|
|
}
|
|
|
|
|
|
if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
|
|
@@ -1149,8 +1170,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
|
|
|
skb_shinfo(skb)->gso_segs = tcp_skb_pcount(skb);
|
|
|
skb_shinfo(skb)->gso_size = tcp_skb_mss(skb);
|
|
|
|
|
|
- /* Our usage of tstamp should remain private */
|
|
|
- skb->tstamp = 0;
|
|
|
+ /* Leave earliest departure time in skb->tstamp (skb->skb_mstamp_ns) */
|
|
|
|
|
|
/* Cleanup our debris for IP stacks */
|
|
|
memset(skb->cb, 0, max(sizeof(struct inet_skb_parm),
|
|
@@ -1163,7 +1183,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
|
|
|
err = net_xmit_eval(err);
|
|
|
}
|
|
|
if (!err && oskb) {
|
|
|
- tcp_update_skb_after_send(tp, oskb);
|
|
|
+ tcp_update_skb_after_send(sk, oskb);
|
|
|
tcp_rate_skb_sent(sk, oskb);
|
|
|
}
|
|
|
return err;
|
|
@@ -1966,7 +1986,7 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
|
|
|
head = tcp_rtx_queue_head(sk);
|
|
|
if (!head)
|
|
|
goto send_now;
|
|
|
- age = tcp_stamp_us_delta(tp->tcp_mstamp, head->skb_mstamp);
|
|
|
+ age = tcp_stamp_us_delta(tp->tcp_mstamp, tcp_skb_timestamp_us(head));
|
|
|
/* If next ACK is likely to come too late (half srtt), do not defer */
|
|
|
if (age < (tp->srtt_us >> 4))
|
|
|
goto send_now;
|
|
@@ -2312,7 +2332,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
|
|
|
|
|
|
if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) {
|
|
|
/* "skb_mstamp" is used as a start point for the retransmit timer */
|
|
|
- tcp_update_skb_after_send(tp, skb);
|
|
|
+ tcp_update_skb_after_send(sk, skb);
|
|
|
goto repair; /* Skip network transmission */
|
|
|
}
|
|
|
|
|
@@ -2887,7 +2907,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
|
|
|
} tcp_skb_tsorted_restore(skb);
|
|
|
|
|
|
if (!err) {
|
|
|
- tcp_update_skb_after_send(tp, skb);
|
|
|
+ tcp_update_skb_after_send(sk, skb);
|
|
|
tcp_rate_skb_sent(sk, skb);
|
|
|
}
|
|
|
} else {
|
|
@@ -3205,10 +3225,10 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
|
|
|
memset(&opts, 0, sizeof(opts));
|
|
|
#ifdef CONFIG_SYN_COOKIES
|
|
|
if (unlikely(req->cookie_ts))
|
|
|
- skb->skb_mstamp = cookie_init_timestamp(req);
|
|
|
+ skb->skb_mstamp_ns = cookie_init_timestamp(req);
|
|
|
else
|
|
|
#endif
|
|
|
- skb->skb_mstamp = tcp_clock_us();
|
|
|
+ skb->skb_mstamp_ns = tcp_clock_ns();
|
|
|
|
|
|
#ifdef CONFIG_TCP_MD5SIG
|
|
|
rcu_read_lock();
|
|
@@ -3424,7 +3444,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
|
|
|
|
|
|
err = tcp_transmit_skb(sk, syn_data, 1, sk->sk_allocation);
|
|
|
|
|
|
- syn->skb_mstamp = syn_data->skb_mstamp;
|
|
|
+ syn->skb_mstamp_ns = syn_data->skb_mstamp_ns;
|
|
|
|
|
|
/* Now full SYN+DATA was cloned and sent (or not),
|
|
|
* remove the SYN from the original skb (syn_data)
|