|
|
@@ -103,7 +103,6 @@ int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2;
|
|
|
#define FLAG_DATA_SACKED 0x20 /* New SACK. */
|
|
|
#define FLAG_ECE 0x40 /* ECE in this ACK */
|
|
|
#define FLAG_LOST_RETRANS 0x80 /* This ACK marks some retransmission lost */
|
|
|
-#define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/
|
|
|
#define FLAG_ORIG_SACK_ACKED 0x200 /* Never retransmitted data are (s)acked */
|
|
|
#define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */
|
|
|
#define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */
|
|
|
@@ -3367,12 +3366,6 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32
|
|
|
if (tp->snd_wnd != nwin) {
|
|
|
tp->snd_wnd = nwin;
|
|
|
|
|
|
- /* Note, it is the only place, where
|
|
|
- * fast path is recovered for sending TCP.
|
|
|
- */
|
|
|
- tp->pred_flags = 0;
|
|
|
- tcp_fast_path_check(sk);
|
|
|
-
|
|
|
if (tcp_send_head(sk))
|
|
|
tcp_slow_start_after_idle_check(sk);
|
|
|
|
|
|
@@ -3554,6 +3547,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
|
|
|
u32 lost = tp->lost;
|
|
|
int acked = 0; /* Number of packets newly acked */
|
|
|
int rexmit = REXMIT_NONE; /* Flag to (re)transmit to recover losses */
|
|
|
+ u32 ack_ev_flags = 0;
|
|
|
|
|
|
sack_state.first_sackt = 0;
|
|
|
sack_state.rate = &rs;
|
|
|
@@ -3597,42 +3591,26 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
|
|
|
if (flag & FLAG_UPDATE_TS_RECENT)
|
|
|
tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
|
|
|
|
|
|
- if (!(flag & FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
|
|
|
- /* Window is constant, pure forward advance.
|
|
|
- * No more checks are required.
|
|
|
- * Note, we use the fact that SND.UNA>=SND.WL2.
|
|
|
- */
|
|
|
- tcp_update_wl(tp, ack_seq);
|
|
|
- tcp_snd_una_update(tp, ack);
|
|
|
- flag |= FLAG_WIN_UPDATE;
|
|
|
-
|
|
|
- tcp_in_ack_event(sk, CA_ACK_WIN_UPDATE);
|
|
|
-
|
|
|
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPACKS);
|
|
|
- } else {
|
|
|
- u32 ack_ev_flags = CA_ACK_SLOWPATH;
|
|
|
-
|
|
|
- if (ack_seq != TCP_SKB_CB(skb)->end_seq)
|
|
|
- flag |= FLAG_DATA;
|
|
|
- else
|
|
|
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPUREACKS);
|
|
|
+ if (ack_seq != TCP_SKB_CB(skb)->end_seq)
|
|
|
+ flag |= FLAG_DATA;
|
|
|
+ else
|
|
|
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPUREACKS);
|
|
|
|
|
|
- flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);
|
|
|
+ flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);
|
|
|
|
|
|
- if (TCP_SKB_CB(skb)->sacked)
|
|
|
- flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
|
|
|
- &sack_state);
|
|
|
+ if (TCP_SKB_CB(skb)->sacked)
|
|
|
+ flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
|
|
|
+ &sack_state);
|
|
|
|
|
|
- if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb))) {
|
|
|
- flag |= FLAG_ECE;
|
|
|
- ack_ev_flags |= CA_ACK_ECE;
|
|
|
- }
|
|
|
+ if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb))) {
|
|
|
+ flag |= FLAG_ECE;
|
|
|
+ ack_ev_flags = CA_ACK_ECE;
|
|
|
+ }
|
|
|
|
|
|
- if (flag & FLAG_WIN_UPDATE)
|
|
|
- ack_ev_flags |= CA_ACK_WIN_UPDATE;
|
|
|
+ if (flag & FLAG_WIN_UPDATE)
|
|
|
+ ack_ev_flags |= CA_ACK_WIN_UPDATE;
|
|
|
|
|
|
- tcp_in_ack_event(sk, ack_ev_flags);
|
|
|
- }
|
|
|
+ tcp_in_ack_event(sk, ack_ev_flags);
|
|
|
|
|
|
/* We passed data and got it acked, remove any soft error
|
|
|
* log. Something worked...
|
|
|
@@ -4398,8 +4376,6 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
|
|
|
return;
|
|
|
}
|
|
|
|
|
|
- /* Disable header prediction. */
|
|
|
- tp->pred_flags = 0;
|
|
|
inet_csk_schedule_ack(sk);
|
|
|
|
|
|
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOQUEUE);
|
|
|
@@ -4611,32 +4587,14 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
|
|
|
goto out_of_window;
|
|
|
|
|
|
/* Ok. In sequence. In window. */
|
|
|
- if (tp->ucopy.task == current &&
|
|
|
- tp->copied_seq == tp->rcv_nxt && tp->ucopy.len &&
|
|
|
- sock_owned_by_user(sk) && !tp->urg_data) {
|
|
|
- int chunk = min_t(unsigned int, skb->len,
|
|
|
- tp->ucopy.len);
|
|
|
-
|
|
|
- __set_current_state(TASK_RUNNING);
|
|
|
-
|
|
|
- if (!skb_copy_datagram_msg(skb, 0, tp->ucopy.msg, chunk)) {
|
|
|
- tp->ucopy.len -= chunk;
|
|
|
- tp->copied_seq += chunk;
|
|
|
- eaten = (chunk == skb->len);
|
|
|
- tcp_rcv_space_adjust(sk);
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- if (eaten <= 0) {
|
|
|
queue_and_out:
|
|
|
- if (eaten < 0) {
|
|
|
- if (skb_queue_len(&sk->sk_receive_queue) == 0)
|
|
|
- sk_forced_mem_schedule(sk, skb->truesize);
|
|
|
- else if (tcp_try_rmem_schedule(sk, skb, skb->truesize))
|
|
|
- goto drop;
|
|
|
- }
|
|
|
- eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen);
|
|
|
+ if (eaten < 0) {
|
|
|
+ if (skb_queue_len(&sk->sk_receive_queue) == 0)
|
|
|
+ sk_forced_mem_schedule(sk, skb->truesize);
|
|
|
+ else if (tcp_try_rmem_schedule(sk, skb, skb->truesize))
|
|
|
+ goto drop;
|
|
|
}
|
|
|
+ eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen);
|
|
|
tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
|
|
|
if (skb->len)
|
|
|
tcp_event_data_recv(sk, skb);
|
|
|
@@ -4656,8 +4614,6 @@ queue_and_out:
|
|
|
if (tp->rx_opt.num_sacks)
|
|
|
tcp_sack_remove(tp);
|
|
|
|
|
|
- tcp_fast_path_check(sk);
|
|
|
-
|
|
|
if (eaten > 0)
|
|
|
kfree_skb_partial(skb, fragstolen);
|
|
|
if (!sock_flag(sk, SOCK_DEAD))
|
|
|
@@ -4983,7 +4939,6 @@ static int tcp_prune_queue(struct sock *sk)
|
|
|
NET_INC_STATS(sock_net(sk), LINUX_MIB_RCVPRUNED);
|
|
|
|
|
|
/* Massive buffer overcommit. */
|
|
|
- tp->pred_flags = 0;
|
|
|
return -1;
|
|
|
}
|
|
|
|
|
|
@@ -5155,9 +5110,6 @@ static void tcp_check_urg(struct sock *sk, const struct tcphdr *th)
|
|
|
|
|
|
tp->urg_data = TCP_URG_NOTYET;
|
|
|
tp->urg_seq = ptr;
|
|
|
-
|
|
|
- /* Disable header prediction. */
|
|
|
- tp->pred_flags = 0;
|
|
|
}
|
|
|
|
|
|
/* This is the 'fast' part of urgent handling. */
|
|
|
@@ -5186,26 +5138,6 @@ static void tcp_urg(struct sock *sk, struct sk_buff *skb, const struct tcphdr *t
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
|
|
|
-{
|
|
|
- struct tcp_sock *tp = tcp_sk(sk);
|
|
|
- int chunk = skb->len - hlen;
|
|
|
- int err;
|
|
|
-
|
|
|
- if (skb_csum_unnecessary(skb))
|
|
|
- err = skb_copy_datagram_msg(skb, hlen, tp->ucopy.msg, chunk);
|
|
|
- else
|
|
|
- err = skb_copy_and_csum_datagram_msg(skb, hlen, tp->ucopy.msg);
|
|
|
-
|
|
|
- if (!err) {
|
|
|
- tp->ucopy.len -= chunk;
|
|
|
- tp->copied_seq += chunk;
|
|
|
- tcp_rcv_space_adjust(sk);
|
|
|
- }
|
|
|
-
|
|
|
- return err;
|
|
|
-}
|
|
|
-
|
|
|
/* Accept RST for rcv_nxt - 1 after a FIN.
|
|
|
* When tcp connections are abruptly terminated from Mac OSX (via ^C), a
|
|
|
* FIN is sent followed by a RST packet. The RST is sent with the same
|
|
|
@@ -5336,26 +5268,6 @@ discard:
|
|
|
|
|
|
/*
|
|
|
* TCP receive function for the ESTABLISHED state.
|
|
|
- *
|
|
|
- * It is split into a fast path and a slow path. The fast path is
|
|
|
- * disabled when:
|
|
|
- * - A zero window was announced from us - zero window probing
|
|
|
- * is only handled properly in the slow path.
|
|
|
- * - Out of order segments arrived.
|
|
|
- * - Urgent data is expected.
|
|
|
- * - There is no buffer space left
|
|
|
- * - Unexpected TCP flags/window values/header lengths are received
|
|
|
- * (detected by checking the TCP header against pred_flags)
|
|
|
- * - Data is sent in both directions. Fast path only supports pure senders
|
|
|
- * or pure receivers (this means either the sequence number or the ack
|
|
|
- * value must stay constant)
|
|
|
- * - Unexpected TCP option.
|
|
|
- *
|
|
|
- * When these conditions are not satisfied it drops into a standard
|
|
|
- * receive procedure patterned after RFC793 to handle all cases.
|
|
|
- * The first three cases are guaranteed by proper pred_flags setting,
|
|
|
- * the rest is checked inline. Fast processing is turned on in
|
|
|
- * tcp_data_queue when everything is OK.
|
|
|
*/
|
|
|
void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
|
|
|
const struct tcphdr *th)
|
|
|
@@ -5366,172 +5278,19 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
|
|
|
tcp_mstamp_refresh(tp);
|
|
|
if (unlikely(!sk->sk_rx_dst))
|
|
|
inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb);
|
|
|
- /*
|
|
|
- * Header prediction.
|
|
|
- * The code loosely follows the one in the famous
|
|
|
- * "30 instruction TCP receive" Van Jacobson mail.
|
|
|
- *
|
|
|
- * Van's trick is to deposit buffers into socket queue
|
|
|
- * on a device interrupt, to call tcp_recv function
|
|
|
- * on the receive process context and checksum and copy
|
|
|
- * the buffer to user space. smart...
|
|
|
- *
|
|
|
- * Our current scheme is not silly either but we take the
|
|
|
- * extra cost of the net_bh soft interrupt processing...
|
|
|
- * We do checksum and copy also but from device to kernel.
|
|
|
- */
|
|
|
|
|
|
tp->rx_opt.saw_tstamp = 0;
|
|
|
|
|
|
- /* pred_flags is 0xS?10 << 16 + snd_wnd
|
|
|
- * if header_prediction is to be made
|
|
|
- * 'S' will always be tp->tcp_header_len >> 2
|
|
|
- * '?' will be 0 for the fast path, otherwise pred_flags is 0 to
|
|
|
- * turn it off (when there are holes in the receive
|
|
|
- * space for instance)
|
|
|
- * PSH flag is ignored.
|
|
|
- */
|
|
|
-
|
|
|
- if ((tcp_flag_word(th) & TCP_HP_BITS) == tp->pred_flags &&
|
|
|
- TCP_SKB_CB(skb)->seq == tp->rcv_nxt &&
|
|
|
- !after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt)) {
|
|
|
- int tcp_header_len = tp->tcp_header_len;
|
|
|
-
|
|
|
- /* Timestamp header prediction: tcp_header_len
|
|
|
- * is automatically equal to th->doff*4 due to pred_flags
|
|
|
- * match.
|
|
|
- */
|
|
|
-
|
|
|
- /* Check timestamp */
|
|
|
- if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) {
|
|
|
- /* No? Slow path! */
|
|
|
- if (!tcp_parse_aligned_timestamp(tp, th))
|
|
|
- goto slow_path;
|
|
|
-
|
|
|
- /* If PAWS failed, check it more carefully in slow path */
|
|
|
- if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) < 0)
|
|
|
- goto slow_path;
|
|
|
-
|
|
|
- /* DO NOT update ts_recent here, if checksum fails
|
|
|
- * and timestamp was corrupted part, it will result
|
|
|
- * in a hung connection since we will drop all
|
|
|
- * future packets due to the PAWS test.
|
|
|
- */
|
|
|
- }
|
|
|
-
|
|
|
- if (len <= tcp_header_len) {
|
|
|
- /* Bulk data transfer: sender */
|
|
|
- if (len == tcp_header_len) {
|
|
|
- /* Predicted packet is in window by definition.
|
|
|
- * seq == rcv_nxt and rcv_wup <= rcv_nxt.
|
|
|
- * Hence, check seq<=rcv_wup reduces to:
|
|
|
- */
|
|
|
- if (tcp_header_len ==
|
|
|
- (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
|
|
|
- tp->rcv_nxt == tp->rcv_wup)
|
|
|
- tcp_store_ts_recent(tp);
|
|
|
-
|
|
|
- /* We know that such packets are checksummed
|
|
|
- * on entry.
|
|
|
- */
|
|
|
- tcp_ack(sk, skb, 0);
|
|
|
- __kfree_skb(skb);
|
|
|
- tcp_data_snd_check(sk);
|
|
|
- return;
|
|
|
- } else { /* Header too small */
|
|
|
- TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
|
|
|
- goto discard;
|
|
|
- }
|
|
|
- } else {
|
|
|
- int eaten = 0;
|
|
|
- bool fragstolen = false;
|
|
|
-
|
|
|
- if (tp->ucopy.task == current &&
|
|
|
- tp->copied_seq == tp->rcv_nxt &&
|
|
|
- len - tcp_header_len <= tp->ucopy.len &&
|
|
|
- sock_owned_by_user(sk)) {
|
|
|
- __set_current_state(TASK_RUNNING);
|
|
|
-
|
|
|
- if (!tcp_copy_to_iovec(sk, skb, tcp_header_len)) {
|
|
|
- /* Predicted packet is in window by definition.
|
|
|
- * seq == rcv_nxt and rcv_wup <= rcv_nxt.
|
|
|
- * Hence, check seq<=rcv_wup reduces to:
|
|
|
- */
|
|
|
- if (tcp_header_len ==
|
|
|
- (sizeof(struct tcphdr) +
|
|
|
- TCPOLEN_TSTAMP_ALIGNED) &&
|
|
|
- tp->rcv_nxt == tp->rcv_wup)
|
|
|
- tcp_store_ts_recent(tp);
|
|
|
-
|
|
|
- tcp_rcv_rtt_measure_ts(sk, skb);
|
|
|
-
|
|
|
- __skb_pull(skb, tcp_header_len);
|
|
|
- tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
|
|
|
- NET_INC_STATS(sock_net(sk),
|
|
|
- LINUX_MIB_TCPHPHITSTOUSER);
|
|
|
- eaten = 1;
|
|
|
- }
|
|
|
- }
|
|
|
- if (!eaten) {
|
|
|
- if (tcp_checksum_complete(skb))
|
|
|
- goto csum_error;
|
|
|
-
|
|
|
- if ((int)skb->truesize > sk->sk_forward_alloc)
|
|
|
- goto step5;
|
|
|
-
|
|
|
- /* Predicted packet is in window by definition.
|
|
|
- * seq == rcv_nxt and rcv_wup <= rcv_nxt.
|
|
|
- * Hence, check seq<=rcv_wup reduces to:
|
|
|
- */
|
|
|
- if (tcp_header_len ==
|
|
|
- (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
|
|
|
- tp->rcv_nxt == tp->rcv_wup)
|
|
|
- tcp_store_ts_recent(tp);
|
|
|
-
|
|
|
- tcp_rcv_rtt_measure_ts(sk, skb);
|
|
|
-
|
|
|
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS);
|
|
|
-
|
|
|
- /* Bulk data transfer: receiver */
|
|
|
- eaten = tcp_queue_rcv(sk, skb, tcp_header_len,
|
|
|
- &fragstolen);
|
|
|
- }
|
|
|
-
|
|
|
- tcp_event_data_recv(sk, skb);
|
|
|
-
|
|
|
- if (TCP_SKB_CB(skb)->ack_seq != tp->snd_una) {
|
|
|
- /* Well, only one small jumplet in fast path... */
|
|
|
- tcp_ack(sk, skb, FLAG_DATA);
|
|
|
- tcp_data_snd_check(sk);
|
|
|
- if (!inet_csk_ack_scheduled(sk))
|
|
|
- goto no_ack;
|
|
|
- }
|
|
|
-
|
|
|
- __tcp_ack_snd_check(sk, 0);
|
|
|
-no_ack:
|
|
|
- if (eaten)
|
|
|
- kfree_skb_partial(skb, fragstolen);
|
|
|
- sk->sk_data_ready(sk);
|
|
|
- return;
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
-slow_path:
|
|
|
if (len < (th->doff << 2) || tcp_checksum_complete(skb))
|
|
|
goto csum_error;
|
|
|
|
|
|
if (!th->ack && !th->rst && !th->syn)
|
|
|
goto discard;
|
|
|
|
|
|
- /*
|
|
|
- * Standard slow path.
|
|
|
- */
|
|
|
-
|
|
|
if (!tcp_validate_incoming(sk, skb, th, 1))
|
|
|
return;
|
|
|
|
|
|
-step5:
|
|
|
- if (tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) < 0)
|
|
|
+ if (tcp_ack(sk, skb, FLAG_UPDATE_TS_RECENT) < 0)
|
|
|
goto discard;
|
|
|
|
|
|
tcp_rcv_rtt_measure_ts(sk, skb);
|
|
|
@@ -5585,11 +5344,10 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
|
|
|
if (sock_flag(sk, SOCK_KEEPOPEN))
|
|
|
inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));
|
|
|
|
|
|
- if (!tp->rx_opt.snd_wscale)
|
|
|
- __tcp_fast_path_on(tp, tp->snd_wnd);
|
|
|
- else
|
|
|
- tp->pred_flags = 0;
|
|
|
-
|
|
|
+ if (!sock_flag(sk, SOCK_DEAD)) {
|
|
|
+ sk->sk_state_change(sk);
|
|
|
+ sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
|
|
|
@@ -5718,7 +5476,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
|
|
|
tcp_ecn_rcv_synack(tp, th);
|
|
|
|
|
|
tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
|
|
|
- tcp_ack(sk, skb, FLAG_SLOWPATH);
|
|
|
+ tcp_ack(sk, skb, 0);
|
|
|
|
|
|
/* Ok.. it's good. Set up sequence numbers and
|
|
|
* move to established.
|
|
|
@@ -5954,8 +5712,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
|
|
|
return 0;
|
|
|
|
|
|
/* step 5: check the ACK field */
|
|
|
- acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH |
|
|
|
- FLAG_UPDATE_TS_RECENT |
|
|
|
+
|
|
|
+ acceptable = tcp_ack(sk, skb, FLAG_UPDATE_TS_RECENT |
|
|
|
FLAG_NO_CHALLENGE_ACK) > 0;
|
|
|
|
|
|
if (!acceptable) {
|
|
|
@@ -6023,7 +5781,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
|
|
|
tp->lsndtime = tcp_jiffies32;
|
|
|
|
|
|
tcp_initialize_rcv_mss(sk);
|
|
|
- tcp_fast_path_on(tp);
|
|
|
break;
|
|
|
|
|
|
case TCP_FIN_WAIT1: {
|