|
|
@@ -4358,6 +4358,23 @@ static bool tcp_try_coalesce(struct sock *sk,
|
|
|
return true;
|
|
|
}
|
|
|
|
|
|
+static bool tcp_ooo_try_coalesce(struct sock *sk,
|
|
|
+ struct sk_buff *to,
|
|
|
+ struct sk_buff *from,
|
|
|
+ bool *fragstolen)
|
|
|
+{
|
|
|
+ bool res = tcp_try_coalesce(sk, to, from, fragstolen);
|
|
|
+
|
|
|
+ /* In case tcp_drop() is called later, update to->gso_segs */
|
|
|
+ if (res) {
|
|
|
+ u32 gso_segs = max_t(u16, 1, skb_shinfo(to)->gso_segs) +
|
|
|
+ max_t(u16, 1, skb_shinfo(from)->gso_segs);
|
|
|
+
|
|
|
+ skb_shinfo(to)->gso_segs = min_t(u32, gso_segs, 0xFFFF);
|
|
|
+ }
|
|
|
+ return res;
|
|
|
+}
|
|
|
+
|
|
|
static void tcp_drop(struct sock *sk, struct sk_buff *skb)
|
|
|
{
|
|
|
sk_drops_add(sk, skb);
|
|
|
@@ -4481,8 +4498,8 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
|
|
|
/* In the typical case, we are adding an skb to the end of the list.
|
|
|
* Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup.
|
|
|
*/
|
|
|
- if (tcp_try_coalesce(sk, tp->ooo_last_skb,
|
|
|
- skb, &fragstolen)) {
|
|
|
+ if (tcp_ooo_try_coalesce(sk, tp->ooo_last_skb,
|
|
|
+ skb, &fragstolen)) {
|
|
|
coalesce_done:
|
|
|
tcp_grow_window(sk, skb);
|
|
|
kfree_skb_partial(skb, fragstolen);
|
|
|
@@ -4510,7 +4527,7 @@ coalesce_done:
|
|
|
/* All the bits are present. Drop. */
|
|
|
NET_INC_STATS(sock_net(sk),
|
|
|
LINUX_MIB_TCPOFOMERGE);
|
|
|
- __kfree_skb(skb);
|
|
|
+ tcp_drop(sk, skb);
|
|
|
skb = NULL;
|
|
|
tcp_dsack_set(sk, seq, end_seq);
|
|
|
goto add_sack;
|
|
|
@@ -4529,11 +4546,11 @@ coalesce_done:
|
|
|
TCP_SKB_CB(skb1)->end_seq);
|
|
|
NET_INC_STATS(sock_net(sk),
|
|
|
LINUX_MIB_TCPOFOMERGE);
|
|
|
- __kfree_skb(skb1);
|
|
|
+ tcp_drop(sk, skb1);
|
|
|
goto merge_right;
|
|
|
}
|
|
|
- } else if (tcp_try_coalesce(sk, skb1,
|
|
|
- skb, &fragstolen)) {
|
|
|
+ } else if (tcp_ooo_try_coalesce(sk, skb1,
|
|
|
+ skb, &fragstolen)) {
|
|
|
goto coalesce_done;
|
|
|
}
|
|
|
p = &parent->rb_right;
|
|
|
@@ -4902,6 +4919,7 @@ end:
|
|
|
static void tcp_collapse_ofo_queue(struct sock *sk)
|
|
|
{
|
|
|
struct tcp_sock *tp = tcp_sk(sk);
|
|
|
+ u32 range_truesize, sum_tiny = 0;
|
|
|
struct sk_buff *skb, *head;
|
|
|
u32 start, end;
|
|
|
|
|
|
@@ -4913,6 +4931,7 @@ new_range:
|
|
|
}
|
|
|
start = TCP_SKB_CB(skb)->seq;
|
|
|
end = TCP_SKB_CB(skb)->end_seq;
|
|
|
+ range_truesize = skb->truesize;
|
|
|
|
|
|
for (head = skb;;) {
|
|
|
skb = skb_rb_next(skb);
|
|
|
@@ -4923,11 +4942,20 @@ new_range:
|
|
|
if (!skb ||
|
|
|
after(TCP_SKB_CB(skb)->seq, end) ||
|
|
|
before(TCP_SKB_CB(skb)->end_seq, start)) {
|
|
|
- tcp_collapse(sk, NULL, &tp->out_of_order_queue,
|
|
|
- head, skb, start, end);
|
|
|
+ /* Do not attempt collapsing tiny skbs */
|
|
|
+ if (range_truesize != head->truesize ||
|
|
|
+ end - start >= SKB_WITH_OVERHEAD(SK_MEM_QUANTUM)) {
|
|
|
+ tcp_collapse(sk, NULL, &tp->out_of_order_queue,
|
|
|
+ head, skb, start, end);
|
|
|
+ } else {
|
|
|
+ sum_tiny += range_truesize;
|
|
|
+ if (sum_tiny > sk->sk_rcvbuf >> 3)
|
|
|
+ return;
|
|
|
+ }
|
|
|
goto new_range;
|
|
|
}
|
|
|
|
|
|
+ range_truesize += skb->truesize;
|
|
|
if (unlikely(before(TCP_SKB_CB(skb)->seq, start)))
|
|
|
start = TCP_SKB_CB(skb)->seq;
|
|
|
if (after(TCP_SKB_CB(skb)->end_seq, end))
|
|
|
@@ -4942,6 +4970,7 @@ new_range:
|
|
|
* 2) not add too big latencies if thousands of packets sit there.
|
|
|
* (But if application shrinks SO_RCVBUF, we could still end up
|
|
|
* freeing whole queue here)
|
|
|
+ * 3) Drop at least 12.5 % of sk_rcvbuf to avoid malicious attacks.
|
|
|
*
|
|
|
* Return true if queue has shrunk.
|
|
|
*/
|
|
|
@@ -4949,20 +4978,26 @@ static bool tcp_prune_ofo_queue(struct sock *sk)
|
|
|
{
|
|
|
struct tcp_sock *tp = tcp_sk(sk);
|
|
|
struct rb_node *node, *prev;
|
|
|
+ int goal;
|
|
|
|
|
|
if (RB_EMPTY_ROOT(&tp->out_of_order_queue))
|
|
|
return false;
|
|
|
|
|
|
NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED);
|
|
|
+ goal = sk->sk_rcvbuf >> 3;
|
|
|
node = &tp->ooo_last_skb->rbnode;
|
|
|
do {
|
|
|
prev = rb_prev(node);
|
|
|
rb_erase(node, &tp->out_of_order_queue);
|
|
|
+ goal -= rb_to_skb(node)->truesize;
|
|
|
tcp_drop(sk, rb_to_skb(node));
|
|
|
- sk_mem_reclaim(sk);
|
|
|
- if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
|
|
|
- !tcp_under_memory_pressure(sk))
|
|
|
- break;
|
|
|
+ if (!prev || goal <= 0) {
|
|
|
+ sk_mem_reclaim(sk);
|
|
|
+ if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
|
|
|
+ !tcp_under_memory_pressure(sk))
|
|
|
+ break;
|
|
|
+ goal = sk->sk_rcvbuf >> 3;
|
|
|
+ }
|
|
|
node = prev;
|
|
|
} while (node);
|
|
|
tp->ooo_last_skb = rb_to_skb(prev);
|
|
|
@@ -4997,6 +5032,9 @@ static int tcp_prune_queue(struct sock *sk)
|
|
|
else if (tcp_under_memory_pressure(sk))
|
|
|
tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
|
|
|
|
|
|
+ if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
|
|
|
+ return 0;
|
|
|
+
|
|
|
tcp_collapse_ofo_queue(sk);
|
|
|
if (!skb_queue_empty(&sk->sk_receive_queue))
|
|
|
tcp_collapse(sk, &sk->sk_receive_queue, NULL,
|