|
@@ -769,25 +769,26 @@ static void tcp_tasklet_func(unsigned long data)
|
|
|
list_del(&tp->tsq_node);
|
|
|
|
|
|
sk = (struct sock *)tp;
|
|
|
- bh_lock_sock(sk);
|
|
|
-
|
|
|
- if (!sock_owned_by_user(sk)) {
|
|
|
- tcp_tsq_handler(sk);
|
|
|
- } else {
|
|
|
- /* defer the work to tcp_release_cb() */
|
|
|
- set_bit(TCP_TSQ_DEFERRED, &tp->tsq_flags);
|
|
|
+ clear_bit(TSQ_QUEUED, &sk->sk_tsq_flags);
|
|
|
+
|
|
|
+ if (!sk->sk_lock.owned &&
|
|
|
+ test_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags)) {
|
|
|
+ bh_lock_sock(sk);
|
|
|
+ if (!sock_owned_by_user(sk)) {
|
|
|
+ clear_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags);
|
|
|
+ tcp_tsq_handler(sk);
|
|
|
+ }
|
|
|
+ bh_unlock_sock(sk);
|
|
|
}
|
|
|
- bh_unlock_sock(sk);
|
|
|
|
|
|
- clear_bit(TSQ_QUEUED, &tp->tsq_flags);
|
|
|
sk_free(sk);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-#define TCP_DEFERRED_ALL ((1UL << TCP_TSQ_DEFERRED) | \
|
|
|
- (1UL << TCP_WRITE_TIMER_DEFERRED) | \
|
|
|
- (1UL << TCP_DELACK_TIMER_DEFERRED) | \
|
|
|
- (1UL << TCP_MTU_REDUCED_DEFERRED))
|
|
|
+#define TCP_DEFERRED_ALL (TCPF_TSQ_DEFERRED | \
|
|
|
+ TCPF_WRITE_TIMER_DEFERRED | \
|
|
|
+ TCPF_DELACK_TIMER_DEFERRED | \
|
|
|
+ TCPF_MTU_REDUCED_DEFERRED)
|
|
|
/**
|
|
|
* tcp_release_cb - tcp release_sock() callback
|
|
|
* @sk: socket
|
|
@@ -797,18 +798,17 @@ static void tcp_tasklet_func(unsigned long data)
|
|
|
*/
|
|
|
void tcp_release_cb(struct sock *sk)
|
|
|
{
|
|
|
- struct tcp_sock *tp = tcp_sk(sk);
|
|
|
unsigned long flags, nflags;
|
|
|
|
|
|
/* perform an atomic operation only if at least one flag is set */
|
|
|
do {
|
|
|
- flags = tp->tsq_flags;
|
|
|
+ flags = sk->sk_tsq_flags;
|
|
|
if (!(flags & TCP_DEFERRED_ALL))
|
|
|
return;
|
|
|
nflags = flags & ~TCP_DEFERRED_ALL;
|
|
|
- } while (cmpxchg(&tp->tsq_flags, flags, nflags) != flags);
|
|
|
+ } while (cmpxchg(&sk->sk_tsq_flags, flags, nflags) != flags);
|
|
|
|
|
|
- if (flags & (1UL << TCP_TSQ_DEFERRED))
|
|
|
+ if (flags & TCPF_TSQ_DEFERRED)
|
|
|
tcp_tsq_handler(sk);
|
|
|
|
|
|
/* Here begins the tricky part :
|
|
@@ -822,15 +822,15 @@ void tcp_release_cb(struct sock *sk)
|
|
|
*/
|
|
|
sock_release_ownership(sk);
|
|
|
|
|
|
- if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED)) {
|
|
|
+ if (flags & TCPF_WRITE_TIMER_DEFERRED) {
|
|
|
tcp_write_timer_handler(sk);
|
|
|
__sock_put(sk);
|
|
|
}
|
|
|
- if (flags & (1UL << TCP_DELACK_TIMER_DEFERRED)) {
|
|
|
+ if (flags & TCPF_DELACK_TIMER_DEFERRED) {
|
|
|
tcp_delack_timer_handler(sk);
|
|
|
__sock_put(sk);
|
|
|
}
|
|
|
- if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED)) {
|
|
|
+ if (flags & TCPF_MTU_REDUCED_DEFERRED) {
|
|
|
inet_csk(sk)->icsk_af_ops->mtu_reduced(sk);
|
|
|
__sock_put(sk);
|
|
|
}
|
|
@@ -860,6 +860,7 @@ void tcp_wfree(struct sk_buff *skb)
|
|
|
{
|
|
|
struct sock *sk = skb->sk;
|
|
|
struct tcp_sock *tp = tcp_sk(sk);
|
|
|
+ unsigned long flags, nval, oval;
|
|
|
int wmem;
|
|
|
|
|
|
/* Keep one reference on sk_wmem_alloc.
|
|
@@ -877,16 +878,25 @@ void tcp_wfree(struct sk_buff *skb)
|
|
|
if (wmem >= SKB_TRUESIZE(1) && this_cpu_ksoftirqd() == current)
|
|
|
goto out;
|
|
|
|
|
|
- if (test_and_clear_bit(TSQ_THROTTLED, &tp->tsq_flags) &&
|
|
|
- !test_and_set_bit(TSQ_QUEUED, &tp->tsq_flags)) {
|
|
|
- unsigned long flags;
|
|
|
+ for (oval = READ_ONCE(sk->sk_tsq_flags);; oval = nval) {
|
|
|
struct tsq_tasklet *tsq;
|
|
|
+ bool empty;
|
|
|
+
|
|
|
+ if (!(oval & TSQF_THROTTLED) || (oval & TSQF_QUEUED))
|
|
|
+ goto out;
|
|
|
+
|
|
|
+ nval = (oval & ~TSQF_THROTTLED) | TSQF_QUEUED | TCPF_TSQ_DEFERRED;
|
|
|
+ nval = cmpxchg(&sk->sk_tsq_flags, oval, nval);
|
|
|
+ if (nval != oval)
|
|
|
+ continue;
|
|
|
|
|
|
/* queue this socket to tasklet queue */
|
|
|
local_irq_save(flags);
|
|
|
tsq = this_cpu_ptr(&tsq_tasklet);
|
|
|
+ empty = list_empty(&tsq->head);
|
|
|
list_add(&tp->tsq_node, &tsq->head);
|
|
|
- tasklet_schedule(&tsq->tasklet);
|
|
|
+ if (empty)
|
|
|
+ tasklet_schedule(&tsq->tasklet);
|
|
|
local_irq_restore(flags);
|
|
|
return;
|
|
|
}
|
|
@@ -1922,26 +1932,26 @@ static inline void tcp_mtu_check_reprobe(struct sock *sk)
|
|
|
*/
|
|
|
static int tcp_mtu_probe(struct sock *sk)
|
|
|
{
|
|
|
- struct tcp_sock *tp = tcp_sk(sk);
|
|
|
struct inet_connection_sock *icsk = inet_csk(sk);
|
|
|
+ struct tcp_sock *tp = tcp_sk(sk);
|
|
|
struct sk_buff *skb, *nskb, *next;
|
|
|
struct net *net = sock_net(sk);
|
|
|
- int len;
|
|
|
int probe_size;
|
|
|
int size_needed;
|
|
|
- int copy;
|
|
|
+ int copy, len;
|
|
|
int mss_now;
|
|
|
int interval;
|
|
|
|
|
|
/* Not currently probing/verifying,
|
|
|
* not in recovery,
|
|
|
* have enough cwnd, and
|
|
|
- * not SACKing (the variable headers throw things off) */
|
|
|
- if (!icsk->icsk_mtup.enabled ||
|
|
|
- icsk->icsk_mtup.probe_size ||
|
|
|
- inet_csk(sk)->icsk_ca_state != TCP_CA_Open ||
|
|
|
- tp->snd_cwnd < 11 ||
|
|
|
- tp->rx_opt.num_sacks || tp->rx_opt.dsack)
|
|
|
+ * not SACKing (the variable headers throw things off)
|
|
|
+ */
|
|
|
+ if (likely(!icsk->icsk_mtup.enabled ||
|
|
|
+ icsk->icsk_mtup.probe_size ||
|
|
|
+ inet_csk(sk)->icsk_ca_state != TCP_CA_Open ||
|
|
|
+ tp->snd_cwnd < 11 ||
|
|
|
+ tp->rx_opt.num_sacks || tp->rx_opt.dsack))
|
|
|
return -1;
|
|
|
|
|
|
/* Use binary search for probe_size between tcp_mss_base,
|
|
@@ -2081,7 +2091,16 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
|
|
|
limit <<= factor;
|
|
|
|
|
|
if (atomic_read(&sk->sk_wmem_alloc) > limit) {
|
|
|
- set_bit(TSQ_THROTTLED, &tcp_sk(sk)->tsq_flags);
|
|
|
+ /* Always send the 1st or 2nd skb in write queue.
|
|
|
+ * No need to wait for TX completion to call us back,
|
|
|
+ * after softirq/tasklet schedule.
|
|
|
+ * This helps when TX completions are delayed too much.
|
|
|
+ */
|
|
|
+ if (skb == sk->sk_write_queue.next ||
|
|
|
+ skb->prev == sk->sk_write_queue.next)
|
|
|
+ return false;
|
|
|
+
|
|
|
+ set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags);
|
|
|
/* It is possible TX completion already happened
|
|
|
* before we set TSQ_THROTTLED, so we must
|
|
|
* test again the condition.
|
|
@@ -2222,6 +2241,8 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
|
|
|
unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
|
|
|
break;
|
|
|
|
|
|
+ if (test_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags))
|
|
|
+ clear_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags);
|
|
|
if (tcp_small_queue_check(sk, skb, 0))
|
|
|
break;
|
|
|
|
|
@@ -3524,8 +3545,6 @@ void tcp_send_ack(struct sock *sk)
|
|
|
/* We do not want pure acks influencing TCP Small Queues or fq/pacing
|
|
|
* too much.
|
|
|
* SKB_TRUESIZE(max(1 .. 66, MAX_TCP_HEADER)) is unfortunately ~784
|
|
|
- * We also avoid tcp_wfree() overhead (cache line miss accessing
|
|
|
- * tp->tsq_flags) by using regular sock_wfree()
|
|
|
*/
|
|
|
skb_set_tcp_pure_ack(buff);
|
|
|
|