|
|
@@ -915,7 +915,9 @@ struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size)
|
|
|
uarg = (void *)skb->cb;
|
|
|
|
|
|
uarg->callback = sock_zerocopy_callback;
|
|
|
- uarg->desc = atomic_inc_return(&sk->sk_zckey) - 1;
|
|
|
+ uarg->id = ((u32)atomic_inc_return(&sk->sk_zckey)) - 1;
|
|
|
+ uarg->len = 1;
|
|
|
+ uarg->bytelen = size;
|
|
|
uarg->zerocopy = 1;
|
|
|
atomic_set(&uarg->refcnt, 0);
|
|
|
sock_hold(sk);
|
|
|
@@ -929,26 +931,101 @@ static inline struct sk_buff *skb_from_uarg(struct ubuf_info *uarg)
|
|
|
return container_of((void *)uarg, struct sk_buff, cb);
|
|
|
}
|
|
|
|
|
|
+struct ubuf_info *sock_zerocopy_realloc(struct sock *sk, size_t size,
|
|
|
+ struct ubuf_info *uarg)
|
|
|
+{
|
|
|
+ if (uarg) {
|
|
|
+ const u32 byte_limit = 1 << 19; /* limit to a few TSO */
|
|
|
+ u32 bytelen, next;
|
|
|
+
|
|
|
+ /* realloc only when socket is locked (TCP, UDP cork),
|
|
|
+ * so uarg->len and sk_zckey access is serialized
|
|
|
+ */
|
|
|
+ if (!sock_owned_by_user(sk)) {
|
|
|
+ WARN_ON_ONCE(1);
|
|
|
+ return NULL;
|
|
|
+ }
|
|
|
+
|
|
|
+ bytelen = uarg->bytelen + size;
|
|
|
+ if (uarg->len == USHRT_MAX - 1 || bytelen > byte_limit) {
|
|
|
+ /* TCP can create new skb to attach new uarg */
|
|
|
+ if (sk->sk_type == SOCK_STREAM)
|
|
|
+ goto new_alloc;
|
|
|
+ return NULL;
|
|
|
+ }
|
|
|
+
|
|
|
+ next = (u32)atomic_read(&sk->sk_zckey);
|
|
|
+ if ((u32)(uarg->id + uarg->len) == next) {
|
|
|
+ uarg->len++;
|
|
|
+ uarg->bytelen = bytelen;
|
|
|
+ atomic_set(&sk->sk_zckey, ++next);
|
|
|
+ return uarg;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+new_alloc:
|
|
|
+ return sock_zerocopy_alloc(sk, size);
|
|
|
+}
|
|
|
+EXPORT_SYMBOL_GPL(sock_zerocopy_realloc);
|
|
|
+
|
|
|
+static bool skb_zerocopy_notify_extend(struct sk_buff *skb, u32 lo, u16 len)
|
|
|
+{
|
|
|
+ struct sock_exterr_skb *serr = SKB_EXT_ERR(skb);
|
|
|
+ u32 old_lo, old_hi;
|
|
|
+ u64 sum_len;
|
|
|
+
|
|
|
+ old_lo = serr->ee.ee_info;
|
|
|
+ old_hi = serr->ee.ee_data;
|
|
|
+ sum_len = old_hi - old_lo + 1ULL + len;
|
|
|
+
|
|
|
+ if (sum_len >= (1ULL << 32))
|
|
|
+ return false;
|
|
|
+
|
|
|
+ if (lo != old_hi + 1)
|
|
|
+ return false;
|
|
|
+
|
|
|
+ serr->ee.ee_data += len;
|
|
|
+ return true;
|
|
|
+}
|
|
|
+
|
|
|
void sock_zerocopy_callback(struct ubuf_info *uarg, bool success)
|
|
|
{
|
|
|
- struct sk_buff *skb = skb_from_uarg(uarg);
|
|
|
+ struct sk_buff *tail, *skb = skb_from_uarg(uarg);
|
|
|
struct sock_exterr_skb *serr;
|
|
|
struct sock *sk = skb->sk;
|
|
|
- u16 id = uarg->desc;
|
|
|
+ struct sk_buff_head *q;
|
|
|
+ unsigned long flags;
|
|
|
+ u32 lo, hi;
|
|
|
+ u16 len;
|
|
|
|
|
|
- if (sock_flag(sk, SOCK_DEAD))
|
|
|
+ /* if !len, there was only 1 call, and it was aborted
|
|
|
+ * so do not queue a completion notification
|
|
|
+ */
|
|
|
+ if (!uarg->len || sock_flag(sk, SOCK_DEAD))
|
|
|
goto release;
|
|
|
|
|
|
+ len = uarg->len;
|
|
|
+ lo = uarg->id;
|
|
|
+ hi = uarg->id + len - 1;
|
|
|
+
|
|
|
serr = SKB_EXT_ERR(skb);
|
|
|
memset(serr, 0, sizeof(*serr));
|
|
|
serr->ee.ee_errno = 0;
|
|
|
serr->ee.ee_origin = SO_EE_ORIGIN_ZEROCOPY;
|
|
|
- serr->ee.ee_data = id;
|
|
|
+ serr->ee.ee_data = hi;
|
|
|
+ serr->ee.ee_info = lo;
|
|
|
if (!success)
|
|
|
serr->ee.ee_code |= SO_EE_CODE_ZEROCOPY_COPIED;
|
|
|
|
|
|
- skb_queue_tail(&sk->sk_error_queue, skb);
|
|
|
- skb = NULL;
|
|
|
+ q = &sk->sk_error_queue;
|
|
|
+ spin_lock_irqsave(&q->lock, flags);
|
|
|
+ tail = skb_peek_tail(q);
|
|
|
+ if (!tail || SKB_EXT_ERR(tail)->ee.ee_origin != SO_EE_ORIGIN_ZEROCOPY ||
|
|
|
+ !skb_zerocopy_notify_extend(tail, lo, len)) {
|
|
|
+ __skb_queue_tail(q, skb);
|
|
|
+ skb = NULL;
|
|
|
+ }
|
|
|
+ spin_unlock_irqrestore(&q->lock, flags);
|
|
|
|
|
|
sk->sk_error_report(sk);
|
|
|
|
|
|
@@ -975,6 +1052,7 @@ void sock_zerocopy_put_abort(struct ubuf_info *uarg)
|
|
|
struct sock *sk = skb_from_uarg(uarg)->sk;
|
|
|
|
|
|
atomic_dec(&sk->sk_zckey);
|
|
|
+ uarg->len--;
|
|
|
|
|
|
/* sock_zerocopy_put expects a ref. Most sockets take one per
|
|
|
* skb, which is zero on abort. tcp_sendmsg holds one extra, to
|
|
|
@@ -995,9 +1073,16 @@ int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
|
|
|
struct msghdr *msg, int len,
|
|
|
struct ubuf_info *uarg)
|
|
|
{
|
|
|
+ struct ubuf_info *orig_uarg = skb_zcopy(skb);
|
|
|
struct iov_iter orig_iter = msg->msg_iter;
|
|
|
int err, orig_len = skb->len;
|
|
|
|
|
|
+ /* An skb can only point to one uarg. This edge case happens when
|
|
|
+ * TCP appends to an skb, but zerocopy_realloc triggered a new alloc.
|
|
|
+ */
|
|
|
+ if (orig_uarg && uarg != orig_uarg)
|
|
|
+ return -EEXIST;
|
|
|
+
|
|
|
err = __zerocopy_sg_from_iter(sk, skb, &msg->msg_iter, len);
|
|
|
if (err == -EFAULT || (err == -EMSGSIZE && skb->len == orig_len)) {
|
|
|
/* Streams do not free skb on error. Reset to prev state. */
|