|
@@ -347,94 +347,18 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size)
|
|
|
}
|
|
|
EXPORT_SYMBOL(build_skb);
|
|
|
|
|
|
-struct netdev_alloc_cache {
|
|
|
- struct page_frag frag;
|
|
|
- /* we maintain a pagecount bias, so that we dont dirty cache line
|
|
|
- * containing page->_count every time we allocate a fragment.
|
|
|
- */
|
|
|
- unsigned int pagecnt_bias;
|
|
|
-};
|
|
|
-static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache);
|
|
|
-static DEFINE_PER_CPU(struct netdev_alloc_cache, napi_alloc_cache);
|
|
|
-
|
|
|
-static struct page *__page_frag_refill(struct netdev_alloc_cache *nc,
|
|
|
- gfp_t gfp_mask)
|
|
|
-{
|
|
|
- const unsigned int order = NETDEV_FRAG_PAGE_MAX_ORDER;
|
|
|
- struct page *page = NULL;
|
|
|
- gfp_t gfp = gfp_mask;
|
|
|
-
|
|
|
- if (order) {
|
|
|
- gfp_mask |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY |
|
|
|
- __GFP_NOMEMALLOC;
|
|
|
- page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, order);
|
|
|
- nc->frag.size = PAGE_SIZE << (page ? order : 0);
|
|
|
- }
|
|
|
-
|
|
|
- if (unlikely(!page))
|
|
|
- page = alloc_pages_node(NUMA_NO_NODE, gfp, 0);
|
|
|
-
|
|
|
- nc->frag.page = page;
|
|
|
-
|
|
|
- return page;
|
|
|
-}
|
|
|
-
|
|
|
-static void *__alloc_page_frag(struct netdev_alloc_cache __percpu *cache,
|
|
|
- unsigned int fragsz, gfp_t gfp_mask)
|
|
|
-{
|
|
|
- struct netdev_alloc_cache *nc = this_cpu_ptr(cache);
|
|
|
- struct page *page = nc->frag.page;
|
|
|
- unsigned int size;
|
|
|
- int offset;
|
|
|
-
|
|
|
- if (unlikely(!page)) {
|
|
|
-refill:
|
|
|
- page = __page_frag_refill(nc, gfp_mask);
|
|
|
- if (!page)
|
|
|
- return NULL;
|
|
|
-
|
|
|
- /* if size can vary use frag.size else just use PAGE_SIZE */
|
|
|
- size = NETDEV_FRAG_PAGE_MAX_ORDER ? nc->frag.size : PAGE_SIZE;
|
|
|
-
|
|
|
- /* Even if we own the page, we do not use atomic_set().
|
|
|
- * This would break get_page_unless_zero() users.
|
|
|
- */
|
|
|
- atomic_add(size - 1, &page->_count);
|
|
|
-
|
|
|
- /* reset page count bias and offset to start of new frag */
|
|
|
- nc->pagecnt_bias = size;
|
|
|
- nc->frag.offset = size;
|
|
|
- }
|
|
|
-
|
|
|
- offset = nc->frag.offset - fragsz;
|
|
|
- if (unlikely(offset < 0)) {
|
|
|
- if (!atomic_sub_and_test(nc->pagecnt_bias, &page->_count))
|
|
|
- goto refill;
|
|
|
-
|
|
|
- /* if size can vary use frag.size else just use PAGE_SIZE */
|
|
|
- size = NETDEV_FRAG_PAGE_MAX_ORDER ? nc->frag.size : PAGE_SIZE;
|
|
|
-
|
|
|
- /* OK, page count is 0, we can safely set it */
|
|
|
- atomic_set(&page->_count, size);
|
|
|
-
|
|
|
- /* reset page count bias and offset to start of new frag */
|
|
|
- nc->pagecnt_bias = size;
|
|
|
- offset = size - fragsz;
|
|
|
- }
|
|
|
-
|
|
|
- nc->pagecnt_bias--;
|
|
|
- nc->frag.offset = offset;
|
|
|
-
|
|
|
- return page_address(page) + offset;
|
|
|
-}
|
|
|
+static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache);
|
|
|
+static DEFINE_PER_CPU(struct page_frag_cache, napi_alloc_cache);
|
|
|
|
|
|
static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
|
|
|
{
|
|
|
+ struct page_frag_cache *nc;
|
|
|
unsigned long flags;
|
|
|
void *data;
|
|
|
|
|
|
local_irq_save(flags);
|
|
|
- data = __alloc_page_frag(&netdev_alloc_cache, fragsz, gfp_mask);
|
|
|
+ nc = this_cpu_ptr(&netdev_alloc_cache);
|
|
|
+ data = __alloc_page_frag(nc, fragsz, gfp_mask);
|
|
|
local_irq_restore(flags);
|
|
|
return data;
|
|
|
}
|
|
@@ -454,7 +378,9 @@ EXPORT_SYMBOL(netdev_alloc_frag);
|
|
|
|
|
|
static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
|
|
|
{
|
|
|
- return __alloc_page_frag(&napi_alloc_cache, fragsz, gfp_mask);
|
|
|
+ struct page_frag_cache *nc = this_cpu_ptr(&napi_alloc_cache);
|
|
|
+
|
|
|
+ return __alloc_page_frag(nc, fragsz, gfp_mask);
|
|
|
}
|
|
|
|
|
|
void *napi_alloc_frag(unsigned int fragsz)
|
|
@@ -464,76 +390,64 @@ void *napi_alloc_frag(unsigned int fragsz)
|
|
|
EXPORT_SYMBOL(napi_alloc_frag);
|
|
|
|
|
|
/**
|
|
|
- * __alloc_rx_skb - allocate an skbuff for rx
|
|
|
+ * __netdev_alloc_skb - allocate an skbuff for rx on a specific device
|
|
|
+ * @dev: network device to receive on
|
|
|
* @length: length to allocate
|
|
|
* @gfp_mask: get_free_pages mask, passed to alloc_skb
|
|
|
- * @flags: If SKB_ALLOC_RX is set, __GFP_MEMALLOC will be used for
|
|
|
- * allocations in case we have to fallback to __alloc_skb()
|
|
|
- * If SKB_ALLOC_NAPI is set, page fragment will be allocated
|
|
|
- * from napi_cache instead of netdev_cache.
|
|
|
*
|
|
|
* Allocate a new &sk_buff and assign it a usage count of one. The
|
|
|
- * buffer has unspecified headroom built in. Users should allocate
|
|
|
+ * buffer has NET_SKB_PAD headroom built in. Users should allocate
|
|
|
* the headroom they think they need without accounting for the
|
|
|
* built in space. The built in space is used for optimisations.
|
|
|
*
|
|
|
* %NULL is returned if there is no free memory.
|
|
|
*/
|
|
|
-static struct sk_buff *__alloc_rx_skb(unsigned int length, gfp_t gfp_mask,
|
|
|
- int flags)
|
|
|
+struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len,
|
|
|
+ gfp_t gfp_mask)
|
|
|
{
|
|
|
- struct sk_buff *skb = NULL;
|
|
|
- unsigned int fragsz = SKB_DATA_ALIGN(length) +
|
|
|
- SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
|
|
|
+ struct page_frag_cache *nc;
|
|
|
+ unsigned long flags;
|
|
|
+ struct sk_buff *skb;
|
|
|
+ bool pfmemalloc;
|
|
|
+ void *data;
|
|
|
|
|
|
- if (fragsz <= PAGE_SIZE && !(gfp_mask & (__GFP_WAIT | GFP_DMA))) {
|
|
|
- void *data;
|
|
|
+ len += NET_SKB_PAD;
|
|
|
|
|
|
- if (sk_memalloc_socks())
|
|
|
- gfp_mask |= __GFP_MEMALLOC;
|
|
|
+ if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) ||
|
|
|
+ (gfp_mask & (__GFP_WAIT | GFP_DMA)))
|
|
|
+ return __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE);
|
|
|
|
|
|
- data = (flags & SKB_ALLOC_NAPI) ?
|
|
|
- __napi_alloc_frag(fragsz, gfp_mask) :
|
|
|
- __netdev_alloc_frag(fragsz, gfp_mask);
|
|
|
+ len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
|
|
|
+ len = SKB_DATA_ALIGN(len);
|
|
|
|
|
|
- if (likely(data)) {
|
|
|
- skb = build_skb(data, fragsz);
|
|
|
- if (unlikely(!skb))
|
|
|
- put_page(virt_to_head_page(data));
|
|
|
- }
|
|
|
- } else {
|
|
|
- skb = __alloc_skb(length, gfp_mask,
|
|
|
- SKB_ALLOC_RX, NUMA_NO_NODE);
|
|
|
- }
|
|
|
- return skb;
|
|
|
-}
|
|
|
+ if (sk_memalloc_socks())
|
|
|
+ gfp_mask |= __GFP_MEMALLOC;
|
|
|
|
|
|
-/**
|
|
|
- * __netdev_alloc_skb - allocate an skbuff for rx on a specific device
|
|
|
- * @dev: network device to receive on
|
|
|
- * @length: length to allocate
|
|
|
- * @gfp_mask: get_free_pages mask, passed to alloc_skb
|
|
|
- *
|
|
|
- * Allocate a new &sk_buff and assign it a usage count of one. The
|
|
|
- * buffer has NET_SKB_PAD headroom built in. Users should allocate
|
|
|
- * the headroom they think they need without accounting for the
|
|
|
- * built in space. The built in space is used for optimisations.
|
|
|
- *
|
|
|
- * %NULL is returned if there is no free memory.
|
|
|
- */
|
|
|
-struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
|
|
|
- unsigned int length, gfp_t gfp_mask)
|
|
|
-{
|
|
|
- struct sk_buff *skb;
|
|
|
+ local_irq_save(flags);
|
|
|
|
|
|
- length += NET_SKB_PAD;
|
|
|
- skb = __alloc_rx_skb(length, gfp_mask, 0);
|
|
|
+ nc = this_cpu_ptr(&netdev_alloc_cache);
|
|
|
+ data = __alloc_page_frag(nc, len, gfp_mask);
|
|
|
+ pfmemalloc = nc->pfmemalloc;
|
|
|
|
|
|
- if (likely(skb)) {
|
|
|
- skb_reserve(skb, NET_SKB_PAD);
|
|
|
- skb->dev = dev;
|
|
|
+ local_irq_restore(flags);
|
|
|
+
|
|
|
+ if (unlikely(!data))
|
|
|
+ return NULL;
|
|
|
+
|
|
|
+ skb = __build_skb(data, len);
|
|
|
+ if (unlikely(!skb)) {
|
|
|
+ skb_free_frag(data);
|
|
|
+ return NULL;
|
|
|
}
|
|
|
|
|
|
+ /* use OR instead of assignment to avoid clearing of bits in mask */
|
|
|
+ if (pfmemalloc)
|
|
|
+ skb->pfmemalloc = 1;
|
|
|
+ skb->head_frag = 1;
|
|
|
+
|
|
|
+ skb_reserve(skb, NET_SKB_PAD);
|
|
|
+ skb->dev = dev;
|
|
|
+
|
|
|
return skb;
|
|
|
}
|
|
|
EXPORT_SYMBOL(__netdev_alloc_skb);
|
|
@@ -551,19 +465,43 @@ EXPORT_SYMBOL(__netdev_alloc_skb);
|
|
|
*
|
|
|
* %NULL is returned if there is no free memory.
|
|
|
*/
|
|
|
-struct sk_buff *__napi_alloc_skb(struct napi_struct *napi,
|
|
|
- unsigned int length, gfp_t gfp_mask)
|
|
|
+struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
|
|
|
+ gfp_t gfp_mask)
|
|
|
{
|
|
|
+ struct page_frag_cache *nc = this_cpu_ptr(&napi_alloc_cache);
|
|
|
struct sk_buff *skb;
|
|
|
+ void *data;
|
|
|
+
|
|
|
+ len += NET_SKB_PAD + NET_IP_ALIGN;
|
|
|
|
|
|
- length += NET_SKB_PAD + NET_IP_ALIGN;
|
|
|
- skb = __alloc_rx_skb(length, gfp_mask, SKB_ALLOC_NAPI);
|
|
|
+ if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) ||
|
|
|
+ (gfp_mask & (__GFP_WAIT | GFP_DMA)))
|
|
|
+ return __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE);
|
|
|
|
|
|
- if (likely(skb)) {
|
|
|
- skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
|
|
|
- skb->dev = napi->dev;
|
|
|
+ len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
|
|
|
+ len = SKB_DATA_ALIGN(len);
|
|
|
+
|
|
|
+ if (sk_memalloc_socks())
|
|
|
+ gfp_mask |= __GFP_MEMALLOC;
|
|
|
+
|
|
|
+ data = __alloc_page_frag(nc, len, gfp_mask);
|
|
|
+ if (unlikely(!data))
|
|
|
+ return NULL;
|
|
|
+
|
|
|
+ skb = __build_skb(data, len);
|
|
|
+ if (unlikely(!skb)) {
|
|
|
+ skb_free_frag(data);
|
|
|
+ return NULL;
|
|
|
}
|
|
|
|
|
|
+ /* use OR instead of assignment to avoid clearing of bits in mask */
|
|
|
+ if (nc->pfmemalloc)
|
|
|
+ skb->pfmemalloc = 1;
|
|
|
+ skb->head_frag = 1;
|
|
|
+
|
|
|
+ skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
|
|
|
+ skb->dev = napi->dev;
|
|
|
+
|
|
|
return skb;
|
|
|
}
|
|
|
EXPORT_SYMBOL(__napi_alloc_skb);
|
|
@@ -611,10 +549,12 @@ static void skb_clone_fraglist(struct sk_buff *skb)
|
|
|
|
|
|
static void skb_free_head(struct sk_buff *skb)
|
|
|
{
|
|
|
+ unsigned char *head = skb->head;
|
|
|
+
|
|
|
if (skb->head_frag)
|
|
|
- put_page(virt_to_head_page(skb->head));
|
|
|
+ skb_free_frag(head);
|
|
|
else
|
|
|
- kfree(skb->head);
|
|
|
+ kfree(head);
|
|
|
}
|
|
|
|
|
|
static void skb_release_data(struct sk_buff *skb)
|