浏览代码

Merge branch 'bpf-fix-cpu-and-devmap-teardown'

Jesper Dangaard Brouer says:

====================
Removing entries from cpumap and devmap, goes through a number of
syncronization steps to make sure no new xdp_frames can be enqueued.
But there is a small chance, that xdp_frames remains which have not
been flushed/processed yet.  Flushing these during teardown, happens
from RCU context and not as usual under RX NAPI context.

The optimization introduced in commt 389ab7f01af9 ("xdp: introduce
xdp_return_frame_rx_napi"), missed that the flush operation can also
be called from RCU context.  Thus, we cannot always use the
xdp_return_frame_rx_napi call, which take advantage of the protection
provided by XDP RX running under NAPI protection.

The samples/bpf xdp_redirect_cpu have a --stress-mode, that is
adjusted to easier reproduce (verified by Red Hat QA).
====================

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Daniel Borkmann 7 年之前
父节点
当前提交
9c95420117
共有 4 个文件被更改,包括 21 次插入14 次删除
  1. 9 6
      kernel/bpf/cpumap.c
  2. 9 5
      kernel/bpf/devmap.c
  3. 1 1
      samples/bpf/xdp_redirect_cpu_kern.c
  4. 2 2
      samples/bpf/xdp_redirect_cpu_user.c

+ 9 - 6
kernel/bpf/cpumap.c

@@ -69,7 +69,7 @@ struct bpf_cpu_map {
 };
 };
 
 
 static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu,
 static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu,
-			     struct xdp_bulk_queue *bq);
+			     struct xdp_bulk_queue *bq, bool in_napi_ctx);
 
 
 static u64 cpu_map_bitmap_size(const union bpf_attr *attr)
 static u64 cpu_map_bitmap_size(const union bpf_attr *attr)
 {
 {
@@ -375,7 +375,7 @@ static void __cpu_map_entry_free(struct rcu_head *rcu)
 		struct xdp_bulk_queue *bq = per_cpu_ptr(rcpu->bulkq, cpu);
 		struct xdp_bulk_queue *bq = per_cpu_ptr(rcpu->bulkq, cpu);
 
 
 		/* No concurrent bq_enqueue can run at this point */
 		/* No concurrent bq_enqueue can run at this point */
-		bq_flush_to_queue(rcpu, bq);
+		bq_flush_to_queue(rcpu, bq, false);
 	}
 	}
 	free_percpu(rcpu->bulkq);
 	free_percpu(rcpu->bulkq);
 	/* Cannot kthread_stop() here, last put free rcpu resources */
 	/* Cannot kthread_stop() here, last put free rcpu resources */
@@ -558,7 +558,7 @@ const struct bpf_map_ops cpu_map_ops = {
 };
 };
 
 
 static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu,
 static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu,
-			     struct xdp_bulk_queue *bq)
+			     struct xdp_bulk_queue *bq, bool in_napi_ctx)
 {
 {
 	unsigned int processed = 0, drops = 0;
 	unsigned int processed = 0, drops = 0;
 	const int to_cpu = rcpu->cpu;
 	const int to_cpu = rcpu->cpu;
@@ -578,7 +578,10 @@ static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu,
 		err = __ptr_ring_produce(q, xdpf);
 		err = __ptr_ring_produce(q, xdpf);
 		if (err) {
 		if (err) {
 			drops++;
 			drops++;
-			xdp_return_frame_rx_napi(xdpf);
+			if (likely(in_napi_ctx))
+				xdp_return_frame_rx_napi(xdpf);
+			else
+				xdp_return_frame(xdpf);
 		}
 		}
 		processed++;
 		processed++;
 	}
 	}
@@ -598,7 +601,7 @@ static int bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf)
 	struct xdp_bulk_queue *bq = this_cpu_ptr(rcpu->bulkq);
 	struct xdp_bulk_queue *bq = this_cpu_ptr(rcpu->bulkq);
 
 
 	if (unlikely(bq->count == CPU_MAP_BULK_SIZE))
 	if (unlikely(bq->count == CPU_MAP_BULK_SIZE))
-		bq_flush_to_queue(rcpu, bq);
+		bq_flush_to_queue(rcpu, bq, true);
 
 
 	/* Notice, xdp_buff/page MUST be queued here, long enough for
 	/* Notice, xdp_buff/page MUST be queued here, long enough for
 	 * driver to code invoking us to finished, due to driver
 	 * driver to code invoking us to finished, due to driver
@@ -661,7 +664,7 @@ void __cpu_map_flush(struct bpf_map *map)
 
 
 		/* Flush all frames in bulkq to real queue */
 		/* Flush all frames in bulkq to real queue */
 		bq = this_cpu_ptr(rcpu->bulkq);
 		bq = this_cpu_ptr(rcpu->bulkq);
-		bq_flush_to_queue(rcpu, bq);
+		bq_flush_to_queue(rcpu, bq, true);
 
 
 		/* If already running, costs spin_lock_irqsave + smb_mb */
 		/* If already running, costs spin_lock_irqsave + smb_mb */
 		wake_up_process(rcpu->kthread);
 		wake_up_process(rcpu->kthread);

+ 9 - 5
kernel/bpf/devmap.c

@@ -217,7 +217,8 @@ void __dev_map_insert_ctx(struct bpf_map *map, u32 bit)
 }
 }
 
 
 static int bq_xmit_all(struct bpf_dtab_netdev *obj,
 static int bq_xmit_all(struct bpf_dtab_netdev *obj,
-		       struct xdp_bulk_queue *bq, u32 flags)
+		       struct xdp_bulk_queue *bq, u32 flags,
+		       bool in_napi_ctx)
 {
 {
 	struct net_device *dev = obj->dev;
 	struct net_device *dev = obj->dev;
 	int sent = 0, drops = 0, err = 0;
 	int sent = 0, drops = 0, err = 0;
@@ -254,7 +255,10 @@ error:
 		struct xdp_frame *xdpf = bq->q[i];
 		struct xdp_frame *xdpf = bq->q[i];
 
 
 		/* RX path under NAPI protection, can return frames faster */
 		/* RX path under NAPI protection, can return frames faster */
-		xdp_return_frame_rx_napi(xdpf);
+		if (likely(in_napi_ctx))
+			xdp_return_frame_rx_napi(xdpf);
+		else
+			xdp_return_frame(xdpf);
 		drops++;
 		drops++;
 	}
 	}
 	goto out;
 	goto out;
@@ -286,7 +290,7 @@ void __dev_map_flush(struct bpf_map *map)
 		__clear_bit(bit, bitmap);
 		__clear_bit(bit, bitmap);
 
 
 		bq = this_cpu_ptr(dev->bulkq);
 		bq = this_cpu_ptr(dev->bulkq);
-		bq_xmit_all(dev, bq, XDP_XMIT_FLUSH);
+		bq_xmit_all(dev, bq, XDP_XMIT_FLUSH, true);
 	}
 	}
 }
 }
 
 
@@ -316,7 +320,7 @@ static int bq_enqueue(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf,
 	struct xdp_bulk_queue *bq = this_cpu_ptr(obj->bulkq);
 	struct xdp_bulk_queue *bq = this_cpu_ptr(obj->bulkq);
 
 
 	if (unlikely(bq->count == DEV_MAP_BULK_SIZE))
 	if (unlikely(bq->count == DEV_MAP_BULK_SIZE))
-		bq_xmit_all(obj, bq, 0);
+		bq_xmit_all(obj, bq, 0, true);
 
 
 	/* Ingress dev_rx will be the same for all xdp_frame's in
 	/* Ingress dev_rx will be the same for all xdp_frame's in
 	 * bulk_queue, because bq stored per-CPU and must be flushed
 	 * bulk_queue, because bq stored per-CPU and must be flushed
@@ -385,7 +389,7 @@ static void dev_map_flush_old(struct bpf_dtab_netdev *dev)
 			__clear_bit(dev->bit, bitmap);
 			__clear_bit(dev->bit, bitmap);
 
 
 			bq = per_cpu_ptr(dev->bulkq, cpu);
 			bq = per_cpu_ptr(dev->bulkq, cpu);
-			bq_xmit_all(dev, bq, XDP_XMIT_FLUSH);
+			bq_xmit_all(dev, bq, XDP_XMIT_FLUSH, false);
 		}
 		}
 	}
 	}
 }
 }

+ 1 - 1
samples/bpf/xdp_redirect_cpu_kern.c

@@ -14,7 +14,7 @@
 #include <uapi/linux/bpf.h>
 #include <uapi/linux/bpf.h>
 #include "bpf_helpers.h"
 #include "bpf_helpers.h"
 
 
-#define MAX_CPUS 12 /* WARNING - sync with _user.c */
+#define MAX_CPUS 64 /* WARNING - sync with _user.c */
 
 
 /* Special map type that can XDP_REDIRECT frames to another CPU */
 /* Special map type that can XDP_REDIRECT frames to another CPU */
 struct bpf_map_def SEC("maps") cpu_map = {
 struct bpf_map_def SEC("maps") cpu_map = {

+ 2 - 2
samples/bpf/xdp_redirect_cpu_user.c

@@ -19,7 +19,7 @@ static const char *__doc__ =
 #include <arpa/inet.h>
 #include <arpa/inet.h>
 #include <linux/if_link.h>
 #include <linux/if_link.h>
 
 
-#define MAX_CPUS 12 /* WARNING - sync with _kern.c */
+#define MAX_CPUS 64 /* WARNING - sync with _kern.c */
 
 
 /* How many xdp_progs are defined in _kern.c */
 /* How many xdp_progs are defined in _kern.c */
 #define MAX_PROG 5
 #define MAX_PROG 5
@@ -527,7 +527,7 @@ static void stress_cpumap(void)
 	 * procedure.
 	 * procedure.
 	 */
 	 */
 	create_cpu_entry(1,  1024, 0, false);
 	create_cpu_entry(1,  1024, 0, false);
-	create_cpu_entry(1,   128, 0, false);
+	create_cpu_entry(1,     8, 0, false);
 	create_cpu_entry(1, 16000, 0, false);
 	create_cpu_entry(1, 16000, 0, false);
 }
 }