|
@@ -53,6 +53,7 @@ struct bpf_dtab_netdev {
|
|
|
struct bpf_dtab {
|
|
|
struct bpf_map map;
|
|
|
struct bpf_dtab_netdev **netdev_map;
|
|
|
+ unsigned long int __percpu *flush_needed;
|
|
|
};
|
|
|
|
|
|
static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
|
|
@@ -87,6 +88,7 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
|
|
|
|
|
|
/* make sure page count doesn't overflow */
|
|
|
cost = (u64) dtab->map.max_entries * sizeof(struct bpf_dtab_netdev *);
|
|
|
+ cost += BITS_TO_LONGS(attr->max_entries) * sizeof(unsigned long);
|
|
|
if (cost >= U32_MAX - PAGE_SIZE)
|
|
|
goto free_dtab;
|
|
|
|
|
@@ -97,6 +99,14 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
|
|
|
if (err)
|
|
|
goto free_dtab;
|
|
|
|
|
|
+ /* A per cpu bitfield with a bit per possible net device */
|
|
|
+ dtab->flush_needed = __alloc_percpu(
|
|
|
+ BITS_TO_LONGS(attr->max_entries) *
|
|
|
+ sizeof(unsigned long),
|
|
|
+ __alignof__(unsigned long));
|
|
|
+ if (!dtab->flush_needed)
|
|
|
+ goto free_dtab;
|
|
|
+
|
|
|
dtab->netdev_map = bpf_map_area_alloc(dtab->map.max_entries *
|
|
|
sizeof(struct bpf_dtab_netdev *));
|
|
|
if (!dtab->netdev_map)
|
|
@@ -105,6 +115,7 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
|
|
|
return &dtab->map;
|
|
|
|
|
|
free_dtab:
|
|
|
+ free_percpu(dtab->flush_needed);
|
|
|
kfree(dtab);
|
|
|
return ERR_PTR(err);
|
|
|
}
|
|
@@ -112,7 +123,7 @@ free_dtab:
|
|
|
static void dev_map_free(struct bpf_map *map)
|
|
|
{
|
|
|
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
|
|
|
- int i;
|
|
|
+ int i, cpu;
|
|
|
|
|
|
/* At this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0,
|
|
|
* so the programs (can be more than one that used this map) were
|
|
@@ -123,6 +134,18 @@ static void dev_map_free(struct bpf_map *map)
|
|
|
*/
|
|
|
synchronize_rcu();
|
|
|
|
|
|
+ /* To ensure all pending flush operations have completed wait for flush
|
|
|
+ * bitmap to indicate all flush_needed bits to be zero on _all_ cpus.
|
|
|
+ * Because the above synchronize_rcu() ensures the map is disconnected
|
|
|
+ * from the program we can assume no new bits will be set.
|
|
|
+ */
|
|
|
+ for_each_online_cpu(cpu) {
|
|
|
+ unsigned long *bitmap = per_cpu_ptr(dtab->flush_needed, cpu);
|
|
|
+
|
|
|
+ while (!bitmap_empty(bitmap, dtab->map.max_entries))
|
|
|
+ cpu_relax();
|
|
|
+ }
|
|
|
+
|
|
|
for (i = 0; i < dtab->map.max_entries; i++) {
|
|
|
struct bpf_dtab_netdev *dev;
|
|
|
|
|
@@ -137,6 +160,7 @@ static void dev_map_free(struct bpf_map *map)
|
|
|
/* At this point bpf program is detached and all pending operations
|
|
|
* _must_ be complete
|
|
|
*/
|
|
|
+ free_percpu(dtab->flush_needed);
|
|
|
bpf_map_area_free(dtab->netdev_map);
|
|
|
kfree(dtab);
|
|
|
}
|
|
@@ -159,6 +183,14 @@ static int dev_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
+void __dev_map_insert_ctx(struct bpf_map *map, u32 key)
|
|
|
+{
|
|
|
+ struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
|
|
|
+ unsigned long *bitmap = this_cpu_ptr(dtab->flush_needed);
|
|
|
+
|
|
|
+ __set_bit(key, bitmap);
|
|
|
+}
|
|
|
+
|
|
|
struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
|
|
|
{
|
|
|
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
|
|
@@ -171,6 +203,39 @@ struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
|
|
|
return dev ? dev->dev : NULL;
|
|
|
}
|
|
|
|
|
|
+/* __dev_map_flush is called from xdp_do_flush_map() which _must_ be signaled
|
|
|
+ * from the driver before returning from its napi->poll() routine. The poll()
|
|
|
+ * routine is called either from busy_poll context or net_rx_action signaled
|
|
|
+ * from NET_RX_SOFTIRQ. Either way the poll routine must complete before the
|
|
|
+ * net device can be torn down. On devmap tear down we ensure the ctx bitmap
|
|
|
+ * is zeroed before completing to ensure all flush operations have completed.
|
|
|
+ */
|
|
|
+void __dev_map_flush(struct bpf_map *map)
|
|
|
+{
|
|
|
+ struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
|
|
|
+ unsigned long *bitmap = this_cpu_ptr(dtab->flush_needed);
|
|
|
+ u32 bit;
|
|
|
+
|
|
|
+ for_each_set_bit(bit, bitmap, map->max_entries) {
|
|
|
+ struct bpf_dtab_netdev *dev = READ_ONCE(dtab->netdev_map[bit]);
|
|
|
+ struct net_device *netdev;
|
|
|
+
|
|
|
+ /* This is possible if the dev entry is removed by user space
|
|
|
+ * between xdp redirect and flush op.
|
|
|
+ */
|
|
|
+ if (unlikely(!dev))
|
|
|
+ continue;
|
|
|
+
|
|
|
+ netdev = dev->dev;
|
|
|
+
|
|
|
+ __clear_bit(bit, bitmap);
|
|
|
+ if (unlikely(!netdev || !netdev->netdev_ops->ndo_xdp_flush))
|
|
|
+ continue;
|
|
|
+
|
|
|
+ netdev->netdev_ops->ndo_xdp_flush(netdev);
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
/* rcu_read_lock (from syscall and BPF contexts) ensures that if a delete and/or
|
|
|
* update happens in parallel here a dev_put wont happen until after reading the
|
|
|
* ifindex.
|
|
@@ -188,11 +253,28 @@ static void *dev_map_lookup_elem(struct bpf_map *map, void *key)
|
|
|
return dev ? &dev->dev->ifindex : NULL;
|
|
|
}
|
|
|
|
|
|
+static void dev_map_flush_old(struct bpf_dtab_netdev *old_dev)
|
|
|
+{
|
|
|
+ if (old_dev->dev->netdev_ops->ndo_xdp_flush) {
|
|
|
+ struct net_device *fl = old_dev->dev;
|
|
|
+ unsigned long *bitmap;
|
|
|
+ int cpu;
|
|
|
+
|
|
|
+ for_each_online_cpu(cpu) {
|
|
|
+ bitmap = per_cpu_ptr(old_dev->dtab->flush_needed, cpu);
|
|
|
+ __clear_bit(old_dev->key, bitmap);
|
|
|
+
|
|
|
+ fl->netdev_ops->ndo_xdp_flush(old_dev->dev);
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
static void __dev_map_entry_free(struct rcu_head *rcu)
|
|
|
{
|
|
|
struct bpf_dtab_netdev *old_dev;
|
|
|
|
|
|
old_dev = container_of(rcu, struct bpf_dtab_netdev, rcu);
|
|
|
+ dev_map_flush_old(old_dev);
|
|
|
dev_put(old_dev->dev);
|
|
|
kfree(old_dev);
|
|
|
}
|