|
@@ -26,11 +26,18 @@ struct bpf_htab {
|
|
|
struct bucket *buckets;
|
|
|
void *elems;
|
|
|
struct pcpu_freelist freelist;
|
|
|
+ void __percpu *extra_elems;
|
|
|
atomic_t count; /* number of elements in this hashtable */
|
|
|
u32 n_buckets; /* number of hash buckets */
|
|
|
u32 elem_size; /* size of each element in bytes */
|
|
|
};
|
|
|
|
|
|
+enum extra_elem_state {
|
|
|
+ HTAB_NOT_AN_EXTRA_ELEM = 0,
|
|
|
+ HTAB_EXTRA_ELEM_FREE,
|
|
|
+ HTAB_EXTRA_ELEM_USED
|
|
|
+};
|
|
|
+
|
|
|
/* each htab element is struct htab_elem + key + value */
|
|
|
struct htab_elem {
|
|
|
union {
|
|
@@ -38,7 +45,10 @@ struct htab_elem {
|
|
|
struct bpf_htab *htab;
|
|
|
struct pcpu_freelist_node fnode;
|
|
|
};
|
|
|
- struct rcu_head rcu;
|
|
|
+ union {
|
|
|
+ struct rcu_head rcu;
|
|
|
+ enum extra_elem_state state;
|
|
|
+ };
|
|
|
u32 hash;
|
|
|
char key[0] __aligned(8);
|
|
|
};
|
|
@@ -113,6 +123,23 @@ free_elems:
|
|
|
return err;
|
|
|
}
|
|
|
|
|
|
+static int alloc_extra_elems(struct bpf_htab *htab)
|
|
|
+{
|
|
|
+ void __percpu *pptr;
|
|
|
+ int cpu;
|
|
|
+
|
|
|
+ pptr = __alloc_percpu_gfp(htab->elem_size, 8, GFP_USER | __GFP_NOWARN);
|
|
|
+ if (!pptr)
|
|
|
+ return -ENOMEM;
|
|
|
+
|
|
|
+ for_each_possible_cpu(cpu) {
|
|
|
+ ((struct htab_elem *)per_cpu_ptr(pptr, cpu))->state =
|
|
|
+ HTAB_EXTRA_ELEM_FREE;
|
|
|
+ }
|
|
|
+ htab->extra_elems = pptr;
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
/* Called from syscall */
|
|
|
static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
|
|
|
{
|
|
@@ -185,6 +212,8 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
|
|
|
if (percpu)
|
|
|
cost += (u64) round_up(htab->map.value_size, 8) *
|
|
|
num_possible_cpus() * htab->map.max_entries;
|
|
|
+ else
|
|
|
+ cost += (u64) htab->elem_size * num_possible_cpus();
|
|
|
|
|
|
if (cost >= U32_MAX - PAGE_SIZE)
|
|
|
/* make sure page count doesn't overflow */
|
|
@@ -212,14 +241,22 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
|
|
|
raw_spin_lock_init(&htab->buckets[i].lock);
|
|
|
}
|
|
|
|
|
|
+ if (!percpu) {
|
|
|
+ err = alloc_extra_elems(htab);
|
|
|
+ if (err)
|
|
|
+ goto free_buckets;
|
|
|
+ }
|
|
|
+
|
|
|
if (!(attr->map_flags & BPF_F_NO_PREALLOC)) {
|
|
|
err = prealloc_elems_and_freelist(htab);
|
|
|
if (err)
|
|
|
- goto free_buckets;
|
|
|
+ goto free_extra_elems;
|
|
|
}
|
|
|
|
|
|
return &htab->map;
|
|
|
|
|
|
+free_extra_elems:
|
|
|
+ free_percpu(htab->extra_elems);
|
|
|
free_buckets:
|
|
|
kvfree(htab->buckets);
|
|
|
free_htab:
|
|
@@ -349,7 +386,6 @@ static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l)
|
|
|
if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH)
|
|
|
free_percpu(htab_elem_get_ptr(l, htab->map.key_size));
|
|
|
kfree(l);
|
|
|
-
|
|
|
}
|
|
|
|
|
|
static void htab_elem_free_rcu(struct rcu_head *head)
|
|
@@ -370,6 +406,11 @@ static void htab_elem_free_rcu(struct rcu_head *head)
|
|
|
|
|
|
static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
|
|
|
{
|
|
|
+ if (l->state == HTAB_EXTRA_ELEM_USED) {
|
|
|
+ l->state = HTAB_EXTRA_ELEM_FREE;
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
if (!(htab->map.map_flags & BPF_F_NO_PREALLOC)) {
|
|
|
pcpu_freelist_push(&htab->freelist, &l->fnode);
|
|
|
} else {
|
|
@@ -381,25 +422,44 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
|
|
|
|
|
|
static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
|
|
|
void *value, u32 key_size, u32 hash,
|
|
|
- bool percpu, bool onallcpus)
|
|
|
+ bool percpu, bool onallcpus,
|
|
|
+ bool old_elem_exists)
|
|
|
{
|
|
|
u32 size = htab->map.value_size;
|
|
|
bool prealloc = !(htab->map.map_flags & BPF_F_NO_PREALLOC);
|
|
|
struct htab_elem *l_new;
|
|
|
void __percpu *pptr;
|
|
|
+ int err = 0;
|
|
|
|
|
|
if (prealloc) {
|
|
|
l_new = (struct htab_elem *)pcpu_freelist_pop(&htab->freelist);
|
|
|
if (!l_new)
|
|
|
- return ERR_PTR(-E2BIG);
|
|
|
+ err = -E2BIG;
|
|
|
} else {
|
|
|
if (atomic_inc_return(&htab->count) > htab->map.max_entries) {
|
|
|
atomic_dec(&htab->count);
|
|
|
- return ERR_PTR(-E2BIG);
|
|
|
+ err = -E2BIG;
|
|
|
+ } else {
|
|
|
+ l_new = kmalloc(htab->elem_size,
|
|
|
+ GFP_ATOMIC | __GFP_NOWARN);
|
|
|
+ if (!l_new)
|
|
|
+ return ERR_PTR(-ENOMEM);
|
|
|
}
|
|
|
- l_new = kmalloc(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN);
|
|
|
- if (!l_new)
|
|
|
- return ERR_PTR(-ENOMEM);
|
|
|
+ }
|
|
|
+
|
|
|
+ if (err) {
|
|
|
+ if (!old_elem_exists)
|
|
|
+ return ERR_PTR(err);
|
|
|
+
|
|
|
+ /* if we're updating the existing element and the hash table
|
|
|
+ * is full, use per-cpu extra elems
|
|
|
+ */
|
|
|
+ l_new = this_cpu_ptr(htab->extra_elems);
|
|
|
+ if (l_new->state != HTAB_EXTRA_ELEM_FREE)
|
|
|
+ return ERR_PTR(-E2BIG);
|
|
|
+ l_new->state = HTAB_EXTRA_ELEM_USED;
|
|
|
+ } else {
|
|
|
+ l_new->state = HTAB_NOT_AN_EXTRA_ELEM;
|
|
|
}
|
|
|
|
|
|
memcpy(l_new->key, key, key_size);
|
|
@@ -489,7 +549,8 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
|
|
|
if (ret)
|
|
|
goto err;
|
|
|
|
|
|
- l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false);
|
|
|
+ l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false,
|
|
|
+ !!l_old);
|
|
|
if (IS_ERR(l_new)) {
|
|
|
/* all pre-allocated elements are in use or memory exhausted */
|
|
|
ret = PTR_ERR(l_new);
|
|
@@ -563,7 +624,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
|
|
|
}
|
|
|
} else {
|
|
|
l_new = alloc_htab_elem(htab, key, value, key_size,
|
|
|
- hash, true, onallcpus);
|
|
|
+ hash, true, onallcpus, false);
|
|
|
if (IS_ERR(l_new)) {
|
|
|
ret = PTR_ERR(l_new);
|
|
|
goto err;
|
|
@@ -652,6 +713,7 @@ static void htab_map_free(struct bpf_map *map)
|
|
|
htab_free_elems(htab);
|
|
|
pcpu_freelist_destroy(&htab->freelist);
|
|
|
}
|
|
|
+ free_percpu(htab->extra_elems);
|
|
|
kvfree(htab->buckets);
|
|
|
kfree(htab);
|
|
|
}
|