|
@@ -30,18 +30,12 @@ struct bpf_htab {
|
|
struct pcpu_freelist freelist;
|
|
struct pcpu_freelist freelist;
|
|
struct bpf_lru lru;
|
|
struct bpf_lru lru;
|
|
};
|
|
};
|
|
- void __percpu *extra_elems;
|
|
|
|
|
|
+ struct htab_elem *__percpu *extra_elems;
|
|
atomic_t count; /* number of elements in this hashtable */
|
|
atomic_t count; /* number of elements in this hashtable */
|
|
u32 n_buckets; /* number of hash buckets */
|
|
u32 n_buckets; /* number of hash buckets */
|
|
u32 elem_size; /* size of each element in bytes */
|
|
u32 elem_size; /* size of each element in bytes */
|
|
};
|
|
};
|
|
|
|
|
|
-enum extra_elem_state {
|
|
|
|
- HTAB_NOT_AN_EXTRA_ELEM = 0,
|
|
|
|
- HTAB_EXTRA_ELEM_FREE,
|
|
|
|
- HTAB_EXTRA_ELEM_USED
|
|
|
|
-};
|
|
|
|
-
|
|
|
|
/* each htab element is struct htab_elem + key + value */
|
|
/* each htab element is struct htab_elem + key + value */
|
|
struct htab_elem {
|
|
struct htab_elem {
|
|
union {
|
|
union {
|
|
@@ -56,7 +50,6 @@ struct htab_elem {
|
|
};
|
|
};
|
|
union {
|
|
union {
|
|
struct rcu_head rcu;
|
|
struct rcu_head rcu;
|
|
- enum extra_elem_state state;
|
|
|
|
struct bpf_lru_node lru_node;
|
|
struct bpf_lru_node lru_node;
|
|
};
|
|
};
|
|
u32 hash;
|
|
u32 hash;
|
|
@@ -77,6 +70,11 @@ static bool htab_is_percpu(const struct bpf_htab *htab)
|
|
htab->map.map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
|
|
htab->map.map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+static bool htab_is_prealloc(const struct bpf_htab *htab)
|
|
|
|
+{
|
|
|
|
+ return !(htab->map.map_flags & BPF_F_NO_PREALLOC);
|
|
|
|
+}
|
|
|
|
+
|
|
static inline void htab_elem_set_ptr(struct htab_elem *l, u32 key_size,
|
|
static inline void htab_elem_set_ptr(struct htab_elem *l, u32 key_size,
|
|
void __percpu *pptr)
|
|
void __percpu *pptr)
|
|
{
|
|
{
|
|
@@ -128,17 +126,20 @@ static struct htab_elem *prealloc_lru_pop(struct bpf_htab *htab, void *key,
|
|
|
|
|
|
static int prealloc_init(struct bpf_htab *htab)
|
|
static int prealloc_init(struct bpf_htab *htab)
|
|
{
|
|
{
|
|
|
|
+ u32 num_entries = htab->map.max_entries;
|
|
int err = -ENOMEM, i;
|
|
int err = -ENOMEM, i;
|
|
|
|
|
|
- htab->elems = bpf_map_area_alloc(htab->elem_size *
|
|
|
|
- htab->map.max_entries);
|
|
|
|
|
|
+ if (!htab_is_percpu(htab) && !htab_is_lru(htab))
|
|
|
|
+ num_entries += num_possible_cpus();
|
|
|
|
+
|
|
|
|
+ htab->elems = bpf_map_area_alloc(htab->elem_size * num_entries);
|
|
if (!htab->elems)
|
|
if (!htab->elems)
|
|
return -ENOMEM;
|
|
return -ENOMEM;
|
|
|
|
|
|
if (!htab_is_percpu(htab))
|
|
if (!htab_is_percpu(htab))
|
|
goto skip_percpu_elems;
|
|
goto skip_percpu_elems;
|
|
|
|
|
|
- for (i = 0; i < htab->map.max_entries; i++) {
|
|
|
|
|
|
+ for (i = 0; i < num_entries; i++) {
|
|
u32 size = round_up(htab->map.value_size, 8);
|
|
u32 size = round_up(htab->map.value_size, 8);
|
|
void __percpu *pptr;
|
|
void __percpu *pptr;
|
|
|
|
|
|
@@ -166,11 +167,11 @@ skip_percpu_elems:
|
|
if (htab_is_lru(htab))
|
|
if (htab_is_lru(htab))
|
|
bpf_lru_populate(&htab->lru, htab->elems,
|
|
bpf_lru_populate(&htab->lru, htab->elems,
|
|
offsetof(struct htab_elem, lru_node),
|
|
offsetof(struct htab_elem, lru_node),
|
|
- htab->elem_size, htab->map.max_entries);
|
|
|
|
|
|
+ htab->elem_size, num_entries);
|
|
else
|
|
else
|
|
pcpu_freelist_populate(&htab->freelist,
|
|
pcpu_freelist_populate(&htab->freelist,
|
|
htab->elems + offsetof(struct htab_elem, fnode),
|
|
htab->elems + offsetof(struct htab_elem, fnode),
|
|
- htab->elem_size, htab->map.max_entries);
|
|
|
|
|
|
+ htab->elem_size, num_entries);
|
|
|
|
|
|
return 0;
|
|
return 0;
|
|
|
|
|
|
@@ -191,16 +192,22 @@ static void prealloc_destroy(struct bpf_htab *htab)
|
|
|
|
|
|
static int alloc_extra_elems(struct bpf_htab *htab)
|
|
static int alloc_extra_elems(struct bpf_htab *htab)
|
|
{
|
|
{
|
|
- void __percpu *pptr;
|
|
|
|
|
|
+ struct htab_elem *__percpu *pptr, *l_new;
|
|
|
|
+ struct pcpu_freelist_node *l;
|
|
int cpu;
|
|
int cpu;
|
|
|
|
|
|
- pptr = __alloc_percpu_gfp(htab->elem_size, 8, GFP_USER | __GFP_NOWARN);
|
|
|
|
|
|
+ pptr = __alloc_percpu_gfp(sizeof(struct htab_elem *), 8,
|
|
|
|
+ GFP_USER | __GFP_NOWARN);
|
|
if (!pptr)
|
|
if (!pptr)
|
|
return -ENOMEM;
|
|
return -ENOMEM;
|
|
|
|
|
|
for_each_possible_cpu(cpu) {
|
|
for_each_possible_cpu(cpu) {
|
|
- ((struct htab_elem *)per_cpu_ptr(pptr, cpu))->state =
|
|
|
|
- HTAB_EXTRA_ELEM_FREE;
|
|
|
|
|
|
+ l = pcpu_freelist_pop(&htab->freelist);
|
|
|
|
+ /* pop will succeed, since prealloc_init()
|
|
|
|
+ * preallocated extra num_possible_cpus elements
|
|
|
|
+ */
|
|
|
|
+ l_new = container_of(l, struct htab_elem, fnode);
|
|
|
|
+ *per_cpu_ptr(pptr, cpu) = l_new;
|
|
}
|
|
}
|
|
htab->extra_elems = pptr;
|
|
htab->extra_elems = pptr;
|
|
return 0;
|
|
return 0;
|
|
@@ -342,25 +349,25 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
|
|
raw_spin_lock_init(&htab->buckets[i].lock);
|
|
raw_spin_lock_init(&htab->buckets[i].lock);
|
|
}
|
|
}
|
|
|
|
|
|
- if (!percpu && !lru) {
|
|
|
|
- /* lru itself can remove the least used element, so
|
|
|
|
- * there is no need for an extra elem during map_update.
|
|
|
|
- */
|
|
|
|
- err = alloc_extra_elems(htab);
|
|
|
|
- if (err)
|
|
|
|
- goto free_buckets;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
if (prealloc) {
|
|
if (prealloc) {
|
|
err = prealloc_init(htab);
|
|
err = prealloc_init(htab);
|
|
if (err)
|
|
if (err)
|
|
- goto free_extra_elems;
|
|
|
|
|
|
+ goto free_buckets;
|
|
|
|
+
|
|
|
|
+ if (!percpu && !lru) {
|
|
|
|
+ /* lru itself can remove the least used element, so
|
|
|
|
+ * there is no need for an extra elem during map_update.
|
|
|
|
+ */
|
|
|
|
+ err = alloc_extra_elems(htab);
|
|
|
|
+ if (err)
|
|
|
|
+ goto free_prealloc;
|
|
|
|
+ }
|
|
}
|
|
}
|
|
|
|
|
|
return &htab->map;
|
|
return &htab->map;
|
|
|
|
|
|
-free_extra_elems:
|
|
|
|
- free_percpu(htab->extra_elems);
|
|
|
|
|
|
+free_prealloc:
|
|
|
|
+ prealloc_destroy(htab);
|
|
free_buckets:
|
|
free_buckets:
|
|
bpf_map_area_free(htab->buckets);
|
|
bpf_map_area_free(htab->buckets);
|
|
free_htab:
|
|
free_htab:
|
|
@@ -575,12 +582,7 @@ static void htab_elem_free_rcu(struct rcu_head *head)
|
|
|
|
|
|
static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
|
|
static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
|
|
{
|
|
{
|
|
- if (l->state == HTAB_EXTRA_ELEM_USED) {
|
|
|
|
- l->state = HTAB_EXTRA_ELEM_FREE;
|
|
|
|
- return;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- if (!(htab->map.map_flags & BPF_F_NO_PREALLOC)) {
|
|
|
|
|
|
+ if (htab_is_prealloc(htab)) {
|
|
pcpu_freelist_push(&htab->freelist, &l->fnode);
|
|
pcpu_freelist_push(&htab->freelist, &l->fnode);
|
|
} else {
|
|
} else {
|
|
atomic_dec(&htab->count);
|
|
atomic_dec(&htab->count);
|
|
@@ -610,47 +612,43 @@ static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr,
|
|
static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
|
|
static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
|
|
void *value, u32 key_size, u32 hash,
|
|
void *value, u32 key_size, u32 hash,
|
|
bool percpu, bool onallcpus,
|
|
bool percpu, bool onallcpus,
|
|
- bool old_elem_exists)
|
|
|
|
|
|
+ struct htab_elem *old_elem)
|
|
{
|
|
{
|
|
u32 size = htab->map.value_size;
|
|
u32 size = htab->map.value_size;
|
|
- bool prealloc = !(htab->map.map_flags & BPF_F_NO_PREALLOC);
|
|
|
|
- struct htab_elem *l_new;
|
|
|
|
|
|
+ bool prealloc = htab_is_prealloc(htab);
|
|
|
|
+ struct htab_elem *l_new, **pl_new;
|
|
void __percpu *pptr;
|
|
void __percpu *pptr;
|
|
- int err = 0;
|
|
|
|
|
|
|
|
if (prealloc) {
|
|
if (prealloc) {
|
|
- struct pcpu_freelist_node *l;
|
|
|
|
|
|
+ if (old_elem) {
|
|
|
|
+ /* if we're updating the existing element,
|
|
|
|
+ * use per-cpu extra elems to avoid freelist_pop/push
|
|
|
|
+ */
|
|
|
|
+ pl_new = this_cpu_ptr(htab->extra_elems);
|
|
|
|
+ l_new = *pl_new;
|
|
|
|
+ *pl_new = old_elem;
|
|
|
|
+ } else {
|
|
|
|
+ struct pcpu_freelist_node *l;
|
|
|
|
|
|
- l = pcpu_freelist_pop(&htab->freelist);
|
|
|
|
- if (!l)
|
|
|
|
- err = -E2BIG;
|
|
|
|
- else
|
|
|
|
|
|
+ l = pcpu_freelist_pop(&htab->freelist);
|
|
|
|
+ if (!l)
|
|
|
|
+ return ERR_PTR(-E2BIG);
|
|
l_new = container_of(l, struct htab_elem, fnode);
|
|
l_new = container_of(l, struct htab_elem, fnode);
|
|
- } else {
|
|
|
|
- if (atomic_inc_return(&htab->count) > htab->map.max_entries) {
|
|
|
|
- atomic_dec(&htab->count);
|
|
|
|
- err = -E2BIG;
|
|
|
|
- } else {
|
|
|
|
- l_new = kmalloc(htab->elem_size,
|
|
|
|
- GFP_ATOMIC | __GFP_NOWARN);
|
|
|
|
- if (!l_new)
|
|
|
|
- return ERR_PTR(-ENOMEM);
|
|
|
|
}
|
|
}
|
|
- }
|
|
|
|
-
|
|
|
|
- if (err) {
|
|
|
|
- if (!old_elem_exists)
|
|
|
|
- return ERR_PTR(err);
|
|
|
|
-
|
|
|
|
- /* if we're updating the existing element and the hash table
|
|
|
|
- * is full, use per-cpu extra elems
|
|
|
|
- */
|
|
|
|
- l_new = this_cpu_ptr(htab->extra_elems);
|
|
|
|
- if (l_new->state != HTAB_EXTRA_ELEM_FREE)
|
|
|
|
- return ERR_PTR(-E2BIG);
|
|
|
|
- l_new->state = HTAB_EXTRA_ELEM_USED;
|
|
|
|
} else {
|
|
} else {
|
|
- l_new->state = HTAB_NOT_AN_EXTRA_ELEM;
|
|
|
|
|
|
+ if (atomic_inc_return(&htab->count) > htab->map.max_entries)
|
|
|
|
+ if (!old_elem) {
|
|
|
|
+ /* when map is full and update() is replacing
|
|
|
|
+ * old element, it's ok to allocate, since
|
|
|
|
+ * old element will be freed immediately.
|
|
|
|
+ * Otherwise return an error
|
|
|
|
+ */
|
|
|
|
+ atomic_dec(&htab->count);
|
|
|
|
+ return ERR_PTR(-E2BIG);
|
|
|
|
+ }
|
|
|
|
+ l_new = kmalloc(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN);
|
|
|
|
+ if (!l_new)
|
|
|
|
+ return ERR_PTR(-ENOMEM);
|
|
}
|
|
}
|
|
|
|
|
|
memcpy(l_new->key, key, key_size);
|
|
memcpy(l_new->key, key, key_size);
|
|
@@ -731,7 +729,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
|
|
goto err;
|
|
goto err;
|
|
|
|
|
|
l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false,
|
|
l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false,
|
|
- !!l_old);
|
|
|
|
|
|
+ l_old);
|
|
if (IS_ERR(l_new)) {
|
|
if (IS_ERR(l_new)) {
|
|
/* all pre-allocated elements are in use or memory exhausted */
|
|
/* all pre-allocated elements are in use or memory exhausted */
|
|
ret = PTR_ERR(l_new);
|
|
ret = PTR_ERR(l_new);
|
|
@@ -744,7 +742,8 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
|
|
hlist_nulls_add_head_rcu(&l_new->hash_node, head);
|
|
hlist_nulls_add_head_rcu(&l_new->hash_node, head);
|
|
if (l_old) {
|
|
if (l_old) {
|
|
hlist_nulls_del_rcu(&l_old->hash_node);
|
|
hlist_nulls_del_rcu(&l_old->hash_node);
|
|
- free_htab_elem(htab, l_old);
|
|
|
|
|
|
+ if (!htab_is_prealloc(htab))
|
|
|
|
+ free_htab_elem(htab, l_old);
|
|
}
|
|
}
|
|
ret = 0;
|
|
ret = 0;
|
|
err:
|
|
err:
|
|
@@ -856,7 +855,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
|
|
value, onallcpus);
|
|
value, onallcpus);
|
|
} else {
|
|
} else {
|
|
l_new = alloc_htab_elem(htab, key, value, key_size,
|
|
l_new = alloc_htab_elem(htab, key, value, key_size,
|
|
- hash, true, onallcpus, false);
|
|
|
|
|
|
+ hash, true, onallcpus, NULL);
|
|
if (IS_ERR(l_new)) {
|
|
if (IS_ERR(l_new)) {
|
|
ret = PTR_ERR(l_new);
|
|
ret = PTR_ERR(l_new);
|
|
goto err;
|
|
goto err;
|
|
@@ -1024,8 +1023,7 @@ static void delete_all_elements(struct bpf_htab *htab)
|
|
|
|
|
|
hlist_nulls_for_each_entry_safe(l, n, head, hash_node) {
|
|
hlist_nulls_for_each_entry_safe(l, n, head, hash_node) {
|
|
hlist_nulls_del_rcu(&l->hash_node);
|
|
hlist_nulls_del_rcu(&l->hash_node);
|
|
- if (l->state != HTAB_EXTRA_ELEM_USED)
|
|
|
|
- htab_elem_free(htab, l);
|
|
|
|
|
|
+ htab_elem_free(htab, l);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
@@ -1045,7 +1043,7 @@ static void htab_map_free(struct bpf_map *map)
|
|
* not have executed. Wait for them.
|
|
* not have executed. Wait for them.
|
|
*/
|
|
*/
|
|
rcu_barrier();
|
|
rcu_barrier();
|
|
- if (htab->map.map_flags & BPF_F_NO_PREALLOC)
|
|
|
|
|
|
+ if (!htab_is_prealloc(htab))
|
|
delete_all_elements(htab);
|
|
delete_all_elements(htab);
|
|
else
|
|
else
|
|
prealloc_destroy(htab);
|
|
prealloc_destroy(htab);
|