Ver código fonte

Merge branch 'bpf-lpm-delete'

Craig Gallek says:

====================
Implement delete for BPF LPM trie

This was previously left as a TODO.  Add the implementation and
extend the test to cover it.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
David S. Miller 8 anos atrás
pai
commit
7670712707
2 arquivos alterados com 273 adições e 8 exclusões
  1. 77 3
      kernel/bpf/lpm_trie.c
  2. 196 5
      tools/testing/selftests/bpf/test_lpm_map.c

+ 77 - 3
kernel/bpf/lpm_trie.c

@@ -389,10 +389,84 @@ out:
 	return ret;
 }
 
-static int trie_delete_elem(struct bpf_map *map, void *key)
+/* Called from syscall or from eBPF program */
+static int trie_delete_elem(struct bpf_map *map, void *_key)
 {
-	/* TODO */
-	return -ENOSYS;
+	struct lpm_trie *trie = container_of(map, struct lpm_trie, map);
+	struct bpf_lpm_trie_key *key = _key;
+	struct lpm_trie_node __rcu **trim;
+	struct lpm_trie_node *node;
+	unsigned long irq_flags;
+	unsigned int next_bit;
+	size_t matchlen = 0;
+	int ret = 0;
+
+	if (key->prefixlen > trie->max_prefixlen)
+		return -EINVAL;
+
+	raw_spin_lock_irqsave(&trie->lock, irq_flags);
+
+	/* Walk the tree looking for an exact key/length match and keeping
+	 * track of where we could begin trimming the tree.  The trim-point
+	 * is the sub-tree along the walk consisting of only single-child
+	 * intermediate nodes and ending at a leaf node that we want to
+	 * remove.
+	 */
+	trim = &trie->root;
+	node = rcu_dereference_protected(
+		trie->root, lockdep_is_held(&trie->lock));
+	while (node) {
+		matchlen = longest_prefix_match(trie, node, key);
+
+		if (node->prefixlen != matchlen ||
+		    node->prefixlen == key->prefixlen)
+			break;
+
+		next_bit = extract_bit(key->data, node->prefixlen);
+		/* If we hit a node that has more than one child or is a valid
+		 * prefix itself, do not remove it. Reset the root of the trim
+		 * path to its descendant on our path.
+		 */
+		if (!(node->flags & LPM_TREE_NODE_FLAG_IM) ||
+		    (node->child[0] && node->child[1]))
+			trim = &node->child[next_bit];
+		node = rcu_dereference_protected(
+			node->child[next_bit], lockdep_is_held(&trie->lock));
+	}
+
+	if (!node || node->prefixlen != key->prefixlen ||
+	    (node->flags & LPM_TREE_NODE_FLAG_IM)) {
+		ret = -ENOENT;
+		goto out;
+	}
+
+	trie->n_entries--;
+
+	/* If the node we are removing is not a leaf node, simply mark it
+	 * as intermediate and we are done.
+	 */
+	if (rcu_access_pointer(node->child[0]) ||
+	    rcu_access_pointer(node->child[1])) {
+		node->flags |= LPM_TREE_NODE_FLAG_IM;
+		goto out;
+	}
+
+	/* trim should now point to the slot holding the start of a path from
+	 * zero or more intermediate nodes to our leaf node for deletion.
+	 */
+	while ((node = rcu_dereference_protected(
+			*trim, lockdep_is_held(&trie->lock)))) {
+		RCU_INIT_POINTER(*trim, NULL);
+		trim = rcu_access_pointer(node->child[0]) ?
+			&node->child[0] :
+			&node->child[1];
+		kfree_rcu(node, rcu);
+	}
+
+out:
+	raw_spin_unlock_irqrestore(&trie->lock, irq_flags);
+
+	return ret;
 }
 
 #define LPM_DATA_SIZE_MAX	256

+ 196 - 5
tools/testing/selftests/bpf/test_lpm_map.c

@@ -31,6 +31,10 @@ struct tlpm_node {
 	uint8_t key[];
 };
 
+static struct tlpm_node *tlpm_match(struct tlpm_node *list,
+				    const uint8_t *key,
+				    size_t n_bits);
+
 static struct tlpm_node *tlpm_add(struct tlpm_node *list,
 				  const uint8_t *key,
 				  size_t n_bits)
@@ -38,9 +42,17 @@ static struct tlpm_node *tlpm_add(struct tlpm_node *list,
 	struct tlpm_node *node;
 	size_t n;
 
+	n = (n_bits + 7) / 8;
+
+	/* 'overwrite' an equivalent entry if one already exists */
+	node = tlpm_match(list, key, n_bits);
+	if (node && node->n_bits == n_bits) {
+		memcpy(node->key, key, n);
+		return list;
+	}
+
 	/* add new entry with @key/@n_bits to @list and return new head */
 
-	n = (n_bits + 7) / 8;
 	node = malloc(sizeof(*node) + n);
 	assert(node);
 
@@ -92,6 +104,34 @@ static struct tlpm_node *tlpm_match(struct tlpm_node *list,
 	return best;
 }
 
+static struct tlpm_node *tlpm_delete(struct tlpm_node *list,
+				     const uint8_t *key,
+				     size_t n_bits)
+{
+	struct tlpm_node *best = tlpm_match(list, key, n_bits);
+	struct tlpm_node *node;
+
+	if (!best || best->n_bits != n_bits)
+		return list;
+
+	if (best == list) {
+		node = best->next;
+		free(best);
+		return node;
+	}
+
+	for (node = list; node; node = node->next) {
+		if (node->next == best) {
+			node->next = best->next;
+			free(best);
+			return list;
+		}
+	}
+	/* should never get here */
+	assert(0);
+	return list;
+}
+
 static void test_lpm_basic(void)
 {
 	struct tlpm_node *list = NULL, *t1, *t2;
@@ -114,6 +154,13 @@ static void test_lpm_basic(void)
 	assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff, 0xff }, 15));
 	assert(!tlpm_match(list, (uint8_t[]){ 0x7f, 0xff }, 16));
 
+	list = tlpm_delete(list, (uint8_t[]){ 0xff, 0xff }, 16);
+	assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff }, 8));
+	assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff, 0xff }, 16));
+
+	list = tlpm_delete(list, (uint8_t[]){ 0xff }, 8);
+	assert(!tlpm_match(list, (uint8_t[]){ 0xff }, 8));
+
 	tlpm_clear(list);
 }
 
@@ -158,7 +205,7 @@ static void test_lpm_order(void)
 
 static void test_lpm_map(int keysize)
 {
-	size_t i, j, n_matches, n_nodes, n_lookups;
+	size_t i, j, n_matches, n_matches_after_delete, n_nodes, n_lookups;
 	struct tlpm_node *t, *list = NULL;
 	struct bpf_lpm_trie_key *key;
 	uint8_t *data, *value;
@@ -170,6 +217,7 @@ static void test_lpm_map(int keysize)
 	 */
 
 	n_matches = 0;
+	n_matches_after_delete = 0;
 	n_nodes = 1 << 8;
 	n_lookups = 1 << 16;
 
@@ -223,15 +271,54 @@ static void test_lpm_map(int keysize)
 		}
 	}
 
+	/* Remove the first half of the elements in the tlpm and the
+	 * corresponding nodes from the bpf-lpm.  Then run the same
+	 * large number of random lookups in both and make sure they match.
+	 * Note: we need to count the number of nodes actually inserted
+	 * since there may have been duplicates.
+	 */
+	for (i = 0, t = list; t; i++, t = t->next)
+		;
+	for (j = 0; j < i / 2; ++j) {
+		key->prefixlen = list->n_bits;
+		memcpy(key->data, list->key, keysize);
+		r = bpf_map_delete_elem(map, key);
+		assert(!r);
+		list = tlpm_delete(list, list->key, list->n_bits);
+		assert(list);
+	}
+	for (i = 0; i < n_lookups; ++i) {
+		for (j = 0; j < keysize; ++j)
+			data[j] = rand() & 0xff;
+
+		t = tlpm_match(list, data, 8 * keysize);
+
+		key->prefixlen = 8 * keysize;
+		memcpy(key->data, data, keysize);
+		r = bpf_map_lookup_elem(map, key, value);
+		assert(!r || errno == ENOENT);
+		assert(!t == !!r);
+
+		if (t) {
+			++n_matches_after_delete;
+			assert(t->n_bits == value[keysize]);
+			for (j = 0; j < t->n_bits; ++j)
+				assert((t->key[j / 8] & (1 << (7 - j % 8))) ==
+				       (value[j / 8] & (1 << (7 - j % 8))));
+		}
+	}
+
 	close(map);
 	tlpm_clear(list);
 
 	/* With 255 random nodes in the map, we are pretty likely to match
 	 * something on every lookup. For statistics, use this:
 	 *
-	 *     printf("  nodes: %zu\n"
-	 *            "lookups: %zu\n"
-	 *            "matches: %zu\n", n_nodes, n_lookups, n_matches);
+	 *     printf("          nodes: %zu\n"
+	 *            "        lookups: %zu\n"
+	 *            "        matches: %zu\n"
+	 *            "matches(delete): %zu\n",
+	 *            n_nodes, n_lookups, n_matches, n_matches_after_delete);
 	 */
 }
 
@@ -331,6 +418,108 @@ static void test_lpm_ipaddr(void)
 	close(map_fd_ipv6);
 }
 
+static void test_lpm_delete(void)
+{
+	struct bpf_lpm_trie_key *key;
+	size_t key_size;
+	int map_fd;
+	__u64 value;
+
+	key_size = sizeof(*key) + sizeof(__u32);
+	key = alloca(key_size);
+
+	map_fd = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE,
+				key_size, sizeof(value),
+				100, BPF_F_NO_PREALLOC);
+	assert(map_fd >= 0);
+
+	/* Add nodes:
+	 * 192.168.0.0/16   (1)
+	 * 192.168.0.0/24   (2)
+	 * 192.168.128.0/24 (3)
+	 * 192.168.1.0/24   (4)
+	 *
+	 *         (1)
+	 *        /   \
+         *     (IM)    (3)
+	 *    /   \
+         *   (2)  (4)
+	 */
+	value = 1;
+	key->prefixlen = 16;
+	inet_pton(AF_INET, "192.168.0.0", key->data);
+	assert(bpf_map_update_elem(map_fd, key, &value, 0) == 0);
+
+	value = 2;
+	key->prefixlen = 24;
+	inet_pton(AF_INET, "192.168.0.0", key->data);
+	assert(bpf_map_update_elem(map_fd, key, &value, 0) == 0);
+
+	value = 3;
+	key->prefixlen = 24;
+	inet_pton(AF_INET, "192.168.128.0", key->data);
+	assert(bpf_map_update_elem(map_fd, key, &value, 0) == 0);
+
+	value = 4;
+	key->prefixlen = 24;
+	inet_pton(AF_INET, "192.168.1.0", key->data);
+	assert(bpf_map_update_elem(map_fd, key, &value, 0) == 0);
+
+	/* remove non-existent node */
+	key->prefixlen = 32;
+	inet_pton(AF_INET, "10.0.0.1", key->data);
+	assert(bpf_map_lookup_elem(map_fd, key, &value) == -1 &&
+		errno == ENOENT);
+
+	/* assert initial lookup */
+	key->prefixlen = 32;
+	inet_pton(AF_INET, "192.168.0.1", key->data);
+	assert(bpf_map_lookup_elem(map_fd, key, &value) == 0);
+	assert(value == 2);
+
+	/* remove leaf node */
+	key->prefixlen = 24;
+	inet_pton(AF_INET, "192.168.0.0", key->data);
+	assert(bpf_map_delete_elem(map_fd, key) == 0);
+
+	key->prefixlen = 32;
+	inet_pton(AF_INET, "192.168.0.1", key->data);
+	assert(bpf_map_lookup_elem(map_fd, key, &value) == 0);
+	assert(value == 1);
+
+	/* remove leaf (and intermediary) node */
+	key->prefixlen = 24;
+	inet_pton(AF_INET, "192.168.1.0", key->data);
+	assert(bpf_map_delete_elem(map_fd, key) == 0);
+
+	key->prefixlen = 32;
+	inet_pton(AF_INET, "192.168.1.1", key->data);
+	assert(bpf_map_lookup_elem(map_fd, key, &value) == 0);
+	assert(value == 1);
+
+	/* remove root node */
+	key->prefixlen = 16;
+	inet_pton(AF_INET, "192.168.0.0", key->data);
+	assert(bpf_map_delete_elem(map_fd, key) == 0);
+
+	key->prefixlen = 32;
+	inet_pton(AF_INET, "192.168.128.1", key->data);
+	assert(bpf_map_lookup_elem(map_fd, key, &value) == 0);
+	assert(value == 3);
+
+	/* remove last node */
+	key->prefixlen = 24;
+	inet_pton(AF_INET, "192.168.128.0", key->data);
+	assert(bpf_map_delete_elem(map_fd, key) == 0);
+
+	key->prefixlen = 32;
+	inet_pton(AF_INET, "192.168.128.1", key->data);
+	assert(bpf_map_lookup_elem(map_fd, key, &value) == -1 &&
+		errno == ENOENT);
+
+	close(map_fd);
+}
+
 int main(void)
 {
 	struct rlimit limit  = { RLIM_INFINITY, RLIM_INFINITY };
@@ -353,6 +542,8 @@ int main(void)
 
 	test_lpm_ipaddr();
 
+	test_lpm_delete();
+
 	printf("test_lpm: OK\n");
 	return 0;
 }