queue_stack_maps.c 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * queue_stack_maps.c: BPF queue and stack maps
  4. *
  5. * Copyright (c) 2018 Politecnico di Torino
  6. */
  7. #include <linux/bpf.h>
  8. #include <linux/list.h>
  9. #include <linux/slab.h>
  10. #include "percpu_freelist.h"
  11. #define QUEUE_STACK_CREATE_FLAG_MASK \
  12. (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
  13. struct bpf_queue_stack {
  14. struct bpf_map map;
  15. raw_spinlock_t lock;
  16. u32 head, tail;
  17. u32 size; /* max_entries + 1 */
  18. char elements[0] __aligned(8);
  19. };
  20. static struct bpf_queue_stack *bpf_queue_stack(struct bpf_map *map)
  21. {
  22. return container_of(map, struct bpf_queue_stack, map);
  23. }
  24. static bool queue_stack_map_is_empty(struct bpf_queue_stack *qs)
  25. {
  26. return qs->head == qs->tail;
  27. }
  28. static bool queue_stack_map_is_full(struct bpf_queue_stack *qs)
  29. {
  30. u32 head = qs->head + 1;
  31. if (unlikely(head >= qs->size))
  32. head = 0;
  33. return head == qs->tail;
  34. }
  35. /* Called from syscall */
  36. static int queue_stack_map_alloc_check(union bpf_attr *attr)
  37. {
  38. /* check sanity of attributes */
  39. if (attr->max_entries == 0 || attr->key_size != 0 ||
  40. attr->map_flags & ~QUEUE_STACK_CREATE_FLAG_MASK)
  41. return -EINVAL;
  42. if (attr->value_size > KMALLOC_MAX_SIZE)
  43. /* if value_size is bigger, the user space won't be able to
  44. * access the elements.
  45. */
  46. return -E2BIG;
  47. return 0;
  48. }
  49. static struct bpf_map *queue_stack_map_alloc(union bpf_attr *attr)
  50. {
  51. int ret, numa_node = bpf_map_attr_numa_node(attr);
  52. struct bpf_queue_stack *qs;
  53. u32 size, value_size;
  54. u64 queue_size, cost;
  55. size = attr->max_entries + 1;
  56. value_size = attr->value_size;
  57. queue_size = sizeof(*qs) + (u64) value_size * size;
  58. cost = queue_size;
  59. if (cost >= U32_MAX - PAGE_SIZE)
  60. return ERR_PTR(-E2BIG);
  61. cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
  62. ret = bpf_map_precharge_memlock(cost);
  63. if (ret < 0)
  64. return ERR_PTR(ret);
  65. qs = bpf_map_area_alloc(queue_size, numa_node);
  66. if (!qs)
  67. return ERR_PTR(-ENOMEM);
  68. memset(qs, 0, sizeof(*qs));
  69. bpf_map_init_from_attr(&qs->map, attr);
  70. qs->map.pages = cost;
  71. qs->size = size;
  72. raw_spin_lock_init(&qs->lock);
  73. return &qs->map;
  74. }
  75. /* Called when map->refcnt goes to zero, either from workqueue or from syscall */
  76. static void queue_stack_map_free(struct bpf_map *map)
  77. {
  78. struct bpf_queue_stack *qs = bpf_queue_stack(map);
  79. /* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0,
  80. * so the programs (can be more than one that used this map) were
  81. * disconnected from events. Wait for outstanding critical sections in
  82. * these programs to complete
  83. */
  84. synchronize_rcu();
  85. bpf_map_area_free(qs);
  86. }
  87. static int __queue_map_get(struct bpf_map *map, void *value, bool delete)
  88. {
  89. struct bpf_queue_stack *qs = bpf_queue_stack(map);
  90. unsigned long flags;
  91. int err = 0;
  92. void *ptr;
  93. raw_spin_lock_irqsave(&qs->lock, flags);
  94. if (queue_stack_map_is_empty(qs)) {
  95. err = -ENOENT;
  96. goto out;
  97. }
  98. ptr = &qs->elements[qs->tail * qs->map.value_size];
  99. memcpy(value, ptr, qs->map.value_size);
  100. if (delete) {
  101. if (unlikely(++qs->tail >= qs->size))
  102. qs->tail = 0;
  103. }
  104. out:
  105. raw_spin_unlock_irqrestore(&qs->lock, flags);
  106. return err;
  107. }
  108. static int __stack_map_get(struct bpf_map *map, void *value, bool delete)
  109. {
  110. struct bpf_queue_stack *qs = bpf_queue_stack(map);
  111. unsigned long flags;
  112. int err = 0;
  113. void *ptr;
  114. u32 index;
  115. raw_spin_lock_irqsave(&qs->lock, flags);
  116. if (queue_stack_map_is_empty(qs)) {
  117. err = -ENOENT;
  118. goto out;
  119. }
  120. index = qs->head - 1;
  121. if (unlikely(index >= qs->size))
  122. index = qs->size - 1;
  123. ptr = &qs->elements[index * qs->map.value_size];
  124. memcpy(value, ptr, qs->map.value_size);
  125. if (delete)
  126. qs->head = index;
  127. out:
  128. raw_spin_unlock_irqrestore(&qs->lock, flags);
  129. return err;
  130. }
  131. /* Called from syscall or from eBPF program */
  132. static int queue_map_peek_elem(struct bpf_map *map, void *value)
  133. {
  134. return __queue_map_get(map, value, false);
  135. }
  136. /* Called from syscall or from eBPF program */
  137. static int stack_map_peek_elem(struct bpf_map *map, void *value)
  138. {
  139. return __stack_map_get(map, value, false);
  140. }
  141. /* Called from syscall or from eBPF program */
  142. static int queue_map_pop_elem(struct bpf_map *map, void *value)
  143. {
  144. return __queue_map_get(map, value, true);
  145. }
  146. /* Called from syscall or from eBPF program */
  147. static int stack_map_pop_elem(struct bpf_map *map, void *value)
  148. {
  149. return __stack_map_get(map, value, true);
  150. }
  151. /* Called from syscall or from eBPF program */
  152. static int queue_stack_map_push_elem(struct bpf_map *map, void *value,
  153. u64 flags)
  154. {
  155. struct bpf_queue_stack *qs = bpf_queue_stack(map);
  156. unsigned long irq_flags;
  157. int err = 0;
  158. void *dst;
  159. /* BPF_EXIST is used to force making room for a new element in case the
  160. * map is full
  161. */
  162. bool replace = (flags & BPF_EXIST);
  163. /* Check supported flags for queue and stack maps */
  164. if (flags & BPF_NOEXIST || flags > BPF_EXIST)
  165. return -EINVAL;
  166. raw_spin_lock_irqsave(&qs->lock, irq_flags);
  167. if (queue_stack_map_is_full(qs)) {
  168. if (!replace) {
  169. err = -E2BIG;
  170. goto out;
  171. }
  172. /* advance tail pointer to overwrite oldest element */
  173. if (unlikely(++qs->tail >= qs->size))
  174. qs->tail = 0;
  175. }
  176. dst = &qs->elements[qs->head * qs->map.value_size];
  177. memcpy(dst, value, qs->map.value_size);
  178. if (unlikely(++qs->head >= qs->size))
  179. qs->head = 0;
  180. out:
  181. raw_spin_unlock_irqrestore(&qs->lock, irq_flags);
  182. return err;
  183. }
  184. /* Called from syscall or from eBPF program */
  185. static void *queue_stack_map_lookup_elem(struct bpf_map *map, void *key)
  186. {
  187. return NULL;
  188. }
  189. /* Called from syscall or from eBPF program */
  190. static int queue_stack_map_update_elem(struct bpf_map *map, void *key,
  191. void *value, u64 flags)
  192. {
  193. return -EINVAL;
  194. }
  195. /* Called from syscall or from eBPF program */
  196. static int queue_stack_map_delete_elem(struct bpf_map *map, void *key)
  197. {
  198. return -EINVAL;
  199. }
  200. /* Called from syscall */
  201. static int queue_stack_map_get_next_key(struct bpf_map *map, void *key,
  202. void *next_key)
  203. {
  204. return -EINVAL;
  205. }
  206. const struct bpf_map_ops queue_map_ops = {
  207. .map_alloc_check = queue_stack_map_alloc_check,
  208. .map_alloc = queue_stack_map_alloc,
  209. .map_free = queue_stack_map_free,
  210. .map_lookup_elem = queue_stack_map_lookup_elem,
  211. .map_update_elem = queue_stack_map_update_elem,
  212. .map_delete_elem = queue_stack_map_delete_elem,
  213. .map_push_elem = queue_stack_map_push_elem,
  214. .map_pop_elem = queue_map_pop_elem,
  215. .map_peek_elem = queue_map_peek_elem,
  216. .map_get_next_key = queue_stack_map_get_next_key,
  217. };
  218. const struct bpf_map_ops stack_map_ops = {
  219. .map_alloc_check = queue_stack_map_alloc_check,
  220. .map_alloc = queue_stack_map_alloc,
  221. .map_free = queue_stack_map_free,
  222. .map_lookup_elem = queue_stack_map_lookup_elem,
  223. .map_update_elem = queue_stack_map_update_elem,
  224. .map_delete_elem = queue_stack_map_delete_elem,
  225. .map_push_elem = queue_stack_map_push_elem,
  226. .map_pop_elem = stack_map_pop_elem,
  227. .map_peek_elem = stack_map_peek_elem,
  228. .map_get_next_key = queue_stack_map_get_next_key,
  229. };