queue_stack_maps.c 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * queue_stack_maps.c: BPF queue and stack maps
  4. *
  5. * Copyright (c) 2018 Politecnico di Torino
  6. */
  7. #include <linux/bpf.h>
  8. #include <linux/list.h>
  9. #include <linux/slab.h>
  10. #include "percpu_freelist.h"
  11. #define QUEUE_STACK_CREATE_FLAG_MASK \
  12. (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
  13. struct bpf_queue_stack {
  14. struct bpf_map map;
  15. raw_spinlock_t lock;
  16. u32 head, tail;
  17. u32 size; /* max_entries + 1 */
  18. char elements[0] __aligned(8);
  19. };
  20. static struct bpf_queue_stack *bpf_queue_stack(struct bpf_map *map)
  21. {
  22. return container_of(map, struct bpf_queue_stack, map);
  23. }
  24. static bool queue_stack_map_is_empty(struct bpf_queue_stack *qs)
  25. {
  26. return qs->head == qs->tail;
  27. }
  28. static bool queue_stack_map_is_full(struct bpf_queue_stack *qs)
  29. {
  30. u32 head = qs->head + 1;
  31. if (unlikely(head >= qs->size))
  32. head = 0;
  33. return head == qs->tail;
  34. }
  35. /* Called from syscall */
  36. static int queue_stack_map_alloc_check(union bpf_attr *attr)
  37. {
  38. /* check sanity of attributes */
  39. if (attr->max_entries == 0 || attr->key_size != 0 ||
  40. attr->map_flags & ~QUEUE_STACK_CREATE_FLAG_MASK)
  41. return -EINVAL;
  42. if (attr->value_size > KMALLOC_MAX_SIZE)
  43. /* if value_size is bigger, the user space won't be able to
  44. * access the elements.
  45. */
  46. return -E2BIG;
  47. return 0;
  48. }
  49. static struct bpf_map *queue_stack_map_alloc(union bpf_attr *attr)
  50. {
  51. int ret, numa_node = bpf_map_attr_numa_node(attr);
  52. struct bpf_queue_stack *qs;
  53. u32 size, value_size;
  54. u64 queue_size, cost;
  55. size = attr->max_entries + 1;
  56. value_size = attr->value_size;
  57. queue_size = sizeof(*qs) + (u64) value_size * size;
  58. cost = queue_size;
  59. if (cost >= U32_MAX - PAGE_SIZE)
  60. return ERR_PTR(-E2BIG);
  61. cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
  62. ret = bpf_map_precharge_memlock(cost);
  63. if (ret < 0)
  64. return ERR_PTR(ret);
  65. qs = bpf_map_area_alloc(queue_size, numa_node);
  66. if (!qs)
  67. return ERR_PTR(-ENOMEM);
  68. memset(qs, 0, sizeof(*qs));
  69. bpf_map_init_from_attr(&qs->map, attr);
  70. qs->map.pages = cost;
  71. qs->size = size;
  72. raw_spin_lock_init(&qs->lock);
  73. return &qs->map;
  74. }
  75. /* Called when map->refcnt goes to zero, either from workqueue or from syscall */
  76. static void queue_stack_map_free(struct bpf_map *map)
  77. {
  78. struct bpf_queue_stack *qs = bpf_queue_stack(map);
  79. /* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0,
  80. * so the programs (can be more than one that used this map) were
  81. * disconnected from events. Wait for outstanding critical sections in
  82. * these programs to complete
  83. */
  84. synchronize_rcu();
  85. bpf_map_area_free(qs);
  86. }
  87. static int __queue_map_get(struct bpf_map *map, void *value, bool delete)
  88. {
  89. struct bpf_queue_stack *qs = bpf_queue_stack(map);
  90. unsigned long flags;
  91. int err = 0;
  92. void *ptr;
  93. raw_spin_lock_irqsave(&qs->lock, flags);
  94. if (queue_stack_map_is_empty(qs)) {
  95. memset(value, 0, qs->map.value_size);
  96. err = -ENOENT;
  97. goto out;
  98. }
  99. ptr = &qs->elements[qs->tail * qs->map.value_size];
  100. memcpy(value, ptr, qs->map.value_size);
  101. if (delete) {
  102. if (unlikely(++qs->tail >= qs->size))
  103. qs->tail = 0;
  104. }
  105. out:
  106. raw_spin_unlock_irqrestore(&qs->lock, flags);
  107. return err;
  108. }
  109. static int __stack_map_get(struct bpf_map *map, void *value, bool delete)
  110. {
  111. struct bpf_queue_stack *qs = bpf_queue_stack(map);
  112. unsigned long flags;
  113. int err = 0;
  114. void *ptr;
  115. u32 index;
  116. raw_spin_lock_irqsave(&qs->lock, flags);
  117. if (queue_stack_map_is_empty(qs)) {
  118. memset(value, 0, qs->map.value_size);
  119. err = -ENOENT;
  120. goto out;
  121. }
  122. index = qs->head - 1;
  123. if (unlikely(index >= qs->size))
  124. index = qs->size - 1;
  125. ptr = &qs->elements[index * qs->map.value_size];
  126. memcpy(value, ptr, qs->map.value_size);
  127. if (delete)
  128. qs->head = index;
  129. out:
  130. raw_spin_unlock_irqrestore(&qs->lock, flags);
  131. return err;
  132. }
  133. /* Called from syscall or from eBPF program */
  134. static int queue_map_peek_elem(struct bpf_map *map, void *value)
  135. {
  136. return __queue_map_get(map, value, false);
  137. }
  138. /* Called from syscall or from eBPF program */
  139. static int stack_map_peek_elem(struct bpf_map *map, void *value)
  140. {
  141. return __stack_map_get(map, value, false);
  142. }
  143. /* Called from syscall or from eBPF program */
  144. static int queue_map_pop_elem(struct bpf_map *map, void *value)
  145. {
  146. return __queue_map_get(map, value, true);
  147. }
  148. /* Called from syscall or from eBPF program */
  149. static int stack_map_pop_elem(struct bpf_map *map, void *value)
  150. {
  151. return __stack_map_get(map, value, true);
  152. }
  153. /* Called from syscall or from eBPF program */
  154. static int queue_stack_map_push_elem(struct bpf_map *map, void *value,
  155. u64 flags)
  156. {
  157. struct bpf_queue_stack *qs = bpf_queue_stack(map);
  158. unsigned long irq_flags;
  159. int err = 0;
  160. void *dst;
  161. /* BPF_EXIST is used to force making room for a new element in case the
  162. * map is full
  163. */
  164. bool replace = (flags & BPF_EXIST);
  165. /* Check supported flags for queue and stack maps */
  166. if (flags & BPF_NOEXIST || flags > BPF_EXIST)
  167. return -EINVAL;
  168. raw_spin_lock_irqsave(&qs->lock, irq_flags);
  169. if (queue_stack_map_is_full(qs)) {
  170. if (!replace) {
  171. err = -E2BIG;
  172. goto out;
  173. }
  174. /* advance tail pointer to overwrite oldest element */
  175. if (unlikely(++qs->tail >= qs->size))
  176. qs->tail = 0;
  177. }
  178. dst = &qs->elements[qs->head * qs->map.value_size];
  179. memcpy(dst, value, qs->map.value_size);
  180. if (unlikely(++qs->head >= qs->size))
  181. qs->head = 0;
  182. out:
  183. raw_spin_unlock_irqrestore(&qs->lock, irq_flags);
  184. return err;
  185. }
  186. /* Called from syscall or from eBPF program */
  187. static void *queue_stack_map_lookup_elem(struct bpf_map *map, void *key)
  188. {
  189. return NULL;
  190. }
  191. /* Called from syscall or from eBPF program */
  192. static int queue_stack_map_update_elem(struct bpf_map *map, void *key,
  193. void *value, u64 flags)
  194. {
  195. return -EINVAL;
  196. }
  197. /* Called from syscall or from eBPF program */
  198. static int queue_stack_map_delete_elem(struct bpf_map *map, void *key)
  199. {
  200. return -EINVAL;
  201. }
  202. /* Called from syscall */
  203. static int queue_stack_map_get_next_key(struct bpf_map *map, void *key,
  204. void *next_key)
  205. {
  206. return -EINVAL;
  207. }
  208. const struct bpf_map_ops queue_map_ops = {
  209. .map_alloc_check = queue_stack_map_alloc_check,
  210. .map_alloc = queue_stack_map_alloc,
  211. .map_free = queue_stack_map_free,
  212. .map_lookup_elem = queue_stack_map_lookup_elem,
  213. .map_update_elem = queue_stack_map_update_elem,
  214. .map_delete_elem = queue_stack_map_delete_elem,
  215. .map_push_elem = queue_stack_map_push_elem,
  216. .map_pop_elem = queue_map_pop_elem,
  217. .map_peek_elem = queue_map_peek_elem,
  218. .map_get_next_key = queue_stack_map_get_next_key,
  219. };
  220. const struct bpf_map_ops stack_map_ops = {
  221. .map_alloc_check = queue_stack_map_alloc_check,
  222. .map_alloc = queue_stack_map_alloc,
  223. .map_free = queue_stack_map_free,
  224. .map_lookup_elem = queue_stack_map_lookup_elem,
  225. .map_update_elem = queue_stack_map_update_elem,
  226. .map_delete_elem = queue_stack_map_delete_elem,
  227. .map_push_elem = queue_stack_map_push_elem,
  228. .map_pop_elem = stack_map_pop_elem,
  229. .map_peek_elem = stack_map_peek_elem,
  230. .map_get_next_key = queue_stack_map_get_next_key,
  231. };