queue_stack_maps.c 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * queue_stack_maps.c: BPF queue and stack maps
  4. *
  5. * Copyright (c) 2018 Politecnico di Torino
  6. */
  7. #include <linux/bpf.h>
  8. #include <linux/list.h>
  9. #include <linux/slab.h>
  10. #include <linux/capability.h>
  11. #include "percpu_freelist.h"
  12. #define QUEUE_STACK_CREATE_FLAG_MASK \
  13. (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
  14. struct bpf_queue_stack {
  15. struct bpf_map map;
  16. raw_spinlock_t lock;
  17. u32 head, tail;
  18. u32 size; /* max_entries + 1 */
  19. char elements[0] __aligned(8);
  20. };
  21. static struct bpf_queue_stack *bpf_queue_stack(struct bpf_map *map)
  22. {
  23. return container_of(map, struct bpf_queue_stack, map);
  24. }
  25. static bool queue_stack_map_is_empty(struct bpf_queue_stack *qs)
  26. {
  27. return qs->head == qs->tail;
  28. }
  29. static bool queue_stack_map_is_full(struct bpf_queue_stack *qs)
  30. {
  31. u32 head = qs->head + 1;
  32. if (unlikely(head >= qs->size))
  33. head = 0;
  34. return head == qs->tail;
  35. }
  36. /* Called from syscall */
  37. static int queue_stack_map_alloc_check(union bpf_attr *attr)
  38. {
  39. if (!capable(CAP_SYS_ADMIN))
  40. return -EPERM;
  41. /* check sanity of attributes */
  42. if (attr->max_entries == 0 || attr->key_size != 0 ||
  43. attr->value_size == 0 ||
  44. attr->map_flags & ~QUEUE_STACK_CREATE_FLAG_MASK)
  45. return -EINVAL;
  46. if (attr->value_size > KMALLOC_MAX_SIZE)
  47. /* if value_size is bigger, the user space won't be able to
  48. * access the elements.
  49. */
  50. return -E2BIG;
  51. return 0;
  52. }
  53. static struct bpf_map *queue_stack_map_alloc(union bpf_attr *attr)
  54. {
  55. int ret, numa_node = bpf_map_attr_numa_node(attr);
  56. struct bpf_queue_stack *qs;
  57. u64 size, queue_size, cost;
  58. size = (u64) attr->max_entries + 1;
  59. cost = queue_size = sizeof(*qs) + size * attr->value_size;
  60. if (cost >= U32_MAX - PAGE_SIZE)
  61. return ERR_PTR(-E2BIG);
  62. cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
  63. ret = bpf_map_precharge_memlock(cost);
  64. if (ret < 0)
  65. return ERR_PTR(ret);
  66. qs = bpf_map_area_alloc(queue_size, numa_node);
  67. if (!qs)
  68. return ERR_PTR(-ENOMEM);
  69. memset(qs, 0, sizeof(*qs));
  70. bpf_map_init_from_attr(&qs->map, attr);
  71. qs->map.pages = cost;
  72. qs->size = size;
  73. raw_spin_lock_init(&qs->lock);
  74. return &qs->map;
  75. }
  76. /* Called when map->refcnt goes to zero, either from workqueue or from syscall */
  77. static void queue_stack_map_free(struct bpf_map *map)
  78. {
  79. struct bpf_queue_stack *qs = bpf_queue_stack(map);
  80. /* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0,
  81. * so the programs (can be more than one that used this map) were
  82. * disconnected from events. Wait for outstanding critical sections in
  83. * these programs to complete
  84. */
  85. synchronize_rcu();
  86. bpf_map_area_free(qs);
  87. }
  88. static int __queue_map_get(struct bpf_map *map, void *value, bool delete)
  89. {
  90. struct bpf_queue_stack *qs = bpf_queue_stack(map);
  91. unsigned long flags;
  92. int err = 0;
  93. void *ptr;
  94. raw_spin_lock_irqsave(&qs->lock, flags);
  95. if (queue_stack_map_is_empty(qs)) {
  96. memset(value, 0, qs->map.value_size);
  97. err = -ENOENT;
  98. goto out;
  99. }
  100. ptr = &qs->elements[qs->tail * qs->map.value_size];
  101. memcpy(value, ptr, qs->map.value_size);
  102. if (delete) {
  103. if (unlikely(++qs->tail >= qs->size))
  104. qs->tail = 0;
  105. }
  106. out:
  107. raw_spin_unlock_irqrestore(&qs->lock, flags);
  108. return err;
  109. }
  110. static int __stack_map_get(struct bpf_map *map, void *value, bool delete)
  111. {
  112. struct bpf_queue_stack *qs = bpf_queue_stack(map);
  113. unsigned long flags;
  114. int err = 0;
  115. void *ptr;
  116. u32 index;
  117. raw_spin_lock_irqsave(&qs->lock, flags);
  118. if (queue_stack_map_is_empty(qs)) {
  119. memset(value, 0, qs->map.value_size);
  120. err = -ENOENT;
  121. goto out;
  122. }
  123. index = qs->head - 1;
  124. if (unlikely(index >= qs->size))
  125. index = qs->size - 1;
  126. ptr = &qs->elements[index * qs->map.value_size];
  127. memcpy(value, ptr, qs->map.value_size);
  128. if (delete)
  129. qs->head = index;
  130. out:
  131. raw_spin_unlock_irqrestore(&qs->lock, flags);
  132. return err;
  133. }
  134. /* Called from syscall or from eBPF program */
  135. static int queue_map_peek_elem(struct bpf_map *map, void *value)
  136. {
  137. return __queue_map_get(map, value, false);
  138. }
  139. /* Called from syscall or from eBPF program */
  140. static int stack_map_peek_elem(struct bpf_map *map, void *value)
  141. {
  142. return __stack_map_get(map, value, false);
  143. }
  144. /* Called from syscall or from eBPF program */
  145. static int queue_map_pop_elem(struct bpf_map *map, void *value)
  146. {
  147. return __queue_map_get(map, value, true);
  148. }
  149. /* Called from syscall or from eBPF program */
  150. static int stack_map_pop_elem(struct bpf_map *map, void *value)
  151. {
  152. return __stack_map_get(map, value, true);
  153. }
  154. /* Called from syscall or from eBPF program */
  155. static int queue_stack_map_push_elem(struct bpf_map *map, void *value,
  156. u64 flags)
  157. {
  158. struct bpf_queue_stack *qs = bpf_queue_stack(map);
  159. unsigned long irq_flags;
  160. int err = 0;
  161. void *dst;
  162. /* BPF_EXIST is used to force making room for a new element in case the
  163. * map is full
  164. */
  165. bool replace = (flags & BPF_EXIST);
  166. /* Check supported flags for queue and stack maps */
  167. if (flags & BPF_NOEXIST || flags > BPF_EXIST)
  168. return -EINVAL;
  169. raw_spin_lock_irqsave(&qs->lock, irq_flags);
  170. if (queue_stack_map_is_full(qs)) {
  171. if (!replace) {
  172. err = -E2BIG;
  173. goto out;
  174. }
  175. /* advance tail pointer to overwrite oldest element */
  176. if (unlikely(++qs->tail >= qs->size))
  177. qs->tail = 0;
  178. }
  179. dst = &qs->elements[qs->head * qs->map.value_size];
  180. memcpy(dst, value, qs->map.value_size);
  181. if (unlikely(++qs->head >= qs->size))
  182. qs->head = 0;
  183. out:
  184. raw_spin_unlock_irqrestore(&qs->lock, irq_flags);
  185. return err;
  186. }
  187. /* Called from syscall or from eBPF program */
  188. static void *queue_stack_map_lookup_elem(struct bpf_map *map, void *key)
  189. {
  190. return NULL;
  191. }
  192. /* Called from syscall or from eBPF program */
  193. static int queue_stack_map_update_elem(struct bpf_map *map, void *key,
  194. void *value, u64 flags)
  195. {
  196. return -EINVAL;
  197. }
  198. /* Called from syscall or from eBPF program */
  199. static int queue_stack_map_delete_elem(struct bpf_map *map, void *key)
  200. {
  201. return -EINVAL;
  202. }
  203. /* Called from syscall */
  204. static int queue_stack_map_get_next_key(struct bpf_map *map, void *key,
  205. void *next_key)
  206. {
  207. return -EINVAL;
  208. }
  209. const struct bpf_map_ops queue_map_ops = {
  210. .map_alloc_check = queue_stack_map_alloc_check,
  211. .map_alloc = queue_stack_map_alloc,
  212. .map_free = queue_stack_map_free,
  213. .map_lookup_elem = queue_stack_map_lookup_elem,
  214. .map_update_elem = queue_stack_map_update_elem,
  215. .map_delete_elem = queue_stack_map_delete_elem,
  216. .map_push_elem = queue_stack_map_push_elem,
  217. .map_pop_elem = queue_map_pop_elem,
  218. .map_peek_elem = queue_map_peek_elem,
  219. .map_get_next_key = queue_stack_map_get_next_key,
  220. };
  221. const struct bpf_map_ops stack_map_ops = {
  222. .map_alloc_check = queue_stack_map_alloc_check,
  223. .map_alloc = queue_stack_map_alloc,
  224. .map_free = queue_stack_map_free,
  225. .map_lookup_elem = queue_stack_map_lookup_elem,
  226. .map_update_elem = queue_stack_map_update_elem,
  227. .map_delete_elem = queue_stack_map_delete_elem,
  228. .map_push_elem = queue_stack_map_push_elem,
  229. .map_pop_elem = stack_map_pop_elem,
  230. .map_peek_elem = stack_map_peek_elem,
  231. .map_get_next_key = queue_stack_map_get_next_key,
  232. };