local_storage.c 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379
  1. //SPDX-License-Identifier: GPL-2.0
  2. #include <linux/bpf-cgroup.h>
  3. #include <linux/bpf.h>
  4. #include <linux/bug.h>
  5. #include <linux/filter.h>
  6. #include <linux/mm.h>
  7. #include <linux/rbtree.h>
  8. #include <linux/slab.h>
  9. DEFINE_PER_CPU(void*, bpf_cgroup_storage);
  10. #ifdef CONFIG_CGROUP_BPF
  11. #define LOCAL_STORAGE_CREATE_FLAG_MASK \
  12. (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
  13. struct bpf_cgroup_storage_map {
  14. struct bpf_map map;
  15. spinlock_t lock;
  16. struct bpf_prog *prog;
  17. struct rb_root root;
  18. struct list_head list;
  19. };
  20. static struct bpf_cgroup_storage_map *map_to_storage(struct bpf_map *map)
  21. {
  22. return container_of(map, struct bpf_cgroup_storage_map, map);
  23. }
  24. static int bpf_cgroup_storage_key_cmp(
  25. const struct bpf_cgroup_storage_key *key1,
  26. const struct bpf_cgroup_storage_key *key2)
  27. {
  28. if (key1->cgroup_inode_id < key2->cgroup_inode_id)
  29. return -1;
  30. else if (key1->cgroup_inode_id > key2->cgroup_inode_id)
  31. return 1;
  32. else if (key1->attach_type < key2->attach_type)
  33. return -1;
  34. else if (key1->attach_type > key2->attach_type)
  35. return 1;
  36. return 0;
  37. }
  38. static struct bpf_cgroup_storage *cgroup_storage_lookup(
  39. struct bpf_cgroup_storage_map *map, struct bpf_cgroup_storage_key *key,
  40. bool locked)
  41. {
  42. struct rb_root *root = &map->root;
  43. struct rb_node *node;
  44. if (!locked)
  45. spin_lock_bh(&map->lock);
  46. node = root->rb_node;
  47. while (node) {
  48. struct bpf_cgroup_storage *storage;
  49. storage = container_of(node, struct bpf_cgroup_storage, node);
  50. switch (bpf_cgroup_storage_key_cmp(key, &storage->key)) {
  51. case -1:
  52. node = node->rb_left;
  53. break;
  54. case 1:
  55. node = node->rb_right;
  56. break;
  57. default:
  58. if (!locked)
  59. spin_unlock_bh(&map->lock);
  60. return storage;
  61. }
  62. }
  63. if (!locked)
  64. spin_unlock_bh(&map->lock);
  65. return NULL;
  66. }
  67. static int cgroup_storage_insert(struct bpf_cgroup_storage_map *map,
  68. struct bpf_cgroup_storage *storage)
  69. {
  70. struct rb_root *root = &map->root;
  71. struct rb_node **new = &(root->rb_node), *parent = NULL;
  72. while (*new) {
  73. struct bpf_cgroup_storage *this;
  74. this = container_of(*new, struct bpf_cgroup_storage, node);
  75. parent = *new;
  76. switch (bpf_cgroup_storage_key_cmp(&storage->key, &this->key)) {
  77. case -1:
  78. new = &((*new)->rb_left);
  79. break;
  80. case 1:
  81. new = &((*new)->rb_right);
  82. break;
  83. default:
  84. return -EEXIST;
  85. }
  86. }
  87. rb_link_node(&storage->node, parent, new);
  88. rb_insert_color(&storage->node, root);
  89. return 0;
  90. }
  91. static void *cgroup_storage_lookup_elem(struct bpf_map *_map, void *_key)
  92. {
  93. struct bpf_cgroup_storage_map *map = map_to_storage(_map);
  94. struct bpf_cgroup_storage_key *key = _key;
  95. struct bpf_cgroup_storage *storage;
  96. storage = cgroup_storage_lookup(map, key, false);
  97. if (!storage)
  98. return NULL;
  99. return &READ_ONCE(storage->buf)->data[0];
  100. }
  101. static int cgroup_storage_update_elem(struct bpf_map *map, void *_key,
  102. void *value, u64 flags)
  103. {
  104. struct bpf_cgroup_storage_key *key = _key;
  105. struct bpf_cgroup_storage *storage;
  106. struct bpf_storage_buffer *new;
  107. if (flags & BPF_NOEXIST)
  108. return -EINVAL;
  109. storage = cgroup_storage_lookup((struct bpf_cgroup_storage_map *)map,
  110. key, false);
  111. if (!storage)
  112. return -ENOENT;
  113. new = kmalloc_node(sizeof(struct bpf_storage_buffer) +
  114. map->value_size, __GFP_ZERO | GFP_USER,
  115. map->numa_node);
  116. if (!new)
  117. return -ENOMEM;
  118. memcpy(&new->data[0], value, map->value_size);
  119. new = xchg(&storage->buf, new);
  120. kfree_rcu(new, rcu);
  121. return 0;
  122. }
  123. static int cgroup_storage_get_next_key(struct bpf_map *_map, void *_key,
  124. void *_next_key)
  125. {
  126. struct bpf_cgroup_storage_map *map = map_to_storage(_map);
  127. struct bpf_cgroup_storage_key *key = _key;
  128. struct bpf_cgroup_storage_key *next = _next_key;
  129. struct bpf_cgroup_storage *storage;
  130. spin_lock_bh(&map->lock);
  131. if (list_empty(&map->list))
  132. goto enoent;
  133. if (key) {
  134. storage = cgroup_storage_lookup(map, key, true);
  135. if (!storage)
  136. goto enoent;
  137. storage = list_next_entry(storage, list);
  138. if (!storage)
  139. goto enoent;
  140. } else {
  141. storage = list_first_entry(&map->list,
  142. struct bpf_cgroup_storage, list);
  143. }
  144. spin_unlock_bh(&map->lock);
  145. next->attach_type = storage->key.attach_type;
  146. next->cgroup_inode_id = storage->key.cgroup_inode_id;
  147. return 0;
  148. enoent:
  149. spin_unlock_bh(&map->lock);
  150. return -ENOENT;
  151. }
  152. static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
  153. {
  154. int numa_node = bpf_map_attr_numa_node(attr);
  155. struct bpf_cgroup_storage_map *map;
  156. if (attr->key_size != sizeof(struct bpf_cgroup_storage_key))
  157. return ERR_PTR(-EINVAL);
  158. if (attr->value_size > PAGE_SIZE)
  159. return ERR_PTR(-E2BIG);
  160. if (attr->map_flags & ~LOCAL_STORAGE_CREATE_FLAG_MASK)
  161. /* reserved bits should not be used */
  162. return ERR_PTR(-EINVAL);
  163. if (attr->max_entries)
  164. /* max_entries is not used and enforced to be 0 */
  165. return ERR_PTR(-EINVAL);
  166. map = kmalloc_node(sizeof(struct bpf_cgroup_storage_map),
  167. __GFP_ZERO | GFP_USER, numa_node);
  168. if (!map)
  169. return ERR_PTR(-ENOMEM);
  170. map->map.pages = round_up(sizeof(struct bpf_cgroup_storage_map),
  171. PAGE_SIZE) >> PAGE_SHIFT;
  172. /* copy mandatory map attributes */
  173. bpf_map_init_from_attr(&map->map, attr);
  174. spin_lock_init(&map->lock);
  175. map->root = RB_ROOT;
  176. INIT_LIST_HEAD(&map->list);
  177. return &map->map;
  178. }
  179. static void cgroup_storage_map_free(struct bpf_map *_map)
  180. {
  181. struct bpf_cgroup_storage_map *map = map_to_storage(_map);
  182. WARN_ON(!RB_EMPTY_ROOT(&map->root));
  183. WARN_ON(!list_empty(&map->list));
  184. kfree(map);
  185. }
  186. static int cgroup_storage_delete_elem(struct bpf_map *map, void *key)
  187. {
  188. return -EINVAL;
  189. }
  190. const struct bpf_map_ops cgroup_storage_map_ops = {
  191. .map_alloc = cgroup_storage_map_alloc,
  192. .map_free = cgroup_storage_map_free,
  193. .map_get_next_key = cgroup_storage_get_next_key,
  194. .map_lookup_elem = cgroup_storage_lookup_elem,
  195. .map_update_elem = cgroup_storage_update_elem,
  196. .map_delete_elem = cgroup_storage_delete_elem,
  197. .map_check_btf = map_check_no_btf,
  198. };
  199. int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *_map)
  200. {
  201. struct bpf_cgroup_storage_map *map = map_to_storage(_map);
  202. int ret = -EBUSY;
  203. spin_lock_bh(&map->lock);
  204. if (map->prog && map->prog != prog)
  205. goto unlock;
  206. if (prog->aux->cgroup_storage && prog->aux->cgroup_storage != _map)
  207. goto unlock;
  208. map->prog = prog;
  209. prog->aux->cgroup_storage = _map;
  210. ret = 0;
  211. unlock:
  212. spin_unlock_bh(&map->lock);
  213. return ret;
  214. }
  215. void bpf_cgroup_storage_release(struct bpf_prog *prog, struct bpf_map *_map)
  216. {
  217. struct bpf_cgroup_storage_map *map = map_to_storage(_map);
  218. spin_lock_bh(&map->lock);
  219. if (map->prog == prog) {
  220. WARN_ON(prog->aux->cgroup_storage != _map);
  221. map->prog = NULL;
  222. prog->aux->cgroup_storage = NULL;
  223. }
  224. spin_unlock_bh(&map->lock);
  225. }
  226. struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog)
  227. {
  228. struct bpf_cgroup_storage *storage;
  229. struct bpf_map *map;
  230. u32 pages;
  231. map = prog->aux->cgroup_storage;
  232. if (!map)
  233. return NULL;
  234. pages = round_up(sizeof(struct bpf_cgroup_storage) +
  235. sizeof(struct bpf_storage_buffer) +
  236. map->value_size, PAGE_SIZE) >> PAGE_SHIFT;
  237. if (bpf_map_charge_memlock(map, pages))
  238. return ERR_PTR(-EPERM);
  239. storage = kmalloc_node(sizeof(struct bpf_cgroup_storage),
  240. __GFP_ZERO | GFP_USER, map->numa_node);
  241. if (!storage) {
  242. bpf_map_uncharge_memlock(map, pages);
  243. return ERR_PTR(-ENOMEM);
  244. }
  245. storage->buf = kmalloc_node(sizeof(struct bpf_storage_buffer) +
  246. map->value_size, __GFP_ZERO | GFP_USER,
  247. map->numa_node);
  248. if (!storage->buf) {
  249. bpf_map_uncharge_memlock(map, pages);
  250. kfree(storage);
  251. return ERR_PTR(-ENOMEM);
  252. }
  253. storage->map = (struct bpf_cgroup_storage_map *)map;
  254. return storage;
  255. }
  256. void bpf_cgroup_storage_free(struct bpf_cgroup_storage *storage)
  257. {
  258. u32 pages;
  259. struct bpf_map *map;
  260. if (!storage)
  261. return;
  262. map = &storage->map->map;
  263. pages = round_up(sizeof(struct bpf_cgroup_storage) +
  264. sizeof(struct bpf_storage_buffer) +
  265. map->value_size, PAGE_SIZE) >> PAGE_SHIFT;
  266. bpf_map_uncharge_memlock(map, pages);
  267. kfree_rcu(storage->buf, rcu);
  268. kfree_rcu(storage, rcu);
  269. }
  270. void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage,
  271. struct cgroup *cgroup,
  272. enum bpf_attach_type type)
  273. {
  274. struct bpf_cgroup_storage_map *map;
  275. if (!storage)
  276. return;
  277. storage->key.attach_type = type;
  278. storage->key.cgroup_inode_id = cgroup->kn->id.id;
  279. map = storage->map;
  280. spin_lock_bh(&map->lock);
  281. WARN_ON(cgroup_storage_insert(map, storage));
  282. list_add(&storage->list, &map->list);
  283. spin_unlock_bh(&map->lock);
  284. }
  285. void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage *storage)
  286. {
  287. struct bpf_cgroup_storage_map *map;
  288. struct rb_root *root;
  289. if (!storage)
  290. return;
  291. map = storage->map;
  292. spin_lock_bh(&map->lock);
  293. root = &map->root;
  294. rb_erase(&storage->node, root);
  295. list_del(&storage->list);
  296. spin_unlock_bh(&map->lock);
  297. }
  298. #endif