cgroup.c 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238
  1. /*
  2. * Functions to manage eBPF programs attached to cgroups
  3. *
  4. * Copyright (c) 2016 Daniel Mack
  5. *
  6. * This file is subject to the terms and conditions of version 2 of the GNU
  7. * General Public License. See the file COPYING in the main directory of the
  8. * Linux distribution for more details.
  9. */
  10. #include <linux/kernel.h>
  11. #include <linux/atomic.h>
  12. #include <linux/cgroup.h>
  13. #include <linux/slab.h>
  14. #include <linux/bpf.h>
  15. #include <linux/bpf-cgroup.h>
  16. #include <net/sock.h>
  17. DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key);
  18. EXPORT_SYMBOL(cgroup_bpf_enabled_key);
  19. /**
  20. * cgroup_bpf_put() - put references of all bpf programs
  21. * @cgrp: the cgroup to modify
  22. */
  23. void cgroup_bpf_put(struct cgroup *cgrp)
  24. {
  25. unsigned int type;
  26. for (type = 0; type < ARRAY_SIZE(cgrp->bpf.prog); type++) {
  27. struct bpf_prog *prog = cgrp->bpf.prog[type];
  28. if (prog) {
  29. bpf_prog_put(prog);
  30. static_branch_dec(&cgroup_bpf_enabled_key);
  31. }
  32. }
  33. }
  34. /**
  35. * cgroup_bpf_inherit() - inherit effective programs from parent
  36. * @cgrp: the cgroup to modify
  37. * @parent: the parent to inherit from
  38. */
  39. void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent)
  40. {
  41. unsigned int type;
  42. for (type = 0; type < ARRAY_SIZE(cgrp->bpf.effective); type++) {
  43. struct bpf_prog *e;
  44. e = rcu_dereference_protected(parent->bpf.effective[type],
  45. lockdep_is_held(&cgroup_mutex));
  46. rcu_assign_pointer(cgrp->bpf.effective[type], e);
  47. cgrp->bpf.disallow_override[type] = parent->bpf.disallow_override[type];
  48. }
  49. }
  50. /**
  51. * __cgroup_bpf_update() - Update the pinned program of a cgroup, and
  52. * propagate the change to descendants
  53. * @cgrp: The cgroup which descendants to traverse
  54. * @parent: The parent of @cgrp, or %NULL if @cgrp is the root
  55. * @prog: A new program to pin
  56. * @type: Type of pinning operation (ingress/egress)
  57. *
  58. * Each cgroup has a set of two pointers for bpf programs; one for eBPF
  59. * programs it owns, and which is effective for execution.
  60. *
  61. * If @prog is not %NULL, this function attaches a new program to the cgroup
  62. * and releases the one that is currently attached, if any. @prog is then made
  63. * the effective program of type @type in that cgroup.
  64. *
  65. * If @prog is %NULL, the currently attached program of type @type is released,
  66. * and the effective program of the parent cgroup (if any) is inherited to
  67. * @cgrp.
  68. *
  69. * Then, the descendants of @cgrp are walked and the effective program for
  70. * each of them is set to the effective program of @cgrp unless the
  71. * descendant has its own program attached, in which case the subbranch is
  72. * skipped. This ensures that delegated subcgroups with own programs are left
  73. * untouched.
  74. *
  75. * Must be called with cgroup_mutex held.
  76. */
  77. int __cgroup_bpf_update(struct cgroup *cgrp, struct cgroup *parent,
  78. struct bpf_prog *prog, enum bpf_attach_type type,
  79. bool new_overridable)
  80. {
  81. struct bpf_prog *old_prog, *effective = NULL;
  82. struct cgroup_subsys_state *pos;
  83. bool overridable = true;
  84. if (parent) {
  85. overridable = !parent->bpf.disallow_override[type];
  86. effective = rcu_dereference_protected(parent->bpf.effective[type],
  87. lockdep_is_held(&cgroup_mutex));
  88. }
  89. if (prog && effective && !overridable)
  90. /* if parent has non-overridable prog attached, disallow
  91. * attaching new programs to descendent cgroup
  92. */
  93. return -EPERM;
  94. if (prog && effective && overridable != new_overridable)
  95. /* if parent has overridable prog attached, only
  96. * allow overridable programs in descendent cgroup
  97. */
  98. return -EPERM;
  99. old_prog = cgrp->bpf.prog[type];
  100. if (prog) {
  101. overridable = new_overridable;
  102. effective = prog;
  103. if (old_prog &&
  104. cgrp->bpf.disallow_override[type] == new_overridable)
  105. /* disallow attaching non-overridable on top
  106. * of existing overridable in this cgroup
  107. * and vice versa
  108. */
  109. return -EPERM;
  110. }
  111. if (!prog && !old_prog)
  112. /* report error when trying to detach and nothing is attached */
  113. return -ENOENT;
  114. cgrp->bpf.prog[type] = prog;
  115. css_for_each_descendant_pre(pos, &cgrp->self) {
  116. struct cgroup *desc = container_of(pos, struct cgroup, self);
  117. /* skip the subtree if the descendant has its own program */
  118. if (desc->bpf.prog[type] && desc != cgrp) {
  119. pos = css_rightmost_descendant(pos);
  120. } else {
  121. rcu_assign_pointer(desc->bpf.effective[type],
  122. effective);
  123. desc->bpf.disallow_override[type] = !overridable;
  124. }
  125. }
  126. if (prog)
  127. static_branch_inc(&cgroup_bpf_enabled_key);
  128. if (old_prog) {
  129. bpf_prog_put(old_prog);
  130. static_branch_dec(&cgroup_bpf_enabled_key);
  131. }
  132. return 0;
  133. }
  134. /**
  135. * __cgroup_bpf_run_filter_skb() - Run a program for packet filtering
  136. * @sk: The socket sending or receiving traffic
  137. * @skb: The skb that is being sent or received
  138. * @type: The type of program to be exectuted
  139. *
  140. * If no socket is passed, or the socket is not of type INET or INET6,
  141. * this function does nothing and returns 0.
  142. *
  143. * The program type passed in via @type must be suitable for network
  144. * filtering. No further check is performed to assert that.
  145. *
  146. * This function will return %-EPERM if any if an attached program was found
  147. * and if it returned != 1 during execution. In all other cases, 0 is returned.
  148. */
  149. int __cgroup_bpf_run_filter_skb(struct sock *sk,
  150. struct sk_buff *skb,
  151. enum bpf_attach_type type)
  152. {
  153. struct bpf_prog *prog;
  154. struct cgroup *cgrp;
  155. int ret = 0;
  156. if (!sk || !sk_fullsock(sk))
  157. return 0;
  158. if (sk->sk_family != AF_INET &&
  159. sk->sk_family != AF_INET6)
  160. return 0;
  161. cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
  162. rcu_read_lock();
  163. prog = rcu_dereference(cgrp->bpf.effective[type]);
  164. if (prog) {
  165. unsigned int offset = skb->data - skb_network_header(skb);
  166. struct sock *save_sk = skb->sk;
  167. skb->sk = sk;
  168. __skb_push(skb, offset);
  169. ret = bpf_prog_run_save_cb(prog, skb) == 1 ? 0 : -EPERM;
  170. __skb_pull(skb, offset);
  171. skb->sk = save_sk;
  172. }
  173. rcu_read_unlock();
  174. return ret;
  175. }
  176. EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb);
  177. /**
  178. * __cgroup_bpf_run_filter_sk() - Run a program on a sock
  179. * @sk: sock structure to manipulate
  180. * @type: The type of program to be exectuted
  181. *
  182. * socket is passed is expected to be of type INET or INET6.
  183. *
  184. * The program type passed in via @type must be suitable for sock
  185. * filtering. No further check is performed to assert that.
  186. *
  187. * This function will return %-EPERM if any if an attached program was found
  188. * and if it returned != 1 during execution. In all other cases, 0 is returned.
  189. */
  190. int __cgroup_bpf_run_filter_sk(struct sock *sk,
  191. enum bpf_attach_type type)
  192. {
  193. struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
  194. struct bpf_prog *prog;
  195. int ret = 0;
  196. rcu_read_lock();
  197. prog = rcu_dereference(cgrp->bpf.effective[type]);
  198. if (prog)
  199. ret = BPF_PROG_RUN(prog, sk) == 1 ? 0 : -EPERM;
  200. rcu_read_unlock();
  201. return ret;
  202. }
  203. EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);