9 years ago · b5b5eca9aa
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -36,31 +36,44 @@ void cgroup_bpf_update(struct cgroup *cgrp,
 
				 		       struct bpf_prog *prog,
			
 
				 		       enum bpf_attach_type type);
			
 
				 
			
 
				-int __cgroup_bpf_run_filter(struct sock *sk,
			
 
				-			    struct sk_buff *skb,
			
 
				-			    enum bpf_attach_type type);
			
 
				-
			
 
				-/* Wrappers for __cgroup_bpf_run_filter() guarded by cgroup_bpf_enabled. */
			
 
				-#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb)			\
			
 
				-({									\
			
 
				-	int __ret = 0;							\
			
 
				-	if (cgroup_bpf_enabled)						\
			
 
				-		__ret = __cgroup_bpf_run_filter(sk, skb,		\
			
 
				-						BPF_CGROUP_INET_INGRESS); \
			
 
				-									\
			
 
				-	__ret;								\
			
 
				+int __cgroup_bpf_run_filter_skb(struct sock *sk,
			
 
				+				struct sk_buff *skb,
			
 
				+				enum bpf_attach_type type);
			
 
				+
			
 
				+int __cgroup_bpf_run_filter_sk(struct sock *sk,
			
 
				+			       enum bpf_attach_type type);
			
 
				+
			
 
				+/* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */
			
 
				+#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb)			      \
			
 
				+({									      \
			
 
				+	int __ret = 0;							      \
			
 
				+	if (cgroup_bpf_enabled)						      \
			
 
				+		__ret = __cgroup_bpf_run_filter_skb(sk, skb,		      \
			
 
				+						    BPF_CGROUP_INET_INGRESS); \
			
 
				+									      \
			
 
				+	__ret;								      \
			
 
				 })
			
 
				 
			
 
				-#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb)				\
			
 
				-({									\
			
 
				-	int __ret = 0;							\
			
 
				-	if (cgroup_bpf_enabled && sk && sk == skb->sk) {		\
			
 
				-		typeof(sk) __sk = sk_to_full_sk(sk);			\
			
 
				-		if (sk_fullsock(__sk))					\
			
 
				-			__ret = __cgroup_bpf_run_filter(__sk, skb,	\
			
 
				-						BPF_CGROUP_INET_EGRESS); \
			
 
				-	}								\
			
 
				-	__ret;								\
			
 
				+#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb)			       \
			
 
				+({									       \
			
 
				+	int __ret = 0;							       \
			
 
				+	if (cgroup_bpf_enabled && sk && sk == skb->sk) {		       \
			
 
				+		typeof(sk) __sk = sk_to_full_sk(sk);			       \
			
 
				+		if (sk_fullsock(__sk))					       \
			
 
				+			__ret = __cgroup_bpf_run_filter_skb(__sk, skb,	       \
			
 
				+						      BPF_CGROUP_INET_EGRESS); \
			
 
				+	}								       \
			
 
				+	__ret;								       \
			
 
				+})
			
 
				+
			
 
				+#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk)				       \
			
 
				+({									       \
			
 
				+	int __ret = 0;							       \
			
 
				+	if (cgroup_bpf_enabled && sk) {					       \
			
 
				+		__ret = __cgroup_bpf_run_filter_sk(sk,			       \
			
 
				+						 BPF_CGROUP_INET_SOCK_CREATE); \
			
 
				+	}								       \
			
 
				+	__ret;								       \
			
 
				 })
			
 
				 
			
 
				 #else
			
@@ -72,6 +85,7 @@ static inline void cgroup_bpf_inherit(struct cgroup *cgrp,
 
				 
			
 
				 #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
			
 
				 #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
			
 
				+#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })
			
 
				 
			
 
				 #endif /* CONFIG_CGROUP_BPF */
			
 
				 
			
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -389,6 +389,21 @@ struct sock {
 
				 	 * Because of non atomicity rules, all
			
 
				 	 * changes are protected by socket lock.
			
 
				 	 */
			
 
				+	unsigned int		__sk_flags_offset[0];
			
 
				+#ifdef __BIG_ENDIAN_BITFIELD
			
 
				+#define SK_FL_PROTO_SHIFT  16
			
 
				+#define SK_FL_PROTO_MASK   0x00ff0000
			
 
				+
			
 
				+#define SK_FL_TYPE_SHIFT   0
			
 
				+#define SK_FL_TYPE_MASK    0x0000ffff
			
 
				+#else
			
 
				+#define SK_FL_PROTO_SHIFT  8
			
 
				+#define SK_FL_PROTO_MASK   0x0000ff00
			
 
				+
			
 
				+#define SK_FL_TYPE_SHIFT   16
			
 
				+#define SK_FL_TYPE_MASK    0xffff0000
			
 
				+#endif
			
 
				+
			
 
				 	kmemcheck_bitfield_begin(flags);
			
 
				 	unsigned int		sk_padding : 2,
			
 
				 				sk_no_check_tx : 1,
			
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -101,6 +101,7 @@ enum bpf_prog_type {
 
				 	BPF_PROG_TYPE_XDP,
			
 
				 	BPF_PROG_TYPE_PERF_EVENT,
			
 
				 	BPF_PROG_TYPE_CGROUP_SKB,
			
 
				+	BPF_PROG_TYPE_CGROUP_SOCK,
			
 
				 	BPF_PROG_TYPE_LWT_IN,
			
 
				 	BPF_PROG_TYPE_LWT_OUT,
			
 
				 	BPF_PROG_TYPE_LWT_XMIT,
			
@@ -109,6 +110,7 @@ enum bpf_prog_type {
 
				 enum bpf_attach_type {
			
 
				 	BPF_CGROUP_INET_INGRESS,
			
 
				 	BPF_CGROUP_INET_EGRESS,
			
 
				+	BPF_CGROUP_INET_SOCK_CREATE,
			
 
				 	__MAX_BPF_ATTACH_TYPE
			
 
				 };
			
 
				 
			
@@ -567,6 +569,13 @@ enum bpf_ret_code {
 
				 	/* >127 are reserved for prog type specific return codes */
			
 
				 };
			
 
				 
			
 
				+struct bpf_sock {
			
 
				+	__u32 bound_dev_if;
			
 
				+	__u32 family;
			
 
				+	__u32 type;
			
 
				+	__u32 protocol;
			
 
				+};
			
 
				+
			
 
				 /* User return codes for XDP prog type.
			
 
				  * A valid XDP program must return one of these defined values. All other
			
 
				  * return codes are reserved for future use. Unknown return codes will result
			
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -118,7 +118,7 @@ void __cgroup_bpf_update(struct cgroup *cgrp,
 
				 }
			
 
				 
			
 
				 /**
			
 
				- * __cgroup_bpf_run_filter() - Run a program for packet filtering
			
 
				+ * __cgroup_bpf_run_filter_skb() - Run a program for packet filtering
			
 
				  * @sk: The socken sending or receiving traffic
			
 
				  * @skb: The skb that is being sent or received
			
 
				  * @type: The type of program to be exectuted
			
@@ -132,9 +132,9 @@ void __cgroup_bpf_update(struct cgroup *cgrp,
 
				  * This function will return %-EPERM if any if an attached program was found
			
 
				  * and if it returned != 1 during execution. In all other cases, 0 is returned.
			
 
				  */
			
 
				-int __cgroup_bpf_run_filter(struct sock *sk,
			
 
				-			    struct sk_buff *skb,
			
 
				-			    enum bpf_attach_type type)
			
 
				+int __cgroup_bpf_run_filter_skb(struct sock *sk,
			
 
				+				struct sk_buff *skb,
			
 
				+				enum bpf_attach_type type)
			
 
				 {
			
 
				 	struct bpf_prog *prog;
			
 
				 	struct cgroup *cgrp;
			
@@ -164,4 +164,37 @@ int __cgroup_bpf_run_filter(struct sock *sk,
 
				 
			
 
				 	return ret;
			
 
				 }
			
 
				-EXPORT_SYMBOL(__cgroup_bpf_run_filter);
			
 
				+EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb);
			
 
				+
			
 
				+/**
			
 
				+ * __cgroup_bpf_run_filter_sk() - Run a program on a sock
			
 
				+ * @sk: sock structure to manipulate
			
 
				+ * @type: The type of program to be exectuted
			
 
				+ *
			
 
				+ * socket is passed is expected to be of type INET or INET6.
			
 
				+ *
			
 
				+ * The program type passed in via @type must be suitable for sock
			
 
				+ * filtering. No further check is performed to assert that.
			
 
				+ *
			
 
				+ * This function will return %-EPERM if any if an attached program was found
			
 
				+ * and if it returned != 1 during execution. In all other cases, 0 is returned.
			
 
				+ */
			
 
				+int __cgroup_bpf_run_filter_sk(struct sock *sk,
			
 
				+			       enum bpf_attach_type type)
			
 
				+{
			
 
				+	struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
			
 
				+	struct bpf_prog *prog;
			
 
				+	int ret = 0;
			
 
				+
			
 
				+
			
 
				+	rcu_read_lock();
			
 
				+
			
 
				+	prog = rcu_dereference(cgrp->bpf.effective[type]);
			
 
				+	if (prog)
			
 
				+		ret = BPF_PROG_RUN(prog, sk) == 1 ? 0 : -EPERM;
			
 
				+
			
 
				+	rcu_read_unlock();
			
 
				+
			
 
				+	return ret;
			
 
				+}
			
 
				+EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
			
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -856,6 +856,7 @@ static int bpf_prog_attach(const union bpf_attr *attr)
 
				 {
			
 
				 	struct bpf_prog *prog;
			
 
				 	struct cgroup *cgrp;
			
 
				+	enum bpf_prog_type ptype;
			
 
				 
			
 
				 	if (!capable(CAP_NET_ADMIN))
			
 
				 		return -EPERM;
			
@@ -866,25 +867,28 @@ static int bpf_prog_attach(const union bpf_attr *attr)
 
				 	switch (attr->attach_type) {
			
 
				 	case BPF_CGROUP_INET_INGRESS:
			
 
				 	case BPF_CGROUP_INET_EGRESS:
			
 
				-		prog = bpf_prog_get_type(attr->attach_bpf_fd,
			
 
				-					 BPF_PROG_TYPE_CGROUP_SKB);
			
 
				-		if (IS_ERR(prog))
			
 
				-			return PTR_ERR(prog);
			
 
				-
			
 
				-		cgrp = cgroup_get_from_fd(attr->target_fd);
			
 
				-		if (IS_ERR(cgrp)) {
			
 
				-			bpf_prog_put(prog);
			
 
				-			return PTR_ERR(cgrp);
			
 
				-		}
			
 
				-
			
 
				-		cgroup_bpf_update(cgrp, prog, attr->attach_type);
			
 
				-		cgroup_put(cgrp);
			
 
				+		ptype = BPF_PROG_TYPE_CGROUP_SKB;
			
 
				+		break;
			
 
				+	case BPF_CGROUP_INET_SOCK_CREATE:
			
 
				+		ptype = BPF_PROG_TYPE_CGROUP_SOCK;
			
 
				 		break;
			
 
				-
			
 
				 	default:
			
 
				 		return -EINVAL;
			
 
				 	}
			
 
				 
			
 
				+	prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
			
 
				+	if (IS_ERR(prog))
			
 
				+		return PTR_ERR(prog);
			
 
				+
			
 
				+	cgrp = cgroup_get_from_fd(attr->target_fd);
			
 
				+	if (IS_ERR(cgrp)) {
			
 
				+		bpf_prog_put(prog);
			
 
				+		return PTR_ERR(cgrp);
			
 
				+	}
			
 
				+
			
 
				+	cgroup_bpf_update(cgrp, prog, attr->attach_type);
			
 
				+	cgroup_put(cgrp);
			
 
				+
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -903,6 +907,7 @@ static int bpf_prog_detach(const union bpf_attr *attr)
 
				 	switch (attr->attach_type) {
			
 
				 	case BPF_CGROUP_INET_INGRESS:
			
 
				 	case BPF_CGROUP_INET_EGRESS:
			
 
				+	case BPF_CGROUP_INET_SOCK_CREATE:
			
 
				 		cgrp = cgroup_get_from_fd(attr->target_fd);
			
 
				 		if (IS_ERR(cgrp))
			
 
				 			return PTR_ERR(cgrp);
			
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2818,6 +2818,32 @@ static bool lwt_is_valid_access(int off, int size,
 
				 	return __is_valid_access(off, size, type);
			
 
				 }
			
 
				 
			
 
				+static bool sock_filter_is_valid_access(int off, int size,
			
 
				+					enum bpf_access_type type,
			
 
				+					enum bpf_reg_type *reg_type)
			
 
				+{
			
 
				+	if (type == BPF_WRITE) {
			
 
				+		switch (off) {
			
 
				+		case offsetof(struct bpf_sock, bound_dev_if):
			
 
				+			break;
			
 
				+		default:
			
 
				+			return false;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (off < 0 || off + size > sizeof(struct bpf_sock))
			
 
				+		return false;
			
 
				+
			
 
				+	/* The verifier guarantees that size > 0. */
			
 
				+	if (off % size != 0)
			
 
				+		return false;
			
 
				+
			
 
				+	if (size != sizeof(__u32))
			
 
				+		return false;
			
 
				+
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				 static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write,
			
 
				 			       const struct bpf_prog *prog)
			
 
				 {
			
@@ -3076,6 +3102,51 @@ static u32 sk_filter_convert_ctx_access(enum bpf_access_type type, int dst_reg,
 
				 	return insn - insn_buf;
			
 
				 }
			
 
				 
			
 
				+static u32 sock_filter_convert_ctx_access(enum bpf_access_type type,
			
 
				+					  int dst_reg, int src_reg,
			
 
				+					  int ctx_off,
			
 
				+					  struct bpf_insn *insn_buf,
			
 
				+					  struct bpf_prog *prog)
			
 
				+{
			
 
				+	struct bpf_insn *insn = insn_buf;
			
 
				+
			
 
				+	switch (ctx_off) {
			
 
				+	case offsetof(struct bpf_sock, bound_dev_if):
			
 
				+		BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_bound_dev_if) != 4);
			
 
				+
			
 
				+		if (type == BPF_WRITE)
			
 
				+			*insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg,
			
 
				+					offsetof(struct sock, sk_bound_dev_if));
			
 
				+		else
			
 
				+			*insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
			
 
				+				      offsetof(struct sock, sk_bound_dev_if));
			
 
				+		break;
			
 
				+
			
 
				+	case offsetof(struct bpf_sock, family):
			
 
				+		BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_family) != 2);
			
 
				+
			
 
				+		*insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
			
 
				+				      offsetof(struct sock, sk_family));
			
 
				+		break;
			
 
				+
			
 
				+	case offsetof(struct bpf_sock, type):
			
 
				+		*insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
			
 
				+				      offsetof(struct sock, __sk_flags_offset));
			
 
				+		*insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, SK_FL_TYPE_MASK);
			
 
				+		*insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, SK_FL_TYPE_SHIFT);
			
 
				+		break;
			
 
				+
			
 
				+	case offsetof(struct bpf_sock, protocol):
			
 
				+		*insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
			
 
				+				      offsetof(struct sock, __sk_flags_offset));
			
 
				+		*insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, SK_FL_PROTO_MASK);
			
 
				+		*insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, SK_FL_PROTO_SHIFT);
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				+	return insn - insn_buf;
			
 
				+}
			
 
				+
			
 
				 static u32 tc_cls_act_convert_ctx_access(enum bpf_access_type type, int dst_reg,
			
 
				 					 int src_reg, int ctx_off,
			
 
				 					 struct bpf_insn *insn_buf,
			
@@ -3162,6 +3233,12 @@ static const struct bpf_verifier_ops lwt_xmit_ops = {
 
				 	.gen_prologue		= tc_cls_act_prologue,
			
 
				 };
			
 
				 
			
 
				+static const struct bpf_verifier_ops cg_sock_ops = {
			
 
				+	.get_func_proto		= sk_filter_func_proto,
			
 
				+	.is_valid_access	= sock_filter_is_valid_access,
			
 
				+	.convert_ctx_access	= sock_filter_convert_ctx_access,
			
 
				+};
			
 
				+
			
 
				 static struct bpf_prog_type_list sk_filter_type __read_mostly = {
			
 
				 	.ops	= &sk_filter_ops,
			
 
				 	.type	= BPF_PROG_TYPE_SOCKET_FILTER,
			
@@ -3202,6 +3279,11 @@ static struct bpf_prog_type_list lwt_xmit_type __read_mostly = {
 
				 	.type	= BPF_PROG_TYPE_LWT_XMIT,
			
 
				 };
			
 
				 
			
 
				+static struct bpf_prog_type_list cg_sock_type __read_mostly = {
			
 
				+	.ops	= &cg_sock_ops,
			
 
				+	.type	= BPF_PROG_TYPE_CGROUP_SOCK
			
 
				+};
			
 
				+
			
 
				 static int __init register_sk_filter_ops(void)
			
 
				 {
			
 
				 	bpf_register_prog_type(&sk_filter_type);
			
@@ -3209,6 +3291,7 @@ static int __init register_sk_filter_ops(void)
 
				 	bpf_register_prog_type(&sched_act_type);
			
 
				 	bpf_register_prog_type(&xdp_type);
			
 
				 	bpf_register_prog_type(&cg_skb_type);
			
 
				+	bpf_register_prog_type(&cg_sock_type);
			
 
				 	bpf_register_prog_type(&lwt_in_type);
			
 
				 	bpf_register_prog_type(&lwt_out_type);
			
 
				 	bpf_register_prog_type(&lwt_xmit_type);
			
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -374,8 +374,18 @@ static int inet_create(struct net *net, struct socket *sock, int protocol,
 
				 
			
 
				 	if (sk->sk_prot->init) {
			
 
				 		err = sk->sk_prot->init(sk);
			
 
				-		if (err)
			
 
				+		if (err) {
			
 
				+			sk_common_release(sk);
			
 
				+			goto out;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (!kern) {
			
 
				+		err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk);
			
 
				+		if (err) {
			
 
				 			sk_common_release(sk);
			
 
				+			goto out;
			
 
				+		}
			
 
				 	}
			
 
				 out:
			
 
				 	return err;
			
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -258,6 +258,14 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol,
 
				 			goto out;
			
 
				 		}
			
 
				 	}
			
 
				+
			
 
				+	if (!kern) {
			
 
				+		err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk);
			
 
				+		if (err) {
			
 
				+			sk_common_release(sk);
			
 
				+			goto out;
			
 
				+		}
			
 
				+	}
			
 
				 out:
			
 
				 	return err;
			
 
				 out_rcu_unlock:
			
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -23,6 +23,8 @@ hostprogs-y += map_perf_test
 
				 hostprogs-y += test_overhead
			
 
				 hostprogs-y += test_cgrp2_array_pin
			
 
				 hostprogs-y += test_cgrp2_attach
			
 
				+hostprogs-y += test_cgrp2_sock
			
 
				+hostprogs-y += test_cgrp2_sock2
			
 
				 hostprogs-y += xdp1
			
 
				 hostprogs-y += xdp2
			
 
				 hostprogs-y += test_current_task_under_cgroup
			
@@ -52,6 +54,8 @@ map_perf_test-objs := bpf_load.o libbpf.o map_perf_test_user.o
 
				 test_overhead-objs := bpf_load.o libbpf.o test_overhead_user.o
			
 
				 test_cgrp2_array_pin-objs := libbpf.o test_cgrp2_array_pin.o
			
 
				 test_cgrp2_attach-objs := libbpf.o test_cgrp2_attach.o
			
 
				+test_cgrp2_sock-objs := libbpf.o test_cgrp2_sock.o
			
 
				+test_cgrp2_sock2-objs := bpf_load.o libbpf.o test_cgrp2_sock2.o
			
 
				 xdp1-objs := bpf_load.o libbpf.o xdp1_user.o
			
 
				 # reuse xdp1 source intentionally
			
 
				 xdp2-objs := bpf_load.o libbpf.o xdp1_user.o
			
@@ -73,6 +77,7 @@ always += tracex3_kern.o
 
				 always += tracex4_kern.o
			
 
				 always += tracex5_kern.o
			
 
				 always += tracex6_kern.o
			
 
				+always += sock_flags_kern.o
			
 
				 always += test_probe_write_user_kern.o
			
 
				 always += trace_output_kern.o
			
 
				 always += tcbpf1_kern.o
			
@@ -107,6 +112,7 @@ HOSTLOADLIBES_tracex3 += -lelf
 
				 HOSTLOADLIBES_tracex4 += -lelf -lrt
			
 
				 HOSTLOADLIBES_tracex5 += -lelf
			
 
				 HOSTLOADLIBES_tracex6 += -lelf
			
 
				+HOSTLOADLIBES_test_cgrp2_sock2 += -lelf
			
 
				 HOSTLOADLIBES_test_probe_write_user += -lelf
			
 
				 HOSTLOADLIBES_trace_output += -lelf -lrt
			
 
				 HOSTLOADLIBES_lathist += -lelf
			
--- a/samples/bpf/bpf_load.c
+++ b/samples/bpf/bpf_load.c
@@ -52,6 +52,8 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
 
				 	bool is_tracepoint = strncmp(event, "tracepoint/", 11) == 0;
			
 
				 	bool is_xdp = strncmp(event, "xdp", 3) == 0;
			
 
				 	bool is_perf_event = strncmp(event, "perf_event", 10) == 0;
			
 
				+	bool is_cgroup_skb = strncmp(event, "cgroup/skb", 10) == 0;
			
 
				+	bool is_cgroup_sk = strncmp(event, "cgroup/sock", 11) == 0;
			
 
				 	enum bpf_prog_type prog_type;
			
 
				 	char buf[256];
			
 
				 	int fd, efd, err, id;
			
@@ -72,6 +74,10 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
 
				 		prog_type = BPF_PROG_TYPE_XDP;
			
 
				 	} else if (is_perf_event) {
			
 
				 		prog_type = BPF_PROG_TYPE_PERF_EVENT;
			
 
				+	} else if (is_cgroup_skb) {
			
 
				+		prog_type = BPF_PROG_TYPE_CGROUP_SKB;
			
 
				+	} else if (is_cgroup_sk) {
			
 
				+		prog_type = BPF_PROG_TYPE_CGROUP_SOCK;
			
 
				 	} else {
			
 
				 		printf("Unknown event '%s'\n", event);
			
 
				 		return -1;
			
@@ -85,7 +91,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
 
				 
			
 
				 	prog_fd[prog_cnt++] = fd;
			
 
				 
			
 
				-	if (is_xdp || is_perf_event)
			
 
				+	if (is_xdp || is_perf_event || is_cgroup_skb || is_cgroup_sk)
			
 
				 		return 0;
			
 
				 
			
 
				 	if (is_socket) {
			
@@ -334,7 +340,8 @@ int load_bpf_file(char *path)
 
				 			    memcmp(shname_prog, "tracepoint/", 11) == 0 ||
			
 
				 			    memcmp(shname_prog, "xdp", 3) == 0 ||
			
 
				 			    memcmp(shname_prog, "perf_event", 10) == 0 ||
			
 
				-			    memcmp(shname_prog, "socket", 6) == 0)
			
 
				+			    memcmp(shname_prog, "socket", 6) == 0 ||
			
 
				+			    memcmp(shname_prog, "cgroup/", 7) == 0)
			
 
				 				load_and_attach(shname_prog, insns, data_prog->d_size);
			
 
				 		}
			
 
				 	}
			
@@ -353,7 +360,8 @@ int load_bpf_file(char *path)
 
				 		    memcmp(shname, "tracepoint/", 11) == 0 ||
			
 
				 		    memcmp(shname, "xdp", 3) == 0 ||
			
 
				 		    memcmp(shname, "perf_event", 10) == 0 ||
			
 
				-		    memcmp(shname, "socket", 6) == 0)
			
 
				+		    memcmp(shname, "socket", 6) == 0 ||
			
 
				+		    memcmp(shname, "cgroup/", 7) == 0)
			
 
				 			load_and_attach(shname, data->d_buf, data->d_size);
			
 
				 	}
			
 
				 
			
--- a/samples/bpf/bpf_load.h
+++ b/samples/bpf/bpf_load.h
@@ -7,6 +7,7 @@
 
				 extern int map_fd[MAX_MAPS];
			
 
				 extern int prog_fd[MAX_PROGS];
			
 
				 extern int event_fd[MAX_PROGS];
			
 
				+extern int prog_cnt;
			
 
				 
			
 
				 /* parses elf file compiled by llvm .c->.o
			
 
				  * . parses 'maps' section and creates maps via BPF syscall
			
--- a/samples/bpf/sock_flags_kern.c
+++ b/samples/bpf/sock_flags_kern.c
@@ -0,0 +1,44 @@
 
				+#include <uapi/linux/bpf.h>
			
 
				+#include <linux/socket.h>
			
 
				+#include <linux/net.h>
			
 
				+#include <uapi/linux/in.h>
			
 
				+#include <uapi/linux/in6.h>
			
 
				+#include "bpf_helpers.h"
			
 
				+
			
 
				+SEC("cgroup/sock1")
			
 
				+int bpf_prog1(struct bpf_sock *sk)
			
 
				+{
			
 
				+	char fmt[] = "socket: family %d type %d protocol %d\n";
			
 
				+
			
 
				+	bpf_trace_printk(fmt, sizeof(fmt), sk->family, sk->type, sk->protocol);
			
 
				+
			
 
				+	/* block PF_INET6, SOCK_RAW, IPPROTO_ICMPV6 sockets
			
 
				+	 * ie., make ping6 fail
			
 
				+	 */
			
 
				+	if (sk->family == PF_INET6 &&
			
 
				+	    sk->type == SOCK_RAW   &&
			
 
				+	    sk->protocol == IPPROTO_ICMPV6)
			
 
				+		return 0;
			
 
				+
			
 
				+	return 1;
			
 
				+}
			
 
				+
			
 
				+SEC("cgroup/sock2")
			
 
				+int bpf_prog2(struct bpf_sock *sk)
			
 
				+{
			
 
				+	char fmt[] = "socket: family %d type %d protocol %d\n";
			
 
				+
			
 
				+	bpf_trace_printk(fmt, sizeof(fmt), sk->family, sk->type, sk->protocol);
			
 
				+
			
 
				+	/* block PF_INET, SOCK_RAW, IPPROTO_ICMP sockets
			
 
				+	 * ie., make ping fail
			
 
				+	 */
			
 
				+	if (sk->family == PF_INET &&
			
 
				+	    sk->type == SOCK_RAW  &&
			
 
				+	    sk->protocol == IPPROTO_ICMP)
			
 
				+		return 0;
			
 
				+
			
 
				+	return 1;
			
 
				+}
			
 
				+
			
 
				+char _license[] SEC("license") = "GPL";
			
--- a/samples/bpf/test_cgrp2_sock.c
+++ b/samples/bpf/test_cgrp2_sock.c
@@ -0,0 +1,83 @@
 
				+/* eBPF example program:
			
 
				+ *
			
 
				+ * - Loads eBPF program
			
 
				+ *
			
 
				+ *   The eBPF program sets the sk_bound_dev_if index in new AF_INET{6}
			
 
				+ *   sockets opened by processes in the cgroup.
			
 
				+ *
			
 
				+ * - Attaches the new program to a cgroup using BPF_PROG_ATTACH
			
 
				+ */
			
 
				+
			
 
				+#define _GNU_SOURCE
			
 
				+
			
 
				+#include <stdio.h>
			
 
				+#include <stdlib.h>
			
 
				+#include <stddef.h>
			
 
				+#include <string.h>
			
 
				+#include <unistd.h>
			
 
				+#include <assert.h>
			
 
				+#include <errno.h>
			
 
				+#include <fcntl.h>
			
 
				+#include <net/if.h>
			
 
				+#include <linux/bpf.h>
			
 
				+
			
 
				+#include "libbpf.h"
			
 
				+
			
 
				+static int prog_load(int idx)
			
 
				+{
			
 
				+	struct bpf_insn prog[] = {
			
 
				+		BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
			
 
				+		BPF_MOV64_IMM(BPF_REG_3, idx),
			
 
				+		BPF_MOV64_IMM(BPF_REG_2, offsetof(struct bpf_sock, bound_dev_if)),
			
 
				+		BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, offsetof(struct bpf_sock, bound_dev_if)),
			
 
				+		BPF_MOV64_IMM(BPF_REG_0, 1), /* r0 = verdict */
			
 
				+		BPF_EXIT_INSN(),
			
 
				+	};
			
 
				+
			
 
				+	return bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, prog, sizeof(prog),
			
 
				+			     "GPL", 0);
			
 
				+}
			
 
				+
			
 
				+static int usage(const char *argv0)
			
 
				+{
			
 
				+	printf("Usage: %s cg-path device-index\n", argv0);
			
 
				+	return EXIT_FAILURE;
			
 
				+}
			
 
				+
			
 
				+int main(int argc, char **argv)
			
 
				+{
			
 
				+	int cg_fd, prog_fd, ret;
			
 
				+	unsigned int idx;
			
 
				+
			
 
				+	if (argc < 2)
			
 
				+		return usage(argv[0]);
			
 
				+
			
 
				+	idx = if_nametoindex(argv[2]);
			
 
				+	if (!idx) {
			
 
				+		printf("Invalid device name\n");
			
 
				+		return EXIT_FAILURE;
			
 
				+	}
			
 
				+
			
 
				+	cg_fd = open(argv[1], O_DIRECTORY | O_RDONLY);
			
 
				+	if (cg_fd < 0) {
			
 
				+		printf("Failed to open cgroup path: '%s'\n", strerror(errno));
			
 
				+		return EXIT_FAILURE;
			
 
				+	}
			
 
				+
			
 
				+	prog_fd = prog_load(idx);
			
 
				+	printf("Output from kernel verifier:\n%s\n-------\n", bpf_log_buf);
			
 
				+
			
 
				+	if (prog_fd < 0) {
			
 
				+		printf("Failed to load prog: '%s'\n", strerror(errno));
			
 
				+		return EXIT_FAILURE;
			
 
				+	}
			
 
				+
			
 
				+	ret = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_INET_SOCK_CREATE);
			
 
				+	if (ret < 0) {
			
 
				+		printf("Failed to attach prog to cgroup: '%s'\n",
			
 
				+		       strerror(errno));
			
 
				+		return EXIT_FAILURE;
			
 
				+	}
			
 
				+
			
 
				+	return EXIT_SUCCESS;
			
 
				+}
			
--- a/samples/bpf/test_cgrp2_sock.sh
+++ b/samples/bpf/test_cgrp2_sock.sh
@@ -0,0 +1,47 @@
 
				+#!/bin/bash
			
 
				+
			
 
				+function config_device {
			
 
				+	ip netns add at_ns0
			
 
				+	ip link add veth0 type veth peer name veth0b
			
 
				+	ip link set veth0b up
			
 
				+	ip link set veth0 netns at_ns0
			
 
				+	ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0
			
 
				+	ip netns exec at_ns0 ip addr add 2401:db00::1/64 dev veth0 nodad
			
 
				+	ip netns exec at_ns0 ip link set dev veth0 up
			
 
				+	ip link add foo type vrf table 1234
			
 
				+	ip link set foo up
			
 
				+	ip addr add 172.16.1.101/24 dev veth0b
			
 
				+	ip addr add 2401:db00::2/64 dev veth0b nodad
			
 
				+	ip link set veth0b master foo
			
 
				+}
			
 
				+
			
 
				+function attach_bpf {
			
 
				+	rm -rf /tmp/cgroupv2
			
 
				+	mkdir -p /tmp/cgroupv2
			
 
				+	mount -t cgroup2 none /tmp/cgroupv2
			
 
				+	mkdir -p /tmp/cgroupv2/foo
			
 
				+	test_cgrp2_sock /tmp/cgroupv2/foo foo
			
 
				+	echo $$ >> /tmp/cgroupv2/foo/cgroup.procs
			
 
				+}
			
 
				+
			
 
				+function cleanup {
			
 
				+	set +ex
			
 
				+	ip netns delete at_ns0
			
 
				+	ip link del veth0
			
 
				+	ip link del foo
			
 
				+	umount /tmp/cgroupv2
			
 
				+	rm -rf /tmp/cgroupv2
			
 
				+	set -ex
			
 
				+}
			
 
				+
			
 
				+function do_test {
			
 
				+	ping -c1 -w1 172.16.1.100
			
 
				+	ping6 -c1 -w1 2401:db00::1
			
 
				+}
			
 
				+
			
 
				+cleanup 2>/dev/null
			
 
				+config_device
			
 
				+attach_bpf
			
 
				+do_test
			
 
				+cleanup
			
 
				+echo "*** PASS ***"
			
--- a/samples/bpf/test_cgrp2_sock2.c
+++ b/samples/bpf/test_cgrp2_sock2.c
@@ -0,0 +1,66 @@
 
				+/* eBPF example program:
			
 
				+ *
			
 
				+ * - Loads eBPF program
			
 
				+ *
			
 
				+ *   The eBPF program loads a filter from file and attaches the
			
 
				+ *   program to a cgroup using BPF_PROG_ATTACH
			
 
				+ */
			
 
				+
			
 
				+#define _GNU_SOURCE
			
 
				+
			
 
				+#include <stdio.h>
			
 
				+#include <stdlib.h>
			
 
				+#include <stddef.h>
			
 
				+#include <string.h>
			
 
				+#include <unistd.h>
			
 
				+#include <assert.h>
			
 
				+#include <errno.h>
			
 
				+#include <fcntl.h>
			
 
				+#include <net/if.h>
			
 
				+#include <linux/bpf.h>
			
 
				+
			
 
				+#include "libbpf.h"
			
 
				+#include "bpf_load.h"
			
 
				+
			
 
				+static int usage(const char *argv0)
			
 
				+{
			
 
				+	printf("Usage: %s cg-path filter-path [filter-id]\n", argv0);
			
 
				+	return EXIT_FAILURE;
			
 
				+}
			
 
				+
			
 
				+int main(int argc, char **argv)
			
 
				+{
			
 
				+	int cg_fd, ret, filter_id = 0;
			
 
				+
			
 
				+	if (argc < 3)
			
 
				+		return usage(argv[0]);
			
 
				+
			
 
				+	cg_fd = open(argv[1], O_DIRECTORY | O_RDONLY);
			
 
				+	if (cg_fd < 0) {
			
 
				+		printf("Failed to open cgroup path: '%s'\n", strerror(errno));
			
 
				+		return EXIT_FAILURE;
			
 
				+	}
			
 
				+
			
 
				+	if (load_bpf_file(argv[2]))
			
 
				+		return EXIT_FAILURE;
			
 
				+
			
 
				+	printf("Output from kernel verifier:\n%s\n-------\n", bpf_log_buf);
			
 
				+
			
 
				+	if (argc > 3)
			
 
				+		filter_id = atoi(argv[3]);
			
 
				+
			
 
				+	if (filter_id > prog_cnt) {
			
 
				+		printf("Invalid program id; program not found in file\n");
			
 
				+		return EXIT_FAILURE;
			
 
				+	}
			
 
				+
			
 
				+	ret = bpf_prog_attach(prog_fd[filter_id], cg_fd,
			
 
				+			      BPF_CGROUP_INET_SOCK_CREATE);
			
 
				+	if (ret < 0) {
			
 
				+		printf("Failed to attach prog to cgroup: '%s'\n",
			
 
				+		       strerror(errno));
			
 
				+		return EXIT_FAILURE;
			
 
				+	}
			
 
				+
			
 
				+	return EXIT_SUCCESS;
			
 
				+}
			
--- a/samples/bpf/test_cgrp2_sock2.sh
+++ b/samples/bpf/test_cgrp2_sock2.sh
@@ -0,0 +1,81 @@
 
				+#!/bin/bash
			
 
				+
			
 
				+function config_device {
			
 
				+	ip netns add at_ns0
			
 
				+	ip link add veth0 type veth peer name veth0b
			
 
				+	ip link set veth0b up
			
 
				+	ip link set veth0 netns at_ns0
			
 
				+	ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0
			
 
				+	ip netns exec at_ns0 ip addr add 2401:db00::1/64 dev veth0 nodad
			
 
				+	ip netns exec at_ns0 ip link set dev veth0 up
			
 
				+	ip addr add 172.16.1.101/24 dev veth0b
			
 
				+	ip addr add 2401:db00::2/64 dev veth0b nodad
			
 
				+}
			
 
				+
			
 
				+function config_cgroup {
			
 
				+	rm -rf /tmp/cgroupv2
			
 
				+	mkdir -p /tmp/cgroupv2
			
 
				+	mount -t cgroup2 none /tmp/cgroupv2
			
 
				+	mkdir -p /tmp/cgroupv2/foo
			
 
				+	echo $$ >> /tmp/cgroupv2/foo/cgroup.procs
			
 
				+}
			
 
				+
			
 
				+
			
 
				+function attach_bpf {
			
 
				+	test_cgrp2_sock2 /tmp/cgroupv2/foo sock_flags_kern.o $1
			
 
				+	[ $? -ne 0 ] && exit 1
			
 
				+}
			
 
				+
			
 
				+function cleanup {
			
 
				+	ip link del veth0b
			
 
				+	ip netns delete at_ns0
			
 
				+	umount /tmp/cgroupv2
			
 
				+	rm -rf /tmp/cgroupv2
			
 
				+}
			
 
				+
			
 
				+cleanup 2>/dev/null
			
 
				+
			
 
				+set -e
			
 
				+config_device
			
 
				+config_cgroup
			
 
				+set +e
			
 
				+
			
 
				+#
			
 
				+# Test 1 - fail ping6
			
 
				+#
			
 
				+attach_bpf 0
			
 
				+ping -c1 -w1 172.16.1.100
			
 
				+if [ $? -ne 0 ]; then
			
 
				+	echo "ping failed when it should succeed"
			
 
				+	cleanup
			
 
				+	exit 1
			
 
				+fi
			
 
				+
			
 
				+ping6 -c1 -w1 2401:db00::1
			
 
				+if [ $? -eq 0 ]; then
			
 
				+	echo "ping6 succeeded when it should not"
			
 
				+	cleanup
			
 
				+	exit 1
			
 
				+fi
			
 
				+
			
 
				+#
			
 
				+# Test 2 - fail ping
			
 
				+#
			
 
				+attach_bpf 1
			
 
				+ping6 -c1 -w1 2401:db00::1
			
 
				+if [ $? -ne 0 ]; then
			
 
				+	echo "ping6 failed when it should succeed"
			
 
				+	cleanup
			
 
				+	exit 1
			
 
				+fi
			
 
				+
			
 
				+ping -c1 -w1 172.16.1.100
			
 
				+if [ $? -eq 0 ]; then
			
 
				+	echo "ping succeeded when it should not"
			
 
				+	cleanup
			
 
				+	exit 1
			
 
				+fi
			
 
				+
			
 
				+cleanup
			
 
				+echo
			
 
				+echo "*** PASS ***"